[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/site_perl/5.10.0/XML/XPath/ -> XMLParser.pm (source)

   1  # $Id: XMLParser.pm,v 1.49 2001/03/14 17:13:57 matt Exp $
   2  
   3  package XML::XPath::XMLParser;
   4  
   5  use strict;
   6  
   7  use XML::Parser;
   8  #use XML::XPath;
   9  use XML::XPath::Node;
  10  use XML::XPath::Node::Element;
  11  use XML::XPath::Node::Text;
  12  use XML::XPath::Node::Comment;
  13  use XML::XPath::Node::PI;
  14  use XML::XPath::Node::Attribute;
  15  use XML::XPath::Node::Namespace;
  16  
  17  my @options = qw(
  18          filename
  19          xml
  20          parser
  21          ioref
  22          );
  23  
  24  my ($_current, $_namespaces_on);
  25  my %IdNames;
  26  
  27  use vars qw/$xmlns_ns $xml_ns/;
  28  
  29  $xmlns_ns = "http://www.w3.org/2000/xmlns/";
  30  $xml_ns = "http://www.w3.org/XML/1998/namespace";
  31  
  32  sub new {
  33      my $proto = shift;
  34      my $class = ref($proto) || $proto;
  35      my %args = @_;
  36      my %hash = map(( "_$_" => $args{$_} ), @options);
  37      bless \%hash, $class;
  38  }
  39  
  40  sub parse {
  41      my $self = shift;
  42      
  43      $self->{IdNames} = {};
  44      $self->{InScopeNamespaceStack} = [ { 
  45              '_Default' => undef,
  46              'xmlns' => $xmlns_ns,
  47              'xml' => $xml_ns,
  48          } ];
  49      
  50      $self->{NodeStack} = [ ];
  51      
  52      $self->set_xml($_[0]) if $_[0];
  53      
  54      my $parser = $self->get_parser || XML::Parser->new(
  55              ErrorContext => 2,
  56              ParseParamEnt => 1,
  57              );
  58      
  59      $parser->setHandlers(
  60              Init => sub { $self->parse_init(@_) },
  61              Char => sub { $self->parse_char(@_) },
  62              Start => sub { $self->parse_start(@_) },
  63              End => sub { $self->parse_end(@_) },
  64              Final => sub { $self->parse_final(@_) },
  65              Proc => sub { $self->parse_pi(@_) },
  66              Comment => sub { $self->parse_comment(@_) },
  67              Attlist => sub { $self->parse_attlist(@_) },
  68              );
  69      
  70      my $toparse;
  71      if ($toparse = $self->get_filename) {
  72          return $parser->parsefile($toparse);
  73      }
  74      else {
  75          return $parser->parse($self->get_xml || $self->get_ioref);
  76      }
  77  }
  78  
  79  sub parsefile {
  80      my $self = shift;
  81      my ($filename) = @_;
  82      $self->set_filename($filename);
  83      $self->parse;
  84  }
  85  
  86  sub parse_init {
  87      my $self = shift;
  88      my $e = shift;
  89      my $document = XML::XPath::Node::Element->new();
  90      my $newns = XML::XPath::Node::Namespace->new('xml', $xml_ns);
  91      $document->appendNamespace($newns);
  92      $self->{current} = $self->{DOC_Node} = $document;
  93  }
  94  
  95  sub parse_final {
  96      my $self = shift;
  97      return $self->{DOC_Node};
  98  }
  99  
 100  sub parse_char {
 101      my $self = shift;
 102      my $e = shift;
 103      my $text = shift;
 104      
 105      my $parent = $self->{current};
 106      
 107      my $last = $parent->getLastChild;
 108      if ($last && $last->isTextNode) {
 109          # append to previous text node
 110          $last->appendText($text);
 111          return;
 112      }
 113      
 114      my $node = XML::XPath::Node::Text->new($text);
 115      $parent->appendChild($node, 1);
 116  }
 117  
 118  sub parse_start {
 119      my $self = shift;
 120      my $e = shift;
 121      my $tag = shift;
 122      
 123      push @{ $self->{InScopeNamespaceStack} },
 124           { %{ $self->{InScopeNamespaceStack}[-1] } };
 125      $self->_scan_namespaces(@_);
 126      
 127      my ($prefix, $namespace) = $self->_namespace($tag);
 128      
 129      my $node = XML::XPath::Node::Element->new($tag, $prefix);
 130      
 131      my @attributes;
 132      for (my $ii = 0; $ii < $#_; $ii += 2) {
 133      my ($name, $value) = ($_[$ii], $_[$ii+1]);
 134          if ($name =~ /^xmlns(:(.*))?$/) {
 135              # namespace node
 136              my $prefix = $2 || '#default';
 137  #            warn "Creating NS node: $prefix = $value\n";
 138              my $newns = XML::XPath::Node::Namespace->new($prefix, $value);
 139              $node->appendNamespace($newns);
 140          }
 141          else {
 142          my ($prefix, $namespace) = $self->_namespace($name);
 143              undef $namespace unless $prefix;
 144  
 145              my $newattr = XML::XPath::Node::Attribute->new($name, $value, $prefix);
 146              $node->appendAttribute($newattr, 1);
 147              if (exists($self->{IdNames}{$tag}) && ($self->{IdNames}{$tag} eq $name)) {
 148      #            warn "appending Id Element: $val for ", $node->getName, "\n";
 149                  $self->{DOC_Node}->appendIdElement($value, $node);
 150              }
 151          }
 152      }
 153          
 154      $self->{current}->appendChild($node, 1);
 155      $self->{current} = $node;
 156  }
 157  
 158  sub parse_end {
 159      my $self = shift;
 160      my $e = shift;
 161      $self->{current} = $self->{current}->getParentNode;
 162  }
 163  
 164  sub parse_pi {
 165      my $self = shift;
 166      my $e = shift;
 167      my ($target, $data) = @_;
 168      my $node = XML::XPath::Node::PI->new($target, $data);
 169      $self->{current}->appendChild($node, 1);
 170  }
 171  
 172  sub parse_comment {
 173      my $self = shift;
 174      my $e = shift;
 175      my ($data) = @_;
 176      my $node = XML::XPath::Node::Comment->new($data);
 177      $self->{current}->appendChild($node, 1);
 178  }
 179  
 180  sub parse_attlist {
 181      my $self = shift;
 182      my $e = shift;
 183      my ($elname, $attname, $type, $default, $fixed) = @_;
 184      if ($type eq 'ID') {
 185          $self->{IdNames}{$elname} = $attname;
 186      }
 187  }
 188  
 189  sub _scan_namespaces {
 190      my ($self, %attributes) = @_;
 191  
 192      while (my ($attr_name, $value) = each %attributes) {
 193      if ($attr_name eq 'xmlns') {
 194          $self->{InScopeNamespaceStack}[-1]{'_Default'} = $value;
 195      } elsif ($attr_name =~ /^xmlns:(.*)$/) {
 196          my $prefix = $1;
 197          $self->{InScopeNamespaceStack}[-1]{$prefix} = $value;
 198      }
 199      }
 200  }
 201  
 202  sub _namespace {
 203      my ($self, $name) = @_;
 204  
 205      my ($prefix, $localname) = split(/:/, $name);
 206      if (!defined($localname)) {
 207      if ($prefix eq 'xmlns') {
 208          return '', undef;
 209      } else {
 210          return '', $self->{InScopeNamespaceStack}[-1]{'_Default'};
 211      }
 212      } else {
 213      return $prefix, $self->{InScopeNamespaceStack}[-1]{$prefix};
 214      }
 215  }
 216  
 217  sub as_string {
 218      my $node = shift;
 219      $node->toString;
 220  }
 221  
 222  sub get_parser { shift->{_parser}; }
 223  sub get_filename { shift->{_filename}; }
 224  sub get_xml { shift->{_xml}; }
 225  sub get_ioref { shift->{_ioref}; }
 226  
 227  sub set_parser { $_[0]->{_parser} = $_[1]; }
 228  sub set_filename { $_[0]->{_filename} = $_[1]; }
 229  sub set_xml { $_[0]->{_xml} = $_[1]; }
 230  sub set_ioref { $_[0]->{_ioref} = $_[1]; }
 231  
 232  1;
 233  
 234  __END__
 235  
 236  =head1 NAME
 237  
 238  XML::XPath::XMLParser - The default XML parsing class that produces a node tree
 239  
 240  =head1 SYNOPSIS
 241  
 242      my $parser = XML::XPath::XMLParser->new(
 243                  filename => $self->get_filename,
 244                  xml => $self->get_xml,
 245                  ioref => $self->get_ioref,
 246                  parser => $self->get_parser,
 247              );
 248      my $root_node = $parser->parse;
 249  
 250  =head1 DESCRIPTION
 251  
 252  This module generates a node tree for use as the context node for XPath processing.
 253  It aims to be a quick parser, nothing fancy, and yet has to store more information
 254  than most parsers. To achieve this I've used array refs everywhere - no hashes.
 255  I don't have any performance figures for the speedups achieved, so I make no
 256  appologies for anyone not used to using arrays instead of hashes. I think they
 257  make good sense here where we know the attributes of each type of node.
 258  
 259  =head1 Node Structure
 260  
 261  All nodes have the same first 2 entries in the array: node_parent
 262  and node_pos. The type of the node is determined using the ref() function.
 263  The node_parent always contains an entry for the parent of the current
 264  node - except for the root node which has undef in there. And node_pos is the
 265  position of this node in the array that it is in (think: 
 266  $node == $node->[node_parent]->[node_children]->[$node->[node_pos]] )
 267  
 268  Nodes are structured as follows:
 269  
 270  =head2 Root Node
 271  
 272  The root node is just an element node with no parent.
 273  
 274      [
 275        undef, # node_parent - check for undef to identify root node
 276        undef, # node_pos
 277        undef, # node_prefix
 278        [ ... ], # node_children (see below)
 279      ]
 280  
 281  =head2 Element Node
 282  
 283      [
 284        $parent, # node_parent
 285        <position in current array>, # node_pos
 286        'xxx', # node_prefix - namespace prefix on this element
 287        [ ... ], # node_children
 288        'yyy', # node_name - element tag name
 289        [ ... ], # node_attribs - attributes on this element
 290        [ ... ], # node_namespaces - namespaces currently in scope
 291      ]
 292  
 293  =head2 Attribute Node
 294  
 295      [
 296        $parent, # node_parent - the element node
 297        <position in current array>, # node_pos
 298        'xxx', # node_prefix - namespace prefix on this element
 299        'href', # node_key - attribute name
 300        'ftp://ftp.com/', # node_value - value in the node
 301      ]
 302  
 303  =head2 Namespace Nodes
 304  
 305  Each element has an associated set of namespace nodes that are currently
 306  in scope. Each namespace node stores a prefix and the expanded name (retrieved
 307  from the xmlns:prefix="..." attribute).
 308  
 309      [
 310        $parent,
 311        <pos>,
 312        'a', # node_prefix - the namespace as it was written as a prefix
 313        'http://my.namespace.com', # node_expanded - the expanded name.
 314      ]
 315  
 316  =head2 Text Nodes
 317  
 318      [
 319        $parent,
 320        <pos>,
 321        'This is some text' # node_text - the text in the node
 322      ]
 323  
 324  =head2 Comment Nodes
 325  
 326      [
 327        $parent,
 328        <pos>,
 329        'This is a comment' # node_comment
 330      ]
 331  
 332  =head2 Processing Instruction Nodes
 333  
 334      [
 335        $parent,
 336        <pos>,
 337        'target', # node_target
 338        'data', # node_data
 339      ]
 340  
 341  =head1 Usage
 342  
 343  If you feel the need to use this module outside of XML::XPath (for example
 344  you might use this module directly so that you can cache parsed trees), you
 345  can follow the following API:
 346  
 347  =head2 new
 348  
 349  The new method takes either no parameters, or any of the following parameters:
 350  
 351          filename
 352          xml
 353          parser
 354          ioref
 355  
 356  This uses the familiar hash syntax, so an example might be:
 357  
 358      use XML::XPath::XMLParser;
 359      
 360      my $parser = XML::XPath::XMLParser->new(filename => 'example.xml');
 361  
 362  The parameters represent a filename, a string containing XML, an XML::Parser
 363  instance and an open filehandle ref respectively. You can also set or get all
 364  of these properties using the get_ and set_ functions that have the same
 365  name as the property: e.g. get_filename, set_ioref, etc.
 366  
 367  =head2 parse
 368  
 369  The parse method generally takes no parameters, however you are free to
 370  pass either an open filehandle reference or an XML string if you so require.
 371  The return value is a tree that XML::XPath can use. The parse method will
 372  die if there is an error in your XML, so be sure to use perl's exception
 373  handling mechanism (eval{};) if you want to avoid this.
 374  
 375  =head2 parsefile
 376  
 377  The parsefile method is identical to parse() except it expects a single
 378  parameter that is a string naming a file to open and parse. Again it
 379  returns a tree and also dies if there are XML errors.
 380  
 381  =head1 NOTICES
 382  
 383  This file is distributed as part of the XML::XPath module, and is copyright
 384  2000 Fastnet Software Ltd. Please see the documentation for the module as a
 385  whole for licencing information.


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1