[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/site_perl/5.10.0/XML/ -> XPath.pm (source)

   1  # $Id: XPath.pm,v 1.56 2003/01/26 19:33:17 matt Exp $
   2  
   3  package XML::XPath;
   4  
   5  use strict;
   6  use vars qw($VERSION $AUTOLOAD $revision);
   7  
   8  $VERSION = '1.13';
   9  
  10  $XML::XPath::Namespaces = 1;
  11  $XML::XPath::Debug = 0;
  12  
  13  use XML::XPath::XMLParser;
  14  use XML::XPath::Parser;
  15  use IO::File;
  16  
  17  # For testing
  18  #use Data::Dumper;
  19  #$Data::Dumper::Indent = 1;
  20  
  21  # Parameters for new()
  22  my @options = qw(
  23          filename
  24          parser
  25          xml
  26          ioref
  27          context
  28          );
  29  
  30  sub new {
  31      my $proto = shift;
  32      my $class = ref($proto) || $proto;
  33  
  34      my(%args);
  35      # Try to figure out what the user passed
  36      if ($#_ == 0) { # passed a scalar
  37          my $string = $_[0];
  38          if ($string =~ m{<.*?>}s) { # it's an XML string
  39              $args{'xml'} = $string;
  40          } elsif (ref($string)) {    # read XML from file handle
  41              $args{'ioref'} = $string;
  42          } elsif ($string eq '-') {  # read XML from stdin
  43              $args{'ioref'} = IO::File->new($string);
  44          } else {                    # read XML from a file
  45              $args{'filename'} = $string;
  46          }
  47      } else {        # passed a hash or hash reference
  48          # just pass the parameters on to the XPath constructor
  49          %args = ((ref($_[0]) eq "HASH") ? %{$_[0]} : @_);
  50      }
  51  
  52      if ($args{filename} && (!-e $args{filename} || !-r $args{filename})) {
  53          die "Cannot open file '$args{filename}'";
  54      }
  55      my %hash = map(( "_$_" => $args{$_} ), @options);
  56      $hash{path_parser} = XML::XPath::Parser->new();
  57      return bless \%hash, $class;
  58  }
  59  
  60  sub find {
  61      my $self = shift;
  62      my $path = shift;
  63      my $context = shift;
  64      die "No path to find" unless $path;
  65      
  66      if (!defined $context) {
  67          $context = $self->get_context;
  68      }
  69      if (!defined $context) {
  70          # Still no context? Need to parse...
  71          my $parser = XML::XPath::XMLParser->new(
  72                  filename => $self->get_filename,
  73                  xml => $self->get_xml,
  74                  ioref => $self->get_ioref,
  75                  parser => $self->get_parser,
  76                  );
  77          $context = $parser->parse;
  78          $self->set_context($context);
  79  #        warn "CONTEXT:\n", Data::Dumper->Dumpxs([$context], ['context']);
  80      }
  81      
  82      my $parsed_path = $self->{path_parser}->parse($path);
  83  #    warn "\n\nPATH: ", $parsed_path->as_string, "\n\n";
  84      
  85  #    warn "evaluating path\n";
  86      return $parsed_path->evaluate($context);
  87  }
  88  
  89  # sub memsize {
  90  #     print STDERR @_, "\t";
  91  #     open(FH, '/proc/self/status');
  92  #     while(<FH>) {
  93  #         print STDERR $_ if /^VmSize/;
  94  #     }
  95  #     close FH;
  96  # }
  97  # 
  98  sub findnodes {
  99      my $self = shift;
 100      my ($path, $context) = @_;
 101      
 102      my $results = $self->find($path, $context);
 103      
 104      if ($results->isa('XML::XPath::NodeSet')) {
 105          return wantarray ? $results->get_nodelist : $results;
 106  #        return $results->get_nodelist;
 107      }
 108      
 109  #    warn("findnodes returned a ", ref($results), " object\n") if $XML::XPath::Debug;
 110      return wantarray ? () : XML::XPath::NodeSet->new();
 111  }
 112  
 113  sub matches {
 114      my $self = shift;
 115      my ($node, $path, $context) = @_;
 116  
 117      my @nodes = $self->findnodes($path, $context);
 118  
 119      if (grep { "$node" eq "$_" } @nodes) {
 120          return 1;
 121      }
 122      return;
 123  }
 124  
 125  sub findnodes_as_string {
 126      my $self = shift;
 127      my ($path, $context) = @_;
 128      
 129      my $results = $self->find($path, $context);
 130      
 131      if ($results->isa('XML::XPath::NodeSet')) {
 132          return join('', map { $_->toString } $results->get_nodelist);
 133      }
 134      elsif ($results->isa('XML::XPath::Node')) {
 135          return $results->toString;
 136      }
 137      else {
 138          return XML::XPath::Node::XMLescape($results->value);
 139      }
 140  }
 141  
 142  sub findvalue {
 143      my $self = shift;
 144      my ($path, $context) = @_;
 145      
 146      my $results = $self->find($path, $context);
 147      
 148      if ($results->isa('XML::XPath::NodeSet')) {
 149          return $results->to_literal;
 150      }
 151      
 152      return $results;
 153  }
 154  
 155  sub exists
 156  {
 157      my $self = shift;
 158      my ($path, $context) = @_;
 159      $path = '/' if (!defined $path);
 160      my @nodeset = $self->findnodes($path, $context);
 161      return 1 if (scalar( @nodeset ));
 162      return 0;
 163  }
 164  
 165  sub getNodeAsXML {
 166    my $self = shift;
 167    my $node_path = shift;
 168    $node_path = '/' if (!defined $node_path);
 169    if (ref($node_path)) {
 170      return $node_path->as_string();
 171    } else {
 172      return $self->findnodes_as_string($node_path);
 173    }
 174  }
 175  
 176  sub getNodeText {
 177    my $self = shift;
 178    my $node_path = shift;
 179    if (ref($node_path)) {
 180      return $node_path->string_value();
 181    } else {
 182      return $self->findvalue($node_path);
 183    }
 184  }
 185  
 186  sub setNodeText {
 187    my $self = shift;
 188    my($node_path, $new_text) = @_;
 189    my $nodeset = $self->findnodes($node_path);
 190    return undef if (!defined $nodeset); # could not find node
 191    my @nodes = $nodeset->get_nodelist;
 192    if ($#nodes < 0) {
 193      if ($node_path =~ m|/@([^/]+)$|) {
 194        # attribute not found, so try to create it
 195        my $parent_path = $`;
 196        my $attr = $1;
 197        $nodeset = $self->findnodes($parent_path);
 198        return undef if (!defined $nodeset); # could not find node
 199        foreach my $node ($nodeset->get_nodelist) {
 200          my $newnode = XML::XPath::Node::Attribute->new($attr, $new_text);
 201          return undef if (!defined $newnode); # could not create new node
 202          $node->appendAttribute($newnode);
 203        }
 204      } else {
 205        return undef; # could not find node
 206      }
 207    }
 208    foreach my $node (@nodes) {
 209      if ($node->getNodeType == XML::XPath::Node::ATTRIBUTE_NODE) {
 210        $node->setNodeValue($new_text);
 211      } else {
 212        foreach my $delnode ($node->getChildNodes()) {
 213          $node->removeChild($delnode);
 214        }
 215        my $newnode = XML::XPath::Node::Text->new($new_text);
 216        return undef if (!defined $newnode); # could not create new node
 217        $node->appendChild($newnode);
 218      }
 219    }
 220    return 1;
 221  }
 222  
 223  sub createNode {
 224    my $self = shift;
 225    my($node_path) = @_;
 226    my $path_steps = $self->{path_parser}->parse($node_path);
 227    my @path_steps = ();
 228    foreach my $step (@{$path_steps->get_lhs()}) {
 229      my $string = $step->as_string();
 230      push(@path_steps, $string) if (defined $string && $string ne "");
 231    }
 232    my $prev_node = undef;
 233    my $nodeset = undef;
 234    my $nodes = undef;
 235    my $p = undef;
 236    my $test_path = "";
 237    # Start with the deepest node, working up the path (right to left),
 238    # trying to find a node that exists.
 239    for ($p = $#path_steps; $p >= 0; $p--) {
 240      my $path = $path_steps[$p];
 241      $test_path = "(/" . join("/", @path_steps[0..$p]) . ")";
 242      $nodeset = $self->findnodes($test_path);
 243      return undef if (!defined $nodeset); # error looking for node
 244      $nodes = $nodeset->size;
 245      return undef if ($nodes > 1); # too many paths - path not specific enough
 246      if ($nodes == 1) { # found a node -- need to create nodes below it
 247        $prev_node = $nodeset->get_node(1);
 248        last;
 249      }
 250    }
 251    if (!defined $prev_node) {
 252      my @root_nodes = $self->findnodes('/')->get_nodelist();
 253      $prev_node = $root_nodes[0];
 254    }
 255    # We found a node that exists, or we'll start at the root.
 256    # Create all lower nodes working left to right along the path.
 257    for ($p++ ; $p <= $#path_steps; $p++) {
 258      my $path = $path_steps[$p];
 259      my $newnode = undef;
 260      my($axis,$name) = ($path =~ /^(.*?)::(.*)$/);
 261      if ($axis =~ /^child$/i) {
 262        $newnode = XML::XPath::Node::Element->new($name);
 263        return undef if (!defined $newnode); # could not create new node
 264        $prev_node->appendChild($newnode);
 265      } elsif ($axis =~ /^attribute$/i) {
 266        $newnode = XML::XPath::Node::Attribute->new($name, "");
 267        return undef if (!defined $newnode); # could not create new node
 268        $prev_node->appendAttribute($newnode);
 269      }
 270      $prev_node = $newnode;
 271    }
 272    return $prev_node;
 273  }
 274  
 275  sub get_filename {
 276      my $self = shift;
 277      $self->{_filename};
 278  }
 279  
 280  sub set_filename {
 281      my $self = shift;
 282      $self->{_filename} = shift;
 283  }
 284  
 285  sub get_parser {
 286      my $self = shift;
 287      $self->{_parser};
 288  }
 289  
 290  sub set_parser {
 291      my $self = shift;
 292      $self->{_parser} = shift;
 293  }
 294  
 295  sub get_xml {
 296      my $self = shift;
 297      $self->{_xml};
 298  }
 299  
 300  sub set_xml {
 301      my $self = shift;
 302      $self->{_xml} = shift;
 303  }
 304  
 305  sub get_ioref {
 306      my $self = shift;
 307      $self->{_ioref};
 308  }
 309  
 310  sub set_ioref {
 311      my $self = shift;
 312      $self->{_ioref} = shift;
 313  }
 314  
 315  sub get_context {
 316      my $self = shift;
 317      $self->{_context};
 318  }
 319  
 320  sub set_context {
 321      my $self = shift;
 322      $self->{_context} = shift;
 323  }
 324  
 325  sub cleanup {
 326      my $self = shift;
 327      if ($XML::XPath::SafeMode) {
 328          my $context = $self->get_context;
 329          return unless $context;
 330          $context->dispose;
 331      }
 332  }
 333  
 334  sub set_namespace {
 335      my $self = shift;
 336      my ($prefix, $expanded) = @_;
 337      $self->{path_parser}->set_namespace($prefix, $expanded);
 338  }
 339  
 340  sub clear_namespaces {
 341      my $self = shift;
 342      $self->{path_parser}->clear_namespaces();
 343  }
 344  
 345  1;
 346  __END__
 347  
 348  =head1 NAME
 349  
 350  XML::XPath - a set of modules for parsing and evaluating XPath statements
 351  
 352  =head1 DESCRIPTION
 353  
 354  This module aims to comply exactly to the XPath specification at
 355  http://www.w3.org/TR/xpath and yet allow extensions to be added in the
 356  form of functions. Modules such as XSLT and XPointer may need to do
 357  this as they support functionality beyond XPath.
 358  
 359  =head1 SYNOPSIS
 360  
 361      use XML::XPath;
 362      use XML::XPath::XMLParser;
 363      
 364      my $xp = XML::XPath->new(filename => 'test.xhtml');
 365      
 366      my $nodeset = $xp->find('/html/body/p'); # find all paragraphs
 367      
 368      foreach my $node ($nodeset->get_nodelist) {
 369          print "FOUND\n\n", 
 370              XML::XPath::XMLParser::as_string($node),
 371              "\n\n";
 372      }
 373  
 374  =head1 DETAILS
 375  
 376  There's an awful lot to all of this, so bear with it - if you stick it
 377  out it should be worth it. Please get a good understanding of XPath
 378  by reading the spec before asking me questions. All of the classes
 379  and parts herein are named to be synonimous with the names in the
 380  specification, so consult that if you don't understand why I'm doing
 381  something in the code.
 382  
 383  =head1 API
 384  
 385  The API of XML::XPath itself is extremely simple to allow you to get
 386  going almost immediately. The deeper API's are more complex, but you
 387  shouldn't have to touch most of that.
 388  
 389  =head2 new()
 390  
 391  This constructor follows the often seen named parameter method call.
 392  Parameters you can use are: filename, parser, xml, ioref and context.
 393  The filename parameter specifies an XML file to parse. The xml
 394  parameter specifies a string to parse, and the ioref parameter
 395  specifies an ioref to parse. The context option allows you to 
 396  specify a context node. The context node has to be in the format 
 397  of a node as specified in L<XML::XPath::XMLParser>. The 4 parameters
 398  filename, xml, ioref and context are mutually exclusive - you should
 399  only specify one (if you specify anything other than context, the
 400  context node is the root of your document).
 401  The parser option allows you to pass in an already prepared 
 402  XML::Parser object, to save you having to create more than one
 403  in your application (if, for example, you're doing more than just XPath).
 404  
 405      my $xp = XML::XPath->new( context => $node );
 406  
 407  It is very much recommended that you use only 1 XPath object throughout 
 408  the life of your application. This is because the object (and it's sub-objects)
 409  maintain certain bits of state information that will be useful (such
 410  as XPath variables) to later calls to find(). It's also a good idea because
 411  you'll use less memory this way.
 412  
 413  =head2 I<nodeset> = find($path, [$context])
 414  
 415  The find function takes an XPath expression (a string) and returns either an
 416  XML::XPath::NodeSet object containing the nodes it found (or empty if
 417  no nodes matched the path), or one of XML::XPath::Literal (a string),
 418  XML::XPath::Number, or XML::XPath::Boolean. It should always return 
 419  something - and you can use ->isa() to find out what it returned. If you
 420  need to check how many nodes it found you should check $nodeset->size.
 421  See L<XML::XPath::NodeSet>. An optional second parameter of a context
 422  node allows you to use this method repeatedly, for example XSLT needs
 423  to do this.
 424  
 425  =head2 findnodes($path, [$context])
 426  
 427  Returns a list of nodes found by $path, optionally in context $context. 
 428  In scalar context returns an XML::XPath::NodeSet object.
 429  
 430  =head2 findnodes_as_string($path, [$context])
 431  
 432  Returns the nodes found reproduced as XML. The result is not guaranteed
 433  to be valid XML though.
 434  
 435  =head2 findvalue($path, [$context])
 436  
 437  Returns either a C<XML::XPath::Literal>, a C<XML::XPath::Boolean> or a
 438  C<XML::XPath::Number> object. If the path returns a NodeSet,
 439  $nodeset->to_literal is called automatically for you (and thus a
 440  C<XML::XPath::Literal> is returned). Note that
 441  for each of the objects stringification is overloaded, so you can just
 442  print the value found, or manipulate it in the ways you would a normal
 443  perl value (e.g. using regular expressions).
 444  
 445  =head2 exists($path, [$context])
 446  
 447  Returns true if the given path exists.
 448  
 449  =head2 matches($node, $path, [$context])
 450  
 451  Returns true if the node matches the path (optionally in context $context).
 452  
 453  =head2 getNodeText($path)
 454  
 455  Returns the text string for a particular XML node.  Returns a string,
 456  or undef if the node doesn't exist.
 457  
 458  =head2 setNodeText($path, $text)
 459  
 460  Sets the text string for a particular XML node.  The node can be an
 461  element or an attribute.  If the node to be set is an attribute, and
 462  the attribute node does not exist, it will be created automatically.
 463  
 464  =head2 createNode($path)
 465  
 466  Creates the node matching the path given.  If part of the path given, or
 467  all of the path do not exist, the necessary nodes will be created
 468  automatically.
 469  
 470  =head2 set_namespace($prefix, $uri)
 471  
 472  Sets the namespace prefix mapping to the uri.
 473  
 474  Normally in XML::XPath the prefixes in XPath node tests take their
 475  context from the current node. This means that foo:bar will always
 476  match an element <foo:bar> regardless of the namespace that the prefix
 477  foo is mapped to (which might even change within the document, resulting
 478  in unexpected results). In order to make prefixes in XPath node tests
 479  actually map to a real URI, you need to enable that via a call
 480  to the set_namespace method of your XML::XPath object.
 481  
 482  =head2 clear_namespaces()
 483  
 484  Clears all previously set namespace mappings.
 485  
 486  =head2 $XML::XPath::Namespaces
 487  
 488  Set this to 0 if you I<don't> want namespace processing to occur. This
 489  will make everything a little (tiny) bit faster, but you'll suffer for it,
 490  probably.
 491  
 492  =head1 Node Object Model
 493  
 494  See L<XML::XPath::Node>, L<XML::XPath::Node::Element>, 
 495  L<XML::XPath::Node::Text>, L<XML::XPath::Node::Comment>,
 496  L<XML::XPath::Node::Attribute>, L<XML::XPath::Node::Namespace>,
 497  and L<XML::XPath::Node::PI>.
 498  
 499  =head1 On Garbage Collection
 500  
 501  XPath nodes work in a special way that allows circular references, and 
 502  yet still lets Perl's reference counting garbage collector to clean up
 503  the nodes after use. This should be totally transparent to the user,
 504  with one caveat: B<If you free your tree before letting go of a sub-tree,
 505  consider that playing with fire and you may get burned>. What does this
 506  mean to the average user? Not much. Provided you don't free (or let go
 507  out of scope) either the tree you passed to XML::XPath->new, or if you
 508  didn't pass a tree, and passed a filename or IO-ref, then provided you
 509  don't let the XML::XPath object go out of scope before you let results
 510  of find() and its friends go out of scope, then you'll be fine. Even if
 511  you B<do> let the tree go out of scope before results, you'll probably
 512  still be fine. The only case where you may get stung is when the last
 513  part of your path/query is either an ancestor or parent axis. In that
 514  case the worst that will happen is you'll end up with a circular reference
 515  that won't get cleared until interpreter destruction time. You can get
 516  around that by explicitly calling $node->DESTROY on each of your result
 517  nodes, if you really need to do that.
 518  
 519  Mail me direct if that's not clear. Note that it's not doom and gloom. It's
 520  by no means perfect, but the worst that will happen is a long running process
 521  could leak memory. Most long running processes will therefore be able to
 522  explicitly be careful not to free the tree (or XML::XPath object) before
 523  freeing results. AxKit, an application that uses XML::XPath, does this and
 524  I didn't have to make any changes to the code - it's already sensible
 525  programming.
 526  
 527  If you I<really> don't want all this to happen, then set the variable
 528  $XML::XPath::SafeMode, and call $xp->cleanup() on the XML::XPath object
 529  when you're finished, or $tree->dispose() if you have a tree instead.
 530  
 531  =head1 Example
 532  
 533  Please see the test files in t/ for examples on how to use XPath.
 534  
 535  =head1 Support/Author
 536  
 537  This module is copyright 2000 AxKit.com Ltd. This is free
 538  software, and as such comes with NO WARRANTY. No dates are used in this
 539  module. You may distribute this module under the terms of either the
 540  Gnu GPL,  or the Artistic License (the same terms as Perl itself).
 541  
 542  For support, please subscribe to the Perl-XML mailing list at the URL 
 543  http://listserv.activestate.com/mailman/listinfo/perl-xml
 544  
 545  Matt Sergeant, matt@sergeant.org
 546  
 547  =head1 SEE ALSO
 548  
 549  L<XML::XPath::Literal>, L<XML::XPath::Boolean>, L<XML::XPath::Number>,
 550  L<XML::XPath::XMLParser>, L<XML::XPath::NodeSet>, L<XML::XPath::PerlSAX>,
 551  L<XML::XPath::Builder>.
 552  
 553  =cut


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1