[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 # $Id: XPath.pm,v 1.56 2003/01/26 19:33:17 matt Exp $ 2 3 package XML::XPath; 4 5 use strict; 6 use vars qw($VERSION $AUTOLOAD $revision); 7 8 $VERSION = '1.13'; 9 10 $XML::XPath::Namespaces = 1; 11 $XML::XPath::Debug = 0; 12 13 use XML::XPath::XMLParser; 14 use XML::XPath::Parser; 15 use IO::File; 16 17 # For testing 18 #use Data::Dumper; 19 #$Data::Dumper::Indent = 1; 20 21 # Parameters for new() 22 my @options = qw( 23 filename 24 parser 25 xml 26 ioref 27 context 28 ); 29 30 sub new { 31 my $proto = shift; 32 my $class = ref($proto) || $proto; 33 34 my(%args); 35 # Try to figure out what the user passed 36 if ($#_ == 0) { # passed a scalar 37 my $string = $_[0]; 38 if ($string =~ m{<.*?>}s) { # it's an XML string 39 $args{'xml'} = $string; 40 } elsif (ref($string)) { # read XML from file handle 41 $args{'ioref'} = $string; 42 } elsif ($string eq '-') { # read XML from stdin 43 $args{'ioref'} = IO::File->new($string); 44 } else { # read XML from a file 45 $args{'filename'} = $string; 46 } 47 } else { # passed a hash or hash reference 48 # just pass the parameters on to the XPath constructor 49 %args = ((ref($_[0]) eq "HASH") ? %{$_[0]} : @_); 50 } 51 52 if ($args{filename} && (!-e $args{filename} || !-r $args{filename})) { 53 die "Cannot open file '$args{filename}'"; 54 } 55 my %hash = map(( "_$_" => $args{$_} ), @options); 56 $hash{path_parser} = XML::XPath::Parser->new(); 57 return bless \%hash, $class; 58 } 59 60 sub find { 61 my $self = shift; 62 my $path = shift; 63 my $context = shift; 64 die "No path to find" unless $path; 65 66 if (!defined $context) { 67 $context = $self->get_context; 68 } 69 if (!defined $context) { 70 # Still no context? Need to parse... 71 my $parser = XML::XPath::XMLParser->new( 72 filename => $self->get_filename, 73 xml => $self->get_xml, 74 ioref => $self->get_ioref, 75 parser => $self->get_parser, 76 ); 77 $context = $parser->parse; 78 $self->set_context($context); 79 # warn "CONTEXT:\n", Data::Dumper->Dumpxs([$context], ['context']); 80 } 81 82 my $parsed_path = $self->{path_parser}->parse($path); 83 # warn "\n\nPATH: ", $parsed_path->as_string, "\n\n"; 84 85 # warn "evaluating path\n"; 86 return $parsed_path->evaluate($context); 87 } 88 89 # sub memsize { 90 # print STDERR @_, "\t"; 91 # open(FH, '/proc/self/status'); 92 # while(<FH>) { 93 # print STDERR $_ if /^VmSize/; 94 # } 95 # close FH; 96 # } 97 # 98 sub findnodes { 99 my $self = shift; 100 my ($path, $context) = @_; 101 102 my $results = $self->find($path, $context); 103 104 if ($results->isa('XML::XPath::NodeSet')) { 105 return wantarray ? $results->get_nodelist : $results; 106 # return $results->get_nodelist; 107 } 108 109 # warn("findnodes returned a ", ref($results), " object\n") if $XML::XPath::Debug; 110 return wantarray ? () : XML::XPath::NodeSet->new(); 111 } 112 113 sub matches { 114 my $self = shift; 115 my ($node, $path, $context) = @_; 116 117 my @nodes = $self->findnodes($path, $context); 118 119 if (grep { "$node" eq "$_" } @nodes) { 120 return 1; 121 } 122 return; 123 } 124 125 sub findnodes_as_string { 126 my $self = shift; 127 my ($path, $context) = @_; 128 129 my $results = $self->find($path, $context); 130 131 if ($results->isa('XML::XPath::NodeSet')) { 132 return join('', map { $_->toString } $results->get_nodelist); 133 } 134 elsif ($results->isa('XML::XPath::Node')) { 135 return $results->toString; 136 } 137 else { 138 return XML::XPath::Node::XMLescape($results->value); 139 } 140 } 141 142 sub findvalue { 143 my $self = shift; 144 my ($path, $context) = @_; 145 146 my $results = $self->find($path, $context); 147 148 if ($results->isa('XML::XPath::NodeSet')) { 149 return $results->to_literal; 150 } 151 152 return $results; 153 } 154 155 sub exists 156 { 157 my $self = shift; 158 my ($path, $context) = @_; 159 $path = '/' if (!defined $path); 160 my @nodeset = $self->findnodes($path, $context); 161 return 1 if (scalar( @nodeset )); 162 return 0; 163 } 164 165 sub getNodeAsXML { 166 my $self = shift; 167 my $node_path = shift; 168 $node_path = '/' if (!defined $node_path); 169 if (ref($node_path)) { 170 return $node_path->as_string(); 171 } else { 172 return $self->findnodes_as_string($node_path); 173 } 174 } 175 176 sub getNodeText { 177 my $self = shift; 178 my $node_path = shift; 179 if (ref($node_path)) { 180 return $node_path->string_value(); 181 } else { 182 return $self->findvalue($node_path); 183 } 184 } 185 186 sub setNodeText { 187 my $self = shift; 188 my($node_path, $new_text) = @_; 189 my $nodeset = $self->findnodes($node_path); 190 return undef if (!defined $nodeset); # could not find node 191 my @nodes = $nodeset->get_nodelist; 192 if ($#nodes < 0) { 193 if ($node_path =~ m|/@([^/]+)$|) { 194 # attribute not found, so try to create it 195 my $parent_path = $`; 196 my $attr = $1; 197 $nodeset = $self->findnodes($parent_path); 198 return undef if (!defined $nodeset); # could not find node 199 foreach my $node ($nodeset->get_nodelist) { 200 my $newnode = XML::XPath::Node::Attribute->new($attr, $new_text); 201 return undef if (!defined $newnode); # could not create new node 202 $node->appendAttribute($newnode); 203 } 204 } else { 205 return undef; # could not find node 206 } 207 } 208 foreach my $node (@nodes) { 209 if ($node->getNodeType == XML::XPath::Node::ATTRIBUTE_NODE) { 210 $node->setNodeValue($new_text); 211 } else { 212 foreach my $delnode ($node->getChildNodes()) { 213 $node->removeChild($delnode); 214 } 215 my $newnode = XML::XPath::Node::Text->new($new_text); 216 return undef if (!defined $newnode); # could not create new node 217 $node->appendChild($newnode); 218 } 219 } 220 return 1; 221 } 222 223 sub createNode { 224 my $self = shift; 225 my($node_path) = @_; 226 my $path_steps = $self->{path_parser}->parse($node_path); 227 my @path_steps = (); 228 foreach my $step (@{$path_steps->get_lhs()}) { 229 my $string = $step->as_string(); 230 push(@path_steps, $string) if (defined $string && $string ne ""); 231 } 232 my $prev_node = undef; 233 my $nodeset = undef; 234 my $nodes = undef; 235 my $p = undef; 236 my $test_path = ""; 237 # Start with the deepest node, working up the path (right to left), 238 # trying to find a node that exists. 239 for ($p = $#path_steps; $p >= 0; $p--) { 240 my $path = $path_steps[$p]; 241 $test_path = "(/" . join("/", @path_steps[0..$p]) . ")"; 242 $nodeset = $self->findnodes($test_path); 243 return undef if (!defined $nodeset); # error looking for node 244 $nodes = $nodeset->size; 245 return undef if ($nodes > 1); # too many paths - path not specific enough 246 if ($nodes == 1) { # found a node -- need to create nodes below it 247 $prev_node = $nodeset->get_node(1); 248 last; 249 } 250 } 251 if (!defined $prev_node) { 252 my @root_nodes = $self->findnodes('/')->get_nodelist(); 253 $prev_node = $root_nodes[0]; 254 } 255 # We found a node that exists, or we'll start at the root. 256 # Create all lower nodes working left to right along the path. 257 for ($p++ ; $p <= $#path_steps; $p++) { 258 my $path = $path_steps[$p]; 259 my $newnode = undef; 260 my($axis,$name) = ($path =~ /^(.*?)::(.*)$/); 261 if ($axis =~ /^child$/i) { 262 $newnode = XML::XPath::Node::Element->new($name); 263 return undef if (!defined $newnode); # could not create new node 264 $prev_node->appendChild($newnode); 265 } elsif ($axis =~ /^attribute$/i) { 266 $newnode = XML::XPath::Node::Attribute->new($name, ""); 267 return undef if (!defined $newnode); # could not create new node 268 $prev_node->appendAttribute($newnode); 269 } 270 $prev_node = $newnode; 271 } 272 return $prev_node; 273 } 274 275 sub get_filename { 276 my $self = shift; 277 $self->{_filename}; 278 } 279 280 sub set_filename { 281 my $self = shift; 282 $self->{_filename} = shift; 283 } 284 285 sub get_parser { 286 my $self = shift; 287 $self->{_parser}; 288 } 289 290 sub set_parser { 291 my $self = shift; 292 $self->{_parser} = shift; 293 } 294 295 sub get_xml { 296 my $self = shift; 297 $self->{_xml}; 298 } 299 300 sub set_xml { 301 my $self = shift; 302 $self->{_xml} = shift; 303 } 304 305 sub get_ioref { 306 my $self = shift; 307 $self->{_ioref}; 308 } 309 310 sub set_ioref { 311 my $self = shift; 312 $self->{_ioref} = shift; 313 } 314 315 sub get_context { 316 my $self = shift; 317 $self->{_context}; 318 } 319 320 sub set_context { 321 my $self = shift; 322 $self->{_context} = shift; 323 } 324 325 sub cleanup { 326 my $self = shift; 327 if ($XML::XPath::SafeMode) { 328 my $context = $self->get_context; 329 return unless $context; 330 $context->dispose; 331 } 332 } 333 334 sub set_namespace { 335 my $self = shift; 336 my ($prefix, $expanded) = @_; 337 $self->{path_parser}->set_namespace($prefix, $expanded); 338 } 339 340 sub clear_namespaces { 341 my $self = shift; 342 $self->{path_parser}->clear_namespaces(); 343 } 344 345 1; 346 __END__ 347 348 =head1 NAME 349 350 XML::XPath - a set of modules for parsing and evaluating XPath statements 351 352 =head1 DESCRIPTION 353 354 This module aims to comply exactly to the XPath specification at 355 http://www.w3.org/TR/xpath and yet allow extensions to be added in the 356 form of functions. Modules such as XSLT and XPointer may need to do 357 this as they support functionality beyond XPath. 358 359 =head1 SYNOPSIS 360 361 use XML::XPath; 362 use XML::XPath::XMLParser; 363 364 my $xp = XML::XPath->new(filename => 'test.xhtml'); 365 366 my $nodeset = $xp->find('/html/body/p'); # find all paragraphs 367 368 foreach my $node ($nodeset->get_nodelist) { 369 print "FOUND\n\n", 370 XML::XPath::XMLParser::as_string($node), 371 "\n\n"; 372 } 373 374 =head1 DETAILS 375 376 There's an awful lot to all of this, so bear with it - if you stick it 377 out it should be worth it. Please get a good understanding of XPath 378 by reading the spec before asking me questions. All of the classes 379 and parts herein are named to be synonimous with the names in the 380 specification, so consult that if you don't understand why I'm doing 381 something in the code. 382 383 =head1 API 384 385 The API of XML::XPath itself is extremely simple to allow you to get 386 going almost immediately. The deeper API's are more complex, but you 387 shouldn't have to touch most of that. 388 389 =head2 new() 390 391 This constructor follows the often seen named parameter method call. 392 Parameters you can use are: filename, parser, xml, ioref and context. 393 The filename parameter specifies an XML file to parse. The xml 394 parameter specifies a string to parse, and the ioref parameter 395 specifies an ioref to parse. The context option allows you to 396 specify a context node. The context node has to be in the format 397 of a node as specified in L<XML::XPath::XMLParser>. The 4 parameters 398 filename, xml, ioref and context are mutually exclusive - you should 399 only specify one (if you specify anything other than context, the 400 context node is the root of your document). 401 The parser option allows you to pass in an already prepared 402 XML::Parser object, to save you having to create more than one 403 in your application (if, for example, you're doing more than just XPath). 404 405 my $xp = XML::XPath->new( context => $node ); 406 407 It is very much recommended that you use only 1 XPath object throughout 408 the life of your application. This is because the object (and it's sub-objects) 409 maintain certain bits of state information that will be useful (such 410 as XPath variables) to later calls to find(). It's also a good idea because 411 you'll use less memory this way. 412 413 =head2 I<nodeset> = find($path, [$context]) 414 415 The find function takes an XPath expression (a string) and returns either an 416 XML::XPath::NodeSet object containing the nodes it found (or empty if 417 no nodes matched the path), or one of XML::XPath::Literal (a string), 418 XML::XPath::Number, or XML::XPath::Boolean. It should always return 419 something - and you can use ->isa() to find out what it returned. If you 420 need to check how many nodes it found you should check $nodeset->size. 421 See L<XML::XPath::NodeSet>. An optional second parameter of a context 422 node allows you to use this method repeatedly, for example XSLT needs 423 to do this. 424 425 =head2 findnodes($path, [$context]) 426 427 Returns a list of nodes found by $path, optionally in context $context. 428 In scalar context returns an XML::XPath::NodeSet object. 429 430 =head2 findnodes_as_string($path, [$context]) 431 432 Returns the nodes found reproduced as XML. The result is not guaranteed 433 to be valid XML though. 434 435 =head2 findvalue($path, [$context]) 436 437 Returns either a C<XML::XPath::Literal>, a C<XML::XPath::Boolean> or a 438 C<XML::XPath::Number> object. If the path returns a NodeSet, 439 $nodeset->to_literal is called automatically for you (and thus a 440 C<XML::XPath::Literal> is returned). Note that 441 for each of the objects stringification is overloaded, so you can just 442 print the value found, or manipulate it in the ways you would a normal 443 perl value (e.g. using regular expressions). 444 445 =head2 exists($path, [$context]) 446 447 Returns true if the given path exists. 448 449 =head2 matches($node, $path, [$context]) 450 451 Returns true if the node matches the path (optionally in context $context). 452 453 =head2 getNodeText($path) 454 455 Returns the text string for a particular XML node. Returns a string, 456 or undef if the node doesn't exist. 457 458 =head2 setNodeText($path, $text) 459 460 Sets the text string for a particular XML node. The node can be an 461 element or an attribute. If the node to be set is an attribute, and 462 the attribute node does not exist, it will be created automatically. 463 464 =head2 createNode($path) 465 466 Creates the node matching the path given. If part of the path given, or 467 all of the path do not exist, the necessary nodes will be created 468 automatically. 469 470 =head2 set_namespace($prefix, $uri) 471 472 Sets the namespace prefix mapping to the uri. 473 474 Normally in XML::XPath the prefixes in XPath node tests take their 475 context from the current node. This means that foo:bar will always 476 match an element <foo:bar> regardless of the namespace that the prefix 477 foo is mapped to (which might even change within the document, resulting 478 in unexpected results). In order to make prefixes in XPath node tests 479 actually map to a real URI, you need to enable that via a call 480 to the set_namespace method of your XML::XPath object. 481 482 =head2 clear_namespaces() 483 484 Clears all previously set namespace mappings. 485 486 =head2 $XML::XPath::Namespaces 487 488 Set this to 0 if you I<don't> want namespace processing to occur. This 489 will make everything a little (tiny) bit faster, but you'll suffer for it, 490 probably. 491 492 =head1 Node Object Model 493 494 See L<XML::XPath::Node>, L<XML::XPath::Node::Element>, 495 L<XML::XPath::Node::Text>, L<XML::XPath::Node::Comment>, 496 L<XML::XPath::Node::Attribute>, L<XML::XPath::Node::Namespace>, 497 and L<XML::XPath::Node::PI>. 498 499 =head1 On Garbage Collection 500 501 XPath nodes work in a special way that allows circular references, and 502 yet still lets Perl's reference counting garbage collector to clean up 503 the nodes after use. This should be totally transparent to the user, 504 with one caveat: B<If you free your tree before letting go of a sub-tree, 505 consider that playing with fire and you may get burned>. What does this 506 mean to the average user? Not much. Provided you don't free (or let go 507 out of scope) either the tree you passed to XML::XPath->new, or if you 508 didn't pass a tree, and passed a filename or IO-ref, then provided you 509 don't let the XML::XPath object go out of scope before you let results 510 of find() and its friends go out of scope, then you'll be fine. Even if 511 you B<do> let the tree go out of scope before results, you'll probably 512 still be fine. The only case where you may get stung is when the last 513 part of your path/query is either an ancestor or parent axis. In that 514 case the worst that will happen is you'll end up with a circular reference 515 that won't get cleared until interpreter destruction time. You can get 516 around that by explicitly calling $node->DESTROY on each of your result 517 nodes, if you really need to do that. 518 519 Mail me direct if that's not clear. Note that it's not doom and gloom. It's 520 by no means perfect, but the worst that will happen is a long running process 521 could leak memory. Most long running processes will therefore be able to 522 explicitly be careful not to free the tree (or XML::XPath object) before 523 freeing results. AxKit, an application that uses XML::XPath, does this and 524 I didn't have to make any changes to the code - it's already sensible 525 programming. 526 527 If you I<really> don't want all this to happen, then set the variable 528 $XML::XPath::SafeMode, and call $xp->cleanup() on the XML::XPath object 529 when you're finished, or $tree->dispose() if you have a tree instead. 530 531 =head1 Example 532 533 Please see the test files in t/ for examples on how to use XPath. 534 535 =head1 Support/Author 536 537 This module is copyright 2000 AxKit.com Ltd. This is free 538 software, and as such comes with NO WARRANTY. No dates are used in this 539 module. You may distribute this module under the terms of either the 540 Gnu GPL, or the Artistic License (the same terms as Perl itself). 541 542 For support, please subscribe to the Perl-XML mailing list at the URL 543 http://listserv.activestate.com/mailman/listinfo/perl-xml 544 545 Matt Sergeant, matt@sergeant.org 546 547 =head1 SEE ALSO 548 549 L<XML::XPath::Literal>, L<XML::XPath::Boolean>, L<XML::XPath::Number>, 550 L<XML::XPath::XMLParser>, L<XML::XPath::NodeSet>, L<XML::XPath::PerlSAX>, 551 L<XML::XPath::Builder>. 552 553 =cut
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |