[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 # XML::Parser 2 # 3 # Copyright (c) 1998-2000 Larry Wall and Clark Cooper 4 # All rights reserved. 5 # 6 # This program is free software; you can redistribute it and/or 7 # modify it under the same terms as Perl itself. 8 9 package XML::Parser; 10 11 use Carp; 12 13 BEGIN { 14 require XML::Parser::Expat; 15 $VERSION = '2.36'; 16 die "Parser.pm and Expat.pm versions don't match" 17 unless $VERSION eq $XML::Parser::Expat::VERSION; 18 } 19 20 use strict; 21 22 use vars qw($VERSION $LWP_load_failed); 23 24 $LWP_load_failed = 0; 25 26 sub new { 27 my ($class, %args) = @_; 28 my $style = $args{Style}; 29 30 my $nonexopt = $args{Non_Expat_Options} ||= {}; 31 32 $nonexopt->{Style} = 1; 33 $nonexopt->{Non_Expat_Options} = 1; 34 $nonexopt->{Handlers} = 1; 35 $nonexopt->{_HNDL_TYPES} = 1; 36 $nonexopt->{NoLWP} = 1; 37 38 $args{_HNDL_TYPES} = {%XML::Parser::Expat::Handler_Setters}; 39 $args{_HNDL_TYPES}->{Init} = 1; 40 $args{_HNDL_TYPES}->{Final} = 1; 41 42 $args{Handlers} ||= {}; 43 my $handlers = $args{Handlers}; 44 45 if (defined($style)) { 46 my $stylepkg = $style; 47 48 if ($stylepkg !~ /::/) { 49 $stylepkg = "\u$style"; 50 51 eval { 52 my $fullpkg = 'XML::Parser::Style::' . $stylepkg; 53 my $stylefile = $fullpkg; 54 $stylefile =~ s/::/\//g; 55 require "$stylefile.pm"; 56 $stylepkg = $fullpkg; 57 }; 58 if ($@) { 59 # fallback to old behaviour 60 $stylepkg = 'XML::Parser::' . $stylepkg; 61 } 62 } 63 64 my $htype; 65 foreach $htype (keys %{$args{_HNDL_TYPES}}) { 66 # Handlers explicity given override 67 # handlers from the Style package 68 unless (defined($handlers->{$htype})) { 69 70 # A handler in the style package must either have 71 # exactly the right case as the type name or a 72 # completely lower case version of it. 73 74 my $hname = "$stylepkg}::$htype"; 75 if (defined(&$hname)) { 76 $handlers->{$htype} = \&$hname; 77 next; 78 } 79 80 $hname = "$stylepkg}::\L$htype"; 81 if (defined(&$hname)) { 82 $handlers->{$htype} = \&$hname; 83 next; 84 } 85 } 86 } 87 } 88 89 unless (defined($handlers->{ExternEnt}) 90 or defined ($handlers->{ExternEntFin})) { 91 92 if ($args{NoLWP} or $LWP_load_failed) { 93 $handlers->{ExternEnt} = \&file_ext_ent_handler; 94 $handlers->{ExternEntFin} = \&file_ext_ent_cleanup; 95 } 96 else { 97 # The following just bootstraps the real LWP external entity 98 # handler 99 100 $handlers->{ExternEnt} = \&initial_ext_ent_handler; 101 102 # No cleanup function available until LWPExternEnt.pl loaded 103 } 104 } 105 106 $args{Pkg} ||= caller; 107 bless \%args, $class; 108 } # End of new 109 110 sub setHandlers { 111 my ($self, @handler_pairs) = @_; 112 113 croak("Uneven number of arguments to setHandlers method") 114 if (int(@handler_pairs) & 1); 115 116 my @ret; 117 while (@handler_pairs) { 118 my $type = shift @handler_pairs; 119 my $handler = shift @handler_pairs; 120 unless (defined($self->{_HNDL_TYPES}->{$type})) { 121 my @types = sort keys %{$self->{_HNDL_TYPES}}; 122 123 croak("Unknown Parser handler type: $type\n Valid types: @types"); 124 } 125 push(@ret, $type, $self->{Handlers}->{$type}); 126 $self->{Handlers}->{$type} = $handler; 127 } 128 129 return @ret; 130 } 131 132 sub parse_start { 133 my $self = shift; 134 my @expat_options = (); 135 136 my ($key, $val); 137 while (($key, $val) = each %{$self}) { 138 push (@expat_options, $key, $val) 139 unless exists $self->{Non_Expat_Options}->{$key}; 140 } 141 142 my %handlers = %{$self->{Handlers}}; 143 my $init = delete $handlers{Init}; 144 my $final = delete $handlers{Final}; 145 146 my $expatnb = new XML::Parser::ExpatNB(@expat_options, @_); 147 $expatnb->setHandlers(%handlers); 148 149 &$init($expatnb) 150 if defined($init); 151 152 $expatnb->{_State_} = 1; 153 154 $expatnb->{FinalHandler} = $final 155 if defined($final); 156 157 return $expatnb; 158 } 159 160 sub parse { 161 my $self = shift; 162 my $arg = shift; 163 my @expat_options = (); 164 my ($key, $val); 165 while (($key, $val) = each %{$self}) { 166 push(@expat_options, $key, $val) 167 unless exists $self->{Non_Expat_Options}->{$key}; 168 } 169 170 my $expat = new XML::Parser::Expat(@expat_options, @_); 171 my %handlers = %{$self->{Handlers}}; 172 my $init = delete $handlers{Init}; 173 my $final = delete $handlers{Final}; 174 175 $expat->setHandlers(%handlers); 176 177 if ($self->{Base}) { 178 $expat->base($self->{Base}); 179 } 180 181 &$init($expat) 182 if defined($init); 183 184 my @result = (); 185 my $result; 186 eval { 187 $result = $expat->parse($arg); 188 }; 189 my $err = $@; 190 if ($err) { 191 $expat->release; 192 die $err; 193 } 194 195 if ($result and defined($final)) { 196 if (wantarray) { 197 @result = &$final($expat); 198 } 199 else { 200 $result = &$final($expat); 201 } 202 } 203 204 $expat->release; 205 206 return unless defined wantarray; 207 return wantarray ? @result : $result; 208 } 209 210 sub parsestring { 211 my $self = shift; 212 $self->parse(@_); 213 } 214 215 sub parsefile { 216 my $self = shift; 217 my $file = shift; 218 local(*FILE); 219 open(FILE, $file) or croak "Couldn't open $file:\n$!"; 220 binmode(FILE); 221 my @ret; 222 my $ret; 223 224 $self->{Base} = $file; 225 226 if (wantarray) { 227 eval { 228 @ret = $self->parse(*FILE, @_); 229 }; 230 } 231 else { 232 eval { 233 $ret = $self->parse(*FILE, @_); 234 }; 235 } 236 my $err = $@; 237 close(FILE); 238 die $err if $err; 239 240 return unless defined wantarray; 241 return wantarray ? @ret : $ret; 242 } 243 244 sub initial_ext_ent_handler { 245 # This just bootstraps in the real lwp_ext_ent_handler which 246 # also loads the URI and LWP modules. 247 248 unless ($LWP_load_failed) { 249 local($^W) = 0; 250 251 my $stat = 252 eval { 253 require('XML/Parser/LWPExternEnt.pl'); 254 }; 255 256 if ($stat) { 257 $_[0]->setHandlers(ExternEnt => \&lwp_ext_ent_handler, 258 ExternEntFin => \&lwp_ext_ent_cleanup); 259 260 goto &lwp_ext_ent_handler; 261 } 262 263 # Failed to load lwp handler, act as if NoLWP 264 265 $LWP_load_failed = 1; 266 267 my $cmsg = "Couldn't load LWP based external entity handler\n"; 268 $cmsg .= "Switching to file-based external entity handler\n"; 269 $cmsg .= " (To avoid this message, use NoLWP option to XML::Parser)\n"; 270 warn($cmsg); 271 } 272 273 $_[0]->setHandlers(ExternEnt => \&file_ext_ent_handler, 274 ExternEntFin => \&file_ext_ent_cleanup); 275 goto &file_ext_ent_handler; 276 277 } 278 279 sub file_ext_ent_handler { 280 my ($xp, $base, $path) = @_; 281 282 # Prepend base only for relative paths 283 284 if (defined($base) 285 and not ($path =~ m!^(?:[\\/]|\w+:)!)) 286 { 287 my $newpath = $base; 288 $newpath =~ s![^\\/:]*$!$path!; 289 $path = $newpath; 290 } 291 292 if ($path =~ /^\s*[|>+]/ 293 or $path =~ /\|\s*$/) { 294 $xp->{ErrorMessage} 295 .= "System ID ($path) contains Perl IO control characters"; 296 return undef; 297 } 298 299 require IO::File; 300 my $fh = new IO::File($path); 301 unless (defined $fh) { 302 $xp->{ErrorMessage} 303 .= "Failed to open $path:\n$!"; 304 return undef; 305 } 306 307 $xp->{_BaseStack} ||= []; 308 $xp->{_FhStack} ||= []; 309 310 push(@{$xp->{_BaseStack}}, $base); 311 push(@{$xp->{_FhStack}}, $fh); 312 313 $xp->base($path); 314 315 return $fh; 316 } 317 318 sub file_ext_ent_cleanup { 319 my ($xp) = @_; 320 321 my $fh = pop(@{$xp->{_FhStack}}); 322 $fh->close; 323 324 my $base = pop(@{$xp->{_BaseStack}}); 325 $xp->base($base); 326 } 327 328 1; 329 330 __END__ 331 332 =head1 NAME 333 334 XML::Parser - A perl module for parsing XML documents 335 336 =head1 SYNOPSIS 337 338 use XML::Parser; 339 340 $p1 = new XML::Parser(Style => 'Debug'); 341 $p1->parsefile('REC-xml-19980210.xml'); 342 $p1->parse('<foo id="me">Hello World</foo>'); 343 344 # Alternative 345 $p2 = new XML::Parser(Handlers => {Start => \&handle_start, 346 End => \&handle_end, 347 Char => \&handle_char}); 348 $p2->parse($socket); 349 350 # Another alternative 351 $p3 = new XML::Parser(ErrorContext => 2); 352 353 $p3->setHandlers(Char => \&text, 354 Default => \&other); 355 356 open(FOO, 'xmlgenerator |'); 357 $p3->parse(*FOO, ProtocolEncoding => 'ISO-8859-1'); 358 close(FOO); 359 360 $p3->parsefile('junk.xml', ErrorContext => 3); 361 362 =begin man 363 .ds PI PI 364 365 =end man 366 367 =head1 DESCRIPTION 368 369 This module provides ways to parse XML documents. It is built on top of 370 L<XML::Parser::Expat>, which is a lower level interface to James Clark's 371 expat library. Each call to one of the parsing methods creates a new 372 instance of XML::Parser::Expat which is then used to parse the document. 373 Expat options may be provided when the XML::Parser object is created. 374 These options are then passed on to the Expat object on each parse call. 375 They can also be given as extra arguments to the parse methods, in which 376 case they override options given at XML::Parser creation time. 377 378 The behavior of the parser is controlled either by C<L</Style>> and/or 379 C<L</Handlers>> options, or by L</setHandlers> method. These all provide 380 mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat. 381 If neither C<Style> nor C<Handlers> are specified, then parsing just 382 checks the document for being well-formed. 383 384 When underlying handlers get called, they receive as their first parameter 385 the I<Expat> object, not the Parser object. 386 387 =head1 METHODS 388 389 =over 4 390 391 =item new 392 393 This is a class method, the constructor for XML::Parser. Options are passed 394 as keyword value pairs. Recognized options are: 395 396 =over 4 397 398 =item * Style 399 400 This option provides an easy way to create a given style of parser. The 401 built in styles are: L<"Debug">, L<"Subs">, L<"Tree">, L<"Objects">, 402 and L<"Stream">. These are all defined in separate packages under 403 C<XML::Parser::Style::*>, and you can find further documentation for 404 each style both below, and in those packages. 405 406 Custom styles can be provided by giving a full package name containing 407 at least one '::'. This package should then have subs defined for each 408 handler it wishes to have installed. See L<"STYLES"> below 409 for a discussion of each built in style. 410 411 =item * Handlers 412 413 When provided, this option should be an anonymous hash containing as 414 keys the type of handler and as values a sub reference to handle that 415 type of event. All the handlers get passed as their 1st parameter the 416 instance of expat that is parsing the document. Further details on 417 handlers can be found in L<"HANDLERS">. Any handler set here 418 overrides the corresponding handler set with the Style option. 419 420 =item * Pkg 421 422 Some styles will refer to subs defined in this package. If not provided, 423 it defaults to the package which called the constructor. 424 425 =item * ErrorContext 426 427 This is an Expat option. When this option is defined, errors are reported 428 in context. The value should be the number of lines to show on either side 429 of the line in which the error occurred. 430 431 =item * ProtocolEncoding 432 433 This is an Expat option. This sets the protocol encoding name. It defaults 434 to none. The built-in encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and 435 C<US-ASCII>. Other encodings may be used if they have encoding maps in one 436 of the directories in the @Encoding_Path list. Check L<"ENCODINGS"> for 437 more information on encoding maps. Setting the protocol encoding overrides 438 any encoding in the XML declaration. 439 440 =item * Namespaces 441 442 This is an Expat option. If this is set to a true value, then namespace 443 processing is done during the parse. See L<XML::Parser::Expat/"Namespaces"> 444 for further discussion of namespace processing. 445 446 =item * NoExpand 447 448 This is an Expat option. Normally, the parser will try to expand references 449 to entities defined in the internal subset. If this option is set to a true 450 value, and a default handler is also set, then the default handler will be 451 called when an entity reference is seen in text. This has no effect if a 452 default handler has not been registered, and it has no effect on the expansion 453 of entity references inside attribute values. 454 455 =item * Stream_Delimiter 456 457 This is an Expat option. It takes a string value. When this string is found 458 alone on a line while parsing from a stream, then the parse is ended as if it 459 saw an end of file. The intended use is with a stream of xml documents in a 460 MIME multipart format. The string should not contain a trailing newline. 461 462 =item * ParseParamEnt 463 464 This is an Expat option. Unless standalone is set to "yes" in the XML 465 declaration, setting this to a true value allows the external DTD to be read, 466 and parameter entities to be parsed and expanded. 467 468 =item * NoLWP 469 470 This option has no effect if the ExternEnt or ExternEntFin handlers are 471 directly set. Otherwise, if true, it forces the use of a file based external 472 entity handler. 473 474 =item * Non-Expat-Options 475 476 If provided, this should be an anonymous hash whose keys are options that 477 shouldn't be passed to Expat. This should only be of concern to those 478 subclassing XML::Parser. 479 480 =back 481 482 =item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) 483 484 This method registers handlers for various parser events. It overrides any 485 previous handlers registered through the Style or Handler options or through 486 earlier calls to setHandlers. By providing a false or undefined value as 487 the handler, the existing handler can be unset. 488 489 This method returns a list of type, handler pairs corresponding to the 490 input. The handlers returned are the ones that were in effect prior to 491 the call. 492 493 See a description of the handler types in L<"HANDLERS">. 494 495 =item parse(SOURCE [, OPT => OPT_VALUE [...]]) 496 497 The SOURCE parameter should either be a string containing the whole XML 498 document, or it should be an open IO::Handle. Constructor options to 499 XML::Parser::Expat given as keyword-value pairs may follow the SOURCE 500 parameter. These override, for this call, any options or attributes passed 501 through from the XML::Parser instance. 502 503 A die call is thrown if a parse error occurs. Otherwise it will return 1 504 or whatever is returned from the B<Final> handler, if one is installed. 505 In other words, what parse may return depends on the style. 506 507 =item parsestring 508 509 This is just an alias for parse for backwards compatibility. 510 511 =item parsefile(FILE [, OPT => OPT_VALUE [...]]) 512 513 Open FILE for reading, then call parse with the open handle. The file 514 is closed no matter how parse returns. Returns what parse returns. 515 516 =item parse_start([ OPT => OPT_VALUE [...]]) 517 518 Create and return a new instance of XML::Parser::ExpatNB. Constructor 519 options may be provided. If an init handler has been provided, it is 520 called before returning the ExpatNB object. Documents are parsed by 521 making incremental calls to the parse_more method of this object, which 522 takes a string. A single call to the parse_done method of this object, 523 which takes no arguments, indicates that the document is finished. 524 525 If there is a final handler installed, it is executed by the parse_done 526 method before returning and the parse_done method returns whatever is 527 returned by the final handler. 528 529 =back 530 531 =head1 HANDLERS 532 533 Expat is an event based parser. As the parser recognizes parts of the 534 document (say the start or end tag for an XML element), then any handlers 535 registered for that type of an event are called with suitable parameters. 536 All handlers receive an instance of XML::Parser::Expat as their first 537 argument. See L<XML::Parser::Expat/"METHODS"> for a discussion of the 538 methods that can be called on this object. 539 540 =head2 Init (Expat) 541 542 This is called just before the parsing of the document starts. 543 544 =head2 Final (Expat) 545 546 This is called just after parsing has finished, but only if no errors 547 occurred during the parse. Parse returns what this returns. 548 549 =head2 Start (Expat, Element [, Attr, Val [,...]]) 550 551 This event is generated when an XML start tag is recognized. Element is the 552 name of the XML element type that is opened with the start tag. The Attr & 553 Val pairs are generated for each attribute in the start tag. 554 555 =head2 End (Expat, Element) 556 557 This event is generated when an XML end tag is recognized. Note that 558 an XML empty tag (<foo/>) generates both a start and an end event. 559 560 =head2 Char (Expat, String) 561 562 This event is generated when non-markup is recognized. The non-markup 563 sequence of characters is in String. A single non-markup sequence of 564 characters may generate multiple calls to this handler. Whatever the 565 encoding of the string in the original document, this is given to the 566 handler in UTF-8. 567 568 =head2 Proc (Expat, Target, Data) 569 570 This event is generated when a processing instruction is recognized. 571 572 =head2 Comment (Expat, Data) 573 574 This event is generated when a comment is recognized. 575 576 =head2 CdataStart (Expat) 577 578 This is called at the start of a CDATA section. 579 580 =head2 CdataEnd (Expat) 581 582 This is called at the end of a CDATA section. 583 584 =head2 Default (Expat, String) 585 586 This is called for any characters that don't have a registered handler. 587 This includes both characters that are part of markup for which no 588 events are generated (markup declarations) and characters that 589 could generate events, but for which no handler has been registered. 590 591 Whatever the encoding in the original document, the string is returned to 592 the handler in UTF-8. 593 594 =head2 Unparsed (Expat, Entity, Base, Sysid, Pubid, Notation) 595 596 This is called for a declaration of an unparsed entity. Entity is the name 597 of the entity. Base is the base to be used for resolving a relative URI. 598 Sysid is the system id. Pubid is the public id. Notation is the notation 599 name. Base and Pubid may be undefined. 600 601 =head2 Notation (Expat, Notation, Base, Sysid, Pubid) 602 603 This is called for a declaration of notation. Notation is the notation name. 604 Base is the base to be used for resolving a relative URI. Sysid is the system 605 id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. 606 607 =head2 ExternEnt (Expat, Base, Sysid, Pubid) 608 609 This is called when an external entity is referenced. Base is the base to be 610 used for resolving a relative URI. Sysid is the system id. Pubid is the public 611 id. Base, and Pubid may be undefined. 612 613 This handler should either return a string, which represents the contents of 614 the external entity, or return an open filehandle that can be read to obtain 615 the contents of the external entity, or return undef, which indicates the 616 external entity couldn't be found and will generate a parse error. 617 618 If an open filehandle is returned, it must be returned as either a glob 619 (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). 620 621 A default handler is installed for this event. The default handler is 622 XML::Parser::lwp_ext_ent_handler unless the NoLWP option was provided with 623 a true value, otherwise XML::Parser::file_ext_ent_handler is the default 624 handler for external entities. Even without the NoLWP option, if the 625 URI or LWP modules are missing, the file based handler ends up being used 626 after giving a warning on the first external entity reference. 627 628 The LWP external entity handler will use proxies defined in the environment 629 (http_proxy, ftp_proxy, etc.). 630 631 Please note that the LWP external entity handler reads the entire 632 entity into a string and returns it, where as the file handler opens a 633 filehandle. 634 635 Also note that the file external entity handler will likely choke on 636 absolute URIs or file names that don't fit the conventions of the local 637 operating system. 638 639 The expat base method can be used to set a basename for 640 relative pathnames. If no basename is given, or if the basename is itself 641 a relative name, then it is relative to the current working directory. 642 643 =head2 ExternEntFin (Expat) 644 645 This is called after parsing an external entity. It's not called unless 646 an ExternEnt handler is also set. There is a default handler installed 647 that pairs with the default ExternEnt handler. 648 649 If you're going to install your own ExternEnt handler, then you should 650 set (or unset) this handler too. 651 652 =head2 Entity (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam) 653 654 This is called when an entity is declared. For internal entities, the Val 655 parameter will contain the value and the remaining three parameters will be 656 undefined. For external entities, the Val parameter will be undefined, the 657 Sysid parameter will have the system id, the Pubid parameter will have the 658 public id if it was provided (it will be undefined otherwise), the Ndata 659 parameter will contain the notation for unparsed entities. If this is a 660 parameter entity declaration, then the IsParam parameter is true. 661 662 Note that this handler and the Unparsed handler above overlap. If both are 663 set, then this handler will not be called for unparsed entities. 664 665 =head2 Element (Expat, Name, Model) 666 667 The element handler is called when an element declaration is found. Name 668 is the element name, and Model is the content model as an XML::Parser::Content 669 object. See L<XML::Parser::Expat/"XML::Parser::ContentModel Methods"> 670 for methods available for this class. 671 672 =head2 Attlist (Expat, Elname, Attname, Type, Default, Fixed) 673 674 This handler is called for each attribute in an ATTLIST declaration. 675 So an ATTLIST declaration that has multiple attributes will generate multiple 676 calls to this handler. The Elname parameter is the name of the element with 677 which the attribute is being associated. The Attname parameter is the name 678 of the attribute. Type is the attribute type, given as a string. Default is 679 the default value, which will either be "#REQUIRED", "#IMPLIED" or a quoted 680 string (i.e. the returned string will begin and end with a quote character). 681 If Fixed is true, then this is a fixed attribute. 682 683 =head2 Doctype (Expat, Name, Sysid, Pubid, Internal) 684 685 This handler is called for DOCTYPE declarations. Name is the document type 686 name. Sysid is the system id of the document type, if it was provided, 687 otherwise it's undefined. Pubid is the public id of the document type, 688 which will be undefined if no public id was given. Internal is the internal 689 subset, given as a string. If there was no internal subset, it will be 690 undefined. Internal will contain all whitespace, comments, processing 691 instructions, and declarations seen in the internal subset. The declarations 692 will be there whether or not they have been processed by another handler 693 (except for unparsed entities processed by the Unparsed handler). However, 694 comments and processing instructions will not appear if they've been processed 695 by their respective handlers. 696 697 =head2 * DoctypeFin (Parser) 698 699 This handler is called after parsing of the DOCTYPE declaration has finished, 700 including any internal or external DTD declarations. 701 702 =head2 XMLDecl (Expat, Version, Encoding, Standalone) 703 704 This handler is called for xml declarations. Version is a string containg 705 the version. Encoding is either undefined or contains an encoding string. 706 Standalone will be either true, false, or undefined if the standalone attribute 707 is yes, no, or not made respectively. 708 709 =head1 STYLES 710 711 =head2 Debug 712 713 This just prints out the document in outline form. Nothing special is 714 returned by parse. 715 716 =head2 Subs 717 718 Each time an element starts, a sub by that name in the package specified 719 by the Pkg option is called with the same parameters that the Start 720 handler gets called with. 721 722 Each time an element ends, a sub with that name appended with an underscore 723 ("_"), is called with the same parameters that the End handler gets called 724 with. 725 726 Nothing special is returned by parse. 727 728 =head2 Tree 729 730 Parse will return a parse tree for the document. Each node in the tree 731 takes the form of a tag, content pair. Text nodes are represented with 732 a pseudo-tag of "0" and the string that is their content. For elements, 733 the content is an array reference. The first item in the array is a 734 (possibly empty) hash reference containing attributes. The remainder of 735 the array is a sequence of tag-content pairs representing the content 736 of the element. 737 738 So for example the result of parsing: 739 740 <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> 741 742 would be: 743 744 Tag Content 745 ================================================================== 746 [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], 747 bar, [ {}, 0, "Howdy", ref, [{}]], 748 0, "do" 749 ] 750 ] 751 752 The root document "foo", has 3 children: a "head" element, a "bar" 753 element and the text "do". After the empty attribute hash, these are 754 represented in it's contents by 3 tag-content pairs. 755 756 =head2 Objects 757 758 This is similar to the Tree style, except that a hash object is created for 759 each element. The corresponding object will be in the class whose name 760 is created by appending "::" and the element name to the package set with 761 the Pkg option. Non-markup text will be in the ::Characters class. The 762 contents of the corresponding object will be in an anonymous array that 763 is the value of the Kids property for that object. 764 765 =head2 Stream 766 767 This style also uses the Pkg package. If none of the subs that this 768 style looks for is there, then the effect of parsing with this style is 769 to print a canonical copy of the document without comments or declarations. 770 All the subs receive as their 1st parameter the Expat instance for the 771 document they're parsing. 772 773 It looks for the following routines: 774 775 =over 4 776 777 =item * StartDocument 778 779 Called at the start of the parse . 780 781 =item * StartTag 782 783 Called for every start tag with a second parameter of the element type. The $_ 784 variable will contain a copy of the tag and the %_ variable will contain 785 attribute values supplied for that element. 786 787 =item * EndTag 788 789 Called for every end tag with a second parameter of the element type. The $_ 790 variable will contain a copy of the end tag. 791 792 =item * Text 793 794 Called just before start or end tags with accumulated non-markup text in 795 the $_ variable. 796 797 =item * PI 798 799 Called for processing instructions. The $_ variable will contain a copy of 800 the PI and the target and data are sent as 2nd and 3rd parameters 801 respectively. 802 803 =item * EndDocument 804 805 Called at conclusion of the parse. 806 807 =back 808 809 =head1 ENCODINGS 810 811 XML documents may be encoded in character sets other than Unicode as 812 long as they may be mapped into the Unicode character set. Expat has 813 further restrictions on encodings. Read the xmlparse.h header file in 814 the expat distribution to see details on these restrictions. 815 816 Expat has built-in encodings for: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and 817 C<US-ASCII>. Encodings are set either through the XML declaration 818 encoding attribute or through the ProtocolEncoding option to XML::Parser 819 or XML::Parser::Expat. 820 821 For encodings other than the built-ins, expat calls the function 822 load_encoding in the Expat package with the encoding name. This function 823 looks for a file in the path list @XML::Parser::Expat::Encoding_Path, that 824 matches the lower-cased name with a '.enc' extension. The first one it 825 finds, it loads. 826 827 If you wish to build your own encoding maps, check out the XML::Encoding 828 module from CPAN. 829 830 =head1 AUTHORS 831 832 Larry Wall <F<larry@wall.org>> wrote version 1.0. 833 834 Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API 835 for this version (2.x), provided documentation, 836 and added some standard package features. 837 838 Matt Sergeant <F<matt@sergeant.org>> is now maintaining XML::Parser 839 840 =cut
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |