[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/site_perl/5.10.0/i586-linux-thread-multi/XML/Parser/ -> Expat.pm (source)

   1  package XML::Parser::Expat;
   2  
   3  require 5.004;
   4  
   5  use strict;
   6  use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path
   7              $have_File_Spec);
   8  use Carp;
   9  
  10  require DynaLoader;
  11  
  12  @ISA = qw(DynaLoader);
  13  $VERSION = "2.36" ;
  14  
  15  $have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm';
  16  
  17  %Encoding_Table = ();
  18  if ($have_File_Spec) {
  19    @Encoding_Path = (grep(-d $_,
  20                           map(File::Spec->catdir($_, qw(XML Parser Encodings)),
  21                               @INC)),
  22                      File::Spec->curdir);
  23  }
  24  else {
  25    @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.');
  26  }
  27    
  28  
  29  bootstrap XML::Parser::Expat $VERSION;
  30  
  31  %Handler_Setters = (
  32                      Start => \&SetStartElementHandler,
  33                      End   => \&SetEndElementHandler,
  34                      Char  => \&SetCharacterDataHandler,
  35                      Proc  => \&SetProcessingInstructionHandler,
  36                      Comment => \&SetCommentHandler,
  37                      CdataStart => \&SetStartCdataHandler,
  38                      CdataEnd   => \&SetEndCdataHandler,
  39                      Default => \&SetDefaultHandler,
  40                      Unparsed => \&SetUnparsedEntityDeclHandler,
  41                      Notation => \&SetNotationDeclHandler,
  42                      ExternEnt => \&SetExternalEntityRefHandler,
  43                      ExternEntFin => \&SetExtEntFinishHandler,
  44                      Entity => \&SetEntityDeclHandler,
  45                      Element => \&SetElementDeclHandler,
  46                      Attlist => \&SetAttListDeclHandler,
  47                      Doctype => \&SetDoctypeHandler,
  48                      DoctypeFin => \&SetEndDoctypeHandler,
  49                      XMLDecl => \&SetXMLDeclHandler
  50                      );
  51  
  52  sub new {
  53    my ($class, %args) = @_;
  54    my $self = bless \%args, $_[0];
  55    $args{_State_} = 0;
  56    $args{Context} = [];
  57    $args{Namespaces} ||= 0;
  58    $args{ErrorMessage} ||= '';
  59    if ($args{Namespaces}) {
  60      $args{Namespace_Table} = {};
  61      $args{Namespace_List} = [undef];
  62      $args{Prefix_Table} = {};
  63      $args{New_Prefixes} = [];
  64    }
  65    $args{_Setters} = \%Handler_Setters;
  66    $args{Parser} = ParserCreate($self, $args{ProtocolEncoding},
  67                                 $args{Namespaces});
  68    $self;
  69  }
  70  
  71  sub load_encoding {
  72    my ($file) = @_;
  73  
  74    $file =~ s!([^/]+)$!\L$1\E!;
  75    $file .= '.enc' unless $file =~ /\.enc$/;
  76    unless ($file =~ m!^/!) {
  77      foreach (@Encoding_Path) {
  78        my $tmp = ($have_File_Spec
  79                   ? File::Spec->catfile($_, $file)
  80                   : "$_/$file");
  81        if (-e $tmp) {
  82          $file = $tmp;
  83          last;
  84        }
  85      }
  86    }
  87  
  88    local(*ENC);
  89    open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n");
  90    binmode(ENC);
  91    my $data;
  92    my $br = sysread(ENC, $data, -s $file);
  93    croak("Trouble reading $file:\n$!\n")
  94      unless defined($br);
  95    close(ENC);
  96  
  97    my $name = LoadEncoding($data, $br);
  98    croak("$file isn't an encmap file")
  99      unless defined($name);
 100  
 101    $name;
 102  }  # End load_encoding
 103  
 104  sub setHandlers {
 105    my ($self, @handler_pairs) = @_;
 106  
 107    croak("Uneven number of arguments to setHandlers method")
 108      if (int(@handler_pairs) & 1);
 109  
 110    my @ret;
 111  
 112    while (@handler_pairs) {
 113      my $type = shift @handler_pairs;
 114      my $handler = shift @handler_pairs;
 115      croak "Handler for $type not a Code ref"
 116        unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE');
 117  
 118      my $hndl = $self->{_Setters}->{$type};
 119  
 120      unless (defined($hndl)) {
 121        my @types = sort keys %{$self->{_Setters}};
 122        croak("Unknown Expat handler type: $type\n Valid types: @types");
 123      }
 124  
 125      my $old = &$hndl($self->{Parser}, $handler);
 126      push (@ret, $type, $old);
 127    }
 128  
 129    return @ret;
 130  }
 131  
 132  sub xpcroak
 133   {
 134    my ($self, $message) = @_;
 135  
 136    my $eclines = $self->{ErrorContext};
 137    my $line = GetCurrentLineNumber($_[0]->{Parser});
 138    $message .= " at line $line";
 139    $message .= ":\n" . $self->position_in_context($eclines)
 140      if defined($eclines);
 141    croak $message;
 142  }
 143  
 144  sub xpcarp {
 145    my ($self, $message) = @_;
 146  
 147    my $eclines = $self->{ErrorContext};
 148    my $line = GetCurrentLineNumber($_[0]->{Parser});
 149    $message .= " at line $line";
 150    $message .= ":\n" . $self->position_in_context($eclines)
 151      if defined($eclines);
 152    carp $message;
 153  }
 154  
 155  sub default_current {
 156    my $self = shift;
 157    if ($self->{_State_} == 1) {
 158      return DefaultCurrent($self->{Parser});
 159    }
 160  }
 161  
 162  sub recognized_string {
 163    my $self = shift;
 164    if ($self->{_State_} == 1) {
 165      return RecognizedString($self->{Parser});
 166    }
 167  }
 168  
 169  sub original_string {
 170    my $self = shift;
 171    if ($self->{_State_} == 1) {
 172      return OriginalString($self->{Parser});
 173    }
 174  }
 175  
 176  sub current_line {
 177    my $self = shift;
 178    if ($self->{_State_} == 1) {
 179      return GetCurrentLineNumber($self->{Parser});
 180    }
 181  }
 182  
 183  sub current_column {
 184    my $self = shift;
 185    if ($self->{_State_} == 1) {
 186      return GetCurrentColumnNumber($self->{Parser});
 187    }
 188  }
 189  
 190  sub current_byte {
 191    my $self = shift;
 192    if ($self->{_State_} == 1) {
 193      return GetCurrentByteIndex($self->{Parser});
 194    }
 195  }
 196  
 197  sub base {
 198    my ($self, $newbase) = @_;
 199    my $p = $self->{Parser};
 200    my $oldbase = GetBase($p);
 201    SetBase($p, $newbase) if @_ > 1;
 202    return $oldbase;
 203  }
 204  
 205  sub context {
 206    my $ctx = $_[0]->{Context};
 207    @$ctx;
 208  }
 209  
 210  sub current_element {
 211    my ($self) = @_;
 212    @{$self->{Context}} ? $self->{Context}->[-1] : undef;
 213  }
 214  
 215  sub in_element {
 216    my ($self, $element) = @_;
 217    @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element)
 218      : undef;
 219  }
 220  
 221  sub within_element {
 222    my ($self, $element) = @_;
 223    my $cnt = 0;
 224    foreach (@{$self->{Context}}) {
 225      $cnt++ if $self->eq_name($_, $element);
 226    }
 227    return $cnt;
 228  }
 229  
 230  sub depth {
 231    my ($self) = @_;
 232    int(@{$self->{Context}});
 233  }
 234  
 235  sub element_index {
 236    my ($self) = @_;
 237  
 238    if ($self->{_State_} == 1) {
 239      return ElementIndex($self->{Parser});
 240    }
 241  }
 242  
 243  ################
 244  # Namespace methods
 245  
 246  sub namespace {
 247    my ($self, $name) = @_;
 248    local($^W) = 0;
 249    $self->{Namespace_List}->[int($name)];
 250  }
 251  
 252  sub eq_name {
 253    my ($self, $nm1, $nm2) = @_;
 254    local($^W) = 0;
 255  
 256    int($nm1) == int($nm2) and $nm1 eq $nm2;
 257  }
 258  
 259  sub generate_ns_name {
 260    my ($self, $name, $namespace) = @_;
 261  
 262    $namespace ?
 263      GenerateNSName($name, $namespace, $self->{Namespace_Table},
 264                     $self->{Namespace_List})
 265        : $name;
 266  }
 267  
 268  sub new_ns_prefixes {
 269    my ($self) = @_;
 270    if ($self->{Namespaces}) {
 271      return @{$self->{New_Prefixes}};
 272    }
 273    return ();
 274  }
 275  
 276  sub expand_ns_prefix {
 277    my ($self, $prefix) = @_;
 278  
 279    if ($self->{Namespaces}) {
 280      my $stack = $self->{Prefix_Table}->{$prefix};
 281      return (defined($stack) and @$stack) ? $stack->[-1] : undef;
 282    }
 283  
 284    return undef;
 285  }
 286  
 287  sub current_ns_prefixes {
 288    my ($self) = @_;
 289  
 290    if ($self->{Namespaces}) {
 291      my %set = %{$self->{Prefix_Table}};
 292  
 293      if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) {
 294        delete $set{'#default'};
 295      }
 296  
 297      return keys %set;
 298    }
 299  
 300    return ();
 301  }
 302  
 303  
 304  ################################################################
 305  # Namespace declaration handlers
 306  #
 307  
 308  sub NamespaceStart {
 309    my ($self, $prefix, $uri) = @_;
 310  
 311    $prefix = '#default' unless defined $prefix;
 312    my $stack = $self->{Prefix_Table}->{$prefix}; 
 313  
 314    if (defined $stack) {
 315      push(@$stack, $uri);
 316    }
 317    else {
 318      $self->{Prefix_Table}->{$prefix} = [$uri];
 319    }
 320  
 321    # The New_Prefixes list gets emptied at end of startElement function
 322    # in Expat.xs
 323  
 324    push(@{$self->{New_Prefixes}}, $prefix);
 325  }
 326  
 327  sub NamespaceEnd {
 328    my ($self, $prefix) = @_;
 329  
 330    $prefix = '#default' unless defined $prefix;
 331  
 332    my $stack = $self->{Prefix_Table}->{$prefix};
 333    if (@$stack > 1) {
 334      pop(@$stack);
 335    }
 336    else {
 337      delete $self->{Prefix_Table}->{$prefix};
 338    }
 339  }
 340  
 341  ################
 342  
 343  sub specified_attr {
 344    my $self = shift;
 345    
 346    if ($self->{_State_} == 1) {
 347      return GetSpecifiedAttributeCount($self->{Parser});
 348    }
 349  }
 350  
 351  sub finish {
 352    my ($self) = @_;
 353    if ($self->{_State_} == 1) {
 354      my $parser = $self->{Parser};
 355      UnsetAllHandlers($parser);
 356    }
 357  }
 358  
 359  sub position_in_context {
 360    my ($self, $lines) = @_;
 361    if ($self->{_State_} == 1) {
 362      my $parser = $self->{Parser};
 363      my ($string, $linepos) = PositionContext($parser, $lines);
 364  
 365      return '' unless defined($string);
 366  
 367      my $col = GetCurrentColumnNumber($parser);
 368      my $ptr = ('=' x ($col - 1)) . '^' . "\n";
 369      my $ret;
 370      my $dosplit = $linepos < length($string);
 371    
 372      $string .= "\n" unless $string =~ /\n$/;
 373    
 374      if ($dosplit) {
 375        $ret = substr($string, 0, $linepos) . $ptr
 376          . substr($string, $linepos);
 377      } else {
 378        $ret = $string . $ptr;
 379      }
 380    
 381      return $ret;
 382    }
 383  }
 384  
 385  sub xml_escape {
 386    my $self = shift;
 387    my $text = shift;
 388  
 389    study $text;
 390    $text =~ s/\&/\&amp;/g;
 391    $text =~ s/</\&lt;/g;
 392    foreach (@_) {
 393      croak "xml_escape: '$_' isn't a single character" if length($_) > 1;
 394  
 395      if ($_ eq '>') {
 396        $text =~ s/>/\&gt;/g;
 397      }
 398      elsif ($_ eq '"') {
 399        $text =~ s/\"/\&quot;/;
 400      }
 401      elsif ($_ eq "'") {
 402        $text =~ s/\'/\&apos;/;
 403      }
 404      else {
 405        my $rep = '&#' . sprintf('x%X', ord($_)) . ';';
 406        if (/\W/) {
 407          my $ptrn = "\\$_";
 408          $text =~ s/$ptrn/$rep/g;
 409        }
 410        else {
 411          $text =~ s/$_/$rep/g;
 412        }
 413      }
 414    }
 415    $text;
 416  }
 417  
 418  sub skip_until {
 419    my $self = shift;
 420    if ($self->{_State_} <= 1) {
 421      SkipUntil($self->{Parser}, $_[0]);
 422    }
 423  }
 424  
 425  sub release {
 426    my $self = shift;
 427    ParserRelease($self->{Parser});
 428  }
 429  
 430  sub DESTROY {
 431    my $self = shift;
 432    ParserFree($self->{Parser});
 433  }
 434  
 435  sub parse {
 436    my $self = shift;
 437    my $arg = shift;
 438    croak "Parse already in progress (Expat)" if $self->{_State_};
 439    $self->{_State_} = 1;
 440    my $parser = $self->{Parser};
 441    my $ioref;
 442    my $result = 0;
 443    
 444    if (defined $arg) {
 445      if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) {
 446        $ioref = $arg;
 447      } elsif (tied($arg)) {
 448        my $class = ref($arg);
 449        no strict 'refs';
 450        $ioref = $arg if defined &{"$class}::TIEHANDLE"};
 451      }
 452      else {
 453        require IO::Handle;
 454        eval {
 455          no strict 'refs';
 456          $ioref = *{$arg}{IO} if defined *{$arg};
 457        };
 458        undef $@;
 459      }
 460    }
 461    
 462    if (defined($ioref)) {
 463      my $delim = $self->{Stream_Delimiter};
 464      my $prev_rs;
 465      
 466      $prev_rs = ref($ioref)->input_record_separator("\n$delim\n")
 467        if defined($delim);
 468      
 469      $result = ParseStream($parser, $ioref, $delim);
 470      
 471      ref($ioref)->input_record_separator($prev_rs)
 472        if defined($delim);
 473    } else {
 474      $result = ParseString($parser, $arg);
 475    }
 476    
 477    $self->{_State_} = 2;
 478    $result or croak $self->{ErrorMessage};
 479  }
 480  
 481  sub parsestring {
 482    my $self = shift;
 483    $self->parse(@_);
 484  }
 485  
 486  sub parsefile {
 487    my $self = shift;
 488    croak "Parser has already been used" if $self->{_State_};
 489    local(*FILE);
 490    open(FILE, $_[0]) or  croak "Couldn't open $_[0]:\n$!";
 491    binmode(FILE);
 492    my $ret = $self->parse(*FILE);
 493    close(FILE);
 494    $ret;
 495  }
 496  
 497  ################################################################
 498  package XML::Parser::ContentModel;
 499  use overload '""' => \&asString, 'eq' => \&thiseq;
 500  
 501  sub EMPTY  () {1}
 502  sub ANY    () {2}
 503  sub MIXED  () {3}
 504  sub NAME   () {4}
 505  sub CHOICE () {5}
 506  sub SEQ    () {6}
 507  
 508  
 509  sub isempty {
 510    return $_[0]->{Type} == EMPTY;
 511  }
 512  
 513  sub isany {
 514    return $_[0]->{Type} == ANY;
 515  }
 516  
 517  sub ismixed {
 518    return $_[0]->{Type} == MIXED;
 519  }
 520  
 521  sub isname {
 522    return $_[0]->{Type} == NAME;
 523  }
 524  
 525  sub name {
 526    return $_[0]->{Tag};
 527  }
 528  
 529  sub ischoice {
 530    return $_[0]->{Type} == CHOICE;
 531  }
 532  
 533  sub isseq {
 534    return $_[0]->{Type} == SEQ;
 535  }
 536  
 537  sub quant {
 538    return $_[0]->{Quant};
 539  }
 540  
 541  sub children {
 542    my $children = $_[0]->{Children};
 543    if (defined $children) {
 544      return @$children;
 545    }
 546    return undef;
 547  }
 548  
 549  sub asString {
 550    my ($self) = @_;
 551    my $ret;
 552  
 553    if ($self->{Type} == NAME) {
 554      $ret = $self->{Tag};
 555    }
 556    elsif ($self->{Type} == EMPTY) {
 557      return "EMPTY";
 558    }
 559    elsif ($self->{Type} == ANY) {
 560      return "ANY";
 561    }
 562    elsif ($self->{Type} == MIXED) {
 563      $ret = '(#PCDATA';
 564      foreach (@{$self->{Children}}) {
 565        $ret .= '|' . $_;
 566      }
 567      $ret .= ')';
 568    }
 569    else {
 570      my $sep = $self->{Type} == CHOICE ? '|' : ',';
 571      $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')';
 572    }
 573  
 574    $ret .= $self->{Quant} if $self->{Quant};
 575    return $ret;
 576  }
 577  
 578  sub thiseq {
 579    my $self = shift;
 580  
 581    return $self->asString eq $_[0];
 582  }
 583  
 584  ################################################################
 585  package XML::Parser::ExpatNB;
 586  
 587  use vars qw(@ISA);
 588  use Carp;
 589  
 590  @ISA = qw(XML::Parser::Expat);
 591  
 592  sub parse {
 593    my $self = shift;
 594    my $class = ref($self);
 595    croak "parse method not supported in $class";
 596  }
 597  
 598  sub parsestring {
 599    my $self = shift;
 600    my $class = ref($self);
 601    croak "parsestring method not supported in $class";
 602  }
 603  
 604  sub parsefile {
 605    my $self = shift;
 606    my $class = ref($self);
 607    croak "parsefile method not supported in $class";
 608  }
 609  
 610  sub parse_more {
 611    my ($self, $data) = @_;
 612  
 613    $self->{_State_} = 1;
 614    my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data);
 615  
 616    croak $self->{ErrorMessage} unless $ret;
 617  }
 618  
 619  sub parse_done {
 620    my $self = shift;
 621  
 622    my $ret = XML::Parser::Expat::ParseDone($self->{Parser});
 623    unless ($ret) {
 624      my $msg = $self->{ErrorMessage};
 625      $self->release;
 626      croak $msg;
 627    }
 628  
 629    $self->{_State_} = 2;
 630  
 631    my $result = $ret;
 632    my @result = ();
 633    my $final = $self->{FinalHandler};
 634    if (defined $final) {
 635      if (wantarray) {
 636        @result = &$final($self);
 637      }
 638      else {
 639        $result = &$final($self);
 640      }
 641    }
 642  
 643    $self->release;
 644  
 645    return unless defined wantarray;
 646    return wantarray ? @result : $result;
 647  }
 648  
 649  ################################################################
 650  
 651  package XML::Parser::Encinfo;
 652  
 653  sub DESTROY {
 654    my $self = shift;
 655    XML::Parser::Expat::FreeEncoding($self);
 656  }
 657  
 658  1;
 659  
 660  __END__
 661  
 662  =head1 NAME
 663  
 664  XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser
 665  
 666  =head1 SYNOPSIS
 667  
 668   use XML::Parser::Expat;
 669  
 670   $parser = new XML::Parser::Expat;
 671   $parser->setHandlers('Start' => \&sh,
 672                        'End'   => \&eh,
 673                        'Char'  => \&ch);
 674   open(FOO, 'info.xml') or die "Couldn't open";
 675   $parser->parse(*FOO);
 676   close(FOO);
 677   # $parser->parse('<foo id="me"> here <em>we</em> go </foo>');
 678  
 679   sub sh
 680   {
 681     my ($p, $el, %atts) = @_;
 682     $p->setHandlers('Char' => \&spec)
 683       if ($el eq 'special');
 684     ...
 685   }
 686  
 687   sub eh
 688   {
 689     my ($p, $el) = @_;
 690     $p->setHandlers('Char' => \&ch)  # Special elements won't contain
 691       if ($el eq 'special');         # other special elements
 692     ...
 693   } 
 694  
 695  =head1 DESCRIPTION
 696  
 697  This module provides an interface to James Clark's XML parser, expat. As in
 698  expat, a single instance of the parser can only parse one document. Calls
 699  to parsestring after the first for a given instance will die.
 700  
 701  Expat (and XML::Parser::Expat) are event based. As the parser recognizes
 702  parts of the document (say the start or end of an XML element), then any
 703  handlers registered for that type of an event are called with suitable
 704  parameters.
 705  
 706  =head1 METHODS
 707  
 708  =over 4
 709  
 710  =item new
 711  
 712  This is a class method, the constructor for XML::Parser::Expat. Options are
 713  passed as keyword value pairs. The recognized options are:
 714  
 715  =over 4
 716  
 717  =item * ProtocolEncoding
 718  
 719  The protocol encoding name. The default is none. The expat built-in
 720  encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and C<US-ASCII>.
 721  Other encodings may be used if they have encoding maps in one of the
 722  directories in the @Encoding_Path list. Setting the protocol encoding
 723  overrides any encoding in the XML declaration.
 724  
 725  =item * Namespaces
 726  
 727  When this option is given with a true value, then the parser does namespace
 728  processing. By default, namespace processing is turned off. When it is
 729  turned on, the parser consumes I<xmlns> attributes and strips off prefixes
 730  from element and attributes names where those prefixes have a defined
 731  namespace. A name's namespace can be found using the L<"namespace"> method
 732  and two names can be checked for absolute equality with the L<"eq_name">
 733  method.
 734  
 735  =item * NoExpand
 736  
 737  Normally, the parser will try to expand references to entities defined in
 738  the internal subset. If this option is set to a true value, and a default
 739  handler is also set, then the default handler will be called when an
 740  entity reference is seen in text. This has no effect if a default handler
 741  has not been registered, and it has no effect on the expansion of entity
 742  references inside attribute values.
 743  
 744  =item * Stream_Delimiter
 745  
 746  This option takes a string value. When this string is found alone on a line
 747  while parsing from a stream, then the parse is ended as if it saw an end of
 748  file. The intended use is with a stream of xml documents in a MIME multipart
 749  format. The string should not contain a trailing newline.
 750  
 751  =item * ErrorContext
 752  
 753  When this option is defined, errors are reported in context. The value
 754  of ErrorContext should be the number of lines to show on either side of
 755  the line in which the error occurred.
 756  
 757  =item * ParseParamEnt
 758  
 759  Unless standalone is set to "yes" in the XML declaration, setting this to
 760  a true value allows the external DTD to be read, and parameter entities
 761  to be parsed and expanded.
 762  
 763  =item * Base
 764  
 765  The base to use for relative pathnames or URLs. This can also be done by
 766  using the base method.
 767  
 768  =back
 769  
 770  =item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])
 771  
 772  This method registers handlers for the various events. If no handlers are
 773  registered, then a call to parsestring or parsefile will only determine if
 774  the corresponding XML document is well formed (by returning without error.)
 775  This may be called from within a handler, after the parse has started.
 776  
 777  Setting a handler to something that evaluates to false unsets that
 778  handler.
 779  
 780  This method returns a list of type, handler pairs corresponding to the
 781  input. The handlers returned are the ones that were in effect before the
 782  call to setHandlers.
 783  
 784  The recognized events and the parameters passed to the corresponding
 785  handlers are:
 786  
 787  =over 4
 788  
 789  =item * Start             (Parser, Element [, Attr, Val [,...]])
 790  
 791  This event is generated when an XML start tag is recognized. Parser is
 792  an XML::Parser::Expat instance. Element is the name of the XML element that
 793  is opened with the start tag. The Attr & Val pairs are generated for each
 794  attribute in the start tag.
 795  
 796  =item * End               (Parser, Element)
 797  
 798  This event is generated when an XML end tag is recognized. Note that
 799  an XML empty tag (<foo/>) generates both a start and an end event.
 800  
 801  There is always a lower level start and end handler installed that wrap
 802  the corresponding callbacks. This is to handle the context mechanism.
 803  A consequence of this is that the default handler (see below) will not
 804  see a start tag or end tag unless the default_current method is called.
 805  
 806  =item * Char              (Parser, String)
 807  
 808  This event is generated when non-markup is recognized. The non-markup
 809  sequence of characters is in String. A single non-markup sequence of
 810  characters may generate multiple calls to this handler. Whatever the
 811  encoding of the string in the original document, this is given to the
 812  handler in UTF-8.
 813  
 814  =item * Proc              (Parser, Target, Data)
 815  
 816  This event is generated when a processing instruction is recognized.
 817  
 818  =item * Comment           (Parser, String)
 819  
 820  This event is generated when a comment is recognized.
 821  
 822  =item * CdataStart        (Parser)
 823  
 824  This is called at the start of a CDATA section.
 825  
 826  =item * CdataEnd          (Parser)
 827  
 828  This is called at the end of a CDATA section.
 829  
 830  =item * Default           (Parser, String)
 831  
 832  This is called for any characters that don't have a registered handler.
 833  This includes both characters that are part of markup for which no
 834  events are generated (markup declarations) and characters that
 835  could generate events, but for which no handler has been registered.
 836  
 837  Whatever the encoding in the original document, the string is returned to
 838  the handler in UTF-8.
 839  
 840  =item * Unparsed          (Parser, Entity, Base, Sysid, Pubid, Notation)
 841  
 842  This is called for a declaration of an unparsed entity. Entity is the name
 843  of the entity. Base is the base to be used for resolving a relative URI.
 844  Sysid is the system id. Pubid is the public id. Notation is the notation
 845  name. Base and Pubid may be undefined.
 846  
 847  =item * Notation          (Parser, Notation, Base, Sysid, Pubid)
 848  
 849  This is called for a declaration of notation. Notation is the notation name.
 850  Base is the base to be used for resolving a relative URI. Sysid is the system
 851  id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined.
 852  
 853  =item * ExternEnt         (Parser, Base, Sysid, Pubid)
 854  
 855  This is called when an external entity is referenced. Base is the base to be
 856  used for resolving a relative URI. Sysid is the system id. Pubid is the public
 857  id. Base, and Pubid may be undefined.
 858  
 859  This handler should either return a string, which represents the contents of
 860  the external entity, or return an open filehandle that can be read to obtain
 861  the contents of the external entity, or return undef, which indicates the
 862  external entity couldn't be found and will generate a parse error.
 863  
 864  If an open filehandle is returned, it must be returned as either a glob
 865  (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle).
 866  
 867  =item * ExternEntFin      (Parser)
 868  
 869  This is called after an external entity has been parsed. It allows
 870  applications to perform cleanup on actions performed in the above
 871  ExternEnt handler.
 872  
 873  =item * Entity            (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam)
 874  
 875  This is called when an entity is declared. For internal entities, the Val
 876  parameter will contain the value and the remaining three parameters will
 877  be undefined. For external entities, the Val parameter
 878  will be undefined, the Sysid parameter will have the system id, the Pubid
 879  parameter will have the public id if it was provided (it will be undefined
 880  otherwise), the Ndata parameter will contain the notation for unparsed
 881  entities. If this is a parameter entity declaration, then the IsParam
 882  parameter is true.
 883  
 884  Note that this handler and the Unparsed handler above overlap. If both are
 885  set, then this handler will not be called for unparsed entities.
 886  
 887  =item * Element           (Parser, Name, Model)
 888  
 889  The element handler is called when an element declaration is found. Name is
 890  the element name, and Model is the content model as an
 891  XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods">
 892  for methods available for this class.
 893  
 894  =item * Attlist           (Parser, Elname, Attname, Type, Default, Fixed)
 895  
 896  This handler is called for each attribute in an ATTLIST declaration.
 897  So an ATTLIST declaration that has multiple attributes
 898  will generate multiple calls to this handler. The Elname parameter is the
 899  name of the element with which the attribute is being associated. The Attname
 900  parameter is the name of the attribute. Type is the attribute type, given as
 901  a string. Default is the default value, which will either be "#REQUIRED",
 902  "#IMPLIED" or a quoted string (i.e. the returned string will begin and end
 903  with a quote character). If Fixed is true, then this is a fixed attribute.
 904  
 905  =item * Doctype           (Parser, Name, Sysid, Pubid, Internal)
 906  
 907  This handler is called for DOCTYPE declarations. Name is the document type
 908  name. Sysid is the system id of the document type, if it was provided,
 909  otherwise it's undefined. Pubid is the public id of the document type,
 910  which will be undefined if no public id was given. Internal will be
 911  true or false, indicating whether or not the doctype declaration contains
 912  an internal subset.
 913  
 914  =item * DoctypeFin        (Parser)
 915  
 916  This handler is called after parsing of the DOCTYPE declaration has finished,
 917  including any internal or external DTD declarations.
 918  
 919  =item * XMLDecl           (Parser, Version, Encoding, Standalone)
 920  
 921  This handler is called for XML declarations. Version is a string containg
 922  the version. Encoding is either undefined or contains an encoding string.
 923  Standalone is either undefined, or true or false. Undefined indicates
 924  that no standalone parameter was given in the XML declaration. True or
 925  false indicates "yes" or "no" respectively.
 926  
 927  =back
 928  
 929  =item namespace(name)
 930  
 931  Return the URI of the namespace that the name belongs to. If the name doesn't
 932  belong to any namespace, an undef is returned. This is only valid on names
 933  received through the Start or End handlers from a single document, or through
 934  a call to the generate_ns_name method. In other words, don't use names
 935  generated from one instance of XML::Parser::Expat with other instances.
 936  
 937  =item eq_name(name1, name2)
 938  
 939  Return true if name1 and name2 are identical (i.e. same name and from
 940  the same namespace.) This is only meaningful if both names were obtained
 941  through the Start or End handlers from a single document, or through
 942  a call to the generate_ns_name method.
 943  
 944  =item generate_ns_name(name, namespace)
 945  
 946  Return a name, associated with a given namespace, good for using with the
 947  above 2 methods. The namespace argument should be the namespace URI, not
 948  a prefix.
 949  
 950  =item new_ns_prefixes
 951  
 952  When called from a start tag handler, returns namespace prefixes declared
 953  with this start tag. If called elsewere (or if there were no namespace
 954  prefixes declared), it returns an empty list. Setting of the default
 955  namespace is indicated with '#default' as a prefix.
 956  
 957  =item expand_ns_prefix(prefix)
 958  
 959  Return the uri to which the given prefix is currently bound. Returns
 960  undef if the prefix isn't currently bound. Use '#default' to find the
 961  current binding of the default namespace (if any).
 962  
 963  =item current_ns_prefixes
 964  
 965  Return a list of currently bound namespace prefixes. The order of the
 966  the prefixes in the list has no meaning. If the default namespace is
 967  currently bound, '#default' appears in the list.
 968  
 969  =item recognized_string
 970  
 971  Returns the string from the document that was recognized in order to call
 972  the current handler. For instance, when called from a start handler, it
 973  will give us the the start-tag string. The string is encoded in UTF-8.
 974  This method doesn't return a meaningful string inside declaration handlers.
 975  
 976  =item original_string
 977  
 978  Returns the verbatim string from the document that was recognized in
 979  order to call the current handler. The string is in the original document
 980  encoding. This method doesn't return a meaningful string inside declaration
 981  handlers.
 982  
 983  =item default_current
 984  
 985  When called from a handler, causes the sequence of characters that generated
 986  the corresponding event to be sent to the default handler (if one is
 987  registered). Use of this method is deprecated in favor the recognized_string
 988  method, which you can use without installing a default handler. This
 989  method doesn't deliver a meaningful string to the default handler when
 990  called from inside declaration handlers.
 991  
 992  =item xpcroak(message)
 993  
 994  Concatenate onto the given message the current line number within the
 995  XML document plus the message implied by ErrorContext. Then croak with
 996  the formed message.
 997  
 998  =item xpcarp(message)
 999  
1000  Concatenate onto the given message the current line number within the
1001  XML document plus the message implied by ErrorContext. Then carp with
1002  the formed message.
1003  
1004  =item current_line
1005  
1006  Returns the line number of the current position of the parse.
1007  
1008  =item current_column
1009  
1010  Returns the column number of the current position of the parse.
1011  
1012  =item current_byte
1013  
1014  Returns the current position of the parse.
1015  
1016  =item base([NEWBASE]);
1017  
1018  Returns the current value of the base for resolving relative URIs. If
1019  NEWBASE is supplied, changes the base to that value.
1020  
1021  =item context
1022  
1023  Returns a list of element names that represent open elements, with the
1024  last one being the innermost. Inside start and end tag handlers, this
1025  will be the tag of the parent element.
1026  
1027  =item current_element
1028  
1029  Returns the name of the innermost currently opened element. Inside
1030  start or end handlers, returns the parent of the element associated
1031  with those tags.
1032  
1033  =item in_element(NAME)
1034  
1035  Returns true if NAME is equal to the name of the innermost currently opened
1036  element. If namespace processing is being used and you want to check
1037  against a name that may be in a namespace, then use the generate_ns_name
1038  method to create the NAME argument.
1039  
1040  =item within_element(NAME)
1041  
1042  Returns the number of times the given name appears in the context list.
1043  If namespace processing is being used and you want to check
1044  against a name that may be in a namespace, then use the generate_ns_name
1045  method to create the NAME argument.
1046  
1047  =item depth
1048  
1049  Returns the size of the context list.
1050  
1051  =item element_index
1052  
1053  Returns an integer that is the depth-first visit order of the current
1054  element. This will be zero outside of the root element. For example,
1055  this will return 1 when called from the start handler for the root element
1056  start tag.
1057  
1058  =item skip_until(INDEX)
1059  
1060  INDEX is an integer that represents an element index. When this method
1061  is called, all handlers are suspended until the start tag for an element
1062  that has an index number equal to INDEX is seen. If a start handler has
1063  been set, then this is the first tag that the start handler will see
1064  after skip_until has been called.
1065  
1066  
1067  =item position_in_context(LINES)
1068  
1069  Returns a string that shows the current parse position. LINES should be
1070  an integer >= 0 that represents the number of lines on either side of the
1071  current parse line to place into the returned string.
1072  
1073  =item xml_escape(TEXT [, CHAR [, CHAR ...]])
1074  
1075  Returns TEXT with markup characters turned into character entities. Any
1076  additional characters provided as arguments are also turned into character
1077  references where found in TEXT.
1078  
1079  =item parse (SOURCE)
1080  
1081  The SOURCE parameter should either be a string containing the whole XML
1082  document, or it should be an open IO::Handle. Only a single document
1083  may be parsed for a given instance of XML::Parser::Expat, so this will croak
1084  if it's been called previously for this instance.
1085  
1086  =item parsestring(XML_DOC_STRING)
1087  
1088  Parses the given string as an XML document. Only a single document may be
1089  parsed for a given instance of XML::Parser::Expat, so this will die if either
1090  parsestring or parsefile has been called for this instance previously.
1091  
1092  This method is deprecated in favor of the parse method.
1093  
1094  =item parsefile(FILENAME)
1095  
1096  Parses the XML document in the given file. Will die if parsestring or
1097  parsefile has been called previously for this instance.
1098  
1099  =item is_defaulted(ATTNAME)
1100  
1101  NO LONGER WORKS. To find out if an attribute is defaulted please use
1102  the specified_attr method.
1103  
1104  =item specified_attr
1105  
1106  When the start handler receives lists of attributes and values, the
1107  non-defaulted (i.e. explicitly specified) attributes occur in the list
1108  first. This method returns the number of specified items in the list.
1109  So if this number is equal to the length of the list, there were no
1110  defaulted values. Otherwise the number points to the index of the
1111  first defaulted attribute name.
1112  
1113  =item finish
1114  
1115  Unsets all handlers (including internal ones that set context), but expat
1116  continues parsing to the end of the document or until it finds an error.
1117  It should finish up a lot faster than with the handlers set.
1118  
1119  =item release
1120  
1121  There are data structures used by XML::Parser::Expat that have circular
1122  references. This means that these structures will never be garbage
1123  collected unless these references are explicitly broken. Calling this
1124  method breaks those references (and makes the instance unusable.)
1125  
1126  Normally, higher level calls handle this for you, but if you are using
1127  XML::Parser::Expat directly, then it's your responsibility to call it.
1128  
1129  =back
1130  
1131  =head2 XML::Parser::ContentModel Methods
1132  
1133  The element declaration handlers are passed objects of this class as the
1134  content model of the element declaration. They also represent content
1135  particles, components of a content model.
1136  
1137  When referred to as a string, these objects are automagicly converted to a
1138  string representation of the model (or content particle).
1139  
1140  =over 4
1141  
1142  =item isempty
1143  
1144  This method returns true if the object is "EMPTY", false otherwise.
1145  
1146  =item isany
1147  
1148  This method returns true if the object is "ANY", false otherwise.
1149  
1150  =item ismixed
1151  
1152  This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*",
1153  false otherwise.
1154  
1155  =item isname
1156  
1157  This method returns if the object is an element name.
1158  
1159  =item ischoice
1160  
1161  This method returns true if the object is a choice of content particles.
1162  
1163  
1164  =item isseq
1165  
1166  This method returns true if the object is a sequence of content particles.
1167  
1168  =item quant
1169  
1170  This method returns undef or a string representing the quantifier
1171  ('?', '*', '+') associated with the model or particle.
1172  
1173  =item children
1174  
1175  This method returns undef or (for mixed, choice, and sequence types)
1176  an array of component content particles. There will always be at least
1177  one component for choices and sequences, but for a mixed content model
1178  of pure PCDATA, "(#PCDATA)", then an undef is returned.
1179  
1180  =back
1181  
1182  =head2 XML::Parser::ExpatNB Methods
1183  
1184  The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used
1185  for non-blocking access to the expat library. It does not support the parse,
1186  parsestring, or parsefile methods, but it does have these additional methods:
1187  
1188  =over 4
1189  
1190  =item parse_more(DATA)
1191  
1192  Feed expat more text to munch on.
1193  
1194  =item parse_done
1195  
1196  Tell expat that it's gotten the whole document.
1197  
1198  =back
1199  
1200  =head1 FUNCTIONS
1201  
1202  =over 4
1203  
1204  =item XML::Parser::Expat::load_encoding(ENCODING)
1205  
1206  Load an external encoding. ENCODING is either the name of an encoding or
1207  the name of a file. The basename is converted to lowercase and a '.enc'
1208  extension is appended unless there's one already there. Then, unless
1209  it's an absolute pathname (i.e. begins with '/'), the first file by that
1210  name discovered in the @Encoding_Path path list is used.
1211  
1212  The encoding in the file is loaded and kept in the %Encoding_Table
1213  table. Earlier encodings of the same name are replaced.
1214  
1215  This function is automaticly called by expat when it encounters an encoding
1216  it doesn't know about. Expat shouldn't call this twice for the same
1217  encoding name. The only reason users should use this function is to
1218  explicitly load an encoding not contained in the @Encoding_Path list.
1219  
1220  =back
1221  
1222  =head1 AUTHORS
1223  
1224  Larry Wall <F<larry@wall.org>> wrote version 1.0.
1225  
1226  Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API
1227  for this version (2.x), provided documentation, and added some standard
1228  package features.
1229  
1230  =cut


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1