[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/Text/ -> ParseWords.pm (source)

   1  package Text::ParseWords;
   2  
   3  use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
   4  $VERSION = "3.26";
   5  
   6  require 5.000;
   7  
   8  use Exporter;
   9  @ISA = qw(Exporter);
  10  @EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
  11  @EXPORT_OK = qw(old_shellwords);
  12  
  13  
  14  sub shellwords {
  15      my (@lines) = @_;
  16      my @allwords;
  17  
  18      foreach my $line (@lines) {
  19      $line =~ s/^\s+//;
  20      my @words = parse_line('\s+', 0, $line);
  21      pop @words if (@words and !defined $words[-1]);
  22      return() unless (@words || !length($line));
  23      push(@allwords, @words);
  24      }
  25      return(@allwords);
  26  }
  27  
  28  
  29  
  30  sub quotewords {
  31      my($delim, $keep, @lines) = @_;
  32      my($line, @words, @allwords);
  33  
  34      foreach $line (@lines) {
  35      @words = parse_line($delim, $keep, $line);
  36      return() unless (@words || !length($line));
  37      push(@allwords, @words);
  38      }
  39      return(@allwords);
  40  }
  41  
  42  
  43  
  44  sub nested_quotewords {
  45      my($delim, $keep, @lines) = @_;
  46      my($i, @allwords);
  47  
  48      for ($i = 0; $i < @lines; $i++) {
  49      @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
  50      return() unless (@{$allwords[$i]} || !length($lines[$i]));
  51      }
  52      return(@allwords);
  53  }
  54  
  55  
  56  
  57  sub parse_line {
  58      my($delimiter, $keep, $line) = @_;
  59      my($word, @pieces);
  60  
  61      no warnings 'uninitialized';    # we will be testing undef strings
  62  
  63      while (length($line)) {
  64          # This pattern is optimised to be stack conservative on older perls.
  65          # Do not refactor without being careful and testing it on very long strings.
  66          # See Perl bug #42980 for an example of a stack busting input.
  67          $line =~ s/^
  68                      (?: 
  69                          # double quoted string
  70                          (")                             # $quote
  71                          ((?>[^\\"]*(?:\\.[^\\"]*)*))"   # $quoted 
  72              |    # --OR--
  73                          # singe quoted string
  74                          (')                             # $quote
  75                          ((?>[^\\']*(?:\\.[^\\']*)*))'   # $quoted
  76                      |   # --OR--
  77                          # unquoted string
  78                  (                               # $unquoted 
  79                              (?:\\.|[^\\"'])*?           
  80                          )        
  81                          # followed by
  82                  (                               # $delim
  83                              \Z(?!\n)                    # EOL
  84                          |   # --OR--
  85                              (?-x:$delimiter)            # delimiter
  86                          |   # --OR--                    
  87                              (?!^)(?=["'])               # a quote
  88                          )  
  89              )//xs or return;        # extended layout                  
  90          my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6);
  91  
  92  
  93      return() unless( defined($quote) || length($unquoted) || length($delim));
  94  
  95          if ($keep) {
  96          $quoted = "$quote$quoted$quote";
  97      }
  98          else {
  99          $unquoted =~ s/\\(.)/$1/sg;
 100          if (defined $quote) {
 101          $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
 102          $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
 103              }
 104      }
 105          $word .= substr($line, 0, 0);    # leave results tainted
 106          $word .= defined $quote ? $quoted : $unquoted;
 107   
 108          if (length($delim)) {
 109              push(@pieces, $word);
 110              push(@pieces, $delim) if ($keep eq 'delimiters');
 111              undef $word;
 112          }
 113          if (!length($line)) {
 114              push(@pieces, $word);
 115      }
 116      }
 117      return(@pieces);
 118  }
 119  
 120  
 121  
 122  sub old_shellwords {
 123  
 124      # Usage:
 125      #    use ParseWords;
 126      #    @words = old_shellwords($line);
 127      #    or
 128      #    @words = old_shellwords(@lines);
 129      #    or
 130      #    @words = old_shellwords();    # defaults to $_ (and clobbers it)
 131  
 132      no warnings 'uninitialized';    # we will be testing undef strings
 133      local *_ = \join('', @_) if @_;
 134      my (@words, $snippet);
 135  
 136      s/\A\s+//;
 137      while ($_ ne '') {
 138      my $field = substr($_, 0, 0);    # leave results tainted
 139      for (;;) {
 140          if (s/\A"(([^"\\]|\\.)*)"//s) {
 141          ($snippet = $1) =~ s#\\(.)#$1#sg;
 142          }
 143          elsif (/\A"/) {
 144          require Carp;
 145          Carp::carp("Unmatched double quote: $_");
 146          return();
 147          }
 148          elsif (s/\A'(([^'\\]|\\.)*)'//s) {
 149          ($snippet = $1) =~ s#\\(.)#$1#sg;
 150          }
 151          elsif (/\A'/) {
 152          require Carp;
 153          Carp::carp("Unmatched single quote: $_");
 154          return();
 155          }
 156          elsif (s/\A\\(.?)//s) {
 157          $snippet = $1;
 158          }
 159          elsif (s/\A([^\s\\'"]+)//) {
 160          $snippet = $1;
 161          }
 162          else {
 163          s/\A\s+//;
 164          last;
 165          }
 166          $field .= $snippet;
 167      }
 168      push(@words, $field);
 169      }
 170      return @words;
 171  }
 172  
 173  1;
 174  
 175  __END__
 176  
 177  =head1 NAME
 178  
 179  Text::ParseWords - parse text into an array of tokens or array of arrays
 180  
 181  =head1 SYNOPSIS
 182  
 183    use Text::ParseWords;
 184    @lists = &nested_quotewords($delim, $keep, @lines);
 185    @words = &quotewords($delim, $keep, @lines);
 186    @words = &shellwords(@lines);
 187    @words = &parse_line($delim, $keep, $line);
 188    @words = &old_shellwords(@lines); # DEPRECATED!
 189  
 190  =head1 DESCRIPTION
 191  
 192  The &nested_quotewords() and &quotewords() functions accept a delimiter 
 193  (which can be a regular expression)
 194  and a list of lines and then breaks those lines up into a list of
 195  words ignoring delimiters that appear inside quotes.  &quotewords()
 196  returns all of the tokens in a single long list, while &nested_quotewords()
 197  returns a list of token lists corresponding to the elements of @lines.
 198  &parse_line() does tokenizing on a single string.  The &*quotewords()
 199  functions simply call &parse_line(), so if you're only splitting
 200  one line you can call &parse_line() directly and save a function
 201  call.
 202  
 203  The $keep argument is a boolean flag.  If true, then the tokens are
 204  split on the specified delimiter, but all other characters (quotes,
 205  backslashes, etc.) are kept in the tokens.  If $keep is false then the
 206  &*quotewords() functions remove all quotes and backslashes that are
 207  not themselves backslash-escaped or inside of single quotes (i.e.,
 208  &quotewords() tries to interpret these characters just like the Bourne
 209  shell).  NB: these semantics are significantly different from the
 210  original version of this module shipped with Perl 5.000 through 5.004.
 211  As an additional feature, $keep may be the keyword "delimiters" which
 212  causes the functions to preserve the delimiters in each string as
 213  tokens in the token lists, in addition to preserving quote and
 214  backslash characters.
 215  
 216  &shellwords() is written as a special case of &quotewords(), and it
 217  does token parsing with whitespace as a delimiter-- similar to most
 218  Unix shells.
 219  
 220  =head1 EXAMPLES
 221  
 222  The sample program:
 223  
 224    use Text::ParseWords;
 225    @words = &quotewords('\s+', 0, q{this   is "a test" of\ quotewords \"for you});
 226    $i = 0;
 227    foreach (@words) {
 228        print "$i: <$_>\n";
 229        $i++;
 230    }
 231  
 232  produces:
 233  
 234    0: <this>
 235    1: <is>
 236    2: <a test>
 237    3: <of quotewords>
 238    4: <"for>
 239    5: <you>
 240  
 241  demonstrating:
 242  
 243  =over 4
 244  
 245  =item 0
 246  
 247  a simple word
 248  
 249  =item 1
 250  
 251  multiple spaces are skipped because of our $delim
 252  
 253  =item 2
 254  
 255  use of quotes to include a space in a word
 256  
 257  =item 3
 258  
 259  use of a backslash to include a space in a word
 260  
 261  =item 4
 262  
 263  use of a backslash to remove the special meaning of a double-quote
 264  
 265  =item 5
 266  
 267  another simple word (note the lack of effect of the
 268  backslashed double-quote)
 269  
 270  =back
 271  
 272  Replacing C<&quotewords('\s+', 0, q{this   is...})>
 273  with C<&shellwords(q{this   is...})>
 274  is a simpler way to accomplish the same thing.
 275  
 276  =head1 AUTHORS
 277  
 278  Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original
 279  author unknown).  Much of the code for &parse_line() (including the
 280  primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>.
 281  
 282  Examples section another documentation provided by John Heidemann 
 283  <johnh@ISI.EDU>
 284  
 285  Bug reports, patches, and nagging provided by lots of folks-- thanks
 286  everybody!  Special thanks to Michael Schwern <schwern@envirolink.org>
 287  for assuring me that a &nested_quotewords() would be useful, and to 
 288  Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about
 289  error-checking (sort of-- you had to be there).
 290  
 291  =cut


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1