[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/i586-linux-thread-multi/Encode/CN/ -> HZ.pm (source)

   1  package Encode::CN::HZ;
   2  
   3  use strict;
   4  use warnings;
   5  
   6  use vars qw($VERSION);
   7  $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
   8  
   9  use Encode qw(:fallbacks);
  10  
  11  use base qw(Encode::Encoding);
  12  __PACKAGE__->Define('hz');
  13  
  14  # HZ is a combination of ASCII and escaped GB, so we implement it
  15  # with the GB2312(raw) encoding here. Cf. RFCs 1842 & 1843.
  16  
  17  # not ported for EBCDIC.  Which should be used, "~" or "\x7E"?
  18  
  19  sub needs_lines { 1 }
  20  
  21  sub decode ($$;$) {
  22      my ( $obj, $str, $chk ) = @_;
  23  
  24      my $GB  = Encode::find_encoding('gb2312-raw');
  25      my $ret = '';
  26      my $in_ascii = 1;    # default mode is ASCII.
  27  
  28      while ( length $str ) {
  29          if ($in_ascii) {    # ASCII mode
  30              if ( $str =~ s/^([\x00-\x7D\x7F]+)// ) {    # no '~' => ASCII
  31                  $ret .= $1;
  32  
  33                  # EBCDIC should need ascii2native, but not ported.
  34              }
  35              elsif ( $str =~ s/^\x7E\x7E// ) {           # escaped tilde
  36                  $ret .= '~';
  37              }
  38              elsif ( $str =~ s/^\x7E\cJ// ) {    # '\cJ' == LF in ASCII
  39                  1;                              # no-op
  40              }
  41              elsif ( $str =~ s/^\x7E\x7B// ) {    # '~{'
  42                  $in_ascii = 0;                   # to GB
  43              }
  44              else {    # encounters an invalid escape, \x80 or greater
  45                  last;
  46              }
  47          }
  48          else {        # GB mode; the byte ranges are as in RFC 1843.
  49              no warnings 'uninitialized';
  50              if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) {
  51                  $ret .= $GB->decode( $1, $chk );
  52              }
  53              elsif ( $str =~ s/^\x7E\x7D// ) {    # '~}'
  54                  $in_ascii = 1;
  55              }
  56              else {                               # invalid
  57                  last;
  58              }
  59          }
  60      }
  61      $_[1] = '' if $chk;    # needs_lines guarantees no partial character
  62      return $ret;
  63  }
  64  
  65  sub cat_decode {
  66      my ( $obj, undef, $src, $pos, $trm, $chk ) = @_;
  67      my ( $rdst, $rsrc, $rpos ) = \@_[ 1 .. 3 ];
  68  
  69      my $GB  = Encode::find_encoding('gb2312-raw');
  70      my $ret = '';
  71      my $in_ascii = 1;      # default mode is ASCII.
  72  
  73      my $ini_pos = pos($$rsrc);
  74  
  75      substr( $src, 0, $pos ) = '';
  76  
  77      my $ini_len = bytes::length($src);
  78  
  79      # $trm is the first of the pair '~~', then 2nd tilde is to be removed.
  80      # XXX: Is better C<$src =~ s/^\x7E// or die if ...>?
  81      $src =~ s/^\x7E// if $trm eq "\x7E";
  82  
  83      while ( length $src ) {
  84          my $now;
  85          if ($in_ascii) {    # ASCII mode
  86              if ( $src =~ s/^([\x00-\x7D\x7F])// ) {    # no '~' => ASCII
  87                  $now = $1;
  88              }
  89              elsif ( $src =~ s/^\x7E\x7E// ) {          # escaped tilde
  90                  $now = '~';
  91              }
  92              elsif ( $src =~ s/^\x7E\cJ// ) {    # '\cJ' == LF in ASCII
  93                  next;
  94              }
  95              elsif ( $src =~ s/^\x7E\x7B// ) {    # '~{'
  96                  $in_ascii = 0;                   # to GB
  97                  next;
  98              }
  99              else {    # encounters an invalid escape, \x80 or greater
 100                  last;
 101              }
 102          }
 103          else {        # GB mode; the byte ranges are as in RFC 1843.
 104              if ( $src =~ s/^((?:[\x21-\x77][\x21-\x7F])+)// ) {
 105                  $now = $GB->decode( $1, $chk );
 106              }
 107              elsif ( $src =~ s/^\x7E\x7D// ) {    # '~}'
 108                  $in_ascii = 1;
 109                  next;
 110              }
 111              else {                               # invalid
 112                  last;
 113              }
 114          }
 115  
 116          next if !defined $now;
 117  
 118          $ret .= $now;
 119  
 120          if ( $now eq $trm ) {
 121              $$rdst .= $ret;
 122              $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);
 123              pos($$rsrc) = $ini_pos;
 124              return 1;
 125          }
 126      }
 127  
 128      $$rdst .= $ret;
 129      $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);
 130      pos($$rsrc) = $ini_pos;
 131      return '';    # terminator not found
 132  }
 133  
 134  sub encode($$;$) {
 135      my ( $obj, $str, $chk ) = @_;
 136  
 137      my $GB  = Encode::find_encoding('gb2312-raw');
 138      my $ret = '';
 139      my $in_ascii = 1;    # default mode is ASCII.
 140  
 141      no warnings 'utf8';  # $str may be malformed UTF8 at the end of a chunk.
 142  
 143      while ( length $str ) {
 144          if ( $str =~ s/^([[:ascii:]]+)// ) {
 145              my $tmp = $1;
 146              $tmp =~ s/~/~~/g;    # escapes tildes
 147              if ( !$in_ascii ) {
 148                  $ret .= "\x7E\x7D";    # '~}'
 149                  $in_ascii = 1;
 150              }
 151              $ret .= pack 'a*', $tmp;    # remove UTF8 flag.
 152          }
 153          elsif ( $str =~ s/(.)// ) {
 154              my $s = $1;
 155              my $tmp = $GB->encode( $s, $chk );
 156              last if !defined $tmp;
 157              if ( length $tmp == 2 ) {    # maybe a valid GB char (XXX)
 158                  if ($in_ascii) {
 159                      $ret .= "\x7E\x7B";    # '~{'
 160                      $in_ascii = 0;
 161                  }
 162                  $ret .= $tmp;
 163              }
 164              elsif ( length $tmp ) {        # maybe FALLBACK in ASCII (XXX)
 165                  if ( !$in_ascii ) {
 166                      $ret .= "\x7E\x7D";    # '~}'
 167                      $in_ascii = 1;
 168                  }
 169                  $ret .= $tmp;
 170              }
 171          }
 172          else {    # if $str is malformed UTF8 *and* if length $str != 0.
 173              last;
 174          }
 175      }
 176      $_[1] = $str if $chk;
 177  
 178      # The state at the end of the chunk is discarded, even if in GB mode.
 179      # That results in the combination of GB-OUT and GB-IN, i.e. "~}~{".
 180      # Parhaps it is harmless, but further investigations may be required...
 181  
 182      if ( !$in_ascii ) {
 183          $ret .= "\x7E\x7D";    # '~}'
 184          $in_ascii = 1;
 185      }
 186      return $ret;
 187  }
 188  
 189  1;
 190  __END__
 191  
 192  =head1 NAME
 193  
 194  Encode::CN::HZ -- internally used by Encode::CN
 195  
 196  =cut


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1