[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/i586-linux-thread-multi/Encode/ -> Alias.pm (source)

   1  package Encode::Alias;
   2  use strict;
   3  use warnings;
   4  no warnings 'redefine';
   5  use Encode;
   6  our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
   7  sub DEBUG () { 0 }
   8  
   9  use base qw(Exporter);
  10  
  11  # Public, encouraged API is exported by default
  12  
  13  our @EXPORT =
  14    qw (
  15    define_alias
  16    find_alias
  17  );
  18  
  19  our @Alias;    # ordered matching list
  20  our %Alias;    # cached known aliases
  21  
  22  sub find_alias {
  23      my $class = shift;
  24      my $find  = shift;
  25      unless ( exists $Alias{$find} ) {
  26          $Alias{$find} = undef;    # Recursion guard
  27          for ( my $i = 0 ; $i < @Alias ; $i += 2 ) {
  28              my $alias = $Alias[$i];
  29              my $val   = $Alias[ $i + 1 ];
  30              my $new;
  31              if ( ref($alias) eq 'Regexp' && $find =~ $alias ) {
  32                  DEBUG and warn "eval $val";
  33                  $new = eval $val;
  34                  DEBUG and $@ and warn "$val, $@";
  35              }
  36              elsif ( ref($alias) eq 'CODE' ) {
  37                  DEBUG and warn "$alias", "->", "($find)";
  38                  $new = $alias->($find);
  39              }
  40              elsif ( lc($find) eq lc($alias) ) {
  41                  $new = $val;
  42              }
  43              if ( defined($new) ) {
  44                  next if $new eq $find;    # avoid (direct) recursion on bugs
  45                  DEBUG and warn "$alias, $new";
  46                  my $enc =
  47                    ( ref($new) ) ? $new : Encode::find_encoding($new);
  48                  if ($enc) {
  49                      $Alias{$find} = $enc;
  50                      last;
  51                  }
  52              }
  53          }
  54  
  55          # case insensitive search when canonical is not in all lowercase
  56          # RT ticket #7835
  57          unless ( $Alias{$find} ) {
  58              my $lcfind = lc($find);
  59              for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule )
  60              {
  61                  $lcfind eq lc($name) or next;
  62                  $Alias{$find} = Encode::find_encoding($name);
  63                  DEBUG and warn "$find => $name";
  64              }
  65          }
  66      }
  67      if (DEBUG) {
  68          my $name;
  69          if ( my $e = $Alias{$find} ) {
  70              $name = $e->name;
  71          }
  72          else {
  73              $name = "";
  74          }
  75          warn "find_alias($class, $find)->name = $name";
  76      }
  77      return $Alias{$find};
  78  }
  79  
  80  sub define_alias {
  81      while (@_) {
  82          my ( $alias, $name ) = splice( @_, 0, 2 );
  83          unshift( @Alias, $alias => $name );    # newer one has precedence
  84          if ( ref($alias) ) {
  85  
  86              # clear %Alias cache to allow overrides
  87              my @a = keys %Alias;
  88              for my $k (@a) {
  89                  if ( ref($alias) eq 'Regexp' && $k =~ $alias ) {
  90                      DEBUG and warn "delete \$Alias\{$k\}";
  91                      delete $Alias{$k};
  92                  }
  93                  elsif ( ref($alias) eq 'CODE' ) {
  94                      DEBUG and warn "delete \$Alias\{$k\}";
  95                      delete $Alias{ $alias->($name) };
  96                  }
  97              }
  98          }
  99          else {
 100              DEBUG and warn "delete \$Alias\{$alias\}";
 101              delete $Alias{$alias};
 102          }
 103      }
 104  }
 105  
 106  # Allow latin-1 style names as well
 107  # 0  1  2  3  4  5   6   7   8   9  10
 108  our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
 109  
 110  # Allow winlatin1 style names as well
 111  our %Winlatin2cp = (
 112      'latin1'     => 1252,
 113      'latin2'     => 1250,
 114      'cyrillic'   => 1251,
 115      'greek'      => 1253,
 116      'turkish'    => 1254,
 117      'hebrew'     => 1255,
 118      'arabic'     => 1256,
 119      'baltic'     => 1257,
 120      'vietnamese' => 1258,
 121  );
 122  
 123  init_aliases();
 124  
 125  sub undef_aliases {
 126      @Alias = ();
 127      %Alias = ();
 128  }
 129  
 130  sub init_aliases {
 131      undef_aliases();
 132  
 133      # Try all-lower-case version should all else fails
 134      define_alias( qr/^(.*)$/ => '"\L$1"' );
 135  
 136      # UTF/UCS stuff
 137      define_alias( qr/^UTF-?7$/i     => '"UTF-7"' );
 138      define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
 139      define_alias(
 140          qr/^UCS-?2-?(BE)?$/i    => '"UCS-2BE"',
 141          qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
 142          qr/^iso-10646-1$/i      => '"UCS-2BE"'
 143      );
 144      define_alias(
 145          qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
 146          qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
 147          qr/^UTF-?(16|32)$/i     => '"UTF-$1"',
 148      );
 149  
 150      # ASCII
 151      define_alias( qr/^(?:US-?)ascii$/i       => '"ascii"' );
 152      define_alias( 'C'                        => 'ascii' );
 153      define_alias( qr/\bISO[-_]?646[-_]?US$/i => '"ascii"' );
 154  
 155      # Allow variants of iso-8859-1 etc.
 156      define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
 157  
 158      # At least HP-UX has these.
 159      define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
 160  
 161      # More HP stuff.
 162      define_alias(
 163          qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i =>
 164            '"$1}8"' );
 165  
 166      # The Official name of ASCII.
 167      define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
 168  
 169      # This is a font issue, not an encoding issue.
 170      # (The currency symbol of the Latin 1 upper half
 171      #  has been redefined as the euro symbol.)
 172      define_alias( qr/^(.+)\@euro$/i => '"$1"' );
 173  
 174      define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i =>
 175  'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
 176      );
 177  
 178      define_alias(
 179          qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
 180               hebrew|arabic|baltic|vietnamese)$/ix =>
 181            '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
 182      );
 183  
 184      # Common names for non-latin preferred MIME names
 185      define_alias(
 186          'ascii'    => 'US-ascii',
 187          'cyrillic' => 'iso-8859-5',
 188          'arabic'   => 'iso-8859-6',
 189          'greek'    => 'iso-8859-7',
 190          'hebrew'   => 'iso-8859-8',
 191          'thai'     => 'iso-8859-11',
 192      );
 193      # RT #20781
 194      define_alias(qr/\btis-?620\b/i  => '"iso-8859-11"');
 195  
 196      # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
 197      # And Microsoft has their own naming (again, surprisingly).
 198      # And windows-* is registered in IANA!
 199      define_alias(
 200          qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' );
 201  
 202      # Sometimes seen with a leading zero.
 203      # define_alias( qr/\bcp037\b/i => '"cp37"');
 204  
 205      # Mac Mappings
 206      # predefined in *.ucm; unneeded
 207      # define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
 208      define_alias( qr/^mac_(.*)$/i => '"mac$1"' );
 209  
 210      # Ououououou. gone.  They are differente!
 211      # define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
 212  
 213      # Standardize on the dashed versions.
 214      define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' );
 215  
 216      unless ($Encode::ON_EBCDIC) {
 217  
 218          # for Encode::CN
 219          define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
 220          define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
 221  
 222          # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
 223          # CP936 doesn't have vendor-addon for GBK, so they're identical.
 224          define_alias( qr/^gbk$/i => '"cp936"' );
 225  
 226          # This fixes gb2312 vs. euc-cn confusion, practically
 227          define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
 228  
 229          # for Encode::JP
 230          define_alias( qr/\bjis$/i         => '"7bit-jis"' );
 231          define_alias( qr/\beuc.*jp$/i     => '"euc-jp"' );
 232          define_alias( qr/\bjp.*euc$/i     => '"euc-jp"' );
 233          define_alias( qr/\bujis$/i        => '"euc-jp"' );
 234          define_alias( qr/\bshift.*jis$/i  => '"shiftjis"' );
 235          define_alias( qr/\bsjis$/i        => '"shiftjis"' );
 236          define_alias( qr/\bwindows-31j$/i => '"cp932"' );
 237  
 238          # for Encode::KR
 239          define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
 240          define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
 241  
 242          # This fixes ksc5601 vs. euc-kr confusion, practically
 243          define_alias( qr/(?:x-)?uhc$/i         => '"cp949"' );
 244          define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
 245          define_alias( qr/\bks_c_5601-1987$/i   => '"cp949"' );
 246  
 247          # for Encode::TW
 248          define_alias( qr/\bbig-?5$/i              => '"big5-eten"' );
 249          define_alias( qr/\bbig5-?et(?:en)?$/i     => '"big5-eten"' );
 250          define_alias( qr/\btca[-_]?big5$/i        => '"big5-eten"' );
 251          define_alias( qr/\bbig5-?hk(?:scs)?$/i    => '"big5-hkscs"' );
 252          define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
 253      }
 254  
 255      # utf8 is blessed :)
 256      define_alias( qr/^UTF-8$/i => '"utf-8-strict"' );
 257  
 258      # At last, Map white space and _ to '-'
 259      define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
 260  }
 261  
 262  1;
 263  __END__
 264  
 265  # TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
 266  # TODO: HP-UX '15' encodings japanese15 korean15 roi15
 267  # TODO: Cyrillic encoding ISO-IR-111 (useful?)
 268  # TODO: Armenian encoding ARMSCII-8
 269  # TODO: Hebrew encoding ISO-8859-8-1
 270  # TODO: Thai encoding TCVN
 271  # TODO: Vietnamese encodings VPS
 272  # TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
 273  #       ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
 274  #       Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
 275  #       Kannada Khmer Korean Laotian Malayalam Mongolian
 276  #       Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
 277  
 278  =head1 NAME
 279  
 280  Encode::Alias - alias definitions to encodings
 281  
 282  =head1 SYNOPSIS
 283  
 284    use Encode;
 285    use Encode::Alias;
 286    define_alias( newName => ENCODING);
 287  
 288  =head1 DESCRIPTION
 289  
 290  Allows newName to be used as an alias for ENCODING. ENCODING may be
 291  either the name of an encoding or an encoding object (as described 
 292  in L<Encode>).
 293  
 294  Currently I<newName> can be specified in the following ways:
 295  
 296  =over 4
 297  
 298  =item As a simple string.
 299  
 300  =item As a qr// compiled regular expression, e.g.:
 301  
 302    define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
 303  
 304  In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
 305  in order to allow C<$1> etc. to be substituted.  The example is one
 306  way to alias names as used in X11 fonts to the MIME names for the
 307  iso-8859-* family.  Note the double quotes inside the single quotes.
 308  
 309  (or, you don't have to do this yourself because this example is predefined)
 310  
 311  If you are using a regex here, you have to use the quotes as shown or
 312  it won't work.  Also note that regex handling is tricky even for the
 313  experienced.  Use this feature with caution.
 314  
 315  =item As a code reference, e.g.:
 316  
 317    define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
 318  
 319  The same effect as the example above in a different way.  The coderef
 320  takes the alias name as an argument and returns a canonical name on
 321  success or undef if not.  Note the second argument is not required.
 322  Use this with even more caution than the regex version.
 323  
 324  =back
 325  
 326  =head3 Changes in code reference aliasing
 327  
 328  As of Encode 1.87, the older form
 329  
 330    define_alias( sub { return  /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
 331  
 332  no longer works. 
 333  
 334  Encode up to 1.86 internally used "local $_" to implement ths older
 335  form.  But consider the code below;
 336  
 337    use Encode;
 338    $_ = "eeeee" ;
 339    while (/(e)/g) {
 340      my $utf = decode('aliased-encoding-name', $1);
 341      print "position:",pos,"\n";
 342    }
 343  
 344  Prior to Encode 1.86 this fails because of "local $_".
 345  
 346  =head2 Alias overloading
 347  
 348  You can override predefined aliases by simply applying define_alias().
 349  The new alias is always evaluated first, and when necessary,
 350  define_alias() flushes the internal cache to make the new definition
 351  available.
 352  
 353    # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
 354    # superset of SHIFT_JIS
 355  
 356    define_alias( qr/shift.*jis$/i  => '"cp932"' );
 357    define_alias( qr/sjis$/i        => '"cp932"' );
 358  
 359  If you want to zap all predefined aliases, you can use
 360  
 361    Encode::Alias->undef_aliases;
 362  
 363  to do so.  And
 364  
 365    Encode::Alias->init_aliases;
 366  
 367  gets the factory settings back.
 368  
 369  =head1 SEE ALSO
 370  
 371  L<Encode>, L<Encode::Supported>
 372  
 373  =cut
 374  


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1