[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/i586-linux-thread-multi/Encode/ -> GSM0338.pm (source)

   1  #
   2  # $Id: GSM0338.pm,v 2.0 2007/04/22 14:54:22 dankogai Exp $
   3  #
   4  package Encode::GSM0338;
   5  
   6  use strict;
   7  use warnings;
   8  use Carp;
   9  
  10  use vars qw($VERSION);
  11  $VERSION = do { my @r = ( q$Revision: 2.0 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
  12  
  13  use Encode qw(:fallbacks);
  14  
  15  use base qw(Encode::Encoding);
  16  __PACKAGE__->Define('gsm0338');
  17  
  18  sub needs_lines { 1 }
  19  sub perlio_ok   { 0 }
  20  
  21  use utf8;
  22  our %UNI2GSM = (
  23      "\x{0040}" => "\x00",        # COMMERCIAL AT
  24      "\x{000A}" => "\x0A",        # LINE FEED
  25      "\x{000C}" => "\x1B\x0A",    # FORM FEED
  26      "\x{000D}" => "\x0D",        # CARRIAGE RETURN
  27      "\x{0020}" => "\x20",        # SPACE
  28      "\x{0021}" => "\x21",        # EXCLAMATION MARK
  29      "\x{0022}" => "\x22",        # QUOTATION MARK
  30      "\x{0023}" => "\x23",        # NUMBER SIGN
  31      "\x{0024}" => "\x02",        # DOLLAR SIGN
  32      "\x{0025}" => "\x25",        # PERCENT SIGN
  33      "\x{0026}" => "\x26",        # AMPERSAND
  34      "\x{0027}" => "\x27",        # APOSTROPHE
  35      "\x{0028}" => "\x28",        # LEFT PARENTHESIS
  36      "\x{0029}" => "\x29",        # RIGHT PARENTHESIS
  37      "\x{002A}" => "\x2A",        # ASTERISK
  38      "\x{002B}" => "\x2B",        # PLUS SIGN
  39      "\x{002C}" => "\x2C",        # COMMA
  40      "\x{002D}" => "\x2D",        # HYPHEN-MINUS
  41      "\x{002E}" => "\x2E",        # FULL STOP
  42      "\x{002F}" => "\x2F",        # SOLIDUS
  43      "\x{0030}" => "\x30",        # DIGIT ZERO
  44      "\x{0031}" => "\x31",        # DIGIT ONE
  45      "\x{0032}" => "\x32",        # DIGIT TWO
  46      "\x{0033}" => "\x33",        # DIGIT THREE
  47      "\x{0034}" => "\x34",        # DIGIT FOUR
  48      "\x{0035}" => "\x35",        # DIGIT FIVE
  49      "\x{0036}" => "\x36",        # DIGIT SIX
  50      "\x{0037}" => "\x37",        # DIGIT SEVEN
  51      "\x{0038}" => "\x38",        # DIGIT EIGHT
  52      "\x{0039}" => "\x39",        # DIGIT NINE
  53      "\x{003A}" => "\x3A",        # COLON
  54      "\x{003B}" => "\x3B",        # SEMICOLON
  55      "\x{003C}" => "\x3C",        # LESS-THAN SIGN
  56      "\x{003D}" => "\x3D",        # EQUALS SIGN
  57      "\x{003E}" => "\x3E",        # GREATER-THAN SIGN
  58      "\x{003F}" => "\x3F",        # QUESTION MARK
  59      "\x{0041}" => "\x41",        # LATIN CAPITAL LETTER A
  60      "\x{0042}" => "\x42",        # LATIN CAPITAL LETTER B
  61      "\x{0043}" => "\x43",        # LATIN CAPITAL LETTER C
  62      "\x{0044}" => "\x44",        # LATIN CAPITAL LETTER D
  63      "\x{0045}" => "\x45",        # LATIN CAPITAL LETTER E
  64      "\x{0046}" => "\x46",        # LATIN CAPITAL LETTER F
  65      "\x{0047}" => "\x47",        # LATIN CAPITAL LETTER G
  66      "\x{0048}" => "\x48",        # LATIN CAPITAL LETTER H
  67      "\x{0049}" => "\x49",        # LATIN CAPITAL LETTER I
  68      "\x{004A}" => "\x4A",        # LATIN CAPITAL LETTER J
  69      "\x{004B}" => "\x4B",        # LATIN CAPITAL LETTER K
  70      "\x{004C}" => "\x4C",        # LATIN CAPITAL LETTER L
  71      "\x{004D}" => "\x4D",        # LATIN CAPITAL LETTER M
  72      "\x{004E}" => "\x4E",        # LATIN CAPITAL LETTER N
  73      "\x{004F}" => "\x4F",        # LATIN CAPITAL LETTER O
  74      "\x{0050}" => "\x50",        # LATIN CAPITAL LETTER P
  75      "\x{0051}" => "\x51",        # LATIN CAPITAL LETTER Q
  76      "\x{0052}" => "\x52",        # LATIN CAPITAL LETTER R
  77      "\x{0053}" => "\x53",        # LATIN CAPITAL LETTER S
  78      "\x{0054}" => "\x54",        # LATIN CAPITAL LETTER T
  79      "\x{0055}" => "\x55",        # LATIN CAPITAL LETTER U
  80      "\x{0056}" => "\x56",        # LATIN CAPITAL LETTER V
  81      "\x{0057}" => "\x57",        # LATIN CAPITAL LETTER W
  82      "\x{0058}" => "\x58",        # LATIN CAPITAL LETTER X
  83      "\x{0059}" => "\x59",        # LATIN CAPITAL LETTER Y
  84      "\x{005A}" => "\x5A",        # LATIN CAPITAL LETTER Z
  85      "\x{005F}" => "\x11",        # LOW LINE
  86      "\x{0061}" => "\x61",        # LATIN SMALL LETTER A
  87      "\x{0062}" => "\x62",        # LATIN SMALL LETTER B
  88      "\x{0063}" => "\x63",        # LATIN SMALL LETTER C
  89      "\x{0064}" => "\x64",        # LATIN SMALL LETTER D
  90      "\x{0065}" => "\x65",        # LATIN SMALL LETTER E
  91      "\x{0066}" => "\x66",        # LATIN SMALL LETTER F
  92      "\x{0067}" => "\x67",        # LATIN SMALL LETTER G
  93      "\x{0068}" => "\x68",        # LATIN SMALL LETTER H
  94      "\x{0069}" => "\x69",        # LATIN SMALL LETTER I
  95      "\x{006A}" => "\x6A",        # LATIN SMALL LETTER J
  96      "\x{006B}" => "\x6B",        # LATIN SMALL LETTER K
  97      "\x{006C}" => "\x6C",        # LATIN SMALL LETTER L
  98      "\x{006D}" => "\x6D",        # LATIN SMALL LETTER M
  99      "\x{006E}" => "\x6E",        # LATIN SMALL LETTER N
 100      "\x{006F}" => "\x6F",        # LATIN SMALL LETTER O
 101      "\x{0070}" => "\x70",        # LATIN SMALL LETTER P
 102      "\x{0071}" => "\x71",        # LATIN SMALL LETTER Q
 103      "\x{0072}" => "\x72",        # LATIN SMALL LETTER R
 104      "\x{0073}" => "\x73",        # LATIN SMALL LETTER S
 105      "\x{0074}" => "\x74",        # LATIN SMALL LETTER T
 106      "\x{0075}" => "\x75",        # LATIN SMALL LETTER U
 107      "\x{0076}" => "\x76",        # LATIN SMALL LETTER V
 108      "\x{0077}" => "\x77",        # LATIN SMALL LETTER W
 109      "\x{0078}" => "\x78",        # LATIN SMALL LETTER X
 110      "\x{0079}" => "\x79",        # LATIN SMALL LETTER Y
 111      "\x{007A}" => "\x7A",        # LATIN SMALL LETTER Z
 112      "\x{000C}" => "\x1B\x0A",    # FORM FEED
 113      "\x{005B}" => "\x1B\x3C",    # LEFT SQUARE BRACKET
 114      "\x{005C}" => "\x1B\x2F",    # REVERSE SOLIDUS
 115      "\x{005D}" => "\x1B\x3E",    # RIGHT SQUARE BRACKET
 116      "\x{005E}" => "\x1B\x14",    # CIRCUMFLEX ACCENT
 117      "\x{007B}" => "\x1B\x28",    # LEFT CURLY BRACKET
 118      "\x{007C}" => "\x1B\x40",    # VERTICAL LINE
 119      "\x{007D}" => "\x1B\x29",    # RIGHT CURLY BRACKET
 120      "\x{007E}" => "\x1B\x3D",    # TILDE
 121      "\x{00A0}" => "\x1B",        # NO-BREAK SPACE
 122      "\x{00A1}" => "\x40",        # INVERTED EXCLAMATION MARK
 123      "\x{00A3}" => "\x01",        # POUND SIGN
 124      "\x{00A4}" => "\x24",        # CURRENCY SIGN
 125      "\x{00A5}" => "\x03",        # YEN SIGN
 126      "\x{00A7}" => "\x5F",        # SECTION SIGN
 127      "\x{00BF}" => "\x60",        # INVERTED QUESTION MARK
 128      "\x{00C4}" => "\x5B",        # LATIN CAPITAL LETTER A WITH DIAERESIS
 129      "\x{00C5}" => "\x0E",        # LATIN CAPITAL LETTER A WITH RING ABOVE
 130      "\x{00C6}" => "\x1C",        # LATIN CAPITAL LETTER AE
 131      "\x{00C9}" => "\x1F",        # LATIN CAPITAL LETTER E WITH ACUTE
 132      "\x{00D1}" => "\x5D",        # LATIN CAPITAL LETTER N WITH TILDE
 133      "\x{00D6}" => "\x5C",        # LATIN CAPITAL LETTER O WITH DIAERESIS
 134      "\x{00D8}" => "\x0B",        # LATIN CAPITAL LETTER O WITH STROKE
 135      "\x{00DC}" => "\x5E",        # LATIN CAPITAL LETTER U WITH DIAERESIS
 136      "\x{00DF}" => "\x1E",        # LATIN SMALL LETTER SHARP S
 137      "\x{00E0}" => "\x7F",        # LATIN SMALL LETTER A WITH GRAVE
 138      "\x{00E4}" => "\x7B",        # LATIN SMALL LETTER A WITH DIAERESIS
 139      "\x{00E5}" => "\x0F",        # LATIN SMALL LETTER A WITH RING ABOVE
 140      "\x{00E6}" => "\x1D",        # LATIN SMALL LETTER AE
 141      "\x{00E7}" => "\x09",        # LATIN SMALL LETTER C WITH CEDILLA
 142      "\x{00E8}" => "\x04",        # LATIN SMALL LETTER E WITH GRAVE
 143      "\x{00E9}" => "\x05",        # LATIN SMALL LETTER E WITH ACUTE
 144      "\x{00EC}" => "\x07",        # LATIN SMALL LETTER I WITH GRAVE
 145      "\x{00F1}" => "\x7D",        # LATIN SMALL LETTER N WITH TILDE
 146      "\x{00F2}" => "\x08",        # LATIN SMALL LETTER O WITH GRAVE
 147      "\x{00F6}" => "\x7C",        # LATIN SMALL LETTER O WITH DIAERESIS
 148      "\x{00F8}" => "\x0C",        # LATIN SMALL LETTER O WITH STROKE
 149      "\x{00F9}" => "\x06",        # LATIN SMALL LETTER U WITH GRAVE
 150      "\x{00FC}" => "\x7E",        # LATIN SMALL LETTER U WITH DIAERESIS
 151      "\x{0393}" => "\x13",        # GREEK CAPITAL LETTER GAMMA
 152      "\x{0394}" => "\x10",        # GREEK CAPITAL LETTER DELTA
 153      "\x{0398}" => "\x19",        # GREEK CAPITAL LETTER THETA
 154      "\x{039B}" => "\x14",        # GREEK CAPITAL LETTER LAMDA
 155      "\x{039E}" => "\x1A",        # GREEK CAPITAL LETTER XI
 156      "\x{03A0}" => "\x16",        # GREEK CAPITAL LETTER PI
 157      "\x{03A3}" => "\x18",        # GREEK CAPITAL LETTER SIGMA
 158      "\x{03A6}" => "\x12",        # GREEK CAPITAL LETTER PHI
 159      "\x{03A8}" => "\x17",        # GREEK CAPITAL LETTER PSI
 160      "\x{03A9}" => "\x15",        # GREEK CAPITAL LETTER OMEGA
 161      "\x{20AC}" => "\x1B\x65",    # EURO SIGN
 162  );
 163  our %GSM2UNI = reverse %UNI2GSM;
 164  our $ESC    = "\x1b";
 165  our $ATMARK = "\x40";
 166  our $FBCHAR = "\x3F";
 167  our $NBSP   = "\x{00A0}";
 168  
 169  #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode"
 170  
 171  sub decode ($$;$) {
 172      my ( $obj, $bytes, $chk ) = @_;
 173      my $str;
 174      while ( length $bytes ) {
 175          my $c = substr( $bytes, 0, 1, '' );
 176          my $u;
 177          if ( $c eq "\x00" ) {
 178              my $c2 = substr( $bytes, 0, 1, '' );
 179              $u =
 180                  !length $c2 ? $ATMARK
 181                : $c2 eq "\x00" ? "\x{0000}"
 182                : exists $GSM2UNI{$c2} ? $ATMARK . $GSM2UNI{$c2}
 183                : $chk
 184                ? croak sprintf( "\\x%02X\\x%02X does not map to Unicode",
 185                     ord($c), ord($c2) )
 186                : $ATMARK . $FBCHAR;
 187  
 188          }
 189          elsif ( $c eq $ESC ) {
 190              my $c2 = substr( $bytes, 0, 1, '' );
 191              $u =
 192                  exists $GSM2UNI{ $c . $c2 } ? $GSM2UNI{ $c . $c2 }
 193                : exists $GSM2UNI{$c2}        ? $NBSP . $GSM2UNI{$c2}
 194                : $chk
 195                ? croak sprintf( "\\x%02X\\x%02X does not map to Unicode",
 196                     ord($c), ord($c2) )
 197                : $NBSP . $FBCHAR;
 198          }
 199          else {
 200              $u =
 201                exists $GSM2UNI{$c} ? $GSM2UNI{$c}
 202                : $chk
 203                ? croak sprintf( "\\x%02X does not map to Unicode", ord($c) )
 204                : $FBCHAR;
 205          }
 206          $str .= $u;
 207      }
 208      $_[1] = $bytes if $chk;
 209      return $str;
 210  }
 211  
 212  #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s"
 213  
 214  sub encode($$;$) {
 215      my ( $obj, $str, $chk ) = @_;
 216      my $bytes;
 217      while ( length $str ) {
 218          my $u = substr( $str, 0, 1, '' );
 219          my $c;
 220          $bytes .=
 221            exists $UNI2GSM{$u} ? $UNI2GSM{$u}
 222            : $chk
 223            ? croak sprintf( "\\x{%04x} does not map to %s", 
 224                 ord($u), $obj->name )
 225            : $FBCHAR;
 226      }
 227      $_[1] = $str if $chk;
 228      return $bytes;
 229  }
 230  
 231  1;
 232  __END__
 233  
 234  =head1 NAME
 235  
 236  Encode::GSM0338 -- ESTI GSM 03.38 Encoding
 237  
 238  =head1 SYNOPSIS
 239  
 240    use Encode qw/encode decode/; 
 241    $gsm0338 = encode("gsm0338", $utf8);    # loads Encode::GSM0338 implicitly
 242    $utf8    = decode("gsm0338", $gsm0338); # ditto
 243  
 244  =head1 DESCRIPTION
 245  
 246  GSM0338 is for GSM handsets. Though it shares alphanumerals with ASCII,
 247  control character ranges and other parts are mapped very differently,
 248  mainly to store Greek characters.  There are also escape sequences
 249  (starting with 0x1B) to cover e.g. the Euro sign.
 250  
 251  This was once handled by L<Encode::Bytes> but because of all those
 252  unusual specifications, Encode 2.20 has relocated the support to
 253  this module.
 254  
 255  =head1 NOTES
 256  
 257  Unlike most other encodings,  the following aways croaks on error
 258  for any $chk that evaluates to true.
 259  
 260    $gsm0338 = encode("gsm0338", $utf8      $chk);
 261    $utf8    = decode("gsm0338", $gsm0338,  $chk);
 262  
 263  So if you want to check the validity of the encoding, surround the
 264  expression with C<eval {}> block as follows;
 265  
 266    eval {
 267      $utf8    = decode("gsm0338", $gsm0338,  $chk);
 268    };
 269    if ($@){
 270      # handle exception here
 271    }
 272  
 273  =head1 BUGS
 274  
 275  ESTI GSM 03.38 Encoding itself.
 276  
 277  Mapping \x00 to '@' causes too much pain everywhere.
 278  
 279  Its use of \x1b (escape) is also very questionable.  
 280  
 281  Because of those two, the code paging approach used use in ucm-based
 282  Encoding SOMETIMES fails so this module was written.
 283  
 284  =head1 SEE ALSO
 285  
 286  L<Encode>
 287  
 288  =cut


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1