[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 2 require 5; 3 package I18N::LangTags::List; 4 # Time-stamp: "2004-10-06 23:26:21 ADT" 5 use strict; 6 use vars qw(%Name %Is_Disrec $Debug $VERSION); 7 $VERSION = '0.35'; 8 # POD at the end. 9 10 #---------------------------------------------------------------------- 11 { 12 # read the table out of our own POD! 13 my $seeking = 1; 14 my $count = 0; 15 my($disrec,$tag,$name); 16 my $last_name = ''; 17 while(<I18N::LangTags::List::DATA>) { 18 if($seeking) { 19 $seeking = 0 if m/=for woohah/; 20 } elsif( ($disrec, $tag, $name) = 21 m/(\[?)\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/ 22 ) { 23 $name =~ s/\s*[;\.]*\s*$//g; 24 next unless $name; 25 ++$count; 26 print "<$tag> <$name>\n" if $Debug; 27 $last_name = $Name{$tag} = $name; 28 $Is_Disrec{$tag} = 1 if $disrec; 29 } elsif (m/[Ff]ormerly \"([-a-z0-9]+)\"/) { 30 $Name{$1} = "$last_name (old tag)" if $last_name; 31 $Is_Disrec{$1} = 1; 32 } 33 } 34 die "No tags read??" unless $count; 35 } 36 #---------------------------------------------------------------------- 37 38 sub name { 39 my $tag = lc($_[0] || return); 40 $tag =~ s/^\s+//s; 41 $tag =~ s/\s+$//s; 42 43 my $alt; 44 if($tag =~ m/^x-(.+)/) { 45 $alt = "i-$1"; 46 } elsif($tag =~ m/^i-(.+)/) { 47 $alt = "x-$1"; 48 } else { 49 $alt = ''; 50 } 51 52 my $subform = ''; 53 my $name = ''; 54 print "Input: {$tag}\n" if $Debug; 55 while(length $tag) { 56 last if $name = $Name{$tag}; 57 last if $name = $Name{$alt}; 58 if($tag =~ s/(-[a-z0-9]+)$//s) { 59 print "Shaving off: $1 leaving $tag\n" if $Debug; 60 $subform = "$1$subform"; 61 # and loop around again 62 63 $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n"; 64 } else { 65 # we're trying to pull a subform off a primary tag. TILT! 66 print "Aborting on: {$name}{$subform}\n" if $Debug; 67 last; 68 } 69 } 70 print "Output: {$name}{$subform}\n" if $Debug; 71 72 return unless $name; # Failure 73 return $name unless $subform; # Exact match 74 $subform =~ s/^-//s; 75 $subform =~ s/-$//s; 76 return "$name (Subform \"$subform\")"; 77 } 78 79 #-------------------------------------------------------------------------- 80 81 sub is_decent { 82 my $tag = lc($_[0] || return 0); 83 #require I18N::LangTags; 84 85 return 0 unless 86 $tag =~ 87 /^(?: # First subtag 88 [xi] | [a-z]{2,3} 89 ) 90 (?: # Subtags thereafter 91 - # separator 92 [a-z0-9]{1,8} # subtag 93 )* 94 $/xs; 95 96 my @supers = (); 97 foreach my $bit (split('-', $tag)) { 98 push @supers, 99 scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit; 100 } 101 return 0 unless @supers; 102 shift @supers if $supers[0] =~ m<^(i|x|sgn)$>s; 103 return 0 unless @supers; 104 105 foreach my $f ($tag, @supers) { 106 return 0 if $Is_Disrec{$f}; 107 return 2 if $Name{$f}; 108 # so that decent subforms of indecent tags are decent 109 } 110 return 2 if $Name{$tag}; # not only is it decent, it's known! 111 return 1; 112 } 113 114 #-------------------------------------------------------------------------- 115 1; 116 117 __DATA__ 118 119 =head1 NAME 120 121 I18N::LangTags::List -- tags and names for human languages 122 123 =head1 SYNOPSIS 124 125 use I18N::LangTags::List; 126 print "Parlez-vous... ", join(', ', 127 I18N::LangTags::List::name('elx') || 'unknown_language', 128 I18N::LangTags::List::name('ar-Kw') || 'unknown_language', 129 I18N::LangTags::List::name('en') || 'unknown_language', 130 I18N::LangTags::List::name('en-CA') || 'unknown_language', 131 ), "?\n"; 132 133 prints: 134 135 Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English? 136 137 =head1 DESCRIPTION 138 139 This module provides a function 140 C<I18N::LangTags::List::name( I<langtag> ) > that takes 141 a language tag (see L<I18N::LangTags|I18N::LangTags>) 142 and returns the best attempt at an English name for it, or 143 undef if it can't make sense of the tag. 144 145 The function I18N::LangTags::List::name(...) is not exported. 146 147 This module also provides a function 148 C<I18N::LangTags::List::is_decent( I<langtag> )> that returns true iff 149 the language tag is syntactically valid and is for general use (like 150 "fr" or "fr-ca", below). That is, it returns false for tags that are 151 syntactically invalid and for tags, like "aus", that are listed in 152 brackets below. This function is not exported. 153 154 The map of tags-to-names that it uses is accessible as 155 %I18N::LangTags::List::Name, and it's the same as the list 156 that follows in this documentation, which should be useful 157 to you even if you don't use this module. 158 159 =head1 ABOUT LANGUAGE TAGS 160 161 Internet language tags, as defined in RFC 3066, are a formalism 162 for denoting human languages. The two-letter ISO 639-1 language 163 codes are well known (as "en" for English), as are their forms 164 when qualified by a country code ("en-US"). Less well-known are the 165 arbitrary-length non-ISO codes (like "i-mingo"), and the 166 recently (in 2001) introduced three-letter ISO-639-2 codes. 167 168 Remember these important facts: 169 170 =over 171 172 =item * 173 174 Language tags are not locale IDs. A locale ID is written with a "_" 175 instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and 176 I<means> something different than a language tag. A language tag 177 denotes a language. A locale ID denotes a language I<as used in> 178 a particular place, in combination with non-linguistic 179 location-specific information such as what currency is used 180 there. Locales I<also> often denote character set information, 181 as in "en_US.ISO8859-1". 182 183 =item * 184 185 Language tags are not for computer languages. 186 187 =item * 188 189 "Dialect" is not a useful term, since there is no objective 190 criterion for establishing when two language-forms are 191 dialects of eachother, or are separate languages. 192 193 =item * 194 195 Language tags are not case-sensitive. en-US, en-us, En-Us, etc., 196 are all the same tag, and denote the same language. 197 198 =item * 199 200 Not every language tag really refers to a single language. Some 201 language tags refer to conditions: i-default (system-message text 202 in English plus maybe other languages), und (undetermined 203 language). Others (notably lots of the three-letter codes) are 204 bibliographic tags that classify whole groups of languages, as 205 with cus "Cushitic (Other)" (i.e., a 206 language that has been classed as Cushtic, but which has no more 207 specific code) or the even less linguistically coherent 208 sai for "South American Indian (Other)". Though useful in 209 bibliography, B<SUCH TAGS ARE NOT 210 FOR GENERAL USE>. For further guidance, email me. 211 212 =item * 213 214 Language tags are not country codes. In fact, they are often 215 distinct codes, as with language tag ja for Japanese, and 216 ISO 3166 country code C<.jp> for Japan. 217 218 =back 219 220 =head1 LIST OF LANGUAGES 221 222 The first part of each item is the language tag, between 223 {...}. It 224 is followed by an English name for the language or language-group. 225 Language tags that I judge to be not for general use, are bracketed. 226 227 This list is in alphabetical order by English name of the language. 228 229 =for reminder 230 The name in the =item line MUST NOT have E<...>'s in it!! 231 232 =for woohah START 233 234 =over 235 236 =item {ab} : Abkhazian 237 238 eq Abkhaz 239 240 =item {ace} : Achinese 241 242 =item {ach} : Acoli 243 244 =item {ada} : Adangme 245 246 =item {ady} : Adyghe 247 248 eq Adygei 249 250 =item {aa} : Afar 251 252 =item {afh} : Afrihili 253 254 (Artificial) 255 256 =item {af} : Afrikaans 257 258 =item [{afa} : Afro-Asiatic (Other)] 259 260 =item {ak} : Akan 261 262 (Formerly "aka".) 263 264 =item {akk} : Akkadian 265 266 (Historical) 267 268 =item {sq} : Albanian 269 270 =item {ale} : Aleut 271 272 =item [{alg} : Algonquian languages] 273 274 NOT Algonquin! 275 276 =item [{tut} : Altaic (Other)] 277 278 =item {am} : Amharic 279 280 NOT Aramaic! 281 282 =item {i-ami} : Ami 283 284 eq Amis. eq 'Amis. eq Pangca. 285 286 =item [{apa} : Apache languages] 287 288 =item {ar} : Arabic 289 290 Many forms are mutually un-intelligible in spoken media. 291 Notable forms: 292 {ar-ae} UAE Arabic; 293 {ar-bh} Bahrain Arabic; 294 {ar-dz} Algerian Arabic; 295 {ar-eg} Egyptian Arabic; 296 {ar-iq} Iraqi Arabic; 297 {ar-jo} Jordanian Arabic; 298 {ar-kw} Kuwait Arabic; 299 {ar-lb} Lebanese Arabic; 300 {ar-ly} Libyan Arabic; 301 {ar-ma} Moroccan Arabic; 302 {ar-om} Omani Arabic; 303 {ar-qa} Qatari Arabic; 304 {ar-sa} Sauda Arabic; 305 {ar-sy} Syrian Arabic; 306 {ar-tn} Tunisian Arabic; 307 {ar-ye} Yemen Arabic. 308 309 =item {arc} : Aramaic 310 311 NOT Amharic! NOT Samaritan Aramaic! 312 313 =item {arp} : Arapaho 314 315 =item {arn} : Araucanian 316 317 =item {arw} : Arawak 318 319 =item {hy} : Armenian 320 321 =item {an} : Aragonese 322 323 =item [{art} : Artificial (Other)] 324 325 =item {ast} : Asturian 326 327 eq Bable. 328 329 =item {as} : Assamese 330 331 =item [{ath} : Athapascan languages] 332 333 eq Athabaskan. eq Athapaskan. eq Athabascan. 334 335 =item [{aus} : Australian languages] 336 337 =item [{map} : Austronesian (Other)] 338 339 =item {av} : Avaric 340 341 (Formerly "ava".) 342 343 =item {ae} : Avestan 344 345 eq Zend 346 347 =item {awa} : Awadhi 348 349 =item {ay} : Aymara 350 351 =item {az} : Azerbaijani 352 353 eq Azeri 354 355 Notable forms: 356 {az-Arab} Azerbaijani in Arabic script; 357 {az-Cyrl} Azerbaijani in Cyrillic script; 358 {az-Latn} Azerbaijani in Latin script. 359 360 =item {ban} : Balinese 361 362 =item [{bat} : Baltic (Other)] 363 364 =item {bal} : Baluchi 365 366 =item {bm} : Bambara 367 368 (Formerly "bam".) 369 370 =item [{bai} : Bamileke languages] 371 372 =item {bad} : Banda 373 374 =item [{bnt} : Bantu (Other)] 375 376 =item {bas} : Basa 377 378 =item {ba} : Bashkir 379 380 =item {eu} : Basque 381 382 =item {btk} : Batak (Indonesia) 383 384 =item {bej} : Beja 385 386 =item {be} : Belarusian 387 388 eq Belarussian. eq Byelarussian. 389 eq Belorussian. eq Byelorussian. 390 eq White Russian. eq White Ruthenian. 391 NOT Ruthenian! 392 393 =item {bem} : Bemba 394 395 =item {bn} : Bengali 396 397 eq Bangla. 398 399 =item [{ber} : Berber (Other)] 400 401 =item {bho} : Bhojpuri 402 403 =item {bh} : Bihari 404 405 =item {bik} : Bikol 406 407 =item {bin} : Bini 408 409 =item {bi} : Bislama 410 411 eq Bichelamar. 412 413 =item {bs} : Bosnian 414 415 =item {bra} : Braj 416 417 =item {br} : Breton 418 419 =item {bug} : Buginese 420 421 =item {bg} : Bulgarian 422 423 =item {i-bnn} : Bunun 424 425 =item {bua} : Buriat 426 427 =item {my} : Burmese 428 429 =item {cad} : Caddo 430 431 =item {car} : Carib 432 433 =item {ca} : Catalan 434 435 eq CatalE<aacute>n. eq Catalonian. 436 437 =item [{cau} : Caucasian (Other)] 438 439 =item {ceb} : Cebuano 440 441 =item [{cel} : Celtic (Other)] 442 443 Notable forms: 444 {cel-gaulish} Gaulish (Historical) 445 446 =item [{cai} : Central American Indian (Other)] 447 448 =item {chg} : Chagatai 449 450 (Historical?) 451 452 =item [{cmc} : Chamic languages] 453 454 =item {ch} : Chamorro 455 456 =item {ce} : Chechen 457 458 =item {chr} : Cherokee 459 460 eq Tsalagi 461 462 =item {chy} : Cheyenne 463 464 =item {chb} : Chibcha 465 466 (Historical) NOT Chibchan (which is a language family). 467 468 =item {ny} : Chichewa 469 470 eq Nyanja. eq Chinyanja. 471 472 =item {zh} : Chinese 473 474 Many forms are mutually un-intelligible in spoken media. 475 Notable forms: 476 {zh-Hans} Chinese, in simplified script; 477 {zh-Hant} Chinese, in traditional script; 478 {zh-tw} Taiwan Chinese; 479 {zh-cn} PRC Chinese; 480 {zh-sg} Singapore Chinese; 481 {zh-mo} Macau Chinese; 482 {zh-hk} Hong Kong Chinese; 483 {zh-guoyu} Mandarin [Putonghua/Guoyu]; 484 {zh-hakka} Hakka [formerly "i-hakka"]; 485 {zh-min} Hokkien; 486 {zh-min-nan} Southern Hokkien; 487 {zh-wuu} Shanghaiese; 488 {zh-xiang} Hunanese; 489 {zh-gan} Gan; 490 {zh-yue} Cantonese. 491 492 =for etc 493 {i-hakka} Hakka (old tag) 494 495 =item {chn} : Chinook Jargon 496 497 eq Chinook Wawa. 498 499 =item {chp} : Chipewyan 500 501 =item {cho} : Choctaw 502 503 =item {cu} : Church Slavic 504 505 eq Old Church Slavonic. 506 507 =item {chk} : Chuukese 508 509 eq Trukese. eq Chuuk. eq Truk. eq Ruk. 510 511 =item {cv} : Chuvash 512 513 =item {cop} : Coptic 514 515 =item {kw} : Cornish 516 517 =item {co} : Corsican 518 519 eq Corse. 520 521 =item {cr} : Cree 522 523 NOT Creek! (Formerly "cre".) 524 525 =item {mus} : Creek 526 527 NOT Cree! 528 529 =item [{cpe} : English-based Creoles and pidgins (Other)] 530 531 =item [{cpf} : French-based Creoles and pidgins (Other)] 532 533 =item [{cpp} : Portuguese-based Creoles and pidgins (Other)] 534 535 =item [{crp} : Creoles and pidgins (Other)] 536 537 =item {hr} : Croatian 538 539 eq Croat. 540 541 =item [{cus} : Cushitic (Other)] 542 543 =item {cs} : Czech 544 545 =item {dak} : Dakota 546 547 eq Nakota. eq Latoka. 548 549 =item {da} : Danish 550 551 =item {dar} : Dargwa 552 553 =item {day} : Dayak 554 555 =item {i-default} : Default (Fallthru) Language 556 557 Defined in RFC 2277, this is for tagging text 558 (which must include English text, and might/should include text 559 in other appropriate languages) that is emitted in a context 560 where language-negotiation wasn't possible -- in SMTP mail failure 561 messages, for example. 562 563 =item {del} : Delaware 564 565 =item {din} : Dinka 566 567 =item {dv} : Divehi 568 569 eq Maldivian. (Formerly "div".) 570 571 =item {doi} : Dogri 572 573 NOT Dogrib! 574 575 =item {dgr} : Dogrib 576 577 NOT Dogri! 578 579 =item [{dra} : Dravidian (Other)] 580 581 =item {dua} : Duala 582 583 =item {nl} : Dutch 584 585 eq Netherlander. Notable forms: 586 {nl-nl} Netherlands Dutch; 587 {nl-be} Belgian Dutch. 588 589 =item {dum} : Middle Dutch (ca.1050-1350) 590 591 (Historical) 592 593 =item {dyu} : Dyula 594 595 =item {dz} : Dzongkha 596 597 =item {efi} : Efik 598 599 =item {egy} : Ancient Egyptian 600 601 (Historical) 602 603 =item {eka} : Ekajuk 604 605 =item {elx} : Elamite 606 607 (Historical) 608 609 =item {en} : English 610 611 Notable forms: 612 {en-au} Australian English; 613 {en-bz} Belize English; 614 {en-ca} Canadian English; 615 {en-gb} UK English; 616 {en-ie} Irish English; 617 {en-jm} Jamaican English; 618 {en-nz} New Zealand English; 619 {en-ph} Philippine English; 620 {en-tt} Trinidad English; 621 {en-us} US English; 622 {en-za} South African English; 623 {en-zw} Zimbabwe English. 624 625 =item {enm} : Old English (1100-1500) 626 627 (Historical) 628 629 =item {ang} : Old English (ca.450-1100) 630 631 eq Anglo-Saxon. (Historical) 632 633 =item {i-enochian} : Enochian (Artificial) 634 635 =item {myv} : Erzya 636 637 =item {eo} : Esperanto 638 639 (Artificial) 640 641 =item {et} : Estonian 642 643 =item {ee} : Ewe 644 645 (Formerly "ewe".) 646 647 =item {ewo} : Ewondo 648 649 =item {fan} : Fang 650 651 =item {fat} : Fanti 652 653 =item {fo} : Faroese 654 655 =item {fj} : Fijian 656 657 =item {fi} : Finnish 658 659 =item [{fiu} : Finno-Ugrian (Other)] 660 661 eq Finno-Ugric. NOT Ugaritic! 662 663 =item {fon} : Fon 664 665 =item {fr} : French 666 667 Notable forms: 668 {fr-fr} France French; 669 {fr-be} Belgian French; 670 {fr-ca} Canadian French; 671 {fr-ch} Swiss French; 672 {fr-lu} Luxembourg French; 673 {fr-mc} Monaco French. 674 675 =item {frm} : Middle French (ca.1400-1600) 676 677 (Historical) 678 679 =item {fro} : Old French (842-ca.1400) 680 681 (Historical) 682 683 =item {fy} : Frisian 684 685 =item {fur} : Friulian 686 687 =item {ff} : Fulah 688 689 (Formerly "ful".) 690 691 =item {gaa} : Ga 692 693 =item {gd} : Scots Gaelic 694 695 NOT Scots! 696 697 =item {gl} : Gallegan 698 699 eq Galician 700 701 =item {lg} : Ganda 702 703 (Formerly "lug".) 704 705 =item {gay} : Gayo 706 707 =item {gba} : Gbaya 708 709 =item {gez} : Geez 710 711 eq Ge'ez 712 713 =item {ka} : Georgian 714 715 =item {de} : German 716 717 Notable forms: 718 {de-at} Austrian German; 719 {de-be} Belgian German; 720 {de-ch} Swiss German; 721 {de-de} Germany German; 722 {de-li} Liechtenstein German; 723 {de-lu} Luxembourg German. 724 725 =item {gmh} : Middle High German (ca.1050-1500) 726 727 (Historical) 728 729 =item {goh} : Old High German (ca.750-1050) 730 731 (Historical) 732 733 =item [{gem} : Germanic (Other)] 734 735 =item {gil} : Gilbertese 736 737 =item {gon} : Gondi 738 739 =item {gor} : Gorontalo 740 741 =item {got} : Gothic 742 743 (Historical) 744 745 =item {grb} : Grebo 746 747 =item {grc} : Ancient Greek 748 749 (Historical) (Until 15th century or so.) 750 751 =item {el} : Modern Greek 752 753 (Since 15th century or so.) 754 755 =item {gn} : Guarani 756 757 GuaranE<iacute> 758 759 =item {gu} : Gujarati 760 761 =item {gwi} : Gwich'in 762 763 eq Gwichin 764 765 =item {hai} : Haida 766 767 =item {ht} : Haitian 768 769 eq Haitian Creole 770 771 =item {ha} : Hausa 772 773 =item {haw} : Hawaiian 774 775 Hawai'ian 776 777 =item {he} : Hebrew 778 779 (Formerly "iw".) 780 781 =for etc 782 {iw} Hebrew (old tag) 783 784 =item {hz} : Herero 785 786 =item {hil} : Hiligaynon 787 788 =item {him} : Himachali 789 790 =item {hi} : Hindi 791 792 =item {ho} : Hiri Motu 793 794 =item {hit} : Hittite 795 796 (Historical) 797 798 =item {hmn} : Hmong 799 800 =item {hu} : Hungarian 801 802 =item {hup} : Hupa 803 804 =item {iba} : Iban 805 806 =item {is} : Icelandic 807 808 =item {io} : Ido 809 810 (Artificial) 811 812 =item {ig} : Igbo 813 814 (Formerly "ibo".) 815 816 =item {ijo} : Ijo 817 818 =item {ilo} : Iloko 819 820 =item [{inc} : Indic (Other)] 821 822 =item [{ine} : Indo-European (Other)] 823 824 =item {id} : Indonesian 825 826 (Formerly "in".) 827 828 =for etc 829 {in} Indonesian (old tag) 830 831 =item {inh} : Ingush 832 833 =item {ia} : Interlingua (International Auxiliary Language Association) 834 835 (Artificial) NOT Interlingue! 836 837 =item {ie} : Interlingue 838 839 (Artificial) NOT Interlingua! 840 841 =item {iu} : Inuktitut 842 843 A subform of "Eskimo". 844 845 =item {ik} : Inupiaq 846 847 A subform of "Eskimo". 848 849 =item [{ira} : Iranian (Other)] 850 851 =item {ga} : Irish 852 853 =item {mga} : Middle Irish (900-1200) 854 855 (Historical) 856 857 =item {sga} : Old Irish (to 900) 858 859 (Historical) 860 861 =item [{iro} : Iroquoian languages] 862 863 =item {it} : Italian 864 865 Notable forms: 866 {it-it} Italy Italian; 867 {it-ch} Swiss Italian. 868 869 =item {ja} : Japanese 870 871 (NOT "jp"!) 872 873 =item {jv} : Javanese 874 875 (Formerly "jw" because of a typo.) 876 877 =item {jrb} : Judeo-Arabic 878 879 =item {jpr} : Judeo-Persian 880 881 =item {kbd} : Kabardian 882 883 =item {kab} : Kabyle 884 885 =item {kac} : Kachin 886 887 =item {kl} : Kalaallisut 888 889 eq Greenlandic "Eskimo" 890 891 =item {xal} : Kalmyk 892 893 =item {kam} : Kamba 894 895 =item {kn} : Kannada 896 897 eq Kanarese. NOT Canadian! 898 899 =item {kr} : Kanuri 900 901 (Formerly "kau".) 902 903 =item {krc} : Karachay-Balkar 904 905 =item {kaa} : Kara-Kalpak 906 907 =item {kar} : Karen 908 909 =item {ks} : Kashmiri 910 911 =item {csb} : Kashubian 912 913 eq Kashub 914 915 =item {kaw} : Kawi 916 917 =item {kk} : Kazakh 918 919 =item {kha} : Khasi 920 921 =item {km} : Khmer 922 923 eq Cambodian. eq Kampuchean. 924 925 =item [{khi} : Khoisan (Other)] 926 927 =item {kho} : Khotanese 928 929 =item {ki} : Kikuyu 930 931 eq Gikuyu. 932 933 =item {kmb} : Kimbundu 934 935 =item {rw} : Kinyarwanda 936 937 =item {ky} : Kirghiz 938 939 =item {i-klingon} : Klingon 940 941 =item {kv} : Komi 942 943 =item {kg} : Kongo 944 945 (Formerly "kon".) 946 947 =item {kok} : Konkani 948 949 =item {ko} : Korean 950 951 =item {kos} : Kosraean 952 953 =item {kpe} : Kpelle 954 955 =item {kro} : Kru 956 957 =item {kj} : Kuanyama 958 959 =item {kum} : Kumyk 960 961 =item {ku} : Kurdish 962 963 =item {kru} : Kurukh 964 965 =item {kut} : Kutenai 966 967 =item {lad} : Ladino 968 969 eq Judeo-Spanish. NOT Ladin (a minority language in Italy). 970 971 =item {lah} : Lahnda 972 973 NOT Lamba! 974 975 =item {lam} : Lamba 976 977 NOT Lahnda! 978 979 =item {lo} : Lao 980 981 eq Laotian. 982 983 =item {la} : Latin 984 985 (Historical) NOT Ladin! NOT Ladino! 986 987 =item {lv} : Latvian 988 989 eq Lettish. 990 991 =item {lb} : Letzeburgesch 992 993 eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".) 994 995 =for etc 996 {i-lux} Letzeburgesch (old tag) 997 998 =item {lez} : Lezghian 999 1000 =item {li} : Limburgish 1001 1002 eq Limburger, eq Limburgan. NOT Letzeburgesch! 1003 1004 =item {ln} : Lingala 1005 1006 =item {lt} : Lithuanian 1007 1008 =item {nds} : Low German 1009 1010 eq Low Saxon. eq Low German. eq Low Saxon. 1011 1012 =item {art-lojban} : Lojban (Artificial) 1013 1014 =item {loz} : Lozi 1015 1016 =item {lu} : Luba-Katanga 1017 1018 (Formerly "lub".) 1019 1020 =item {lua} : Luba-Lulua 1021 1022 =item {lui} : Luiseno 1023 1024 eq LuiseE<ntilde>o. 1025 1026 =item {lun} : Lunda 1027 1028 =item {luo} : Luo (Kenya and Tanzania) 1029 1030 =item {lus} : Lushai 1031 1032 =item {mk} : Macedonian 1033 1034 eq the modern Slavic language spoken in what was Yugoslavia. 1035 NOT the form of Greek spoken in Greek Macedonia! 1036 1037 =item {mad} : Madurese 1038 1039 =item {mag} : Magahi 1040 1041 =item {mai} : Maithili 1042 1043 =item {mak} : Makasar 1044 1045 =item {mg} : Malagasy 1046 1047 =item {ms} : Malay 1048 1049 NOT Malayalam! 1050 1051 =item {ml} : Malayalam 1052 1053 NOT Malay! 1054 1055 =item {mt} : Maltese 1056 1057 =item {mnc} : Manchu 1058 1059 =item {mdr} : Mandar 1060 1061 NOT Mandarin! 1062 1063 =item {man} : Mandingo 1064 1065 =item {mni} : Manipuri 1066 1067 eq Meithei. 1068 1069 =item [{mno} : Manobo languages] 1070 1071 =item {gv} : Manx 1072 1073 =item {mi} : Maori 1074 1075 NOT Mari! 1076 1077 =item {mr} : Marathi 1078 1079 =item {chm} : Mari 1080 1081 NOT Maori! 1082 1083 =item {mh} : Marshall 1084 1085 eq Marshallese. 1086 1087 =item {mwr} : Marwari 1088 1089 =item {mas} : Masai 1090 1091 =item [{myn} : Mayan languages] 1092 1093 =item {men} : Mende 1094 1095 =item {mic} : Micmac 1096 1097 =item {min} : Minangkabau 1098 1099 =item {i-mingo} : Mingo 1100 1101 eq the Irquoian language West Virginia Seneca. NOT New York Seneca! 1102 1103 =item [{mis} : Miscellaneous languages] 1104 1105 Don't use this. 1106 1107 =item {moh} : Mohawk 1108 1109 =item {mdf} : Moksha 1110 1111 =item {mo} : Moldavian 1112 1113 eq Moldovan. 1114 1115 =item [{mkh} : Mon-Khmer (Other)] 1116 1117 =item {lol} : Mongo 1118 1119 =item {mn} : Mongolian 1120 1121 eq Mongol. 1122 1123 =item {mos} : Mossi 1124 1125 =item [{mul} : Multiple languages] 1126 1127 Not for normal use. 1128 1129 =item [{mun} : Munda languages] 1130 1131 =item {nah} : Nahuatl 1132 1133 =item {nap} : Neapolitan 1134 1135 =item {na} : Nauru 1136 1137 =item {nv} : Navajo 1138 1139 eq Navaho. (Formerly "i-navajo".) 1140 1141 =for etc 1142 {i-navajo} Navajo (old tag) 1143 1144 =item {nd} : North Ndebele 1145 1146 =item {nr} : South Ndebele 1147 1148 =item {ng} : Ndonga 1149 1150 =item {ne} : Nepali 1151 1152 eq Nepalese. Notable forms: 1153 {ne-np} Nepal Nepali; 1154 {ne-in} India Nepali. 1155 1156 =item {new} : Newari 1157 1158 =item {nia} : Nias 1159 1160 =item [{nic} : Niger-Kordofanian (Other)] 1161 1162 =item [{ssa} : Nilo-Saharan (Other)] 1163 1164 =item {niu} : Niuean 1165 1166 =item {nog} : Nogai 1167 1168 =item {non} : Old Norse 1169 1170 (Historical) 1171 1172 =item [{nai} : North American Indian] 1173 1174 Do not use this. 1175 1176 =item {no} : Norwegian 1177 1178 Note the two following forms: 1179 1180 =item {nb} : Norwegian Bokmal 1181 1182 eq BokmE<aring>l, (A form of Norwegian.) (Formerly "no-bok".) 1183 1184 =for etc 1185 {no-bok} Norwegian Bokmal (old tag) 1186 1187 =item {nn} : Norwegian Nynorsk 1188 1189 (A form of Norwegian.) (Formerly "no-nyn".) 1190 1191 =for etc 1192 {no-nyn} Norwegian Nynorsk (old tag) 1193 1194 =item [{nub} : Nubian languages] 1195 1196 =item {nym} : Nyamwezi 1197 1198 =item {nyn} : Nyankole 1199 1200 =item {nyo} : Nyoro 1201 1202 =item {nzi} : Nzima 1203 1204 =item {oc} : Occitan (post 1500) 1205 1206 eq ProvenE<ccedil>al, eq Provencal 1207 1208 =item {oj} : Ojibwa 1209 1210 eq Ojibwe. (Formerly "oji".) 1211 1212 =item {or} : Oriya 1213 1214 =item {om} : Oromo 1215 1216 =item {osa} : Osage 1217 1218 =item {os} : Ossetian; Ossetic 1219 1220 =item [{oto} : Otomian languages] 1221 1222 Group of languages collectively called "OtomE<iacute>". 1223 1224 =item {pal} : Pahlavi 1225 1226 eq Pahlevi 1227 1228 =item {i-pwn} : Paiwan 1229 1230 eq Pariwan 1231 1232 =item {pau} : Palauan 1233 1234 =item {pi} : Pali 1235 1236 (Historical?) 1237 1238 =item {pam} : Pampanga 1239 1240 =item {pag} : Pangasinan 1241 1242 =item {pa} : Panjabi 1243 1244 eq Punjabi 1245 1246 =item {pap} : Papiamento 1247 1248 eq Papiamentu. 1249 1250 =item [{paa} : Papuan (Other)] 1251 1252 =item {fa} : Persian 1253 1254 eq Farsi. eq Iranian. 1255 1256 =item {peo} : Old Persian (ca.600-400 B.C.) 1257 1258 =item [{phi} : Philippine (Other)] 1259 1260 =item {phn} : Phoenician 1261 1262 (Historical) 1263 1264 =item {pon} : Pohnpeian 1265 1266 NOT Pompeiian! 1267 1268 =item {pl} : Polish 1269 1270 =item {pt} : Portuguese 1271 1272 eq Portugese. Notable forms: 1273 {pt-pt} Portugal Portuguese; 1274 {pt-br} Brazilian Portuguese. 1275 1276 =item [{pra} : Prakrit languages] 1277 1278 =item {pro} : Old Provencal (to 1500) 1279 1280 eq Old ProvenE<ccedil>al. (Historical.) 1281 1282 =item {ps} : Pushto 1283 1284 eq Pashto. eq Pushtu. 1285 1286 =item {qu} : Quechua 1287 1288 eq Quecha. 1289 1290 =item {rm} : Raeto-Romance 1291 1292 eq Romansh. 1293 1294 =item {raj} : Rajasthani 1295 1296 =item {rap} : Rapanui 1297 1298 =item {rar} : Rarotongan 1299 1300 =item [{qaa - qtz} : Reserved for local use.] 1301 1302 =item [{roa} : Romance (Other)] 1303 1304 NOT Romanian! NOT Romany! NOT Romansh! 1305 1306 =item {ro} : Romanian 1307 1308 eq Rumanian. NOT Romany! 1309 1310 =item {rom} : Romany 1311 1312 eq Rom. NOT Romanian! 1313 1314 =item {rn} : Rundi 1315 1316 =item {ru} : Russian 1317 1318 NOT White Russian! NOT Rusyn! 1319 1320 =item [{sal} : Salishan languages] 1321 1322 Large language group. 1323 1324 =item {sam} : Samaritan Aramaic 1325 1326 NOT Aramaic! 1327 1328 =item {se} : Northern Sami 1329 1330 eq Lappish. eq Lapp. eq (Northern) Saami. 1331 1332 =item {sma} : Southern Sami 1333 1334 =item {smn} : Inari Sami 1335 1336 =item {smj} : Lule Sami 1337 1338 =item {sms} : Skolt Sami 1339 1340 =item [{smi} : Sami languages (Other)] 1341 1342 =item {sm} : Samoan 1343 1344 =item {sad} : Sandawe 1345 1346 =item {sg} : Sango 1347 1348 =item {sa} : Sanskrit 1349 1350 (Historical) 1351 1352 =item {sat} : Santali 1353 1354 =item {sc} : Sardinian 1355 1356 eq Sard. 1357 1358 =item {sas} : Sasak 1359 1360 =item {sco} : Scots 1361 1362 NOT Scots Gaelic! 1363 1364 =item {sel} : Selkup 1365 1366 =item [{sem} : Semitic (Other)] 1367 1368 =item {sr} : Serbian 1369 1370 eq Serb. NOT Sorbian. 1371 1372 Notable forms: 1373 {sr-Cyrl} : Serbian in Cyrillic script; 1374 {sr-Latn} : Serbian in Latin script. 1375 1376 =item {srr} : Serer 1377 1378 =item {shn} : Shan 1379 1380 =item {sn} : Shona 1381 1382 =item {sid} : Sidamo 1383 1384 =item {sgn-...} : Sign Languages 1385 1386 Always use with a subtag. Notable forms: 1387 {sgn-gb} British Sign Language (BSL); 1388 {sgn-ie} Irish Sign Language (ESL); 1389 {sgn-ni} Nicaraguan Sign Language (ISN); 1390 {sgn-us} American Sign Language (ASL). 1391 1392 (And so on with other country codes as the subtag.) 1393 1394 =item {bla} : Siksika 1395 1396 eq Blackfoot. eq Pikanii. 1397 1398 =item {sd} : Sindhi 1399 1400 =item {si} : Sinhalese 1401 1402 eq Sinhala. 1403 1404 =item [{sit} : Sino-Tibetan (Other)] 1405 1406 =item [{sio} : Siouan languages] 1407 1408 =item {den} : Slave (Athapascan) 1409 1410 ("Slavey" is a subform.) 1411 1412 =item [{sla} : Slavic (Other)] 1413 1414 =item {sk} : Slovak 1415 1416 eq Slovakian. 1417 1418 =item {sl} : Slovenian 1419 1420 eq Slovene. 1421 1422 =item {sog} : Sogdian 1423 1424 =item {so} : Somali 1425 1426 =item {son} : Songhai 1427 1428 =item {snk} : Soninke 1429 1430 =item {wen} : Sorbian languages 1431 1432 eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian! 1433 1434 =item {nso} : Northern Sotho 1435 1436 =item {st} : Southern Sotho 1437 1438 eq Sutu. eq Sesotho. 1439 1440 =item [{sai} : South American Indian (Other)] 1441 1442 =item {es} : Spanish 1443 1444 Notable forms: 1445 {es-ar} Argentine Spanish; 1446 {es-bo} Bolivian Spanish; 1447 {es-cl} Chilean Spanish; 1448 {es-co} Colombian Spanish; 1449 {es-do} Dominican Spanish; 1450 {es-ec} Ecuadorian Spanish; 1451 {es-es} Spain Spanish; 1452 {es-gt} Guatemalan Spanish; 1453 {es-hn} Honduran Spanish; 1454 {es-mx} Mexican Spanish; 1455 {es-pa} Panamanian Spanish; 1456 {es-pe} Peruvian Spanish; 1457 {es-pr} Puerto Rican Spanish; 1458 {es-py} Paraguay Spanish; 1459 {es-sv} Salvadoran Spanish; 1460 {es-us} US Spanish; 1461 {es-uy} Uruguayan Spanish; 1462 {es-ve} Venezuelan Spanish. 1463 1464 =item {suk} : Sukuma 1465 1466 =item {sux} : Sumerian 1467 1468 (Historical) 1469 1470 =item {su} : Sundanese 1471 1472 =item {sus} : Susu 1473 1474 =item {sw} : Swahili 1475 1476 eq Kiswahili 1477 1478 =item {ss} : Swati 1479 1480 =item {sv} : Swedish 1481 1482 Notable forms: 1483 {sv-se} Sweden Swedish; 1484 {sv-fi} Finland Swedish. 1485 1486 =item {syr} : Syriac 1487 1488 =item {tl} : Tagalog 1489 1490 =item {ty} : Tahitian 1491 1492 =item [{tai} : Tai (Other)] 1493 1494 NOT Thai! 1495 1496 =item {tg} : Tajik 1497 1498 =item {tmh} : Tamashek 1499 1500 =item {ta} : Tamil 1501 1502 =item {i-tao} : Tao 1503 1504 eq Yami. 1505 1506 =item {tt} : Tatar 1507 1508 =item {i-tay} : Tayal 1509 1510 eq Atayal. eq Atayan. 1511 1512 =item {te} : Telugu 1513 1514 =item {ter} : Tereno 1515 1516 =item {tet} : Tetum 1517 1518 =item {th} : Thai 1519 1520 NOT Tai! 1521 1522 =item {bo} : Tibetan 1523 1524 =item {tig} : Tigre 1525 1526 =item {ti} : Tigrinya 1527 1528 =item {tem} : Timne 1529 1530 eq Themne. eq Timene. 1531 1532 =item {tiv} : Tiv 1533 1534 =item {tli} : Tlingit 1535 1536 =item {tpi} : Tok Pisin 1537 1538 =item {tkl} : Tokelau 1539 1540 =item {tog} : Tonga (Nyasa) 1541 1542 NOT Tsonga! 1543 1544 =item {to} : Tonga (Tonga Islands) 1545 1546 (Pronounced "Tong-a", not "Tong-ga") 1547 1548 NOT Tsonga! 1549 1550 =item {tsi} : Tsimshian 1551 1552 eq Sm'algyax 1553 1554 =item {ts} : Tsonga 1555 1556 NOT Tonga! 1557 1558 =item {i-tsu} : Tsou 1559 1560 =item {tn} : Tswana 1561 1562 Same as Setswana. 1563 1564 =item {tum} : Tumbuka 1565 1566 =item [{tup} : Tupi languages] 1567 1568 =item {tr} : Turkish 1569 1570 (Typically in Roman script) 1571 1572 =item {ota} : Ottoman Turkish (1500-1928) 1573 1574 (Typically in Arabic script) (Historical) 1575 1576 =item {crh} : Crimean Turkish 1577 1578 eq Crimean Tatar 1579 1580 =item {tk} : Turkmen 1581 1582 eq Turkmeni. 1583 1584 =item {tvl} : Tuvalu 1585 1586 =item {tyv} : Tuvinian 1587 1588 eq Tuvan. eq Tuvin. 1589 1590 =item {tw} : Twi 1591 1592 =item {udm} : Udmurt 1593 1594 =item {uga} : Ugaritic 1595 1596 NOT Ugric! 1597 1598 =item {ug} : Uighur 1599 1600 =item {uk} : Ukrainian 1601 1602 =item {umb} : Umbundu 1603 1604 =item {und} : Undetermined 1605 1606 Not a tag for normal use. 1607 1608 =item {ur} : Urdu 1609 1610 =item {uz} : Uzbek 1611 1612 eq E<Ouml>zbek 1613 1614 Notable forms: 1615 {uz-Cyrl} Uzbek in Cyrillic script; 1616 {uz-Latn} Uzbek in Latin script. 1617 1618 =item {vai} : Vai 1619 1620 =item {ve} : Venda 1621 1622 NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".) 1623 1624 =item {vi} : Vietnamese 1625 1626 eq Viet. 1627 1628 =item {vo} : Volapuk 1629 1630 eq VolapE<uuml>k. (Artificial) 1631 1632 =item {vot} : Votic 1633 1634 eq Votian. eq Vod. 1635 1636 =item [{wak} : Wakashan languages] 1637 1638 =item {wa} : Walloon 1639 1640 =item {wal} : Walamo 1641 1642 eq Wolaytta. 1643 1644 =item {war} : Waray 1645 1646 Presumably the Philippine language Waray-Waray (SamareE<ntilde>o), 1647 not the smaller Philippine language Waray Sorsogon, nor the extinct 1648 Australian language Waray. 1649 1650 =item {was} : Washo 1651 1652 eq Washoe 1653 1654 =item {cy} : Welsh 1655 1656 =item {wo} : Wolof 1657 1658 =item {x-...} : Unregistered (Semi-Private Use) 1659 1660 "x-" is a prefix for language tags that are not registered with ISO 1661 or IANA. Example, x-double-dutch 1662 1663 =item {xh} : Xhosa 1664 1665 =item {sah} : Yakut 1666 1667 =item {yao} : Yao 1668 1669 (The Yao in Malawi?) 1670 1671 =item {yap} : Yapese 1672 1673 eq Yap 1674 1675 =item {ii} : Sichuan Yi 1676 1677 =item {yi} : Yiddish 1678 1679 Formerly "ji". Usually in Hebrew script. 1680 1681 Notable forms: 1682 {yi-latn} Yiddish in Latin script 1683 1684 =item {yo} : Yoruba 1685 1686 =item [{ypk} : Yupik languages] 1687 1688 Several "Eskimo" languages. 1689 1690 =item {znd} : Zande 1691 1692 =item [{zap} : Zapotec] 1693 1694 (A group of languages.) 1695 1696 =item {zen} : Zenaga 1697 1698 NOT Zend. 1699 1700 =item {za} : Zhuang 1701 1702 =item {zu} : Zulu 1703 1704 =item {zun} : Zuni 1705 1706 eq ZuE<ntilde>i 1707 1708 =back 1709 1710 =for woohah END 1711 1712 =head1 SEE ALSO 1713 1714 L<I18N::LangTags|I18N::LangTags> and its "See Also" section. 1715 1716 =head1 COPYRIGHT AND DISCLAIMER 1717 1718 Copyright (c) 2001+ Sean M. Burke. All rights reserved. 1719 1720 You can redistribute and/or 1721 modify this document under the same terms as Perl itself. 1722 1723 This document is provided in the hope that it will be 1724 useful, but without any warranty; 1725 without even the implied warranty of accuracy, authoritativeness, 1726 completeness, merchantability, or fitness for a particular purpose. 1727 1728 Email any corrections or questions to me. 1729 1730 =head1 AUTHOR 1731 1732 Sean M. Burke, sburkeE<64>cpan.org 1733 1734 =cut 1735 1736 1737 # To generate a list of just the two and three-letter codes: 1738 1739 #!/usr/local/bin/perl -w 1740 1741 require 5; # Time-stamp: "2001-03-13 21:53:39 MST" 1742 # Sean M. Burke, sburke@cpan.org 1743 # This program is for generating the language_codes.txt file 1744 use strict; 1745 use LWP::Simple; 1746 use HTML::TreeBuilder 3.10; 1747 my $root = HTML::TreeBuilder->new(); 1748 my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html'; 1749 $root->parse(get($url) || die "Can't get $url"); 1750 $root->eof(); 1751 1752 my @codes; 1753 1754 foreach my $tr ($root->find_by_tag_name('tr')) { 1755 my @f = map $_->as_text(), $tr->content_list(); 1756 #print map("<$_> ", @f), "\n"; 1757 next unless @f == 5; 1758 pop @f; # nix the French name 1759 next if $f[-1] eq 'Language Name (English)'; # it's a header line 1760 my $xx = splice(@f, 2,1); # pull out the two-letter code 1761 $f[-1] =~ s/^\s+//; 1762 $f[-1] =~ s/\s+$//; 1763 if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it 1764 push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ]; 1765 } else { # print the three-letter codes. 1766 if($f[0] eq $f[1]) { 1767 push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ]; 1768 } else { # shouldn't happen 1769 push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; 1770 } 1771 } 1772 } 1773 1774 print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes; 1775 print "[ based on $url\n at ", scalar(localtime), "]\n", 1776 "[Note: doesn't include IANA-registered codes.]\n"; 1777 exit; 1778 __END__ 1779
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |