package Lire::WWW::UserAgent::Language;
#
# $Id: Language.pm,v 1.2 2006/07/23 13:16:36 vanbaal Exp $
#
# Copyright (C) 2001 Stichting LogReport Foundation <logreport@logreport.org>
# Copyright (C) 2001, 2002 Stichting LogReport Foundation <logreport@logreport.org>
#
#     This file is part of Lire.
#
#     Lire is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program (see COPYING); if not, check with
#     http://www.gnu.org/copyleft/gpl.html.
#
use strict;
use vars qw/ $REVISION/;

BEGIN {
    $REVISION  = '$Id: Language.pm,v 1.2 2006/07/23 13:16:36 vanbaal Exp $';
}

# -----------------------------------------------------------------------------
# Database
# -----------------------------------------------------------------------------

#
# Generated from I18N::LangLists::List using
# perl -MI18N::LangTags::List -e 'foreach my $lang ( sort keys %I18N::LangTags::List::Name ) { printf "%-16s => \"%s\",\n", "\"$lang\"", $I18N::LangTags::List::Name{$lang} }'
#
# The I18N::LangTags::List module (c) Sean Burke and can be distributed
# under the same terms as perl

my %LanguageHash = (
"aa"             => "Afar",
"ab"             => "Abkhazian",
"ace"            => "Achinese",
"ach"            => "Acoli",
"ada"            => "Adangme",
"ae"             => "Avestan",
"af"             => "Afrikaans",
"afa"            => "Afro-Asiatic (Other)",
"afh"            => "Afrihili",
"aka"            => "Akan",
"akk"            => "Akkadian",
"ale"            => "Aleut",
"alg"            => "Algonquian languages",
"am"             => "Amharic",
"ang"            => "Old English (ca.450-1100)",
"apa"            => "Apache languages",
"ar"             => "Arabic",
"ar-ae"          => "UAE Arabic",
"ar-bh"          => "Bahrain Arabic",
"ar-dz"          => "Algerian Arabic",
"ar-eg"          => "Egyptian Arabic",
"ar-iq"          => "Iraqi Arabic",
"ar-jo"          => "Jordanian Arabic",
"ar-kw"          => "Kuwait Arabic",
"ar-lb"          => "Lebanese Arabic",
"ar-ly"          => "Libyan Arabic",
"ar-ma"          => "Moroccan Arabic",
"ar-om"          => "Omani Arabic",
"ar-qa"          => "Qatari Arabic",
"ar-sa"          => "Sauda Arabic",
"ar-sy"          => "Syrian Arabic",
"ar-tn"          => "Tunisian Arabic",
"ar-ye"          => "Yemen Arabic",
"arc"            => "Aramaic",
"arn"            => "Araucanian",
"arp"            => "Arapaho",
"art"            => "Artificial (Other)",
"arw"            => "Arawak",
"as"             => "Assamese",
"ath"            => "Athapascan languages",
"aus"            => "Australian languages",
"ava"            => "Avaric",
"awa"            => "Awadhi",
"ay"             => "Aymara",
"az"             => "Azerbaijani",
"ba"             => "Bashkir",
"bad"            => "Banda",
"bai"            => "Bamileke languages",
"bal"            => "Baluchi",
"bam"            => "Bambara",
"ban"            => "Balinese",
"bas"            => "Basa",
"bat"            => "Baltic (Other)",
"be"             => "Belarusian",
"bej"            => "Beja",
"bem"            => "Bemba",
"ber"            => "Berber (Other)",
"bg"             => "Bulgarian",
"bh"             => "Bihari",
"bho"            => "Bhojpuri",
"bi"             => "Bislama",
"bik"            => "Bikol",
"bin"            => "Bini",
"bla"            => "Siksika",
"bn"             => "Bengali",
"bnt"            => "Bantu (Other)",
"bo"             => "Tibetan",
"br"             => "Breton",
"bra"            => "Braj",
"bs"             => "Bosnian",
"btk"            => "Batak (Indonesia)",
"bua"            => "Buriat",
"bug"            => "Buginese",
"ca"             => "Catalan",
"cad"            => "Caddo",
"cai"            => "Central American Indian (Other)",
"car"            => "Carib",
"cau"            => "Caucasian (Other)",
"ce"             => "Chechen",
"ceb"            => "Cebuano",
"cel"            => "Celtic (Other)",
"cel-gaulish"    => "Gaulish (Historical)",
"ch"             => "Chamorro",
"chb"            => "Chibcha",
"chg"            => "Chagatai",
"chk"            => "Chuukese",
"chm"            => "Mari",
"chn"            => "Chinook Jargon",
"cho"            => "Choctaw",
"chp"            => "Chipewyan",
"chr"            => "Cherokee",
"chy"            => "Cheyenne",
"cmc"            => "Chamic languages",
"co"             => "Corsican",
"cop"            => "Coptic",
"cpe"            => "English-based Creoles and pidgins (Other)",
"cpf"            => "French-based Creoles and pidgins (Other)",
"cpp"            => "Portuguese-based Creoles and pidgins (Other)",
"cre"            => "Cree",
"crp"            => "Creoles and pidgins (Other)",
"cs"             => "Czech",
"cu"             => "Church Slavic",
"cus"            => "Cushitic (Other)",
"cv"             => "Chuvash",
"cy"             => "Welsh",
"da"             => "Danish",
"dak"            => "Dakota",
"day"            => "Dayak",
"de"             => "German",
"de-at"          => "Austrian German",
"de-be"          => "Belgian German",
"de-ch"          => "Swiss German",
"de-de"          => "Germany German",
"de-li"          => "Liechtenstein German",
"de-lu"          => "Luxembourg German",
"del"            => "Delaware",
"den"            => "Slave (Athapascan)",
"dgr"            => "Dogrib",
"din"            => "Dinka",
"div"            => "Divehi",
"doi"            => "Dogri",
"dra"            => "Dravidian (Other)",
"dua"            => "Duala",
"dum"            => "Middle Dutch (ca.1050-1350)",
"dyu"            => "Dyula",
"dz"             => "Dzongkha",
"efi"            => "Efik",
"egy"            => "Ancient Egyptian",
"eka"            => "Ekajuk",
"el"             => "Modern Greek",
"elx"            => "Elamite",
"en"             => "English",
"en-au"          => "Australian English",
"en-bz"          => "Belize English",
"en-ca"          => "Canadian English",
"en-gb"          => "UK English",
"en-ie"          => "Irish English",
"en-jm"          => "Jamaican English",
"en-nz"          => "New Zealand English",
"en-ph"          => "Philippine English",
"en-tt"          => "Trinidad English",
"en-us"          => "US English",
"en-za"          => "South African English",
"en-zw"          => "Zimbabwe English",
"enm"            => "Old English (1100-1500)",
"eo"             => "Esperanto",
"es"             => "Spanish",
"es-ar"          => "Argentine Spanish",
"es-bo"          => "Bolivian Spanish",
"es-cl"          => "Chilean Spanish",
"es-co"          => "Colombian Spanish",
"es-do"          => "Dominican Spanish",
"es-ec"          => "Ecuadorian Spanish",
"es-es"          => "Spain Spanish",
"es-gt"          => "Guatemalan Spanish",
"es-hn"          => "Honduran Spanish",
"es-mx"          => "Mexican Spanish",
"es-pa"          => "Panamanian Spanish",
"es-pe"          => "Peruvian Spanish",
"es-pr"          => "Puerto Rican Spanish",
"es-py"          => "Paraguay Spanish",
"es-sv"          => "Salvadoran Spanish",
"es-us"          => "US Spanish",
"es-uy"          => "Uruguayan Spanish",
"es-ve"          => "Venezuelan Spanish",
"et"             => "Estonian",
"eu"             => "Basque",
"ewe"            => "Ewe",
"ewo"            => "Ewondo",
"fa"             => "Persian",
"fan"            => "Fang",
"fat"            => "Fanti",
"fi"             => "Finnish",
"fiu"            => "Finno-Ugrian (Other)",
"fj"             => "Fijian",
"fo"             => "Faroese",
"fon"            => "Fon",
"fr"             => "French",
"fr-be"          => "Belgian French",
"fr-ca"          => "Canadian French",
"fr-ch"          => "Swiss French",
"fr-fr"          => "France French",
"fr-lu"          => "Luxembourg French",
"fr-mc"          => "Monaco French",
"frm"            => "Middle French (ca.1400-1600)",
"fro"            => "Old French (842-ca.1400)",
"ful"            => "Fulah",
"fur"            => "Friulian",
"fy"             => "Frisian",
"ga"             => "Irish",
"gaa"            => "Ga",
"gay"            => "Gayo",
"gba"            => "Gbaya",
"gd"             => "Scots Gaelic",
"gem"            => "Germanic (Other)",
"gez"            => "Geez",
"gil"            => "Gilbertese",
"gl"             => "Gallegan",
"gmh"            => "Middle High German (ca.1050-1500)",
"gn"             => "Guarani",
"goh"            => "Old High German (ca.750-1050)",
"gon"            => "Gondi",
"gor"            => "Gorontalo",
"got"            => "Gothic",
"grb"            => "Grebo",
"grc"            => "Ancient Greek",
"gu"             => "Gujarati",
"gv"             => "Manx",
"gwi"            => "Gwich'in",
"ha"             => "Hausa",
"hai"            => "Haida",
"haw"            => "Hawaiian",
"he"             => "Hebrew",
"hi"             => "Hindi",
"hil"            => "Hiligaynon",
"him"            => "Himachali",
"hit"            => "Hittite",
"hmn"            => "Hmong",
"ho"             => "Hiri Motu",
"hr"             => "Croatian",
"hu"             => "Hungarian",
"hup"            => "Hupa",
"hy"             => "Armenian",
"hz"             => "Herero",
"i-ami"          => "Ami",
"i-bnn"          => "Bunun",
"i-default"      => "Default (Fallthru) Language",
"i-hakka"        => "Hakka (old tag)",
"i-klingon"      => "Klingon",
"i-lux"          => "Letzeburgesch (old tag)",
"i-mingo"        => "Mingo",
"i-navajo"       => "Navajo (old tag)",
"i-pwn"          => "Paiwan",
"i-tao"          => "Tao",
"i-tay"          => "Tayal",
"i-tsu"          => "Tsou",
"ia"             => "Interlingua (International Auxiliary Language Association)",
"iba"            => "Iban",
"ibo"            => "Igbo",
"id"             => "Indonesian",
"ie"             => "Interlingue",
"ijo"            => "Ijo",
"ik"             => "Inupiaq",
"ilo"            => "Iloko",
"in"             => "Indonesian (old tag)",
"inc"            => "Indic (Other)",
"ine"            => "Indo-European (Other)",
"ira"            => "Iranian (Other)",
"iro"            => "Iroquoian languages",
"is"             => "Icelandic",
"it"             => "Italian",
"it-ch"          => "Swiss Italian",
"it-it"          => "Italy Italian",
"iu"             => "Inuktitut",
"iw"             => "Hebrew (old tag)",
"ja"             => "Japanese",
"ji"             => "Yiddish (old tag)",
"jpr"            => "Judeo-Persian",
"jrb"            => "Judeo-Arabic",
"jw"             => "Javanese",
"ka"             => "Georgian",
"kaa"            => "Kara-Kalpak",
"kab"            => "Kabyle",
"kac"            => "Kachin",
"kam"            => "Kamba",
"kar"            => "Karen",
"kau"            => "Kanuri",
"kaw"            => "Kawi",
"kha"            => "Khasi",
"khi"            => "Khoisan (Other)",
"kho"            => "Khotanese",
"ki"             => "Kikuyu",
"kj"             => "Kuanyama",
"kk"             => "Kazakh",
"kl"             => "Kalaallisut",
"km"             => "Khmer",
"kmb"            => "Kimbundu",
"kn"             => "Kannada",
"ko"             => "Korean",
"kok"            => "Konkani",
"kon"            => "Kongo",
"kos"            => "Kosraean",
"kpe"            => "Kpelle",
"kro"            => "Kru",
"kru"            => "Kurukh",
"ks"             => "Kashmiri",
"ku"             => "Kurdish",
"kum"            => "Kumyk",
"kut"            => "Kutenai",
"kv"             => "Komi",
"kw"             => "Cornish",
"ky"             => "Kirghiz",
"la"             => "Latin",
"lad"            => "Ladino",
"lah"            => "Lahnda",
"lam"            => "Lamba",
"lb"             => "Letzeburgesch",
"lez"            => "Lezghian",
"ln"             => "Lingala",
"lo"             => "Lao",
"lol"            => "Mongo",
"loz"            => "Lozi",
"lt"             => "Lithuanian",
"lua"            => "Luba-Lulua",
"lub"            => "Luba-Katanga",
"lug"            => "Ganda",
"lui"            => "Luiseno",
"lun"            => "Lunda",
"luo"            => "Luo (Kenya and Tanzania)",
"lus"            => "Lushai",
"lv"             => "Latvian",
"mad"            => "Madurese",
"mag"            => "Magahi",
"mai"            => "Maithili",
"mak"            => "Makasar",
"man"            => "Mandingo",
"map"            => "Austronesian (Other)",
"mas"            => "Masai",
"mdr"            => "Mandar",
"men"            => "Mende",
"mg"             => "Malagasy",
"mga"            => "Middle Irish (900-1200)",
"mh"             => "Marshall",
"mi"             => "Maori",
"mic"            => "Micmac",
"min"            => "Minangkabau",
"mis"            => "Miscellaneous languages",
"mk"             => "Macedonian",
"mkh"            => "Mon-Khmer (Other)",
"ml"             => "Malayalam",
"mn"             => "Mongolian",
"mnc"            => "Manchu",
"mni"            => "Manipuri",
"mno"            => "Manobo languages",
"mo"             => "Moldavian",
"moh"            => "Mohawk",
"mos"            => "Mossi",
"mr"             => "Marathi",
"ms"             => "Malay",
"mt"             => "Maltese",
"mul"            => "Multiple languages",
"mun"            => "Munda languages",
"mus"            => "Creek",
"mwr"            => "Marwari",
"my"             => "Burmese",
"myn"            => "Mayan languages",
"na"             => "Nauru",
"nah"            => "Nahuatl",
"nai"            => "North American Indian",
"nb"             => "Norwegian Bokmal",
"nd"             => "North Ndebele",
"nds"            => "Low German",
"ne"             => "Nepali",
"ne-in"          => "India Nepali",
"ne-np"          => "Nepal Nepali",
"new"            => "Newari",
"ng"             => "Ndonga",
"nia"            => "Nias",
"nic"            => "Niger-Kordofanian (Other)",
"niu"            => "Niuean",
"nl"             => "Dutch",
"nl-be"          => "Belgian Dutch",
"nl-nl"          => "Netherlands Dutch",
"nn"             => "Norwegian Nynorsk",
"no"             => "Norwegian",
"no-bok"         => "Norwegian Bokmal (old tag)",
"no-nyn"         => "Norwegian Nynorsk (old tag)",
"non"            => "Old Norse",
"nr"             => "South Ndebele",
"nso"            => "Northern Sotho",
"nub"            => "Nubian languages",
"nv"             => "Navajo",
"ny"             => "Chichewa",
"nym"            => "Nyamwezi",
"nyn"            => "Nyankole",
"nyo"            => "Nyoro",
"nzi"            => "Nzima",
"oc"             => "Occitan (post 1500)",
"oji"            => "Ojibwa",
"om"             => "Oromo",
"or"             => "Oriya",
"os"             => "Ossetian; Ossetic",
"osa"            => "Osage",
"ota"            => "Ottoman Turkish (1500-1928)",
"oto"            => "Otomian languages",
"pa"             => "Panjabi",
"paa"            => "Papuan (Other)",
"pag"            => "Pangasinan",
"pal"            => "Pahlavi",
"pam"            => "Pampanga",
"pap"            => "Papiamento",
"pau"            => "Palauan",
"peo"            => "Old Persian (ca.600-400 B.C.)",
"phi"            => "Philippine (Other)",
"phn"            => "Phoenician",
"pi"             => "Pali",
"pl"             => "Polish",
"pon"            => "Pohnpeian",
"pra"            => "Prakrit languages",
"pro"            => "Old Provencal (to 1500)",
"ps"             => "Pushto",
"pt"             => "Portuguese",
"pt-br"          => "Brazilian Portuguese",
"pt-pt"          => "Portugal Portuguese",
"qu"             => "Quechua",
"raj"            => "Rajasthani",
"rap"            => "Rapanui",
"rar"            => "Rarotongan",
"rm"             => "Raeto-Romance",
"rn"             => "Rundi",
"ro"             => "Romanian",
"roa"            => "Romance (Other)",
"rom"            => "Romany",
"ru"             => "Russian",
"rw"             => "Kinyarwanda",
"sa"             => "Sanskrit",
"sad"            => "Sandawe",
"sah"            => "Yakut",
"sai"            => "South American Indian (Other)",
"sal"            => "Salishan languages",
"sam"            => "Samaritan Aramaic",
"sas"            => "Sasak",
"sat"            => "Santali",
"sc"             => "Sardinian",
"sco"            => "Scots",
"sd"             => "Sindhi",
"se"             => "Northern Sami",
"sel"            => "Selkup",
"sem"            => "Semitic (Other)",
"sg"             => "Sango",
"sga"            => "Old Irish (to 900)",
"sgn-gb"         => "British Sign Language (BSL)",
"sgn-ie"         => "Irish Sign Language (ESL)",
"sgn-ni"         => "Nicaraguan Sign Language (ISN)",
"sgn-us"         => "American Sign Language (ASL)",
"shn"            => "Shan",
"si"             => "Sinhalese",
"sid"            => "Sidamo",
"sio"            => "Siouan languages",
"sit"            => "Sino-Tibetan (Other)",
"sk"             => "Slovak",
"sl"             => "Slovenian",
"sla"            => "Slavic (Other)",
"sm"             => "Samoan",
"smi"            => "Sami languages (Other)",
"sn"             => "Shona",
"snk"            => "Soninke",
"so"             => "Somali",
"sog"            => "Sogdian",
"son"            => "Songhai",
"sq"             => "Albanian",
"sr"             => "Serbian",
"srr"            => "Serer",
"ss"             => "Swati",
"ssa"            => "Nilo-Saharan (Other)",
"st"             => "Southern Sotho",
"su"             => "Sundanese",
"suk"            => "Sukuma",
"sus"            => "Susu",
"sux"            => "Sumerian",
"sv"             => "Swedish",
"sv-fi"          => "Finland Swedish",
"sv-se"          => "Sweden Swedish",
"sw"             => "Swahili",
"syr"            => "Syriac",
"ta"             => "Tamil",
"tai"            => "Tai (Other)",
"te"             => "Telugu",
"tem"            => "Timne",
"ter"            => "Tereno",
"tet"            => "Tetum",
"tg"             => "Tajik",
"th"             => "Thai",
"ti"             => "Tigrinya",
"tig"            => "Tigre",
"tiv"            => "Tiv",
"tk"             => "Turkmen",
"tkl"            => "Tokelau",
"tl"             => "Tagalog",
"tli"            => "Tlingit",
"tmh"            => "Tamashek",
"tn"             => "Tswana",
"to"             => "Tonga (Tonga Islands)",
"tog"            => "Tonga (Nyasa)",
"tpi"            => "Tok Pisin",
"tr"             => "Turkish",
"ts"             => "Tsonga",
"tsi"            => "Tsimshian",
"tt"             => "Tatar",
"tum"            => "Tumbuka",
"tut"            => "Altaic (Other)",
"tvl"            => "Tuvalu",
"tw"             => "Twi",
"ty"             => "Tahitian",
"tyv"            => "Tuvinian",
"ug"             => "Uighur",
"uga"            => "Ugaritic",
"uk"             => "Ukrainian",
"umb"            => "Umbundu",
"und"            => "Undetermined",
"ur"             => "Urdu",
"uz"             => "Uzbek",
"vai"            => "Vai",
"ven"            => "Venda",
"vi"             => "Vietnamese",
"vo"             => "Volapuk",
"vot"            => "Votic",
"wak"            => "Wakashan languages",
"wal"            => "Walamo",
"war"            => "Waray",
"was"            => "Washo",
"wen"            => "Sorbian languages",
"wo"             => "Wolof",
"xh"             => "Xhosa",
"yao"            => "Yao",
"yap"            => "Yapese",
"yi"             => "Yiddish",
"yo"             => "Yoruba",
"ypk"            => "Yupik languages",
"za"             => "Zhuang",
"zap"            => "Zapotec",
"zen"            => "Zenaga",
"zh"             => "Chinese",
"zh-cn"          => "PRC Chinese",
"zh-gan"         => "Gan",
"zh-guoyu"       => "Mandarin",
"zh-hakka"       => "Hakka",
"zh-hk"          => "Hong Kong Chinese",
"zh-min"         => "Hokkien",
"zh-min-nan"     => "Southern Hokkien",
"zh-mo"          => "Macau Chinese",
"zh-sg"          => "Singapore Chinese",
"zh-tw"          => "Taiwan Chinese",
"zh-wuu"         => "Shanghaiese",
"zh-xiang"       => "Hunanese",
"zh-yue"         => "Cantonese",
"znd"            => "Zande",
"zu"             => "Zulu",
"zun"            => "Zuni",
);


# We ignore the i- and x- extensions.
my $rfc3066_re = qr/(?:[a-zA-Z]{2,3})(?:-[a-zA-Z0-9]{1,8})*/;

# -----------------------------------------------------------------------------
# Functions
# -----------------------------------------------------------------------------

sub getLanguage {
    my ( $ua ) = @_;

    # Extract possible language tags
    my @tags = map { lc $_ } $ua =~ /[ _;\[]($rfc3066_re)[;\)\]]/;

    return undef unless @tags;

    # Return the first one found
    foreach my $t ( @tags ) {
	return $LanguageHash{$t} if exists $LanguageHash{$t};
    }

    # FIXME: No extracted language tags are defined.
    # Determine if this can be considered a bug
    return undef;
}

1;

