#!/usr/bin/env perl use 5.012; use warnings; my %cc; # map of country code to info array %cc = ( # transitional reservations an => ["transitionally reserved: Netherlands Antilles", "c-na Xr", "Netherlands Antilles", 'bq cw sx'], # 2060 bu => ["transitionally reserved: Burma", "c-as Xr", "Burma", 'mm'], # 2039 cs => ["transitionally reserved: Serbia and Montenegro", "c-eu Xr", "Serbia and Montenegro", 'rs me'], # 2056 nt => ["transitionally reserved: Neutral Zone", "c-as Xr", "N. Zone", 'iq sa'], # 2043 tp => ["transitionally reserved: East Timor", "c-oc Xr", "(East Timor)", 'tl'], # 2052 yu => ["transitionally reserved: Yugoslavia", "c-eu Xr", "Yugoslavia", 'cs ba hr mk si'], # 2053 zr => ["transitionally reserved: Zaire", "c-af Xr", "Zaire", 'cd'], # 2047 # exceptional reservations ac => ["exceptionally reserved: Ascension Island", "c-oc Xr", "Ascension Island", 'sh'], cp => ["exceptionally reserved: Clipperton Island", "c-na Xr", "Clipperton Island"], dg => ["exceptionally reserved: Diego Garcia", "c-as Xr", "Diego Garcia", 'io'], ea => ["exceptionally reserved: Ceuta and Melilla", "c-af Xr", "Ceuta and Melilla"], eu => ["exceptionally reserved: European Union", "c-eu Xr", "European Union"], ez => ["exceptionally reserved: European OTC derivatives", "c-eu Xr", "European OTC"], fx => ["exceptionally reserved: Metropolitan France", "c-eu Xr", "Metropolitan France", 'fr'], ic => ["exceptionally reserved: Canary Islands", "c-af Xr", "Canary Islands"], su => ["exceptionally reserved: former USSR", "c-eu Xr", "USSR"], ta => ["exceptionally reserved: Tristan da Cunha", "c-oc Xr", "Tristan da Cunha", 'sh'], uk => ["exceptionally reserved: United Kingdom", "c-eu Xr", "(United Kingdom)", 'gb'], un => ["exceptionally reserved: United Nations", "Xi Xr", "United Nations"], # indeterminate reservations dy => ["indeterminately reserved: Benin", "c-af Xr", "(Benin)", 'bj'], ew => ["indeterminately reserved: Estonia", "c-eu Xr", "(Estonia)", 'ee'], fl => ["indeterminately reserved: Liechtenstein", "c-eu Xr", "(Liechtenstein)", 'li'], ja => ["indeterminately reserved: Jamaica", "c-na Xr", "(Jamaica)", 'jm'], lf => ["indeterminately reserved: Libya Fezzan", "c-af Xr", "Fezzan", 'ly'], pi => ["indeterminately reserved: Philippines", "c-as Xr", "(Philippines)", 'ph'], ra => ["indeterminately reserved: Argentina", "c-sa Xr", "(Argentina)", 'ar'], rb => ["indeterminately reserved: Botswana, Bolivia", "c-sa Xr", "(Bots...)"], rc => ["indeterminately reserved: Republic of China", "c-as Xr", "(RoC)", 'tw'], rh => ["indeterminately reserved: Haiti", "c-na Xr", "(Haiti)", 'ht'], ri => ["indeterminately reserved: Indonesia", "c-as Xr", "(Indonesia)", 'id'], rl => ["indeterminately reserved: Lebanon", "c-as Xr", "(Lebanon)", 'lb'], rm => ["indeterminately reserved: Madagascar", "c-af Xr", "(Madagascar)", 'mg'], rn => ["indeterminately reserved: Niger", "c-af Xr", "(Niger)", 'ne'], rp => ["indeterminately reserved: Philippines", "c-as Xr", "(Philippines)", 'ph'], sf => ["indeterminately reserved: Finland", "c-eu Xr", "(Finland)", 'fi'], wg => ["indeterminately reserved: Grenada", "c-na Xr", "(Grenada)", 'gd'], wl => ["indeterminately reserved: Saint Lucia", "c-na Xr", "(Saint Luc.)", 'lc'], wv => ["indeterminately reserved: Saint Vincent", "c-na Xr", "(Saint Vin.)", 'vc'], yv => ["indeterminately reserved: Venezuela", "c-sa Xr", "(Venezuela)", 've'], # WIPO, agreed not to use ap => ["not used: African Regional Industrial Property Organization", "Xi", "ARIPO"], # c-af bx => ["not used: Benelux Office for Intellectual Property", "Xi", "BOIP"], # c-eu ef => ["not used: European Community Patent Convention", "Xi", "CPC"], # c-eu em => ["not used: European Trademark Office", "Xi", "OHIM"], # c-eu ep => ["not used: European Patent Organization", "Xi", "EPOrg"], # c-eu ev => ["not used: Eurasian Patent Organization", "Xi", "EAPO"], # c-as gc => ["not used: Gulf Patent Office", "Xi", "GCCPO"], # c-as ib => ["not used: International Bureau of WIPO", "Xi", "IB"], oa => ["not used: African Intellectual Property Organization", "Xi", "OAPI"], # c-af wo => ["not used: World Intellectual Property Organization", "Xi", "WIPO"], ); while (<>) { /^#/ and next; # skip comments my ($iso, $name, $cont, $tld) = (split /\t/)[0, 4, 8, 9]; my @info = ($name, "c-\L$cont"); $info[3] = $tld if $tld =~ s/\A\.// and $tld ne lc $iso; $cc{ lc $iso } = \@info; } $cc{io}->[2] = "Chagos Islands"; $cc{um}->[2] = "U.S. isl."; for (values %cc) { my $abbr = do { local $_ = $_->[2] // $_->[0]; s/,.*//; s/(?<=.)\(.*\)\s*//; s/ republic\b//gi; s/ islands?\b//gi; s/\bthe //g; s/ and / & /g and s/(?<=.)[a-z ]+//g; s/ of / /g; s/\bsa?int /st /gi; s/United /Un. /gi; s/South(?:ern)? /S-/g; s/North(?:ern)? /N-/g; s/New /n./g; s/(\S)(\S+)-/$1-/g; # strip most chars preceding dash s/(\S{4}[b-df-hj-np-tv-xz])((?[2] = $abbr if $abbr ne $_->[0]; # short name if different } say "# automatically generated by $0"; use Data::Dump 'dd'; $Data::Dump::INDENT = ''; dd \%cc; __END__ =head1 NAME mkcountries-geonames - Create Perl include of country info from GeoNames data =head1 SYNOPSIS curl http://download.geonames.org/export/dump/countryInfo.txt | tools/mkcountryinfo > countries.inc.pl