X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/ca8caa9227ad9151bd57ab7eccab2c8947ea55da..c133e6ad3027e095811931f3e0d21b818865e0e8:/tools/mkcountries-geonames diff --git a/tools/mkcountries-geonames b/tools/mkcountries-geonames index 325379c..b7fb27e 100755 --- a/tools/mkcountries-geonames +++ b/tools/mkcountries-geonames @@ -4,64 +4,83 @@ use warnings; my %cc; # map of country code to info array %cc = ( + # transitional reservations + an => ["transitionally reserved: Netherlands Antilles", "c-na Xr", "Netherlands Antilles", 'bq cw sx'], # 2060 + bu => ["transitionally reserved: Burma", "c-as Xr", "Burma", 'mm'], # 2039 + cs => ["transitionally reserved: Serbia and Montenegro", "c-eu Xr", "Serbia and Montenegro", 'rs me'], # 2056 + nt => ["transitionally reserved: Neutral Zone", "c-as Xr", "N. Zone", 'iq sa'], # 2043 + tp => ["transitionally reserved: East Timor", "c-oc Xr", "(East Timor)", 'tl'], # 2052 + yu => ["transitionally reserved: Yugoslavia", "c-eu Xr", "Yugoslavia", 'cs ba hr mk si'], # 2053 + zr => ["transitionally reserved: Zaire", "c-af Xr", "Zaire", 'cd'], # 2047 + # exceptional reservations - ac => ["Ascension Island", "c-oc Xr", undef, 'sh'], - cp => ["Clipperton Island", "c-na Xr"], - dg => ["Diego Garcia", "c-as Xr", undef, 'io'], - ea => ["Ceuta and Melilla", "c-af Xr"], - eu => ["European Union", "c-eu Xr"], - fx => ["Metropolitan France", "c-eu Xr", undef, 'fr'], - ic => ["Canary Islands", "c-af Xr"], - su => ["former USSR", "c-eu Xr"], - ta => ["Tristan da Cunha", "c-oc Xr", undef, 'sh'], - uk => ["(United Kingdom)", "c-eu Xr", undef, 'gb'], + ac => ["exceptionally reserved: Ascension Island", "c-oc Xr", "Ascension Island", 'sh'], + cp => ["exceptionally reserved: Clipperton Island", "c-na Xr", "Clipperton Island"], + dg => ["exceptionally reserved: Diego Garcia", "c-as Xr", "Diego Garcia", 'io'], + ea => ["exceptionally reserved: Ceuta and Melilla", "c-af Xr", "Ceuta and Melilla"], + eu => ["exceptionally reserved: European Union", "c-eu Xr", "European Union"], + ez => ["exceptionally reserved: European OTC derivatives", "c-eu Xr", "Eurozone"], + fx => ["exceptionally reserved: Metropolitan France", "c-eu Xr", "Metropolitan France", 'fr'], + ic => ["exceptionally reserved: Canary Islands", "c-af Xr", "Canary Islands"], + su => ["exceptionally reserved: former USSR", "c-eu Xr", "USSR"], + ta => ["exceptionally reserved: Tristan da Cunha", "c-oc Xr", "Tristan da Cunha", 'sh'], + uk => ["exceptionally reserved: United Kingdom", "c-eu Xr", "(United Kingdom)", 'gb'], + un => ["exceptionally reserved: United Nations", "Xi Xr", "United Nations"], # indeterminate reservations - dy => ["(Benin)", "c-af Xr", undef, 'bj'], - ew => ["(Estonia)", "c-eu Xr", undef, 'ee'], - fl => ["(Liechtenstein)", "c-eu Xr", undef, 'li'], - ja => ["(Jamaica)", "c-na Xr", undef, 'jm'], - lf => ["Libya Fezzan", "c-af Xr", "Fezzan", 'ly'], - pi => ["(Philippines)", "c-as Xr", undef, 'ph'], - ra => ["(Argentina)", "c-sa Xr", undef, 'ar'], - rb => ["Bolivia/Botswana", "c-xx Xr"], - rc => ["(RoC)", "c-as Xr", undef, 'tw'], - rh => ["(Haiti)", "c-na Xr",undef, 'ht'], - ri => ["(Indonesia)", "c-as Xr", undef, 'id'], - rl => ["(Lebanon)", "c-as Xr", undef, 'lb'], - rm => ["(Madagascar)", "c-af Xr", undef, 'mg'], - rn => ["(Niger)", "c-af Xr",undef, 'ne'], - rp => ["(Philippines)", "c-as Xr", undef, 'ph'], - wg => ["(Grenada)", "c-na Xr", undef, 'gd'], - wl => ["(Saint Lucia)", "c-na Xr", "(Saint Luc.)", 'lc'], - wv => ["(Saint Vincent)", "c-na Xr", "(Saint Vin.)", 'vc'], - yv => ["(Venezuela)", "c-sa Xr", undef, 've'], + dy => ["indeterminately reserved: Benin", "c-af Xr", "(Benin)", 'bj'], + ew => ["indeterminately reserved: Estonia", "c-eu Xr", "(Estonia)", 'ee'], + fl => ["indeterminately reserved: Liechtenstein", "c-eu Xr", "(Liechtenstein)", 'li'], + ja => ["indeterminately reserved: Jamaica", "c-na Xr", "(Jamaica)", 'jm'], + lf => ["indeterminately reserved: Libya Fezzan", "c-af Xr", "Fezzan", 'ly'], + pi => ["indeterminately reserved: Philippines", "c-as Xr", "(Philippines)", 'ph'], + ra => ["indeterminately reserved: Argentina", "c-sa Xr", "(Argentina)", 'ar'], + rb => ["indeterminately reserved: Botswana, Bolivia", "c-sa Xr", "(Bots...)", 'bw bo'], + rc => ["indeterminately reserved: Republic of China", "c-as Xr", "(RoC)", 'tw'], + rh => ["indeterminately reserved: Haiti", "c-na Xr", "(Haiti)", 'ht'], + ri => ["indeterminately reserved: Indonesia", "c-as Xr", "(Indonesia)", 'id'], + rl => ["indeterminately reserved: Lebanon", "c-as Xr", "(Lebanon)", 'lb'], + rm => ["indeterminately reserved: Madagascar", "c-af Xr", "(Madagascar)", 'mg'], + rn => ["indeterminately reserved: Niger", "c-af Xr", "(Niger)", 'ne'], + rp => ["indeterminately reserved: Philippines", "c-as Xr", "(Philippines)", 'ph'], + sf => ["indeterminately reserved: Finland", "c-eu Xr", "(Finland)", 'fi'], + wg => ["indeterminately reserved: Grenada", "c-na Xr", "(Grenada)", 'gd'], + wl => ["indeterminately reserved: Saint Lucia", "c-na Xr", "(Saint Luc.)", 'lc'], + wv => ["indeterminately reserved: Saint Vincent", "c-na Xr", "(Saint Vin.)", 'vc'], + yv => ["indeterminately reserved: Venezuela", "c-sa Xr", "(Venezuela)", 've'], # WIPO, agreed not to use - ap => ["African Regional Industrial Property Organization", "Xi", "ARIPO"], # c-af - bx => ["Benelux Office for Intellectual Property", "Xi", "BOIP"], # c-eu - ef => ["European Community Patent Convention", "Xi", "CPC"], # c-eu - em => ["European Trademark Office", "Xi", "OHIM"], # c-eu - ep => ["European Patent Organization", "Xi", "EPOrg"], # c-eu - ev => ["Eurasian Patent Organization", "Xi", "EAPO"], # c-as - gc => ["Gulf Patent Office", "Xi", "GCCPO"], # c-as - ib => ["International Bureau of WIPO", "Xi", "IB WIPO"], - oa => ["African Intellectual Property Organization", "Xi", "OAPI"], # c-af - wo => ["World Intellectual Property Organization", "Xi", "WIPO"], + ap => ["not used: African Regional Industrial Property Organization", "Xi", "ARIPO"], # c-af + bx => ["not used: Benelux Office for Intellectual Property", "Xi", "BOIP"], # c-eu + ef => ["not used: European Community Patent Convention", "Xi", "CPC"], # c-eu + em => ["not used: European Trademark Office", "Xi", "OHIM"], # c-eu + ep => ["not used: European Patent Organization", "Xi", "EPOrg"], # c-eu + ev => ["not used: Eurasian Patent Organization", "Xi", "EAPO"], # c-as + gc => ["not used: Gulf Patent Office", "Xi", "GCCPO"], # c-as + ib => ["not used: International Bureau of WIPO", "Xi", "IB"], + oa => ["not used: African Intellectual Property Organization", "Xi", "OAPI"], # c-af + wo => ["not used: World Intellectual Property Organization", "Xi", "WIPO"], + + # unicode identifiers + qo => ["Unicode semantics: Outlying Oceania", "c-oc Co Xi", "Oceania"], + qu => ["Unicode semantics: European Union deprecated reserve", "c-eu Co Xi", "EU", 'eu'], + zz => ["Unicode semantics: unknown or invalid territory", "Co Xi","unknown"], ); while (<>) { /^#/ and next; # skip comments - my ($iso, $name, $cont) = (split /\t/)[0, 4, 8]; - my $class = "c-\L$cont"; - $cc{ lc $iso } = [ $name, $class ]; + my ($iso, $name, $cont, $tld) = (split /\t/)[0, 4, 8, 9]; + my @info = ($name, "c-\L$cont"); + $info[3] = $tld if $tld =~ s/\A\.// and $tld ne lc $iso; + $cc{ lc $iso } //= \@info; } $cc{io}->[2] = "Chagos Islands"; $cc{um}->[2] = "U.S. isl."; for (values %cc) { - for ($_->[2] //= $_->[0]) { + my $abbr = do { + local $_ = $_->[2] // $_->[0]; s/,.*//; s/(?<=.)\(.*\)\s*//; s/ republic\b//gi; @@ -76,7 +95,9 @@ for (values %cc) { s/New /n./g; s/(\S)(\S+)-/$1-/g; # strip most chars preceding dash s/(\S{4}[b-df-hj-np-tv-xz])((?[2] = $abbr if $abbr ne $_->[0]; # short name if different } say "# automatically generated by $0";