X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/49069763e748ef46dcb2587647a6f5c26bf04214..e7c68cd940f8ab1a8de85f6685367cfb7eb6378f:/tools/mkcountries-geonames diff --git a/tools/mkcountries-geonames b/tools/mkcountries-geonames index 0eaf880..91daae7 100755 --- a/tools/mkcountries-geonames +++ b/tools/mkcountries-geonames @@ -3,12 +3,81 @@ use 5.012; use warnings; my %cc; # map of country code to info array +%cc = ( + # exceptional reservations + ac => ["Ascension Island", "c-oc Xr", undef, 'sh'], + cp => ["Clipperton Island", "c-na Xr"], + dg => ["Diego Garcia", "c-as Xr", undef, 'io'], + ea => ["Ceuta and Melilla", "c-af Xr"], + eu => ["European Union", "c-eu Xr"], + fx => ["Metropolitan France", "c-eu Xr", undef, 'fr'], + ic => ["Canary Islands", "c-af Xr"], + su => ["former USSR", "c-eu Xr"], + ta => ["Tristan da Cunha", "c-oc Xr", undef, 'sh'], + uk => ["(United Kingdom)", "c-eu Xr", undef, 'gb'], + + # indeterminate reservations + dy => ["(Benin)", "c-af Xr", undef, 'bj'], + ew => ["(Estonia)", "c-eu Xr", undef, 'ee'], + fl => ["(Liechtenstein)", "c-eu Xr", undef, 'li'], + ja => ["(Jamaica)", "c-na Xr", undef, 'jm'], + lf => ["Libya Fezzan", "c-af Xr", "Fezzan", 'ly'], + pi => ["(Philippines)", "c-as Xr", undef, 'ph'], + ra => ["(Argentina)", "c-sa Xr", undef, 'ar'], + rb => ["Bolivia/Botswana", "c-xx Xr"], + rc => ["(RoC)", "c-as Xr", undef, 'tw'], + rh => ["(Haiti)", "c-na Xr",undef, 'ht'], + ri => ["(Indonesia)", "c-as Xr", undef, 'id'], + rl => ["(Lebanon)", "c-as Xr", undef, 'lb'], + rm => ["(Madagascar)", "c-af Xr", undef, 'mg'], + rn => ["(Niger)", "c-af Xr",undef, 'ne'], + rp => ["(Philippines)", "c-as Xr", undef, 'ph'], + wg => ["(Grenada)", "c-na Xr", undef, 'gd'], + wl => ["(Saint Lucia)", "c-na Xr", "(Saint Luc.)", 'lc'], + wv => ["(Saint Vincent)", "c-na Xr", "(Saint Vin.)", 'vc'], + yv => ["(Venezuela)", "c-sa Xr", undef, 've'], + + # WIPO, agreed not to use + ap => ["African Regional Industrial Property Organization", "Xi", "ARIPO"], # c-af + bx => ["Benelux Office for Intellectual Property", "Xi", "BOIP"], # c-eu + ef => ["European Community Patent Convention", "Xi", "CPC"], # c-eu + em => ["European Trademark Office", "Xi", "OHIM"], # c-eu + ep => ["European Patent Organization", "Xi", "EPOrg"], # c-eu + ev => ["Eurasian Patent Organization", "Xi", "EAPO"], # c-as + gc => ["Gulf Patent Office", "Xi", "GCCPO"], # c-as + ib => ["International Bureau of WIPO", "Xi", "IB WIPO"], + oa => ["African Intellectual Property Organization", "Xi", "OAPI"], # c-af + wo => ["World Intellectual Property Organization", "Xi", "WIPO"], +); while (<>) { /^#/ and next; # skip comments - my ($iso, $name, $cont) = (split /\t/)[0, 4, 8]; - my $class = "c-\L$cont"; - $cc{ lc $iso } = [ $name, $class ]; + my ($iso, $name, $cont, $tld) = (split /\t/)[0, 4, 8, 9]; + my @info = ($name, "c-\L$cont"); + $info[3] = $tld if $tld =~ s/\A\.// and $tld ne lc $iso; + $cc{ lc $iso } = \@info; +} + +$cc{io}->[2] = "Chagos Islands"; +$cc{um}->[2] = "U.S. isl."; + +for (values %cc) { + for ($_->[2] //= $_->[0]) { + s/,.*//; + s/(?<=.)\(.*\)\s*//; + s/ republic\b//gi; + s/ islands?\b//gi; + s/\bthe //g; + s/ and / & /g and s/(?<=.)[a-z ]+//g; + s/ of / /g; + s/\bsa?int /st /gi; + s/United /Un. /gi; + s/South(?:ern)? /S-/g; + s/North(?:ern)? /N-/g; + s/New /n./g; + s/(\S)(\S+)-/$1-/g; # strip most chars preceding dash + s/(\S{4}[b-df-hj-np-tv-xz])((?