#!/usr/bin/env perl
-use 5.012;
+use 5.014;
use warnings;
my %cc; # map of country code to info array
dg => ["exceptionally reserved: Diego Garcia", "c-as Xr", "Diego Garcia", 'io'],
ea => ["exceptionally reserved: Ceuta and Melilla", "c-af Xr", "Ceuta and Melilla"],
eu => ["exceptionally reserved: European Union", "c-eu Xr", "European Union"],
- ez => ["exceptionally reserved: European OTC derivatives", "c-eu Xr", "European OTC"],
+ ez => ["exceptionally reserved: European OTC derivatives", "c-eu Xr", "Eurozone"],
fx => ["exceptionally reserved: Metropolitan France", "c-eu Xr", "Metropolitan France", 'fr'],
ic => ["exceptionally reserved: Canary Islands", "c-af Xr", "Canary Islands"],
su => ["exceptionally reserved: former USSR", "c-eu Xr", "USSR"],
lf => ["indeterminately reserved: Libya Fezzan", "c-af Xr", "Fezzan", 'ly'],
pi => ["indeterminately reserved: Philippines", "c-as Xr", "(Philippines)", 'ph'],
ra => ["indeterminately reserved: Argentina", "c-sa Xr", "(Argentina)", 'ar'],
- rb => ["indeterminately reserved: Botswana, Bolivia", "c-sa Xr", "(Bots...)"],
+ rb => ["indeterminately reserved: Botswana, Bolivia", "c-sa Xr", "(Bots...)", 'bw bo'],
rc => ["indeterminately reserved: Republic of China", "c-as Xr", "(RoC)", 'tw'],
rh => ["indeterminately reserved: Haiti", "c-na Xr", "(Haiti)", 'ht'],
ri => ["indeterminately reserved: Indonesia", "c-as Xr", "(Indonesia)", 'id'],
ib => ["not used: International Bureau of WIPO", "Xi", "IB"],
oa => ["not used: African Intellectual Property Organization", "Xi", "OAPI"], # c-af
wo => ["not used: World Intellectual Property Organization", "Xi", "WIPO"],
+
+ # common user-assigned
+ xz => ["UN/LOCODE semantics: international waters", "Co Xi", "international"],
+ qo => ["Unicode semantics: Outlying Oceania", "c-oc Co Xi", "Oceania"],
+ qu => ["Unicode semantics: European Union deprecated reserve", "c-eu Co Xi", "EU", 'eu'],
+ zz => ["Unicode semantics: unknown or invalid territory", "Co Xi","unknown"],
);
while (<>) {
s/(?<=.)\(.*\)\s*//;
s/ republic\b//gi;
s/ islands?\b//gi;
- s/\bthe //g;
+ s/\bthe //gi;
s/ and / & /g and s/(?<=.)[a-z ]+//g;
s/ of / /g;
s/\bsa?int /st /gi;
- s/United /Un. /gi;
+ s/Un\Kited /. /gi;
s/South(?:ern)? /S-/g;
s/North(?:ern)? /N-/g;
+ s/West(?:ern)? /W-/g;
+ s/East(?:ern)? /E-/g;
s/New /n./g;
- s/(\S)(\S+)-/$1-/g; # strip most chars preceding dash
+ s/(\w)(\w+)-/$1-/g; # strip most chars preceding dash
s/(\S{4}[b-df-hj-np-tv-xz])((?<!Austr)(?!land)\w{2,})/$1./g; # abbreviate (at consonant)
$_;
};