countries: note reservation status in descriptions
[sheet.git] / tools / mkcountries-geonames
1 #!/usr/bin/env perl
2 use 5.012;
3 use warnings;
4
5 my %cc;  # map of country code to info array
6 %cc = (
7         # exceptional reservations
8         ac => ["exceptionally reserved: Ascension Island", "c-oc Xr", "Ascension Island", 'sh'],
9         cp => ["exceptionally reserved: Clipperton Island", "c-na Xr", "Clipperton Island"],
10         dg => ["exceptionally reserved: Diego Garcia", "c-as Xr", "Diego Garcia", 'io'],
11         ea => ["exceptionally reserved: Ceuta and Melilla", "c-af Xr", "Ceuta and Melilla"],
12         eu => ["exceptionally reserved: European Union", "c-eu Xr", "European Union"],
13         fx => ["exceptionally reserved: Metropolitan France", "c-eu Xr", "Metropolitan France", 'fr'],
14         ic => ["exceptionally reserved: Canary Islands", "c-af Xr", "Canary Islands"],
15         su => ["exceptionally reserved: former USSR", "c-eu Xr", "USSR"],
16         ta => ["exceptionally reserved: Tristan da Cunha", "c-oc Xr", "Tristan da Cunha", 'sh'],
17         uk => ["exceptionally reserved: United Kingdom", "c-eu Xr", "(United Kingdom)", 'gb'],
18
19         # indeterminate reservations
20         dy => ["indeterminately reserved: Benin", "c-af Xr", "(Benin)", 'bj'],
21         ew => ["indeterminately reserved: Estonia", "c-eu Xr", "(Estonia)", 'ee'],
22         fl => ["indeterminately reserved: Liechtenstein", "c-eu Xr", "(Liechtenstein)", 'li'],
23         ja => ["indeterminately reserved: Jamaica", "c-na Xr", "(Jamaica)", 'jm'],
24         lf => ["indeterminately reserved: Libya Fezzan", "c-af Xr", "Fezzan", 'ly'],
25         pi => ["indeterminately reserved: Philippines", "c-as Xr", "(Philippines)", 'ph'],
26         ra => ["indeterminately reserved: Argentina", "c-sa Xr", "(Argentina)", 'ar'],
27         rb => ["indeterminately reserved: Botswana, Bolivia", "c-sa Xr", "(Bots...)"],
28         rc => ["indeterminately reserved: Republic of China", "c-as Xr", "(RoC)", 'tw'],
29         rh => ["indeterminately reserved: Haiti", "c-na Xr", "(Haiti)", 'ht'],
30         ri => ["indeterminately reserved: Indonesia", "c-as Xr", "(Indonesia)", 'id'],
31         rl => ["indeterminately reserved: Lebanon", "c-as Xr", "(Lebanon)", 'lb'],
32         rm => ["indeterminately reserved: Madagascar", "c-af Xr", "(Madagascar)", 'mg'],
33         rn => ["indeterminately reserved: Niger", "c-af Xr", "(Niger)", 'ne'],
34         rp => ["indeterminately reserved: Philippines", "c-as Xr", "(Philippines)", 'ph'],
35         wg => ["indeterminately reserved: Grenada", "c-na Xr", "(Grenada)", 'gd'],
36         wl => ["indeterminately reserved: Saint Lucia", "c-na Xr", "(Saint Luc.)", 'lc'],
37         wv => ["indeterminately reserved: Saint Vincent", "c-na Xr", "(Saint Vin.)", 'vc'],
38         yv => ["indeterminately reserved: Venezuela", "c-sa Xr", "(Venezuela)", 've'],
39
40         # WIPO, agreed not to use
41         ap => ["not used: African Regional Industrial Property Organization", "Xi", "ARIPO"], # c-af
42         bx => ["not used: Benelux Office for Intellectual Property", "Xi", "BOIP"], # c-eu
43         ef => ["not used: European Community Patent Convention", "Xi", "CPC"], # c-eu
44         em => ["not used: European Trademark Office", "Xi", "OHIM"], # c-eu
45         ep => ["not used: European Patent Organization", "Xi", "EPOrg"], # c-eu
46         ev => ["not used: Eurasian Patent Organization", "Xi", "EAPO"], # c-as
47         gc => ["not used: Gulf Patent Office", "Xi", "GCCPO"], # c-as
48         ib => ["not used: International Bureau of WIPO", "Xi", "IB"],
49         oa => ["not used: African Intellectual Property Organization", "Xi", "OAPI"], # c-af
50         wo => ["not used: World Intellectual Property Organization", "Xi", "WIPO"],
51 );
52
53 while (<>) {
54         /^#/ and next;  # skip comments
55         my ($iso, $name, $cont, $tld) = (split /\t/)[0, 4, 8, 9];
56         my @info = ($name, "c-\L$cont");
57         $info[3] = $tld if $tld =~ s/\A\.// and $tld ne lc $iso;
58         $cc{ lc $iso } = \@info;
59 }
60
61 $cc{io}->[2] = "Chagos Islands";
62 $cc{um}->[2] = "U.S. isl.";
63
64 for (values %cc) {
65         my $abbr = do {
66                 local $_ = $_->[2] // $_->[0];
67                 s/,.*//;
68                 s/(?<=.)\(.*\)\s*//;
69                 s/ republic\b//gi;
70                 s/ islands?\b//gi;
71                 s/\bthe //g;
72                 s/ and / & /g and s/(?<=.)[a-z ]+//g;
73                 s/ of / /g;
74                 s/\bsa?int /st /gi;
75                 s/United /Un. /gi;
76                 s/South(?:ern)? /S-/g;
77                 s/North(?:ern)? /N-/g;
78                 s/New /n./g;
79                 s/(\S)(\S+)-/$1-/g;  # strip most chars preceding dash
80                 s/(\S{4}[b-df-hj-np-tv-xz])((?<!Austr)(?!land)\w{2,})/$1./g;  # abbreviate (at consonant)
81                 $_;
82         };
83         $_->[2] = $abbr if $abbr ne $_->[0];  # short name if different
84 }
85
86 say "# automatically generated by $0";
87 use Data::Dump 'dd';
88 $Data::Dump::INDENT = '';
89 dd \%cc;
90
91 __END__
92
93 =head1 NAME
94
95 mkcountries-geonames - Create Perl include of country info from GeoNames data
96
97 =head1 SYNOPSIS
98
99         curl http://download.geonames.org/export/dump/countryInfo.txt |
100         tools/mkcountryinfo > countries.inc.pl
101