countries: store abbreviations in include
[sheet.git] / tools / mkcountries-geonames
index 0eaf88006fff31cac9cebf5805419d317a288af9..d9b938a03ce070c4a10df882ba21107f948eddf6 100755 (executable)
@@ -11,6 +11,23 @@ while (<>) {
        $cc{ lc $iso } = [ $name, $class ];
 }
 
+for (values %cc) {
+       for ($_->[2] //= $_->[0]) {
+               s/,.*//;
+               s/(?<=.)\(.*\)\s*//;
+               s/ republic\b//gi;
+               s/ islands?\b//gi;
+               s/\bthe //g;
+               s/ and / & /g and s/(?<=.)[a-z ]+//g;
+               s/\bsaint /st /gi;
+               s/South(?:ern)? /S-/g;
+               s/North(?:ern)? /N-/g;
+               s/New /n./g;
+               s/(\S)(\S+)-/$1-/g;  # strip most chars preceding dash
+               s/(\S{4}[b-df-hj-np-tv-xz])((?<!Austr)(?!land)\w{2,})/$1./g;  # abbreviate (at consonant)
+       }
+}
+
 say "# automatically generated by $0";
 use Data::Dump 'dd';
 $Data::Dump::INDENT = '';