countries: reference tld if different from iso code

[sheet.git] / tools / mkcountries-geonames
diff --git a/tools/mkcountries-geonames b/tools/mkcountries-geonames

index 592ce752113eed8a84777857d56230303e2b6932..91daae753722d030e40af9340c1d6221f59377c0 100755 (executable)
--- a/tools/mkcountries-geonames
+++ b/tools/mkcountries-geonames
@@ -36,13 +36,26 @@ my %cc;  # map of country code to info array
         wl => ["(Saint Lucia)", "c-na Xr", "(Saint Luc.)", 'lc'],
         wv => ["(Saint Vincent)", "c-na Xr", "(Saint Vin.)", 'vc'],
         yv => ["(Venezuela)", "c-sa Xr", undef, 've'],
+
+       # WIPO, agreed not to use
+       ap => ["African Regional Industrial Property Organization", "Xi", "ARIPO"], # c-af
+       bx => ["Benelux Office for Intellectual Property", "Xi", "BOIP"], # c-eu
+       ef => ["European Community Patent Convention", "Xi", "CPC"], # c-eu
+       em => ["European Trademark Office", "Xi", "OHIM"], # c-eu
+       ep => ["European Patent Organization", "Xi", "EPOrg"], # c-eu
+       ev => ["Eurasian Patent Organization", "Xi", "EAPO"], # c-as
+       gc => ["Gulf Patent Office", "Xi", "GCCPO"], # c-as
+       ib => ["International Bureau of WIPO", "Xi", "IB WIPO"],
+       oa => ["African Intellectual Property Organization", "Xi", "OAPI"], # c-af
+       wo => ["World Intellectual Property Organization", "Xi", "WIPO"],
  );
  
  while (<>) {
         /^#/ and next;  # skip comments
-       my ($iso, $name, $cont) = (split /\t/)[0, 4, 8];
-       my $class = "c-\L$cont";
-       $cc{ lc $iso } = [ $name, $class ];
+       my ($iso, $name, $cont, $tld) = (split /\t/)[0, 4, 8, 9];
+       my @info = ($name, "c-\L$cont");
+       $info[3] = $tld if $tld =~ s/\A\.// and $tld ne lc $iso;
+       $cc{ lc $iso } = \@info;
  }
  
  $cc{io}->[2] = "Chagos Islands";
@@ -56,7 +69,9 @@ for (values %cc) {
                 s/ islands?\b//gi;
                 s/\bthe //g;
                 s/ and / & /g and s/(?<=.)[a-z ]+//g;
-               s/\bsaint /st /gi;
+               s/ of / /g;
+               s/\bsa?int /st /gi;
+               s/United /Un. /gi;
                 s/South(?:ern)? /S-/g;
                 s/North(?:ern)? /N-/g;
                 s/New /n./g;