d9b938a03ce070c4a10df882ba21107f948eddf6
[sheet.git] / tools / mkcountries-geonames
1 #!/usr/bin/env perl
2 use 5.012;
3 use warnings;
4
5 my %cc;  # map of country code to info array
6
7 while (<>) {
8         /^#/ and next;  # skip comments
9         my ($iso, $name, $cont) = (split /\t/)[0, 4, 8];
10         my $class = "c-\L$cont";
11         $cc{ lc $iso } = [ $name, $class ];
12 }
13
14 for (values %cc) {
15         for ($_->[2] //= $_->[0]) {
16                 s/,.*//;
17                 s/(?<=.)\(.*\)\s*//;
18                 s/ republic\b//gi;
19                 s/ islands?\b//gi;
20                 s/\bthe //g;
21                 s/ and / & /g and s/(?<=.)[a-z ]+//g;
22                 s/\bsaint /st /gi;
23                 s/South(?:ern)? /S-/g;
24                 s/North(?:ern)? /N-/g;
25                 s/New /n./g;
26                 s/(\S)(\S+)-/$1-/g;  # strip most chars preceding dash
27                 s/(\S{4}[b-df-hj-np-tv-xz])((?<!Austr)(?!land)\w{2,})/$1./g;  # abbreviate (at consonant)
28         }
29 }
30
31 say "# automatically generated by $0";
32 use Data::Dump 'dd';
33 $Data::Dump::INDENT = '';
34 dd \%cc;
35
36 __END__
37
38 =head1 NAME
39
40 mkcountries-geonames - Create Perl include of country info from GeoNames data
41
42 =head1 SYNOPSIS
43
44         curl http://download.geonames.org/export/dump/countryInfo.txt |
45         tools/mkcountryinfo > countries.inc.pl
46