tools: rename convert tools to regular mk*
[sheet.git] / tools / mkusage-wikimedia
diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia
new file mode 100755 (executable)
index 0000000..c775a4a
--- /dev/null
@@ -0,0 +1,97 @@
+#!/usr/bin/perl -n
+use 5.010; use strict; use warnings;
+our %count;
+our $mobile;
+
+if (m{<td class=hl>} .. m{</?td>}) {
+       $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/';
+       $count{-title } = 'Wikimedia';
+       $count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) };
+       next;
+}
+
+# select relevant columns
+/>Browser versions(.*)/ ... m{</table>} && last or next;
+my ($tr, $id, $count2, $count) = split /(?:<[^>]*>)+/;
+$mobile = $count2 =~ /(?<!non) mobile/ if $id ~~ '&nbsp;';
+next if $id ~~ ['Total', '&nbsp;'];
+
+# convert to usable syntax
+my ($browser, $version) = split /\h+/, $id, 2;
+$count =~ s/,//g;
+$count =~ s/%$//;
+$version //= 0;
+given ($browser) {
+       when (['Firefox', 'Iceweasel']) {
+               $browser = 'firefox';
+               continue;
+       }
+       when ('MSIE') {
+               $browser = 'ie';
+               continue;
+       }
+       when ('Opera') {
+               $browser = $mobile ? 'op_mob' : 'opera';
+               for ($version) {
+                       if (m{\(Mini(.*)\)$}) {
+                               $browser = 'op_mini';
+                               ($_) = $1 =~ m{^/(\d+)};
+                               continue;
+                       }
+                       s/^\d*\.\d\K.*//;  # one significant digit
+               }
+               continue;
+       }
+       when ('Safari') {
+               $browser = $mobile ? 'ios_saf' : 'safari';
+               my $numversion = join('.', map { sprintf '%03d', $_ } split /\./, $version);
+               $numversion =~ s/6(?=\d{3})//;  # incomparable 6532.22 → 523
+               for (
+                       # http://en.wikipedia.org/wiki/Safari_version_history
+                       $mobile ? (
+                               [ '413'     => '1'   ],
+                               [ '419'     => '1.1' ],
+                               [ '525'     => '2'   ],
+                               [ '528'     => '3'   ],
+                               [ '531'     => '3.2' ],
+                               [ '531.022' => '4'   ],
+                               [ '533'     => '4.2' ],
+                               [ '534'     => '4x' ],
+                       ) : (
+                               [ '413'     => '2'   ],
+                               [ '522'     => '3'   ],
+                               [ '525.013' => '3.1' ],
+                               [ '525.026' => '3.2' ],
+                               [ '526'     => '4'   ],
+                               [ '533'     => '5'   ],
+                               [ '534'     => '5x'  ],
+                       )
+               ) {
+                       last if $numversion lt $_->[0];
+                       $version = $_->[1];
+               }
+       }
+       when ('Chrome') {
+               $browser = 'chrome';
+               s/\.\d+$// for $version;
+       }
+       when ('Android') {
+               $browser = 'android';
+       }
+       s/\.0$// for $version;
+}
+
+$count{$browser}{$version} += $count;
+
+END {
+       use Data::Dump 'pp';
+       print pp(\%count);
+}
+
+__END__
+
+=head1 USAGE
+
+       curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm |
+       ./mkusage-wikimedia > browser-usage.inc.pl
+