browser: script to parse wikimedia statistics
authorMischa POSLAWSKY <perl@shiar.org>
Wed, 10 Nov 2010 04:32:52 +0000 (05:32 +0100)
committerMischa POSLAWSKY <perl@shiar.org>
Wed, 10 Nov 2010 21:40:55 +0000 (22:40 +0100)
tools/convert-stats-wikimedia.pl [new file with mode: 0644]

diff --git a/tools/convert-stats-wikimedia.pl b/tools/convert-stats-wikimedia.pl
new file mode 100644 (file)
index 0000000..d65e5eb
--- /dev/null
@@ -0,0 +1,72 @@
+#!perl -n
+use 5.010; use strict; use warnings;
+our %count;
+
+# select relevant columns
+/>Browser versions/ ... m{</table>} && last or next;
+my ($tr, $id, $count2, $count) = split /(?:<[^>]*>)+/;
+next if $id ~~ ['Total', '&nbsp;'];
+
+# convert to usable syntax
+my ($browser, $version) = split /\h+/, $id, 2;
+$count =~ s/,//g;
+$count =~ s/%$//;
+$version //= 0;
+given ($browser) {
+       when (['Firefox', 'Iceweasel']) {
+               $browser = 'gecko';
+               continue;
+       }
+       when ('MSIE') {
+               $browser = 'trident';
+               continue;
+       }
+       when ('Opera') {
+               $browser = 'presto';
+               for ($version) {
+                       s/\d\d\.\d\K.*// or do {
+                               # major part only, except for ≥v9.6
+                               $_ ge '9.6' ? ($_ = '9.6') : (s/\..*//);
+                       };
+               }
+               continue;
+       }
+       when ('Safari') {
+               $browser = 'webkit_saf';
+               my $numversion = join('.', map { sprintf '%03d', $_ } split /\./, $version);
+               $numversion =~ s/6(?=\d{3})//;  # erroneous(?) 6532.22 → 523
+               for (
+                       # http://en.wikipedia.org/wiki/Safari_version_history
+                       [ '413'     => '2'   ],
+                       [ '522'     => '3'   ],
+                       [ '525.013' => '3.1' ],
+                       [ '525.026' => '3.2' ],
+                       [ '526'     => '4'   ],
+                       [ '533'     => '5'   ],
+                       [ '534'     => '5x'  ],
+               ) {
+                       last if $numversion lt $_->[0];
+                       $version = $_->[1];
+               }
+       }
+       when ('Chrome') {
+               $browser = 'webkit_chr';
+               s/\.\d+$// for $version;
+       }
+       s/\.0$// for $version;
+}
+
+$count{$browser}{$version} += $count;
+
+END {
+       use Data::Dump 'pp';
+       print pp(\%count);
+}
+
+__END__
+
+=head1 USAGE
+
+       curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm |
+       parsesquid.pl > browser-usage.inc.pl
+