From 24d8e7421e79afd514ce6b9035783e4231dabc84 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Wed, 10 Nov 2010 05:32:52 +0100 Subject: [PATCH] browser: script to parse wikimedia statistics --- tools/convert-stats-wikimedia.pl | 72 ++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 tools/convert-stats-wikimedia.pl diff --git a/tools/convert-stats-wikimedia.pl b/tools/convert-stats-wikimedia.pl new file mode 100644 index 0000000..d65e5eb --- /dev/null +++ b/tools/convert-stats-wikimedia.pl @@ -0,0 +1,72 @@ +#!perl -n +use 5.010; use strict; use warnings; +our %count; + +# select relevant columns +/>Browser versions/ ... m{} && last or next; +my ($tr, $id, $count2, $count) = split /(?:<[^>]*>)+/; +next if $id ~~ ['Total', ' ']; + +# convert to usable syntax +my ($browser, $version) = split /\h+/, $id, 2; +$count =~ s/,//g; +$count =~ s/%$//; +$version //= 0; +given ($browser) { + when (['Firefox', 'Iceweasel']) { + $browser = 'gecko'; + continue; + } + when ('MSIE') { + $browser = 'trident'; + continue; + } + when ('Opera') { + $browser = 'presto'; + for ($version) { + s/\d\d\.\d\K.*// or do { + # major part only, except for ≥v9.6 + $_ ge '9.6' ? ($_ = '9.6') : (s/\..*//); + }; + } + continue; + } + when ('Safari') { + $browser = 'webkit_saf'; + my $numversion = join('.', map { sprintf '%03d', $_ } split /\./, $version); + $numversion =~ s/6(?=\d{3})//; # erroneous(?) 6532.22 → 523 + for ( + # http://en.wikipedia.org/wiki/Safari_version_history + [ '413' => '2' ], + [ '522' => '3' ], + [ '525.013' => '3.1' ], + [ '525.026' => '3.2' ], + [ '526' => '4' ], + [ '533' => '5' ], + [ '534' => '5x' ], + ) { + last if $numversion lt $_->[0]; + $version = $_->[1]; + } + } + when ('Chrome') { + $browser = 'webkit_chr'; + s/\.\d+$// for $version; + } + s/\.0$// for $version; +} + +$count{$browser}{$version} += $count; + +END { + use Data::Dump 'pp'; + print pp(\%count); +} + +__END__ + +=head1 USAGE + + curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm | + parsesquid.pl > browser-usage.inc.pl + -- 2.30.0