#!/usr/bin/perl -n use 5.014; use warnings; our %count; our $mobile; our $VERSION = '1.02'; if (m{} .. m{}) { $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/'; $count{-title } = 'Wikimedia'; $count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) }; next; } # select relevant data />In alphabetical order/ .. eof or next; # second table my ($id, $count2, $count) = map { s/<[^>]*>//gr } split /<\/td>/; if (/>Browser engines/ ... m{}) { my $apple = $id =~ s/^AppleWebKit ?//; $apple .. 1 or next; my ($abs) = m/>showCount\((\d+),/; state %version; if (defined $count2) { $version{$id} = $abs; next; } # replace safari total percentage by individual version counts # have to use combined distribution for multiple platforms :( for my $browser (qw/ safari ios_saf /) { my $multiplier = delete $count{$browser}{'0.0'} or next; $multiplier /= $abs; # total number while (my ($verwebkit, $count) = each %version) { my $ver = safariver($verwebkit, $browser eq 'ios_saf'); $count{$browser}{$ver} += $count * $multiplier; } } last; } # select version data />Browser versions(.*)/ ... !defined $count2 or next; unless (defined $count2) { # header row if no td separator $mobile = $id !~ /non mobile/; next; } # convert to usable syntax my ($browser, $version) = split /\h+/, $id || ' ', 2; $count =~ s/,//g; $count =~ s/%$//; $version //= 0; given ($browser) { when (['Firefox', 'Iceweasel']) { $browser = $mobile ? 'and_ff' : 'firefox'; continue; } when ('MSIE') { $browser = $mobile ? 'ie_mob' : 'ie'; continue; } when ('Opera') { $browser = $mobile ? 'op_mob' : 'opera'; for ($version) { if (m{\(Mini(.*)\)$}) { $browser = 'op_mini'; ($_) = $1 =~ m{^/(\d+)}; continue; } s/^\d*\.\d\K.*//; # one significant digit } continue; } when ('Safari') { $browser = $mobile ? 'ios_saf' : 'safari'; } when ('Chrome') { $browser = $mobile ? 'android' : 'chrome'; s/\.\d+$// for $version; } when ('Android') { $browser = 'android'; } when ('BlackBerry') { $browser = 'bb'; } when ('UCWEB') { $browser = 'and_uc'; } s/\.0$// for $version; } $count{$browser}{$version} += $count; END { use Data::Dump 'pp'; print pp(\%count); } sub safariver { my ($webkitnum, $ios) = @_; my $safarinum = ''; for ( $ios ? ( [ 413 => '1' ], [ 419 => '1.1' ], [ 525 => '2' ], [ 528 => '3' ], [ 531 => '3.2' ], # or 4 from 531.022 [ 532 => '4' ], [ 533 => '4.2' ], [ 534 => '5' ], [ 536 => '7' ], [ 537 => '8' ], [ 600 => '8.1' ], ) : ( # http://en.wikipedia.org/wiki/Safari_version_history [ 412 => '2' ], [ 522 => '3' ], [ 525 => '3.1' ], # 3.2 from 525.026 [ 526 => '4' ], [ 533 => '5' ], # or 4.1 [ 534 => '5.1' ], [ 536 => '6' ], [ 537 => '7.1' ], # or 6.1 or 7.0 [ 538 => '8' ], ) ) { last if $webkitnum lt $_->[0]; $safarinum = $_->[1]; } return $safarinum; } __END__ =head1 USAGE curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm | ./mkusage-wikimedia > browser-usage.inc.pl