#!/usr/bin/perl -n
use 5.014;
use warnings;
our %count;
our $mobile;
our $VERSION = '1.02';
if (m{
} .. m{?td>}) {
$count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/';
$count{-title } = 'Wikimedia';
$count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) };
next;
}
# select relevant data
/>In alphabetical order/ .. eof or next; # second table
my ($id, $count2, $count) = map { s/<[^>]*>//gr } split /<\/td>/;
if (/>Browser engines/ ... m{}) {
my $apple = $id =~ s/^AppleWebKit ?//;
$apple .. 1 or next;
my ($abs) = m/>showCount\((\d+),/;
state %version;
if (defined $count2) {
$version{$id} = $abs;
next;
}
# replace safari total percentage by individual version counts
# have to use combined distribution for multiple platforms :(
for my $browser (qw/ safari ios_saf /) {
my $multiplier = delete $count{$browser}{'0.0'} or next;
$multiplier /= $abs; # total number
while (my ($verwebkit, $count) = each %version) {
my $ver = safariver($verwebkit, $browser eq 'ios_saf');
$count{$browser}{$ver} += $count * $multiplier;
}
}
last;
}
# select version data
/>Browser versions(.*)/ ... !defined $count2 or next;
unless (defined $count2) {
# header row if no td separator
$mobile = $id !~ /non mobile/;
next;
}
# convert to usable syntax
my ($browser, $version) = split /\h+/, $id || ' ', 2;
$count =~ s/,//g;
$count =~ s/%$//;
$version //= 0;
given ($browser) {
when (['Firefox', 'Iceweasel']) {
$browser = $mobile ? 'and_ff' : 'firefox';
continue;
}
when ('MSIE') {
$browser = $mobile ? 'ie_mob' : 'ie';
continue;
}
when ('Opera') {
$browser = $mobile ? 'op_mob' : 'opera';
for ($version) {
if (m{\(Mini(.*)\)$}) {
$browser = 'op_mini';
($_) = $1 =~ m{^/(\d+)};
continue;
}
s/^\d*\.\d\K.*//; # one significant digit
}
continue;
}
when ('Safari') {
$browser = $mobile ? 'ios_saf' : 'safari';
}
when ('Chrome') {
$browser = $mobile ? 'android' : 'chrome';
s/\.\d+$// for $version;
}
when ('Android') {
$browser = 'android';
}
when ('BlackBerry') {
$browser = 'bb';
}
when ('UCWEB') {
$browser = 'and_uc';
}
s/\.0$// for $version;
}
$count{$browser}{$version} += $count;
END {
use Data::Dump 'pp';
print pp(\%count);
}
sub safariver {
my ($webkitnum, $ios) = @_;
my $safarinum = '';
for (
$ios ? (
[ 413 => '1' ],
[ 419 => '1.1' ],
[ 525 => '2' ],
[ 528 => '3' ],
[ 531 => '3.2' ], # or 4 from 531.022
[ 532 => '4' ],
[ 533 => '4.2' ],
[ 534 => '5' ],
[ 536 => '7' ],
[ 537 => '8' ],
[ 600 => '8.1' ],
) : (
# http://en.wikipedia.org/wiki/Safari_version_history
[ 412 => '2' ],
[ 522 => '3' ],
[ 525 => '3.1' ], # 3.2 from 525.026
[ 526 => '4' ],
[ 533 => '5' ], # or 4.1
[ 534 => '5.1' ],
[ 536 => '6' ],
[ 537 => '7.1' ], # or 6.1 or 7.0
[ 538 => '8' ],
)
) {
last if $webkitnum lt $_->[0];
$safarinum = $_->[1];
}
return $safarinum;
}
__END__
=head1 USAGE
curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm |
./mkusage-wikimedia > browser-usage.inc.pl
|