#!/usr/bin/perl -n use 5.010; use strict; use warnings; our %count; our $mobile; if (m{} .. m{}) { $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/'; $count{-title } = 'Wikimedia'; $count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) }; next; } # select relevant columns />Browser versions(.*)/ ... m{} && last or next; my ($tr, $id, $count2, $count) = split /(?:<[^>]*>)+/; $mobile = $count2 =~ /(? '1' ], [ '419' => '1.1' ], [ '525' => '2' ], [ '528' => '3' ], [ '531' => '3.2' ], [ '531.022' => '4' ], [ '533' => '4.2' ], [ '534' => '4x' ], ) : ( [ '413' => '2' ], [ '522' => '3' ], [ '525.013' => '3.1' ], [ '525.026' => '3.2' ], [ '526' => '4' ], [ '533' => '5' ], [ '534' => '5x' ], ) ) { last if $numversion lt $_->[0]; $version = $_->[1]; } } when ('Chrome') { $browser = 'chrome'; s/\.\d+$// for $version; } when ('Android') { $browser = 'android'; } s/\.0$// for $version; } $count{$browser}{$version} += $count; END { use Data::Dump 'pp'; print pp(\%count); } __END__ =head1 USAGE curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm | ./mkusage-wikimedia > browser-usage.inc.pl