X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/6a67847f60b0a80a9026f8d219476acf92cc4d37..4a28039ff22d6402f923488c0681f640b6fa14a6:/tools/mkusage-wikimedia diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia index 41ed977..a50d899 100755 --- a/tools/mkusage-wikimedia +++ b/tools/mkusage-wikimedia @@ -1,112 +1,64 @@ -#!/usr/bin/perl -n -use 5.012; +#!/usr/bin/perl +use 5.014; use warnings; -our %count; -our $mobile; -our $VERSION = '1.02'; +use Data::Dump 'pp'; -if (m{} .. m{}) { - $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/'; - $count{-title } = 'Wikimedia'; - $count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) }; - next; -} +our $VERSION = '1.00'; -# select relevant data -/>In alphabetical order/ .. eof or next; # second table -my ($id, $count2, $count) = map { s/<[^>]*>//gr } split /<\/td>/; +my %BROWSERID = qw( + IE ie + IE-Mobile ie_mob + Edge ie + Edge-Mobile ie + Firefox firefox + Firefox-Mobile and_ff + Safari safari + Mobile-Safari ios_saf + Mobile-Safari-UIWebView ios_saf + Chrome chrome + Chromium chrome + Chrome-Mobile android + Chrome-Mobile-iOS android + Android android + Opera opera + Opera-Mini op_mini + BlackBerry-WebKit bb + UC-Browser and_uc +); -# select version data -/>Browser versions(.*)/ ... !defined $count2 or next; -unless (defined $count2) { - # header row if no td separator - $mobile = $id !~ /non mobile/; - next; -} +my %count = ( + -title => 'Wikimedia', + -site => 'https://analytics.wikimedia.org/', +); -# convert to usable syntax -my ($browser, $version) = split /\h+/, $id || ' ', 2; -$count =~ s/,//g; -$count =~ s/%$//; -$version //= 0; -given ($browser) { - when (['Firefox', 'Iceweasel']) { - $browser = $mobile ? 'and_ff' : 'firefox'; - continue; - } - when ('MSIE') { - $browser = $mobile ? 'ie_mob' : 'ie'; - continue; - } - when ('Opera') { - $browser = $mobile ? 'op_mob' : 'opera'; - for ($version) { - if (m{\(Mini(.*)\)$}) { - $browser = 'op_mini'; - ($_) = $1 =~ m{^/(\d+)}; - continue; - } - s/^\d*\.\d\K.*//; # one significant digit - } - continue; - } - when ('Safari') { - $browser = $mobile ? 'ios_saf' : 'safari'; - my $numversion = join('.', map { sprintf '%03d', $_ } split /\./, $version); - $numversion =~ s/6(?=\d{3})//; # incomparable 6532.22 → 523 - for ( - # http://en.wikipedia.org/wiki/Safari_version_history - $mobile ? ( - [ '413' => '1' ], - [ '419' => '1.1' ], - [ '525' => '2' ], - [ '528' => '3' ], - [ '531' => '3.2' ], - [ '531.022' => '4' ], - [ '533' => '4.2' ], - [ '534' => '4x' ], - ) : ( - [ '413' => '2' ], - [ '522' => '3' ], - [ '525.013' => '3.1' ], - [ '525.026' => '3.2' ], - [ '526' => '4' ], - [ '533' => '5' ], - [ '534' => '5x' ], - ) - ) { - last if $numversion lt $_->[0]; - $version = $_->[1]; - } - } - when ('Chrome') { - $browser = $mobile ? 'and_chr' : 'chrome'; - s/\.\d+$// for $version; - } - when ('Android') { - $browser = 'android'; - } - when ('BlackBerry') { - $browser = 'bb'; - } - when ('UCWEB') { - $browser = 'and_uc'; - } - s/\.0$// for $version; -} +my $recent = qr/^2017-/; -$count{$browser}{$version} += $count; +(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; -END { - use Data::Dump 'pp'; - print pp(\%count); +while (my $row = readline) { + my ($date, $name, $version, $pct) = split /\t/, $row; + $date =~ $recent or next; + $name =~ y/ /-/; + my $browser = $BROWSERID{$name} or next; + $version =~ s/\A-\z/0/; + $count{$browser}{$version} += $pct; + $count{-total} += $pct; + $count{-date} = $date; } +my $mult = 100 / delete $count{-total}; +for (values %count) { + ref $_ eq 'HASH' or next; + $_ *= $mult for values %{$_}; +} + +say '+', pp(\%count); + __END__ =head1 USAGE - curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm | - ./mkusage-wikimedia > browser-usage.inc.pl + curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv | + ./mkusage-wikimedia >browser-usage.inc.pl