X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/1b3b68fb314d302d3dfed68613b4276a1ab1369c..4a28039ff22d6402f923488c0681f640b6fa14a6:/tools/mkusage-wikimedia diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia index 63cded2..a50d899 100755 --- a/tools/mkusage-wikimedia +++ b/tools/mkusage-wikimedia @@ -1,145 +1,64 @@ -#!/usr/bin/perl -n -use 5.012; +#!/usr/bin/perl +use 5.014; use warnings; -our %count; -our $mobile; -our $VERSION = '1.02'; +use Data::Dump 'pp'; -if (m{} .. m{}) { - $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/'; - $count{-title } = 'Wikimedia'; - $count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) }; - next; -} - -# select relevant data -/>In alphabetical order/ .. eof or next; # second table -my ($id, $count2, $count) = map { s/<[^>]*>//gr } split /<\/td>/; +our $VERSION = '1.00'; -if (/>Browser engines/ ... m{}) { - my $apple = $id =~ s/^AppleWebKit ?//; - $apple .. 1 or next; - my ($abs) = m/>showCount\((\d+),/; +my %BROWSERID = qw( + IE ie + IE-Mobile ie_mob + Edge ie + Edge-Mobile ie + Firefox firefox + Firefox-Mobile and_ff + Safari safari + Mobile-Safari ios_saf + Mobile-Safari-UIWebView ios_saf + Chrome chrome + Chromium chrome + Chrome-Mobile android + Chrome-Mobile-iOS android + Android android + Opera opera + Opera-Mini op_mini + BlackBerry-WebKit bb + UC-Browser and_uc +); - state %version; - if (defined $count2) { - $version{$id} = $abs; - next; - } +my %count = ( + -title => 'Wikimedia', + -site => 'https://analytics.wikimedia.org/', +); - # replace safari total percentage by individual version counts - # have to use combined distribution for multiple platforms :( - for my $browser (qw/ safari ios_saf /) { - my $multiplier = delete $count{$browser}{'0.0'} or next; - $multiplier /= $abs; # total number - while (my ($verwebkit, $count) = each %version) { - my $ver = safariver($verwebkit, $browser eq 'ios_saf'); - $count{$browser}{$ver} += $count * $multiplier; - } - } - last; -} +my $recent = qr/^2017-/; -# select version data -/>Browser versions(.*)/ ... !defined $count2 or next; -unless (defined $count2) { - # header row if no td separator - $mobile = $id !~ /non mobile/; - next; -} +(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; -# convert to usable syntax -my ($browser, $version) = split /\h+/, $id || ' ', 2; -$count =~ s/,//g; -$count =~ s/%$//; -$version //= 0; -given ($browser) { - when (['Firefox', 'Iceweasel']) { - $browser = $mobile ? 'and_ff' : 'firefox'; - continue; - } - when ('MSIE') { - $browser = $mobile ? 'ie_mob' : 'ie'; - continue; - } - when ('Opera') { - $browser = $mobile ? 'op_mob' : 'opera'; - for ($version) { - if (m{\(Mini(.*)\)$}) { - $browser = 'op_mini'; - ($_) = $1 =~ m{^/(\d+)}; - continue; - } - s/^\d*\.\d\K.*//; # one significant digit - } - continue; - } - when ('Safari') { - $browser = $mobile ? 'ios_saf' : 'safari'; - } - when ('Chrome') { - $browser = $mobile ? 'and_chr' : 'chrome'; - s/\.\d+$// for $version; - } - when ('Android') { - $browser = 'android'; - } - when ('BlackBerry') { - $browser = 'bb'; - } - when ('UCWEB') { - $browser = 'and_uc'; - } - s/\.0$// for $version; +while (my $row = readline) { + my ($date, $name, $version, $pct) = split /\t/, $row; + $date =~ $recent or next; + $name =~ y/ /-/; + my $browser = $BROWSERID{$name} or next; + $version =~ s/\A-\z/0/; + $count{$browser}{$version} += $pct; + $count{-total} += $pct; + $count{-date} = $date; } -$count{$browser}{$version} += $count; - -END { - use Data::Dump 'pp'; - print pp(\%count); +my $mult = 100 / delete $count{-total}; +for (values %count) { + ref $_ eq 'HASH' or next; + $_ *= $mult for values %{$_}; } -sub safariver { - my ($webkitnum, $ios) = @_; - my $safarinum = ''; - for ( - $ios ? ( - [ 413 => '1' ], - [ 419 => '1.1' ], - [ 525 => '2' ], - [ 528 => '3' ], - [ 531 => '3.2' ], # or 4 from 531.022 - [ 532 => '4' ], - [ 533 => '4.2' ], - [ 534 => '5' ], - [ 536 => '7' ], - [ 537 => '8' ], - [ 600 => '8.1' ], - ) : ( - # http://en.wikipedia.org/wiki/Safari_version_history - [ 412 => '2' ], - [ 522 => '3' ], - [ 525 => '3.1' ], # 3.2 from 525.026 - [ 526 => '4' ], - [ 533 => '5' ], # or 4.1 - [ 534 => '5.1' ], - [ 536 => '6' ], - [ 537 => '7.1' ], # or 6.1 or 7.0 - [ 538 => '8' ], - ) - ) { - last if $webkitnum lt $_->[0]; - $safarinum = $_->[1]; - } - return $safarinum; -} +say '+', pp(\%count); __END__ =head1 USAGE - curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm | - ./mkusage-wikimedia > browser-usage.inc.pl + curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv | + ./mkusage-wikimedia >browser-usage.inc.pl