X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/4a28039ff22d6402f923488c0681f640b6fa14a6..HEAD:/tools/mkusage-wikimedia diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia index a50d899..71d6e5f 100755 --- a/tools/mkusage-wikimedia +++ b/tools/mkusage-wikimedia @@ -4,13 +4,13 @@ use warnings; use Data::Dump 'pp'; -our $VERSION = '1.00'; +our $VERSION = '1.03'; my %BROWSERID = qw( - IE ie + IE edge IE-Mobile ie_mob - Edge ie - Edge-Mobile ie + Edge edge + Edge-Mobile edge Firefox firefox Firefox-Mobile and_ff Safari safari @@ -18,13 +18,16 @@ my %BROWSERID = qw( Mobile-Safari-UIWebView ios_saf Chrome chrome Chromium chrome - Chrome-Mobile android - Chrome-Mobile-iOS android - Android android + Chrome-Mobile and_chr + Chrome-Mobile-iOS and_chr + Android and_chr Opera opera Opera-Mini op_mini BlackBerry-WebKit bb UC-Browser and_uc + Samsung-Internet samsung + Google 0 + Other 0 ); my %count = ( @@ -32,15 +35,28 @@ my %count = ( -site => 'https://analytics.wikimedia.org/', ); -my $recent = qr/^2017-/; - (readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; +my @lines = readline; + +my $recent; # minimum date to include +for (reverse @lines) { + my ($date) = /(\S+)/; + $recent ne $date or next if $recent; # same day + $recent = $date; # override older date + last if state $i++ >= 2; # repeat twice +} -while (my $row = readline) { +for my $row (@lines) { + $row =~ s/\r?\n\z//; my ($date, $name, $version, $pct) = split /\t/, $row; - $date =~ $recent or next; + $date ge $recent or next; $name =~ y/ /-/; - my $browser = $BROWSERID{$name} or next; + my $browser = $BROWSERID{$name}; + if (not $browser) { + warn "unknown browser: $name v$version ($pct)\n" + unless defined $browser or $pct < .005; + next; + } $version =~ s/\A-\z/0/; $count{$browser}{$version} += $pct; $count{-total} += $pct; @@ -59,6 +75,5 @@ __END__ =head1 USAGE - curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv | - ./mkusage-wikimedia >browser-usage.inc.pl + ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl