X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/4567809ced8050d78560dfe77fef5edfc58dc1c7..802e8d5b79d157f75c639c11b6cd1cacf4b0ec8a:/tools/mkusage-wikimedia?ds=sidebyside diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia index 6ce3652..bf893ce 100755 --- a/tools/mkusage-wikimedia +++ b/tools/mkusage-wikimedia @@ -4,7 +4,7 @@ use warnings; use Data::Dump 'pp'; -our $VERSION = '1.00'; +our $VERSION = '1.01'; my %BROWSERID = qw( IE ie @@ -25,6 +25,7 @@ my %BROWSERID = qw( Opera-Mini op_mini BlackBerry-WebKit bb UC-Browser and_uc + Other 0 ); my %count = ( @@ -32,23 +33,27 @@ my %count = ( -site => 'https://analytics.wikimedia.org/', ); -my $recent = qr/^2017-/; +my $recent = qr/^2018-/; (readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; while (my $row = readline) { + $row =~ s/\r?\n\z//; my ($date, $name, $version, $pct) = split /\t/, $row; $date =~ $recent or next; $name =~ y/ /-/; - my $browser = $BROWSERID{$name} or next; + my $browser = $BROWSERID{$name}; + if (not $browser) { + warn "unknown browser: $name v$version ($pct)\n" + unless defined $browser or $pct < .005; + next; + } $version =~ s/\A-\z/0/; $count{$browser}{$version} += $pct; $count{-total} += $pct; - $count{-date}->{$date}++; + $count{-date} = $date; } -$_ = join ' to ', (sort keys %{$_})[0, -1] for $count{-date}; - my $mult = 100 / delete $count{-total}; for (values %count) { ref $_ eq 'HASH' or next; @@ -61,6 +66,5 @@ __END__ =head1 USAGE - curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv | - ./mkusage-wikimedia >browser-usage.inc.pl + ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl