X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/4567809ced8050d78560dfe77fef5edfc58dc1c7..HEAD:/tools/mkusage-wikimedia diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia index 6ce3652..71d6e5f 100755 --- a/tools/mkusage-wikimedia +++ b/tools/mkusage-wikimedia @@ -4,13 +4,13 @@ use warnings; use Data::Dump 'pp'; -our $VERSION = '1.00'; +our $VERSION = '1.03'; my %BROWSERID = qw( - IE ie + IE edge IE-Mobile ie_mob - Edge ie - Edge-Mobile ie + Edge edge + Edge-Mobile edge Firefox firefox Firefox-Mobile and_ff Safari safari @@ -18,13 +18,16 @@ my %BROWSERID = qw( Mobile-Safari-UIWebView ios_saf Chrome chrome Chromium chrome - Chrome-Mobile android - Chrome-Mobile-iOS android - Android android + Chrome-Mobile and_chr + Chrome-Mobile-iOS and_chr + Android and_chr Opera opera Opera-Mini op_mini BlackBerry-WebKit bb UC-Browser and_uc + Samsung-Internet samsung + Google 0 + Other 0 ); my %count = ( @@ -32,23 +35,34 @@ my %count = ( -site => 'https://analytics.wikimedia.org/', ); -my $recent = qr/^2017-/; - (readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; +my @lines = readline; + +my $recent; # minimum date to include +for (reverse @lines) { + my ($date) = /(\S+)/; + $recent ne $date or next if $recent; # same day + $recent = $date; # override older date + last if state $i++ >= 2; # repeat twice +} -while (my $row = readline) { +for my $row (@lines) { + $row =~ s/\r?\n\z//; my ($date, $name, $version, $pct) = split /\t/, $row; - $date =~ $recent or next; + $date ge $recent or next; $name =~ y/ /-/; - my $browser = $BROWSERID{$name} or next; + my $browser = $BROWSERID{$name}; + if (not $browser) { + warn "unknown browser: $name v$version ($pct)\n" + unless defined $browser or $pct < .005; + next; + } $version =~ s/\A-\z/0/; $count{$browser}{$version} += $pct; $count{-total} += $pct; - $count{-date}->{$date}++; + $count{-date} = $date; } -$_ = join ' to ', (sort keys %{$_})[0, -1] for $count{-date}; - my $mult = 100 / delete $count{-total}; for (values %count) { ref $_ eq 'HASH' or next; @@ -61,6 +75,5 @@ __END__ =head1 USAGE - curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv | - ./mkusage-wikimedia >browser-usage.inc.pl + ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl