From: Mischa POSLAWSKY Date: Thu, 27 Apr 2017 00:33:39 +0000 (+0200) Subject: browser: new mkusage-wikipedia to parse analytics tsv X-Git-Tag: v1.10~63 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/916e4f76f6d478e51b42b25f5c9e0ec7373749e5?ds=sidebyside browser: new mkusage-wikipedia to parse analytics tsv Amazing export from analytics.wikimedia.org continuing previous squid stats but all nicely prepared, only needing translation to caniuse agent identifiers. Contains all samples, so restrict by year for now. --- diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia new file mode 100755 index 0000000..5e264ed --- /dev/null +++ b/tools/mkusage-wikimedia @@ -0,0 +1,65 @@ +#!/usr/bin/perl +use 5.014; +use warnings; + +use Data::Dump 'pp'; + +our $VERSION = '1.00'; + +my %BROWSERID = qw( + IE ie + IE-Mobile ie_mob + Edge edge + Edge-Mobile edge + Firefox firefox + Firefox-Mobile and_ff + Safari safari + Mobile-Safari ios_saf + Mobile-Safari-UIWebView ios_saf + Chrome chrome + Chromium chrome + Chrome-Mobile and_chr + Chrome-Mobile-iOS and_chr + Android android + Opera opera + Opera-Mini op_mini + BlackBerry-WebKit bb + UC-Browser and_uc +); + +my %count = ( + -title => 'Wikimedia', + -site => 'https://analytics.wikimedia.org/', +); + +my $recent = qr/^2017-/; + +(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; + +while (my $row = readline) { + my ($date, $name, $version, $pct) = split /\t/, $row; + $date =~ $recent or next; + $name =~ y/ /-/; + my $browser = $BROWSERID{$name} or next; + $count{$browser}{$version} += $pct; + $count{-total} += $pct; + $count{-date}->{$date}++; +} + +$_ = join ' to ', (sort keys %{$_})[0, -1] for $count{-date}; + +my $mult = 100 / delete $count{-total}; +for (values %count) { + ref $_ eq 'HASH' or next; + $_ *= $mult for values %{$_}; +} + +say '+', pp(\%count); + +__END__ + +=head1 USAGE + + curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv | + ./mkusage-wikimedia >browser-usage.inc.pl +