From 916e4f76f6d478e51b42b25f5c9e0ec7373749e5 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Thu, 27 Apr 2017 02:33:39 +0200 Subject: [PATCH] browser: new mkusage-wikipedia to parse analytics tsv Amazing export from analytics.wikimedia.org continuing previous squid stats but all nicely prepared, only needing translation to caniuse agent identifiers. Contains all samples, so restrict by year for now. --- tools/mkusage-wikimedia | 65 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100755 tools/mkusage-wikimedia diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia new file mode 100755 index 0000000..5e264ed --- /dev/null +++ b/tools/mkusage-wikimedia @@ -0,0 +1,65 @@ +#!/usr/bin/perl +use 5.014; +use warnings; + +use Data::Dump 'pp'; + +our $VERSION = '1.00'; + +my %BROWSERID = qw( + IE ie + IE-Mobile ie_mob + Edge edge + Edge-Mobile edge + Firefox firefox + Firefox-Mobile and_ff + Safari safari + Mobile-Safari ios_saf + Mobile-Safari-UIWebView ios_saf + Chrome chrome + Chromium chrome + Chrome-Mobile and_chr + Chrome-Mobile-iOS and_chr + Android android + Opera opera + Opera-Mini op_mini + BlackBerry-WebKit bb + UC-Browser and_uc +); + +my %count = ( + -title => 'Wikimedia', + -site => 'https://analytics.wikimedia.org/', +); + +my $recent = qr/^2017-/; + +(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; + +while (my $row = readline) { + my ($date, $name, $version, $pct) = split /\t/, $row; + $date =~ $recent or next; + $name =~ y/ /-/; + my $browser = $BROWSERID{$name} or next; + $count{$browser}{$version} += $pct; + $count{-total} += $pct; + $count{-date}->{$date}++; +} + +$_ = join ' to ', (sort keys %{$_})[0, -1] for $count{-date}; + +my $mult = 100 / delete $count{-total}; +for (values %count) { + ref $_ eq 'HASH' or next; + $_ *= $mult for values %{$_}; +} + +say '+', pp(\%count); + +__END__ + +=head1 USAGE + + curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv | + ./mkusage-wikimedia >browser-usage.inc.pl + -- 2.30.0