X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/b5b3537710ed9f73e1c867e0cc27d50439eaf4cd..4a28039ff22d6402f923488c0681f640b6fa14a6:/tools/mkusage-wikimedia diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia index c775a4a..a50d899 100755 --- a/tools/mkusage-wikimedia +++ b/tools/mkusage-wikimedia @@ -1,97 +1,64 @@ -#!/usr/bin/perl -n -use 5.010; use strict; use warnings; -our %count; -our $mobile; +#!/usr/bin/perl +use 5.014; +use warnings; -if (m{} .. m{}) { - $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/'; - $count{-title } = 'Wikimedia'; - $count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) }; - next; -} +use Data::Dump 'pp'; -# select relevant columns -/>Browser versions(.*)/ ... m{} && last or next; -my ($tr, $id, $count2, $count) = split /(?:<[^>]*>)+/; -$mobile = $count2 =~ /(? '1' ], - [ '419' => '1.1' ], - [ '525' => '2' ], - [ '528' => '3' ], - [ '531' => '3.2' ], - [ '531.022' => '4' ], - [ '533' => '4.2' ], - [ '534' => '4x' ], - ) : ( - [ '413' => '2' ], - [ '522' => '3' ], - [ '525.013' => '3.1' ], - [ '525.026' => '3.2' ], - [ '526' => '4' ], - [ '533' => '5' ], - [ '534' => '5x' ], - ) - ) { - last if $numversion lt $_->[0]; - $version = $_->[1]; - } - } - when ('Chrome') { - $browser = 'chrome'; - s/\.\d+$// for $version; - } - when ('Android') { - $browser = 'android'; - } - s/\.0$// for $version; -} +my %BROWSERID = qw( + IE ie + IE-Mobile ie_mob + Edge ie + Edge-Mobile ie + Firefox firefox + Firefox-Mobile and_ff + Safari safari + Mobile-Safari ios_saf + Mobile-Safari-UIWebView ios_saf + Chrome chrome + Chromium chrome + Chrome-Mobile android + Chrome-Mobile-iOS android + Android android + Opera opera + Opera-Mini op_mini + BlackBerry-WebKit bb + UC-Browser and_uc +); + +my %count = ( + -title => 'Wikimedia', + -site => 'https://analytics.wikimedia.org/', +); + +my $recent = qr/^2017-/; -$count{$browser}{$version} += $count; +(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; -END { - use Data::Dump 'pp'; - print pp(\%count); +while (my $row = readline) { + my ($date, $name, $version, $pct) = split /\t/, $row; + $date =~ $recent or next; + $name =~ y/ /-/; + my $browser = $BROWSERID{$name} or next; + $version =~ s/\A-\z/0/; + $count{$browser}{$version} += $pct; + $count{-total} += $pct; + $count{-date} = $date; } +my $mult = 100 / delete $count{-total}; +for (values %count) { + ref $_ eq 'HASH' or next; + $_ *= $mult for values %{$_}; +} + +say '+', pp(\%count); + __END__ =head1 USAGE - curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm | - ./mkusage-wikimedia > browser-usage.inc.pl + curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv | + ./mkusage-wikimedia >browser-usage.inc.pl