#!/usr/bin/perl use 5.014; use warnings; use Data::Dump 'pp'; our $VERSION = '1.03'; my %BROWSERID = qw( IE edge IE-Mobile ie_mob Edge edge Edge-Mobile edge Firefox firefox Firefox-Mobile and_ff Safari safari Mobile-Safari ios_saf Mobile-Safari-UIWebView ios_saf Chrome chrome Chromium chrome Chrome-Mobile and_chr Chrome-Mobile-iOS and_chr Android and_chr Opera opera Opera-Mini op_mini BlackBerry-WebKit bb UC-Browser and_uc Samsung-Internet samsung Google 0 Other 0 ); my %count = ( -title => 'Wikimedia', -site => 'https://analytics.wikimedia.org/', ); (readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; my @lines = readline; my $recent; # minimum date to include for (reverse @lines) { my ($date) = /(\S+)/; $recent ne $date or next if $recent; # same day $recent = $date; # override older date last if state $i++ >= 2; # repeat twice } for my $row (@lines) { $row =~ s/\r?\n\z//; my ($date, $name, $version, $pct) = split /\t/, $row; $date ge $recent or next; $name =~ y/ /-/; my $browser = $BROWSERID{$name}; if (not $browser) { warn "unknown browser: $name v$version ($pct)\n" unless defined $browser or $pct < .005; next; } $version =~ s/\A-\z/0/; $count{$browser}{$version} += $pct; $count{-total} += $pct; $count{-date} = $date; } my $mult = 100 / delete $count{-total}; for (values %count) { ref $_ eq 'HASH' or next; $_ *= $mult for values %{$_}; } say '+', pp(\%count); __END__ =head1 USAGE ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl