use Data::Dump 'pp';
-our $VERSION = '1.00';
+our $VERSION = '1.03';
my %BROWSERID = qw(
- IE ie
+ IE edge
IE-Mobile ie_mob
- Edge ie
- Edge-Mobile ie
+ Edge edge
+ Edge-Mobile edge
Firefox firefox
Firefox-Mobile and_ff
Safari safari
Mobile-Safari-UIWebView ios_saf
Chrome chrome
Chromium chrome
- Chrome-Mobile android
- Chrome-Mobile-iOS android
- Android android
+ Chrome-Mobile and_chr
+ Chrome-Mobile-iOS and_chr
+ Android and_chr
Opera opera
Opera-Mini op_mini
BlackBerry-WebKit bb
UC-Browser and_uc
+ Samsung-Internet samsung
+ Google 0
+ Other 0
);
my %count = (
-site => 'https://analytics.wikimedia.org/',
);
-my $recent = qr/^2017-/;
-
(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n";
+my @lines = readline;
+
+my $recent; # minimum date to include
+for (reverse @lines) {
+ my ($date) = /(\S+)/;
+ $recent ne $date or next if $recent; # same day
+ $recent = $date; # override older date
+ last if state $i++ >= 2; # repeat twice
+}
-while (my $row = readline) {
+for my $row (@lines) {
+ $row =~ s/\r?\n\z//;
my ($date, $name, $version, $pct) = split /\t/, $row;
- $date =~ $recent or next;
+ $date ge $recent or next;
$name =~ y/ /-/;
- my $browser = $BROWSERID{$name} or next;
+ my $browser = $BROWSERID{$name};
+ if (not $browser) {
+ warn "unknown browser: $name v$version ($pct)\n"
+ unless defined $browser or $pct < .005;
+ next;
+ }
+ $version =~ s/\A-\z/0/;
$count{$browser}{$version} += $pct;
$count{-total} += $pct;
- $count{-date}->{$date}++;
+ $count{-date} = $date;
}
-$_ = join ' to ', (sort keys %{$_})[0, -1] for $count{-date};
-
my $mult = 100 / delete $count{-total};
for (values %count) {
ref $_ eq 'HASH' or next;
=head1 USAGE
- curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv |
- ./mkusage-wikimedia >browser-usage.inc.pl
+ ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl