X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/b5b3537710ed9f73e1c867e0cc27d50439eaf4cd..HEAD:/tools/mkusage-wikimedia diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia index c775a4a..71d6e5f 100755 --- a/tools/mkusage-wikimedia +++ b/tools/mkusage-wikimedia @@ -1,97 +1,79 @@ -#!/usr/bin/perl -n -use 5.010; use strict; use warnings; -our %count; -our $mobile; +#!/usr/bin/perl +use 5.014; +use warnings; -if (m{} .. m{}) { - $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/'; - $count{-title } = 'Wikimedia'; - $count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) }; - next; -} +use Data::Dump 'pp'; -# select relevant columns -/>Browser versions(.*)/ ... m{} && last or next; -my ($tr, $id, $count2, $count) = split /(?:<[^>]*>)+/; -$mobile = $count2 =~ /(? '1' ], - [ '419' => '1.1' ], - [ '525' => '2' ], - [ '528' => '3' ], - [ '531' => '3.2' ], - [ '531.022' => '4' ], - [ '533' => '4.2' ], - [ '534' => '4x' ], - ) : ( - [ '413' => '2' ], - [ '522' => '3' ], - [ '525.013' => '3.1' ], - [ '525.026' => '3.2' ], - [ '526' => '4' ], - [ '533' => '5' ], - [ '534' => '5x' ], - ) - ) { - last if $numversion lt $_->[0]; - $version = $_->[1]; - } - } - when ('Chrome') { - $browser = 'chrome'; - s/\.\d+$// for $version; - } - when ('Android') { - $browser = 'android'; - } - s/\.0$// for $version; +my %BROWSERID = qw( + IE edge + IE-Mobile ie_mob + Edge edge + Edge-Mobile edge + Firefox firefox + Firefox-Mobile and_ff + Safari safari + Mobile-Safari ios_saf + Mobile-Safari-UIWebView ios_saf + Chrome chrome + Chromium chrome + Chrome-Mobile and_chr + Chrome-Mobile-iOS and_chr + Android and_chr + Opera opera + Opera-Mini op_mini + BlackBerry-WebKit bb + UC-Browser and_uc + Samsung-Internet samsung + Google 0 + Other 0 +); + +my %count = ( + -title => 'Wikimedia', + -site => 'https://analytics.wikimedia.org/', +); + +(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; +my @lines = readline; + +my $recent; # minimum date to include +for (reverse @lines) { + my ($date) = /(\S+)/; + $recent ne $date or next if $recent; # same day + $recent = $date; # override older date + last if state $i++ >= 2; # repeat twice } -$count{$browser}{$version} += $count; +for my $row (@lines) { + $row =~ s/\r?\n\z//; + my ($date, $name, $version, $pct) = split /\t/, $row; + $date ge $recent or next; + $name =~ y/ /-/; + my $browser = $BROWSERID{$name}; + if (not $browser) { + warn "unknown browser: $name v$version ($pct)\n" + unless defined $browser or $pct < .005; + next; + } + $version =~ s/\A-\z/0/; + $count{$browser}{$version} += $pct; + $count{-total} += $pct; + $count{-date} = $date; +} -END { - use Data::Dump 'pp'; - print pp(\%count); +my $mult = 100 / delete $count{-total}; +for (values %count) { + ref $_ eq 'HASH' or next; + $_ *= $mult for values %{$_}; } +say '+', pp(\%count); + __END__ =head1 USAGE - curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm | - ./mkusage-wikimedia > browser-usage.inc.pl + ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl