X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/c98bda172fec13970df56f9bec501b420c3b51f5..9197e3b6b3d5421bd29f6a2efbf9caf3bbaccf26:/tools/mkusage-wikimedia?ds=inline
diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia
index 7f9a3a5..4470c50 100755
--- a/tools/mkusage-wikimedia
+++ b/tools/mkusage-wikimedia
@@ -1,145 +1,78 @@
-#!/usr/bin/perl -n
+#!/usr/bin/perl
use 5.014;
use warnings;
-our %count;
-our $mobile;
-our $VERSION = '1.02';
+use Data::Dump 'pp';
-if (m{
} .. m{?td>}) {
- $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/';
- $count{-title } = 'Wikimedia';
- $count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) };
- next;
-}
+our $VERSION = '1.01';
-# select relevant data
-/>In alphabetical order/ .. eof or next; # second table
-my ($id, $count2, $count) = map { s/<[^>]*>//gr } split /<\/td>/;
+my %BROWSERID = qw(
+ IE ie
+ IE-Mobile ie_mob
+ Edge ie
+ Edge-Mobile ie
+ Firefox firefox
+ Firefox-Mobile and_ff
+ Safari safari
+ Mobile-Safari ios_saf
+ Mobile-Safari-UIWebView ios_saf
+ Chrome chrome
+ Chromium chrome
+ Chrome-Mobile android
+ Chrome-Mobile-iOS android
+ Android android
+ Opera opera
+ Opera-Mini op_mini
+ BlackBerry-WebKit bb
+ UC-Browser and_uc
+ Samsung-Internet samsung
+ Other 0
+);
-if (/>Browser engines/ ... m{}) {
- my $apple = $id =~ s/^AppleWebKit ?//;
- $apple .. 1 or next;
- my ($abs) = m/>showCount\((\d+),/;
+my %count = (
+ -title => 'Wikimedia',
+ -site => 'https://analytics.wikimedia.org/',
+);
- state %version;
- if (defined $count2) {
- $version{$id} = $abs;
- next;
- }
+(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n";
+my @lines = readline;
- # replace safari total percentage by individual version counts
- # have to use combined distribution for multiple platforms :(
- for my $browser (qw/ safari ios_saf /) {
- my $multiplier = delete $count{$browser}{'0.0'} or next;
- $multiplier /= $abs; # total number
- while (my ($verwebkit, $count) = each %version) {
- my $ver = safariver($verwebkit, $browser eq 'ios_saf');
- $count{$browser}{$ver} += $count * $multiplier;
- }
- }
- last;
+my $recent; # minimum date to include
+for (reverse @lines) {
+ my ($date) = /(\S+)/;
+ $recent ne $date or next if $recent; # same day
+ $recent = $date; # override older date
+ last if state $i++ >= 2; # repeat twice
}
-# select version data
-/>Browser versions(.*)/ ... !defined $count2 or next;
-unless (defined $count2) {
- # header row if no td separator
- $mobile = $id !~ /non mobile/;
- next;
-}
-
-# convert to usable syntax
-my ($browser, $version) = split /\h+/, $id || ' ', 2;
-$count =~ s/,//g;
-$count =~ s/%$//;
-$version //= 0;
-given ($browser) {
- when (['Firefox', 'Iceweasel']) {
- $browser = $mobile ? 'and_ff' : 'firefox';
- continue;
- }
- when ('MSIE') {
- $browser = $mobile ? 'ie_mob' : 'ie';
- continue;
- }
- when ('Opera') {
- $browser = $mobile ? 'op_mob' : 'opera';
- for ($version) {
- if (m{\(Mini(.*)\)$}) {
- $browser = 'op_mini';
- ($_) = $1 =~ m{^/(\d+)};
- continue;
- }
- s/^\d*\.\d\K.*//; # one significant digit
- }
- continue;
- }
- when ('Safari') {
- $browser = $mobile ? 'ios_saf' : 'safari';
- }
- when ('Chrome') {
- $browser = $mobile ? 'and_chr' : 'chrome';
- s/\.\d+$// for $version;
- }
- when ('Android') {
- $browser = 'android';
- }
- when ('BlackBerry') {
- $browser = 'bb';
- }
- when ('UCWEB') {
- $browser = 'and_uc';
+for my $row (@lines) {
+ $row =~ s/\r?\n\z//;
+ my ($date, $name, $version, $pct) = split /\t/, $row;
+ $date ge $recent or next;
+ $name =~ y/ /-/;
+ my $browser = $BROWSERID{$name};
+ if (not $browser) {
+ warn "unknown browser: $name v$version ($pct)\n"
+ unless defined $browser or $pct < .005;
+ next;
}
- s/\.0$// for $version;
+ $version =~ s/\A-\z/0/;
+ $count{$browser}{$version} += $pct;
+ $count{-total} += $pct;
+ $count{-date} = $date;
}
-$count{$browser}{$version} += $count;
-
-END {
- use Data::Dump 'pp';
- print pp(\%count);
+my $mult = 100 / delete $count{-total};
+for (values %count) {
+ ref $_ eq 'HASH' or next;
+ $_ *= $mult for values %{$_};
}
-sub safariver {
- my ($webkitnum, $ios) = @_;
- my $safarinum = '';
- for (
- $ios ? (
- [ 413 => '1' ],
- [ 419 => '1.1' ],
- [ 525 => '2' ],
- [ 528 => '3' ],
- [ 531 => '3.2' ], # or 4 from 531.022
- [ 532 => '4' ],
- [ 533 => '4.2' ],
- [ 534 => '5' ],
- [ 536 => '7' ],
- [ 537 => '8' ],
- [ 600 => '8.1' ],
- ) : (
- # http://en.wikipedia.org/wiki/Safari_version_history
- [ 412 => '2' ],
- [ 522 => '3' ],
- [ 525 => '3.1' ], # 3.2 from 525.026
- [ 526 => '4' ],
- [ 533 => '5' ], # or 4.1
- [ 534 => '5.1' ],
- [ 536 => '6' ],
- [ 537 => '7.1' ], # or 6.1 or 7.0
- [ 538 => '8' ],
- )
- ) {
- last if $webkitnum lt $_->[0];
- $safarinum = $_->[1];
- }
- return $safarinum;
-}
+say '+', pp(\%count);
__END__
=head1 USAGE
- curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm |
- ./mkusage-wikimedia > browser-usage.inc.pl
+ ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl
|