X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/dd0fba64e720055cf4733f422efef687a16d49fa..40575fa484663def379178642270af334291f385:/tools/mkusage-wikimedia?ds=sidebyside
diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia
index 901ec15..71d6e5f 100755
--- a/tools/mkusage-wikimedia
+++ b/tools/mkusage-wikimedia
@@ -1,105 +1,79 @@
-#!/usr/bin/perl -n
-use 5.010; use strict; use warnings;
-our %count;
-our $mobile;
+#!/usr/bin/perl
+use 5.014;
+use warnings;
-our $VERSION = '1.01';
+use Data::Dump 'pp';
-if (m{
} .. m{?td>}) {
- $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/';
- $count{-title } = 'Wikimedia';
- $count{-date} = $1 if m{ period: (?:\d+ )?(\w+ \d+) };
- next;
-}
+our $VERSION = '1.03';
-# select relevant columns
-/>Browser versions(.*)/ ... m{} && last or next;
-my ($tr, $id, $count2, $count) = split /(?:<[^>]*>)+/;
-$mobile = $count2 !~ /non mobile/ if $id ~~ ' ';
-next if $id ~~ ['Total', ' '];
+my %BROWSERID = qw(
+ IE edge
+ IE-Mobile ie_mob
+ Edge edge
+ Edge-Mobile edge
+ Firefox firefox
+ Firefox-Mobile and_ff
+ Safari safari
+ Mobile-Safari ios_saf
+ Mobile-Safari-UIWebView ios_saf
+ Chrome chrome
+ Chromium chrome
+ Chrome-Mobile and_chr
+ Chrome-Mobile-iOS and_chr
+ Android and_chr
+ Opera opera
+ Opera-Mini op_mini
+ BlackBerry-WebKit bb
+ UC-Browser and_uc
+ Samsung-Internet samsung
+ Google 0
+ Other 0
+);
-# convert to usable syntax
-my ($browser, $version) = split /\h+/, $id, 2;
-$count =~ s/,//g;
-$count =~ s/%$//;
-$version //= 0;
-given ($browser) {
- when (['Firefox', 'Iceweasel']) {
- $browser = $mobile ? 'and_ff' : 'firefox';
- continue;
- }
- when ('MSIE') {
- $browser = $mobile ? 'ie_mob' : 'ie';
- continue;
- }
- when ('Opera') {
- $browser = $mobile ? 'op_mob' : 'opera';
- for ($version) {
- if (m{\(Mini(.*)\)$}) {
- $browser = 'op_mini';
- ($_) = $1 =~ m{^/(\d+)};
- continue;
- }
- s/^\d*\.\d\K.*//; # one significant digit
- }
- continue;
- }
- when ('Safari') {
- $browser = $mobile ? 'ios_saf' : 'safari';
- my $numversion = join('.', map { sprintf '%03d', $_ } split /\./, $version);
- $numversion =~ s/6(?=\d{3})//; # incomparable 6532.22 â 523
- for (
- # http://en.wikipedia.org/wiki/Safari_version_history
- $mobile ? (
- [ '413' => '1' ],
- [ '419' => '1.1' ],
- [ '525' => '2' ],
- [ '528' => '3' ],
- [ '531' => '3.2' ],
- [ '531.022' => '4' ],
- [ '533' => '4.2' ],
- [ '534' => '4x' ],
- ) : (
- [ '413' => '2' ],
- [ '522' => '3' ],
- [ '525.013' => '3.1' ],
- [ '525.026' => '3.2' ],
- [ '526' => '4' ],
- [ '533' => '5' ],
- [ '534' => '5x' ],
- )
- ) {
- last if $numversion lt $_->[0];
- $version = $_->[1];
- }
- }
- when ('Chrome') {
- $browser = $mobile ? 'and_chr' : 'chrome';
- s/\.\d+$// for $version;
- }
- when ('Android') {
- $browser = 'android';
- }
- when ('BlackBerry') {
- $browser = 'bb';
- }
- when ('UCWEB') {
- $browser = 'and_uc';
- }
- s/\.0$// for $version;
+my %count = (
+ -title => 'Wikimedia',
+ -site => 'https://analytics.wikimedia.org/',
+);
+
+(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n";
+my @lines = readline;
+
+my $recent; # minimum date to include
+for (reverse @lines) {
+ my ($date) = /(\S+)/;
+ $recent ne $date or next if $recent; # same day
+ $recent = $date; # override older date
+ last if state $i++ >= 2; # repeat twice
}
-$count{$browser}{$version} += $count;
+for my $row (@lines) {
+ $row =~ s/\r?\n\z//;
+ my ($date, $name, $version, $pct) = split /\t/, $row;
+ $date ge $recent or next;
+ $name =~ y/ /-/;
+ my $browser = $BROWSERID{$name};
+ if (not $browser) {
+ warn "unknown browser: $name v$version ($pct)\n"
+ unless defined $browser or $pct < .005;
+ next;
+ }
+ $version =~ s/\A-\z/0/;
+ $count{$browser}{$version} += $pct;
+ $count{-total} += $pct;
+ $count{-date} = $date;
+}
-END {
- use Data::Dump 'pp';
- print pp(\%count);
+my $mult = 100 / delete $count{-total};
+for (values %count) {
+ ref $_ eq 'HASH' or next;
+ $_ *= $mult for values %{$_};
}
+say '+', pp(\%count);
+
__END__
=head1 USAGE
- curl http://stats.wikimedia.org/archive/squid_reports/2010-10/SquidReportClients.htm |
- ./mkusage-wikimedia > browser-usage.inc.pl
+ ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl
|