git.shiar.nl
/
sheet.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
unicode: extipa glyphs introduced in unicode 14.0
[sheet.git]
/
tools
/
mkusage-wikimedia
diff --git
a/tools/mkusage-wikimedia
b/tools/mkusage-wikimedia
index 5e264ed295ed0bba44b22ddd616a1a6d304a68f6..88b7cef99abf0f2e28430c0af373dac792e27bdf 100755
(executable)
--- a/
tools/mkusage-wikimedia
+++ b/
tools/mkusage-wikimedia
@@
-4,10
+4,10
@@
use warnings;
use Data::Dump 'pp';
use Data::Dump 'pp';
-our $VERSION = '1.0
0
';
+our $VERSION = '1.0
2
';
my %BROWSERID = qw(
my %BROWSERID = qw(
- IE
i
e
+ IE
edg
e
IE-Mobile ie_mob
Edge edge
Edge-Mobile edge
IE-Mobile ie_mob
Edge edge
Edge-Mobile edge
@@
-20,11
+20,13
@@
my %BROWSERID = qw(
Chromium chrome
Chrome-Mobile and_chr
Chrome-Mobile-iOS and_chr
Chromium chrome
Chrome-Mobile and_chr
Chrome-Mobile-iOS and_chr
- Android and
roid
+ Android and
_chr
Opera opera
Opera-Mini op_mini
BlackBerry-WebKit bb
UC-Browser and_uc
Opera opera
Opera-Mini op_mini
BlackBerry-WebKit bb
UC-Browser and_uc
+ Samsung-Internet samsung
+ Other 0
);
my %count = (
);
my %count = (
@@
-32,22
+34,34
@@
my %count = (
-site => 'https://analytics.wikimedia.org/',
);
-site => 'https://analytics.wikimedia.org/',
);
-my $recent = qr/^2017-/;
-
(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n";
(readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n";
+my @lines = readline;
+
+my $recent; # minimum date to include
+for (reverse @lines) {
+ my ($date) = /(\S+)/;
+ $recent ne $date or next if $recent; # same day
+ $recent = $date; # override older date
+ last if state $i++ >= 2; # repeat twice
+}
-while (my $row = readline) {
+for my $row (@lines) {
+ $row =~ s/\r?\n\z//;
my ($date, $name, $version, $pct) = split /\t/, $row;
my ($date, $name, $version, $pct) = split /\t/, $row;
- $date
=~
$recent or next;
+ $date
ge
$recent or next;
$name =~ y/ /-/;
$name =~ y/ /-/;
- my $browser = $BROWSERID{$name} or next;
+ my $browser = $BROWSERID{$name};
+ if (not $browser) {
+ warn "unknown browser: $name v$version ($pct)\n"
+ unless defined $browser or $pct < .005;
+ next;
+ }
+ $version =~ s/\A-\z/0/;
$count{$browser}{$version} += $pct;
$count{-total} += $pct;
$count{$browser}{$version} += $pct;
$count{-total} += $pct;
- $count{-date}
->{$date}++
;
+ $count{-date}
= $date
;
}
}
-$_ = join ' to ', (sort keys %{$_})[0, -1] for $count{-date};
-
my $mult = 100 / delete $count{-total};
for (values %count) {
ref $_ eq 'HASH' or next;
my $mult = 100 / delete $count{-total};
for (values %count) {
ref $_ eq 'HASH' or next;
@@
-60,6
+74,5
@@
__END__
=head1 USAGE
=head1 USAGE
- curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv |
- ./mkusage-wikimedia >browser-usage.inc.pl
+ ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl