From: Mischa POSLAWSKY Date: Wed, 4 Feb 2015 12:08:46 +0000 (+0100) Subject: browsers: improve wikimedia usage parser X-Git-Tag: v1.6~9 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/6a67847f60b0a80a9026f8d219476acf92cc4d37 browsers: improve wikimedia usage parser Alphabetical column contains more versions (due to phone and tables counts being combined?). More reliable matching. --- diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia index 901ec15..41ed977 100755 --- a/tools/mkusage-wikimedia +++ b/tools/mkusage-wikimedia @@ -1,9 +1,10 @@ #!/usr/bin/perl -n -use 5.010; use strict; use warnings; +use 5.012; +use warnings; our %count; our $mobile; -our $VERSION = '1.01'; +our $VERSION = '1.02'; if (m{} .. m{}) { $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/'; @@ -12,14 +13,20 @@ if (m{} .. m{}) { next; } -# select relevant columns -/>Browser versions(.*)/ ... m{} && last or next; -my ($tr, $id, $count2, $count) = split /(?:<[^>]*>)+/; -$mobile = $count2 !~ /non mobile/ if $id ~~ ' '; -next if $id ~~ ['Total', ' ']; +# select relevant data +/>In alphabetical order/ .. eof or next; # second table +my ($id, $count2, $count) = map { s/<[^>]*>//gr } split /<\/td>/; + +# select version data +/>Browser versions(.*)/ ... !defined $count2 or next; +unless (defined $count2) { + # header row if no td separator + $mobile = $id !~ /non mobile/; + next; +} # convert to usable syntax -my ($browser, $version) = split /\h+/, $id, 2; +my ($browser, $version) = split /\h+/, $id || ' ', 2; $count =~ s/,//g; $count =~ s/%$//; $version //= 0;