browsers: improve wikimedia usage parser

author Mischa POSLAWSKY <perl@shiar.org>

Wed, 4 Feb 2015 12:08:46 +0000 (13:08 +0100)

committer Mischa POSLAWSKY <perl@shiar.org>

Thu, 5 Feb 2015 21:46:21 +0000 (22:46 +0100)
author Mischa POSLAWSKY <perl@shiar.org>
Wed, 4 Feb 2015 12:08:46 +0000 (13:08 +0100)
committer Mischa POSLAWSKY <perl@shiar.org>
Thu, 5 Feb 2015 21:46:21 +0000 (22:46 +0100)
diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia

index 901ec15c6d39b12b47ac52c22941a931ab167a6b..41ed97735c6b6d390c9f941388651550c3d91a7c 100755 (executable)
--- a/tools/mkusage-wikimedia
+++ b/tools/mkusage-wikimedia
@@ -1,9 +1,10 @@
  #!/usr/bin/perl -n
  #!/usr/bin/perl -n
-use 5.010; use strict; use warnings;
+use 5.012;
+use warnings;
  our %count;
  our $mobile;
  
  our %count;
  our $mobile;
  
-our $VERSION = '1.01';
+our $VERSION = '1.02';
  
  if (m{<td class=hl>} .. m{</?td>}) {
         $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/';
  
  if (m{<td class=hl>} .. m{</?td>}) {
         $count{-source} = 'http://stats.wikimedia.org/archive/squid_reports/';
@@ -12,14 +13,20 @@ if (m{<td class=hl>} .. m{</?td>}) {
         next;
  }
  
         next;
  }
  
-# select relevant columns
-/>Browser versions(.*)/ ... m{</table>} && last or next;
-my ($tr, $id, $count2, $count) = split /(?:<[^>]*>)+/;
-$mobile = $count2 !~ /non mobile/ if $id ~~ '&nbsp;';
-next if $id ~~ ['Total', '&nbsp;'];
+# select relevant data
+/>In alphabetical order/ .. eof or next;  # second table
+my ($id, $count2, $count) = map { s/<[^>]*>//gr } split /<\/td>/;
+
+# select version data
+/>Browser versions(.*)/ ... !defined $count2 or next;
+unless (defined $count2) {
+       # header row if no td separator
+       $mobile = $id !~ /non mobile/;
+       next;
+}
  
  # convert to usable syntax
  
  # convert to usable syntax
-my ($browser, $version) = split /\h+/, $id, 2;
+my ($browser, $version) = split /\h+/, $id || ' ', 2;
  $count =~ s/,//g;
  $count =~ s/%$//;
  $version //= 0;
  $count =~ s/,//g;
  $count =~ s/%$//;
  $version //= 0;
author	Mischa POSLAWSKY <perl@shiar.org>
	Wed, 4 Feb 2015 12:08:46 +0000 (13:08 +0100)
committer	Mischa POSLAWSKY <perl@shiar.org>
	Thu, 5 Feb 2015 21:46:21 +0000 (22:46 +0100)