writing: mkcharver script to parse unicode version history
authorMischa POSLAWSKY <perl@shiar.org>
Wed, 11 Feb 2015 21:40:04 +0000 (22:40 +0100)
committerMischa POSLAWSKY <perl@shiar.org>
Fri, 13 Feb 2015 16:46:33 +0000 (17:46 +0100)
Prepare include of unicode character versions.

.gitignore
Makefile
tools/mkcharver [new file with mode: 0755]

index b117a195a6a878d1181073a54d8b3a42e039ac83..b458fc20f01caff2f99b3de6223e058ac1b1bdf3 100644 (file)
@@ -6,6 +6,7 @@
 /data/countryInfo.txt
 /data/rfc1345.txt
 /data/caniuse*.json
+/data/DerivedAge.txt
 
 # data includes automatically generated by tools/
 /countries.inc.pl
@@ -14,6 +15,8 @@
        # tools/mkdigraphlist
 /unicode-char.inc.pl
        # tools/mkcharinfo
+/unicode-age.inc.pl
+       # tools/mkcharver
 /ttfsupport/
        # tools/convert-allfonts
 /unicode-cover.inc.pl
index 30f62a9808420be2098106ee3dcd08af6b5d5dc4..8d71274e0645fc8048cbd679326aa5dcf0d29cd3 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,11 @@
 all: unicode-char.inc.pl unicode-cover.inc.pl countries.inc.pl
 
+data/DerivedAge.txt:
+       wget http://www.unicode.org/Public/UNIDATA/DerivedAge.txt -O $@
+
+unicode-age.inc.pl: data/DerivedAge.txt
+       tools/mkcharver data/DerivedAge.txt >$@
+
 data/rfc1345.txt:
        wget http://www.ietf.org/rfc/rfc1345.txt -O $@
 
@@ -25,6 +31,7 @@ countries.inc.pl: data/countryInfo.txt
 clean:
        -rm digraphs.inc.pl
        -rm unicode-char.inc.pl
+       -rm unicode-age.inc.pl
        -rm -rf ttfsupport/
        -rm unicode-cover.inc.pl
 
diff --git a/tools/mkcharver b/tools/mkcharver
new file mode 100755 (executable)
index 0000000..f8fc3e8
--- /dev/null
@@ -0,0 +1,43 @@
+#!/usr/bin/env perl
+use 5.012;
+use warnings;
+use utf8;
+
+our $VERSION = '1.00';
+
+say '# automatically generated by tools/mkcharver';
+say '+{';
+
+while (my $line = readline) {
+       $line =~ m{
+               \A (?<start> [0-9A-F]+)
+               (?: \.\. (?<end> [0-9A-F]+) )?
+               \h+ ; \h+ (?<major> [0-9]+) \. (?<minor> [0-9])
+       }x or next;
+
+       my $start   = hex $+{start};
+       my $end     = defined $+{end} ? hex $+{end} : $start;
+       my $version = $+{major} . $+{minor};
+       say "(map {\$_ => $version} $start .. $end),";
+}
+
+say '}';
+
+__END__
+
+=head1 NAME
+
+mkcharver - Create Perl include of Unicode character versions
+
+=head1 SYNOPSIS
+
+    mkcharver DerivedAge.txt >unicode-age.inc.pl
+
+=head1 AUTHOR
+
+Mischa POSLAWSKY <perl@shiar.org>
+
+=head1 LICENSE
+
+Licensed under the GNU Affero General Public License version 3.
+