From 71ebec1cf2c6050f2b92f4b46bc1f59b45414fb7 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Wed, 11 Feb 2015 22:40:04 +0100 Subject: [PATCH] writing: mkcharver script to parse unicode version history Prepare include of unicode character versions. --- .gitignore | 3 +++ Makefile | 7 +++++++ tools/mkcharver | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100755 tools/mkcharver diff --git a/.gitignore b/.gitignore index b117a19..b458fc2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ /data/countryInfo.txt /data/rfc1345.txt /data/caniuse*.json +/data/DerivedAge.txt # data includes automatically generated by tools/ /countries.inc.pl @@ -14,6 +15,8 @@ # tools/mkdigraphlist /unicode-char.inc.pl # tools/mkcharinfo +/unicode-age.inc.pl + # tools/mkcharver /ttfsupport/ # tools/convert-allfonts /unicode-cover.inc.pl diff --git a/Makefile b/Makefile index 30f62a9..8d71274 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,11 @@ all: unicode-char.inc.pl unicode-cover.inc.pl countries.inc.pl +data/DerivedAge.txt: + wget http://www.unicode.org/Public/UNIDATA/DerivedAge.txt -O $@ + +unicode-age.inc.pl: data/DerivedAge.txt + tools/mkcharver data/DerivedAge.txt >$@ + data/rfc1345.txt: wget http://www.ietf.org/rfc/rfc1345.txt -O $@ @@ -25,6 +31,7 @@ countries.inc.pl: data/countryInfo.txt clean: -rm digraphs.inc.pl -rm unicode-char.inc.pl + -rm unicode-age.inc.pl -rm -rf ttfsupport/ -rm unicode-cover.inc.pl diff --git a/tools/mkcharver b/tools/mkcharver new file mode 100755 index 0000000..f8fc3e8 --- /dev/null +++ b/tools/mkcharver @@ -0,0 +1,43 @@ +#!/usr/bin/env perl +use 5.012; +use warnings; +use utf8; + +our $VERSION = '1.00'; + +say '# automatically generated by tools/mkcharver'; +say '+{'; + +while (my $line = readline) { + $line =~ m{ + \A (? [0-9A-F]+) + (?: \.\. (? [0-9A-F]+) )? + \h+ ; \h+ (? [0-9]+) \. (? [0-9]) + }x or next; + + my $start = hex $+{start}; + my $end = defined $+{end} ? hex $+{end} : $start; + my $version = $+{major} . $+{minor}; + say "(map {\$_ => $version} $start .. $end),"; +} + +say '}'; + +__END__ + +=head1 NAME + +mkcharver - Create Perl include of Unicode character versions + +=head1 SYNOPSIS + + mkcharver DerivedAge.txt >unicode-age.inc.pl + +=head1 AUTHOR + +Mischa POSLAWSKY + +=head1 LICENSE + +Licensed under the GNU Affero General Public License version 3. + -- 2.30.0