From: Mischa POSLAWSKY Date: Sat, 21 Feb 2015 02:03:54 +0000 (+0100) Subject: digraphs: intermediate parse of shiar.inc.txt proposals X-Git-Tag: v1.7~123 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/7e678eaffb1a57d300e8aa77078d93ab481f8006 digraphs: intermediate parse of shiar.inc.txt proposals --- diff --git a/Makefile b/Makefile index 46d6817..6314e18 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,10 @@ data/rfc1345.txt: data/digraphs-rfc.inc.pl: tools/mkdigraphs-rfc data/rfc1345.txt $< $(word 2,$^) >$@ -digraphs.inc.pl: tools/mkdigraphlist data/digraphs-rfc.inc.pl shiar.inc.txt +data/digraphs-shiar.inc.pl: tools/mkdigraphs-shiar shiar.inc.txt + $< $(word 2,$^) >$@ + +digraphs.inc.pl: tools/mkdigraphlist data/digraphs-rfc.inc.pl data/digraphs-shiar.inc.pl $< >$@ unicode-char.inc.pl: tools/mkcharinfo digraphs.inc.pl unicode-age.inc.pl diff --git a/tools/mkdigraphlist b/tools/mkdigraphlist index 66ffd2a..4cacc6c 100755 --- a/tools/mkdigraphlist +++ b/tools/mkdigraphlist @@ -14,17 +14,9 @@ my $di = do 'data/digraphs-rfc.inc.pl' or die "error reading digraphs include: ", $@ // $!; # personal addendums -my @extra; -if (-r 'shiar.inc.txt') { - open my $include, '<:utf8', 'shiar.inc.txt'; - for (readline $include) { - m{^(\$?[!"%'-Z_a-z]{2}) (.)} or next; - warn("$1 already defined"), next if defined $di->{$1}; - $di->{$1} = ord $2; - push @extra, $1; - } -} -warn $@ if $@; +my $extra = do 'data/digraphs-shiar.inc.pl' + or warn "could not include shiar proposals: ", $@ // $!; +$di = { %{$di}, %{$extra // {}} }; $di->{chr $_} = $_ for 32 .. 126; $di->{'\\'.$_} = delete $di->{$_} for '{', '}', '\\'; @@ -45,7 +37,7 @@ for (values %info) { } # mark unofficial extras as such -$info{$_}->{category} .= ' Xz' for @extra; +$info{$_}->{category} .= ' Xz' for keys %{$extra}; for (keys %{$di}) { $info{$_}->{string} = chr(9676) . chr($di->{$_}) if $info{$_}->{combining}; diff --git a/tools/mkdigraphs-shiar b/tools/mkdigraphs-shiar new file mode 100755 index 0000000..4f2b75a --- /dev/null +++ b/tools/mkdigraphs-shiar @@ -0,0 +1,48 @@ +#!/usr/bin/env perl +use 5.010; +use strict; +use warnings; +use open IN => ':utf8', ':std'; + +our $VERSION = '1.00'; + +my $di = do "data/digraphs-rfc.inc.pl" + or warn "official digraphs not included for comparison: ", $@ // $!; + +say "# automatically generated by $0"; +say '+{'; +for (readline) { + my ($mnem, $chr) = m{^([!"%'-Z_a-z]{2}) (.)} or next; + warn("$mnem already defined"), next if defined $di->{$mnem}; + my $chrnum = ord $chr; + say "q{$1} => $chrnum,"; +} +say '}'; + +__END__ + +=head1 NAME + +mkdigraphs-shiar - Output digraph data from proposal text + +=head1 SYNOPSIS + + mkdigraphs-shiar shiar.inc.txt >digraphs-shiar.inc.pl + perl -e'$di = do "digraphs-shiar.inc.pl"; print chr $di->{":)"}' + +=head1 DESCRIPTION + +Parses a text file containing digraphs followed by +the literal unicode character and optional comments. +If successful, Perl code is output resulting in a hash +with Unicode code points keyed by digraph. +Any errors and warnings are given at STDERR. + +=head1 AUTHOR + +Mischa POSLAWSKY + +=head1 LICENSE + +Licensed under the GNU Affero General Public License version 3. +