From c1b893b857c7f9a001c401cf45deec6daa2a9aa4 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sun, 22 Feb 2015 02:17:28 +0100 Subject: [PATCH] digraphs: mkdigraphlist: redetermine source class --- digraphs.plp | 2 +- tools/mkdigraphlist | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/digraphs.plp b/digraphs.plp index ce14962..18123a6 100644 --- a/digraphs.plp +++ b/digraphs.plp @@ -66,7 +66,7 @@ for my $c1group (@chars) { my @class = ('X', grep {$_} $prop, $script); $glyph = EscapeHTML($glyph); - $glyph = "$glyph" if $prop =~ /\bZs\b/; + $glyph = "$glyph" if $script =~ /\bZs\b/; printf "\n".'%s', join(' ', @class), EscapeHTML($desc), $glyph; diff --git a/tools/mkdigraphlist b/tools/mkdigraphlist index 8a05fa4..e472ce0 100755 --- a/tools/mkdigraphlist +++ b/tools/mkdigraphlist @@ -6,18 +6,16 @@ use utf8; use open OUT => ':utf8', ':std'; -our $VERSION = '1.03'; +our $VERSION = '1.04'; -# create a hash of desired input -my $di = do 'data/digraphs-rfc.inc.pl' +# import and combine various digraph data +my $rfc = do 'data/digraphs-rfc.inc.pl' or die "error reading digraphs include: ", $@ // $!; - -# personal addendums my $extra = do 'data/digraphs-shiar.inc.pl' or warn "could not include shiar proposals: ", $@ // $!; my $vim = do 'data/digraphs-vim.inc.pl' or warn "could not include vim extensions ", $@ // $!; -$di = { %{$vim // {}}, %{$di}, %{$extra // {}} }; +my $di = { %{$vim // {}}, %{$rfc}, %{$extra // {}} }; # optionally get unicode character information my $uninfo = do 'unicode-char.inc.pl' @@ -36,8 +34,11 @@ printf "q{%s}=>[%s],\n", $_, join(',', $di->{$_}, # original code point map {"'$_'"} $uninfo->{ chr $di->{$_} }->[1], # name - $uninfo->{ chr $di->{$_} }->[0], # class - (map {('', $_)} $uninfo->{ chr $di->{$_} }->[4] // ()), # string + $vim->{$_} + ? $rfc->{$_} ? 'u-di' : 'u-prop' # vim+rfc or vim-only + : 'u-prop ex', # neither + $uninfo->{ chr $di->{$_} }->[0] =~ s/ u-di| u-prop| ex//gr, # class + $uninfo->{ chr $di->{$_} }->[4] // (), # string ) for sort keys %{$di}; print "}\n"; @@ -61,11 +62,12 @@ The value can either be a scalar string containing another digraph which can be considered identical (usually inverted), or an array ref containing at least the resulting character's Unicode code point value. If available, the following UCD data -is appended: character name, category, script, and output string. +is appended: character name, usage classes, unicode classes, +and replacement output string. For example: +{ - AE => [198, 'LATIN CAPITAL LETTER AE', 'Lu Xl', 'Latin'], + AE => [198, 'LATIN CAPITAL LETTER AE', 'u-di', 'Latin Lu Xl u-v11'], EA => 'AE', } -- 2.30.0