From: Mischa POSLAWSKY Date: Wed, 11 Feb 2015 22:33:07 +0000 (+0100) Subject: tools/mkcharinfo: unicode age classes X-Git-Tag: v1.7~166 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/70e3b473d744176d9692900114c8beefc0527150 tools/mkcharinfo: unicode age classes Append Unicode version data as .u-v00, currently unused. --- diff --git a/Makefile b/Makefile index 8d71274..d7c1003 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ data/rfc1345.txt: digraphs.inc.pl: data/rfc1345.txt tools/mkdigraphlist data/rfc1345.txt >$@ -unicode-char.inc.pl: digraphs.inc.pl +unicode-char.inc.pl: digraphs.inc.pl unicode-age.inc.pl tools/mkcharinfo >$@ ttfsupport: diff --git a/Shiar_Sheet/FormatChar.pm b/Shiar_Sheet/FormatChar.pm index d3926bc..35f6076 100644 --- a/Shiar_Sheet/FormatChar.pm +++ b/Shiar_Sheet/FormatChar.pm @@ -80,12 +80,13 @@ sub cell { $input =~ s/^\\//; # escaped char ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input); + my $codepoint = ord(substr $input, 0, 1); if ($self->{style} eq 'univer') { if ($input =~ /\p{age=unassigned}/) { # check include for assignments after unicode 6.0 (perl v5.14) state $agemap = do 'unicode-age.inc.pl'; - my $version = $agemap->{ord $input}; + my $version = $agemap->{$codepoint}; push @class, $version ? 'l2' : 'l1'; } elsif ($input =~ /^\p{in=1.1}*$/) { @@ -120,7 +121,6 @@ sub cell { } } else { - my $codepoint = ord(substr $input, 0, 1); if ($codepoint <= 0xFF) { push @class, 'l3', 'u-lat1'; # latin1 } diff --git a/tools/mkcharinfo b/tools/mkcharinfo index b673105..c6c8900 100755 --- a/tools/mkcharinfo +++ b/tools/mkcharinfo @@ -61,6 +61,16 @@ for (keys %diinc) { } } +eval { + # read introducing unicode versions for known characters + my $agemap = do 'unicode-age.inc.pl' or die $@ || $!; + for my $chr (keys %info) { + my $version = $agemap->{ord $chr} or next; + $info{$chr}->{class}->{'u-v'.$version}++ + } + 1; +} or warn "Failed including unicode version data $@"; + for my $chr (keys %info) { my $cp = ord $chr; # attempt to get unicode character information @@ -70,7 +80,7 @@ for my $chr (keys %info) { || { block => '?', category => 'Xn', name => '', script => '' } } or next; - $info->{$_} = $info{$chr}->{$_} for qw(di html class string); + $info->{$_} = $info{$chr}->{$_} for keys %{ $info{$chr} }; # categorise by unicode types and writing script $info->{class}->{$_}++ for $info->{category};