tools/mkcharinfo: unicode age classes

author Mischa POSLAWSKY <perl@shiar.org>

Wed, 11 Feb 2015 22:33:07 +0000 (23:33 +0100)

committer Mischa POSLAWSKY <perl@shiar.org>

Fri, 13 Feb 2015 16:46:33 +0000 (17:46 +0100)
author Mischa POSLAWSKY <perl@shiar.org>
Wed, 11 Feb 2015 22:33:07 +0000 (23:33 +0100)
committer Mischa POSLAWSKY <perl@shiar.org>
Fri, 13 Feb 2015 16:46:33 +0000 (17:46 +0100)
diff --git a/Makefile b/Makefile

index 8d71274e0645fc8048cbd679326aa5dcf0d29cd3..d7c10031a5ae2953ad1540d6b7fff08c04767731 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ data/rfc1345.txt:
  digraphs.inc.pl: data/rfc1345.txt
         tools/mkdigraphlist data/rfc1345.txt >$@
  
-unicode-char.inc.pl: digraphs.inc.pl
+unicode-char.inc.pl: digraphs.inc.pl unicode-age.inc.pl
         tools/mkcharinfo >$@
  
  ttfsupport:
diff --git a/Shiar_Sheet/FormatChar.pm b/Shiar_Sheet/FormatChar.pm

index d3926bca4e09e26576f054a74aa825029a47ac45..35f60765c7c394022482159c694629272b13fb2c 100644 (file)
--- a/Shiar_Sheet/FormatChar.pm
+++ b/Shiar_Sheet/FormatChar.pm
@@ -80,12 +80,13 @@ sub cell {
  
                 $input =~ s/^\\//;  # escaped char
                 ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
+               my $codepoint = ord(substr $input, 0, 1);
  
                 if ($self->{style} eq 'univer') {
                         if ($input =~ /\p{age=unassigned}/) {
                                 # check include for assignments after unicode 6.0 (perl v5.14)
                                 state $agemap = do 'unicode-age.inc.pl';
-                               my $version = $agemap->{ord $input};
+                               my $version = $agemap->{$codepoint};
                                 push @class, $version ? 'l2' : 'l1';
                         }
                         elsif ($input =~ /^\p{in=1.1}*$/) {
@@ -120,7 +121,6 @@ sub cell {
                         }
                 }
                 else {
-                       my $codepoint = ord(substr $input, 0, 1);
                         if ($codepoint <= 0xFF) {
                                 push @class, 'l3', 'u-lat1';  # latin1
                         }
diff --git a/tools/mkcharinfo b/tools/mkcharinfo

index b673105339f79357b741efc19c8e671eb3b24680..c6c890011abb33a33a873de67c9edaa4a40c9a59 100755 (executable)
--- a/tools/mkcharinfo
+++ b/tools/mkcharinfo
@@ -61,6 +61,16 @@ for (keys %diinc) {
         }
  }
  
+eval {
+       # read introducing unicode versions for known characters
+       my $agemap = do 'unicode-age.inc.pl' or die $@ || $!;
+       for my $chr (keys %info) {
+               my $version = $agemap->{ord $chr} or next;
+               $info{$chr}->{class}->{'u-v'.$version}++
+       }
+       1;
+} or warn "Failed including unicode version data $@";
+
  for my $chr (keys %info) {
         my $cp = ord $chr;
         # attempt to get unicode character information
@@ -70,7 +80,7 @@ for my $chr (keys %info) {
                         || { block => '?', category => 'Xn', name => '', script => '' }
         } or next;
  
-       $info->{$_} = $info{$chr}->{$_} for qw(di html class string);
+       $info->{$_} = $info{$chr}->{$_} for keys %{ $info{$chr} };
  
         # categorise by unicode types and writing script
         $info->{class}->{$_}++ for $info->{category};
author	Mischa POSLAWSKY <perl@shiar.org>
	Wed, 11 Feb 2015 22:33:07 +0000 (23:33 +0100)
committer	Mischa POSLAWSKY <perl@shiar.org>
	Fri, 13 Feb 2015 16:46:33 +0000 (17:46 +0100)
Makefile		patch \| blob \| history
Shiar_Sheet/FormatChar.pm		patch \| blob \| history
tools/mkcharinfo		patch \| blob \| history