tools/mkcharinfo: unicode age classes
[sheet.git] / Shiar_Sheet / FormatChar.pm
index f22f42bebd517cc1ffbd5e43a0d86b6ccd7595ad..35f60765c7c394022482159c694629272b13fb2c 100644 (file)
@@ -1,5 +1,6 @@
 package Shiar_Sheet::FormatChar;
 
+use 5.010;
 use strict;
 use warnings;
 
@@ -29,7 +30,7 @@ sub glyph_html {
        my ($self, $char) = @_;
        my $codepoint = ord $char;
        my $info = $self->glyph_info($codepoint);
-       my ($class, $name, $mnem, $string) = @$info;
+       my ($class, $name, $mnem, $html, $string) = @$info;
 
        my $cell = EscapeHTML($string || $char);
        my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
@@ -37,7 +38,7 @@ sub glyph_html {
        $cell = "<span>$cell</span>" if $class =~ /\bZs\b/;
        $cell = '&nbsp;' if $cell eq '';
 
-       return ($cell, EscapeHTML($title), "X $class", $mnem);
+       return ($cell, EscapeHTML($title), "X $class", $mnem, $html);
 }
 
 sub glyphs_html {
@@ -61,7 +62,7 @@ sub glyph_cell {
 
 sub cell {
        my ($self, $input, $html) = @_;
-       my (@class, $title, $cell, $mnem);
+       my (@class, $title, $cell, $mnem, $entity);
 
        if ($input eq '-') {
                $cell = '';
@@ -70,7 +71,7 @@ sub cell {
                push @class, 'u-invalid';
                $cell = '';
        }
-       else {
+       else {{
                push @class, 'X';
 
                if ($input =~ s/^-//) {
@@ -78,9 +79,35 @@ sub cell {
                }
 
                $input =~ s/^\\//;  # escaped char
-               ($cell, $title, my $class, $mnem) = $self->glyphs_html($input);
+               ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
+               my $codepoint = ord(substr $input, 0, 1);
+
+               if ($self->{style} eq 'univer') {
+                       if ($input =~ /\p{age=unassigned}/) {
+                               # check include for assignments after unicode 6.0 (perl v5.14)
+                               state $agemap = do 'unicode-age.inc.pl';
+                               my $version = $agemap->{$codepoint};
+                               push @class, $version ? 'l2' : 'l1';
+                       }
+                       elsif ($input =~ /^\p{in=1.1}*$/) {
+                               push @class, 'l5';  # first release 1993
+                       }
+                       elsif ($input =~ /^\p{in=3.0}*$/) {
+                               push @class, 'l4';  # 20th century
+                       }
+                       elsif ($input =~ /^\p{in=4.1}*$/) {
+                               push @class, 'l4';  # over 10 years ago
+                       }
+                       elsif ($input =~ /^\p{in=6.0}*$/) {
+                               push @class, 'l3';  # before 2012
+                       }
+                       else {
+                               push @class, 'l2';  # more recent
+                       }
+                       next;
+               }
 
-               if ($self->{style} = 'di') {
+               if ($self->{style} eq 'di') {
                        if ($class =~ /\bu-di\b/) {
                                push @class, ('l3', 'u-di'); # standard digraph
                        }
@@ -88,8 +115,12 @@ sub cell {
                                push @class, ('l2', 'u-prop'); # unofficial
                        }
                }
+               elsif ($self->{style} eq 'html') {
+                       if (defined $entity) {
+                               push @class, ('l3', 'u-html');
+                       }
+               }
                else {
-                       my $codepoint = ord(substr $input, 0, 1);
                        if ($codepoint <= 0xFF) {
                                push @class, 'l3', 'u-lat1';  # latin1
                        }
@@ -104,15 +135,14 @@ sub cell {
                else {
                        push @class, 'l1'; # basic unicode
                }
-       }
+       }}
 
        my $anno = '';
        if ($cell ne '') {
                for (@{ $self->{anno} }) {
                        if (/html$/) {
-                               require HTML::Entities;
-                               if (my $entity = $HTML::Entities::char2entity{$cell}) {
-                                       $entity = substr($entity, 1, -1) unless /^&/;
+                               if (defined $entity) {
+                                       $entity = "&$entity;" if /^&/;
                                        $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($entity));
                                        last;
                                }
@@ -178,7 +208,8 @@ sub table {
                        if ($cell =~ s/^>//) {
                                # header cell text follows
                                $cell =~ s/_/ /g;  # underscores may be used instead of whitespace (for qw//ability)
-                               $rows[-1] .= '<th>'.($cell || '&nbsp;');
+                               my $class = $cell =~ s/^-// && ' class="ex"';
+                               $rows[-1] .= "<th$class>".($cell || '&nbsp;');
                        }
                        next;
                }