digits: split up table method to display rows
[sheet.git] / Shiar_Sheet / FormatChar.pm
index 30aab5ed68d853141c0bd1b844c813eaf34215e6..0a8a50a0876560bfa954c16d1a16b5d9160005e3 100644 (file)
@@ -1,47 +1,45 @@
 package Shiar_Sheet::FormatChar;
 
+use 5.010;
 use strict;
 use warnings;
+use utf8;
 
 use Data::Dump 'pp';
 use PLP::Functions 'EscapeHTML';
 
-our $VERSION = '1.04';
+our $VERSION = '1.08';
 
-our $diinfo = do 'digraphs.inc.pl';
-our %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} }
-       sort { length $a <=> length $b } keys %$diinfo;
+our $uc = do 'unicode-char.inc.pl';
 
 sub new {
        my ($class) = @_;
-       bless { unicode => 0, anno => 'di', style => 'di' }, $class;
+       bless { anno => ['di', 0], style => 'di' }, $class;
 }
 
 sub glyph_info {
        my ($self, $codepoint) = @_;
-       if (defined (my $mnem = $di{$codepoint})) {
-               return ($diinfo->{$mnem}, length $mnem == 2 ? $mnem : undef);
-       }
-       require Unicode::UCD;
-       if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) {
-               return [$codepoint, @$fullinfo{qw/name category script string/}];
-       }
-       return [$codepoint];
+       return $uc->{chr $codepoint} || eval {
+               require Unicode::UCD;
+               if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) {
+                       return [@$fullinfo{qw/category name - string/}];
+               }
+       } || [];
 }
 
 sub glyph_html {
        my ($self, $char) = @_;
-       my ($info, $mnem) = $self->glyph_info(ord $char);
-       my ($codepoint, $name, $prop, $script, $string) = @$info;
+       my $codepoint = ord $char;
+       my $info = $self->glyph_info($codepoint);
+       my ($class, $name, $mnem, $entity, $string) = @$info;
 
        my $cell = EscapeHTML($string || $char);
-       my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
-       my @class = ('X', grep {$_} $prop, $script);
+       my $title = sprintf 'U+%04X%s', $codepoint, !!$name && " ($name)";
 
-       $cell = "<span>$cell</span>" if $prop and $prop eq 'Zs';
+       $cell = "<span>$cell</span>" if $class and $class =~ /\bZs\b/;
        $cell = '&nbsp;' if $cell eq '';
 
-       return ($cell, EscapeHTML($title), join(' ', @class), $mnem);
+       return ($cell, EscapeHTML($title), !!$class && "X $class", $mnem, $entity);
 }
 
 sub glyphs_html {
@@ -54,7 +52,7 @@ sub glyphs_html {
                EscapeHTML($_[0]), # cell
                join(' | ', map { $_->[1] } @chars), # title
                $chars[0][2], # class
-               join(' ',  grep { defined } map { $_->[3] } @chars), # digraph
+               join(' ',  map { $_->[3] // '…' } @chars), # digraph
        );
 }
 
@@ -65,7 +63,7 @@ sub glyph_cell {
 
 sub cell {
        my ($self, $input, $html) = @_;
-       my (@class, $title, $cell, $mnem);
+       my (@class, $title, $cell, $mnem, $entity);
 
        if ($input eq '-') {
                $cell = '';
@@ -74,79 +72,140 @@ sub cell {
                push @class, 'u-invalid';
                $cell = '';
        }
-       else {
+       else {{
                push @class, 'X';
 
                if ($input =~ s/^-//) {
                        push @class, 'ex'; # discouraged
                }
 
-               ($cell, $title, my $class, $mnem) = $self->glyphs_html($input);
+               $input =~ s/^\\//;  # escaped char
+               ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
+               my $codepoint = ord $input;
 
-               if ($self->{style} = 'di') {
-                       if (defined $mnem) {
-                               push @class, $class =~ /\bXz\b/ ? ('l2', 'u-prop') # unofficial
-                                       : ('l3', 'u-di'); # standard digraph
+               if ($self->{style} eq 'univer') {
+                       if ($input =~ /\p{age=unassigned}/) {
+                               # check include for assignments after unicode 6.0 (perl v5.14)
+                               state $agemap = do 'unicode-age.inc.pl';
+                               my $version = $agemap->{$codepoint};
+                               push @class, $version ? 'l2' : 'l1';
+                       }
+                       elsif ($input =~ /^\p{in=1.1}*$/) {
+                               push @class, 'l5';  # first release 1993
+                       }
+                       elsif ($input =~ /^\p{in=3.0}*$/) {
+                               push @class, 'l4';  # 20th century
+                       }
+                       elsif ($input =~ /^\p{in=4.1}*$/) {
+                               push @class, 'l4';  # over 10 years ago
+                       }
+                       elsif ($input =~ /^\p{in=6.0}*$/) {
+                               push @class, 'l3';  # before 2012
+                       }
+                       else {
+                               push @class, 'l2';  # more recent
+                       }
+                       next;
+               }
+
+               if ($self->{style} eq 'di') {
+                       if ($mnem and $mnem =~ /…/) {
+                               # incomplete representation, usually partial
+                       }
+                       elsif ($class =~ /\bu-di\b/) {
+                               push @class, ('l4', 'u-di'); # standard digraph
+                       }
+                       elsif ($class =~ /\bu-prop\b/) {
+                               push @class, ('l3', 'u-prop'); # unofficial
+                       }
+               }
+               elsif ($self->{style} eq 'html') {
+                       if (defined $entity) {
+                               push @class, ($codepoint <= 0xFF ? 'l4' : 'l3', 'u-html');
                        }
                }
                else {
-                       my $codepoint = ord(substr $input, 0, 1);
                        if ($codepoint <= 0xFF) {
-                               push @class, 'l3', 'u-lat1';  # latin1
+                               push @class, 'l4', 'u-lat1';  # latin1
                        }
                        elsif ($codepoint <= 0xD7FF) {
-                               push @class, 'l2', 'u-bmp';  # bmp
+                               push @class, 'l3', 'u-bmp';  # bmp
                        }
                }
 
                if ($input =~ /[ -~]/) {
-                       push @class, 'l4', 'u-ascii'; # ascii
+                       push @class, 'l5', 'u-ascii'; # ascii
+               }
+               elsif ($input =~ /^\p{in=6.0}+$/) {
+                       push @class, 'l2'; # in unicode 6.0
                }
                else {
-                       push @class, 'l1'; # basic unicode
+                       push @class, 'l1'; # any unicode
+               }
+       }}
+
+       my $anno = '';
+       if ($cell ne '') {
+               for (@{ $self->{anno} }) {
+                       if (/html$/) {
+                               if (defined $entity) {
+                                       $entity = "&$entity;" if /^&/;
+                                       $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($entity));
+                                       last;
+                               }
+                       }
+                       elsif ($_ eq 'xml') {
+                               $anno = sprintf(' <small class="digraph">%s</small>',
+                                       sprintf '#%d', ord($cell)
+                               );
+                               last;
+                       }
+                       elsif ($_ eq '&xml') {
+                               $anno = sprintf(' <small class="digraph">%s</small>',
+                                       sprintf '&amp;#%d;', ord($cell)
+                               );
+                               last;
+                       }
+                       elsif ($_ eq 'di') {
+                               if (defined $mnem and length $mnem) {
+                                       $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem));
+                                       last;
+                               }
+                       }
+                       else {
+                               if ($_ eq 'hex' or $cell =~ /^[^a-zA-Z]$/) {
+                                       $anno = sprintf(' <small class="%s">%04X</small>', 'value', ord $cell);
+                                       last;
+                               }
+                       }
                }
        }
 
-       return sprintf('<td%s%s%s>%s%s',
-               defined $title  ? qq{ title="$title"}  : '',
-               @class ? sprintf(' class="%s"', join ' ', @class) : '',
-               $html || '',
-               $cell eq '' ? ('&nbsp;', '') : ($cell,
-                       $self->{anno} eq 'di' && defined $mnem && length $mnem
-                       ? sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem))
-                       : $self->{unicode} + $cell =~ /^[^a-zA-Z]$/ > 0
-                               ? sprintf(' <small class="%s">%04X</small>', 'value', ord $cell)
-                               : ''
+       return sprintf('<%s>%s%s',
+               join(' ', 'td',
+                       defined $title ? qq{title="$title"}  : (),
+                       @class ? sprintf('class="%s"', join ' ', @class) : (),
+                       $html || (),
                ),
+               $cell eq '' ? '&nbsp;' : $cell,
+               $anno,
        );
 }
 
-sub table {
-       my ($self, $digraphs) = @_;
-
-       my @rows;
-
-       my @colheads;
-       while ($digraphs->[0] !~ /^\./) {
-               my $cell = shift @$digraphs or last;
-               push @colheads, sprintf(
-                       '<%s%s>%s',
-                       $cell =~ s/^-// ? 'td' : 'th',
-                       $cell =~ s/:(.*)// ? qq{ title="$1"} : '',
-                       $cell eq '_' ? '&nbsp;' : $cell
-               );
-       }
-       push @rows, sprintf '<thead><tr>%s<tbody>', join '', @colheads if @colheads;
+sub row {
+       my ($self, $cells) = @_;
+       my @html;
 
        my $colspan = 1;
-       for my $cell (@$digraphs) {
+       for my $cell (@{$cells}) {
                if ($cell =~ s/^\.//) {
                        # dot indicates start of a new row
-                       push @rows, '<tr>';
+                       push @html, '<tr>';
                        if ($cell =~ s/^>//) {
                                # header cell text follows
                                $cell =~ s/_/ /g;  # underscores may be used instead of whitespace (for qw//ability)
-                               $rows[-1] .= '<th>'.($cell || '&nbsp;');
+                               my $class = $cell =~ s/^-// && ' class="ex"';
+                               $html[-1] .= "<th$class>".($cell || '&nbsp;');
                        }
                        next;
                }
@@ -155,27 +214,63 @@ sub table {
                        $colspan++;
                        next;
                }
+               elsif ($cell eq '>-') {
+                       $html[-1] .= '<th>';
+                       next;
+               }
+               elsif ($cell =~ m/^</) {
+                       $html[-1] .= '<td>'.$cell;
+                       next;
+               }
 
-               $rows[-1] .= $self->cell($cell,
-                       $colspan > 1 && qq{ colspan="$colspan"},
+               $html[-1] .= $self->cell($cell,
+                       $colspan > 1 && qq{colspan="$colspan"},
                );
 
                $colspan = 1;
        }
 
-       return sprintf qq{<table class="glyphs%s">\n%s</table>\n},
-               $self->{anno} || $self->{unicode} >= 0 ? ' dilabel' : '',
-               join '', map {"$_\n"} @rows;
+       return @html;
+}
+
+sub tabletag {
+       my ($self) = @_;
+       my $class = 'glyphs';
+       $class .= ' dilabel' if @{ $self->{anno} };
+       return sprintf '<table class="%s">', $class;
+}
+
+sub table {
+       my ($self, $digraphs) = @_;
+
+       my @rows;
+
+       my @colheads;
+       while ($digraphs->[0] !~ /^\./) {
+               my $cell = shift @$digraphs or last;
+               push @colheads, sprintf(
+                       '<%s%s>%s',
+                       $cell =~ s/^-// ? 'td' : 'th',
+                       $cell =~ s/:(.*)// ? qq{ title="$1"} : '',
+                       $cell eq '_' ? '&nbsp;' : $cell
+               );
+       }
+       push @rows, sprintf '<thead><tr>%s<tbody>', join '', @colheads if @colheads;
+       push @rows, $self->row($digraphs);
+
+       return join '', map {"$_\n"} $self->tabletag, @rows, '</table>';
 }
 
 sub print {
        my $self = shift;
        while (@_) {
-               printf '<div class="section"><h2>%s</h2>'."\n\n", shift;
+               print '<div class="section">';
+               printf '<h2>%s</h2>', shift unless ref $_[0];
+               print "\n\n";
                while (ref $_[0] and $_ = shift) {
                        print $self->table($_);
                }
-               print '</div>';
+               print "\n</div>";
        }
 }