X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/a605211aaba5d956789c1d30c09a55494858d086..0dd2b5ce328daf14b51f94c951acef2ca40a5fd4:/Shiar_Sheet/FormatChar.pm diff --git a/Shiar_Sheet/FormatChar.pm b/Shiar_Sheet/FormatChar.pm index 6b5fb6c..78e4a21 100644 --- a/Shiar_Sheet/FormatChar.pm +++ b/Shiar_Sheet/FormatChar.pm @@ -3,6 +3,7 @@ package Shiar_Sheet::FormatChar; use 5.010; use strict; use warnings; +use utf8; use Data::Dump 'pp'; use PLP::Functions 'EscapeHTML'; @@ -51,7 +52,7 @@ sub glyphs_html { EscapeHTML($_[0]), # cell join(' | ', map { $_->[1] } @chars), # title $chars[0][2], # class - join(' ', grep { defined } map { $_->[3] } @chars), # digraph + join(' ', map { $_->[3] // '…' } @chars), # digraph ); } @@ -80,23 +81,25 @@ sub cell { $input =~ s/^\\//; # escaped char ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input); + my $codepoint = ord $input; if ($self->{style} eq 'univer') { - state $agemap = do 'unicode-age.inc.pl' or die $!; - my $version = $agemap->{ord $input}; - if (!$version) { - push @class, 'l1'; # no known unicode assignment + if ($input =~ /\p{age=unassigned}/) { + # check include for assignments after unicode 6.0 (perl v5.14) + state $agemap = do 'unicode-age.inc.pl'; + my $version = $agemap->{$codepoint}; + push @class, $version ? 'l2' : 'l1'; } - elsif ($version < 20) { + elsif ($input =~ /^\p{in=1.1}*$/) { push @class, 'l5'; # first release 1993 } - elsif ($version < 31) { + elsif ($input =~ /^\p{in=3.0}*$/) { push @class, 'l4'; # 20th century } - elsif ($version < 50) { + elsif ($input =~ /^\p{in=4.1}*$/) { push @class, 'l4'; # over 10 years ago } - elsif ($version < 61) { + elsif ($input =~ /^\p{in=6.0}*$/) { push @class, 'l3'; # before 2012 } else { @@ -106,33 +109,38 @@ sub cell { } if ($self->{style} eq 'di') { - if ($class =~ /\bu-di\b/) { - push @class, ('l3', 'u-di'); # standard digraph + if ($mnem =~ /…/) { + # incomplete representation, usually partial + } + elsif ($class =~ /\bu-di\b/) { + push @class, ('l4', 'u-di'); # standard digraph } elsif ($class =~ /\bu-prop\b/) { - push @class, ('l2', 'u-prop'); # unofficial + push @class, ('l3', 'u-prop'); # unofficial } } elsif ($self->{style} eq 'html') { if (defined $entity) { - push @class, ('l3', 'u-html'); + push @class, ($codepoint <= 0xFF ? 'l4' : 'l3', 'u-html'); } } else { - my $codepoint = ord(substr $input, 0, 1); if ($codepoint <= 0xFF) { - push @class, 'l3', 'u-lat1'; # latin1 + push @class, 'l4', 'u-lat1'; # latin1 } elsif ($codepoint <= 0xD7FF) { - push @class, 'l2', 'u-bmp'; # bmp + push @class, 'l3', 'u-bmp'; # bmp } } if ($input =~ /[ -~]/) { - push @class, 'l4', 'u-ascii'; # ascii + push @class, 'l5', 'u-ascii'; # ascii + } + elsif ($input =~ /^\p{in=6.0}+$/) { + push @class, 'l2'; # in unicode 6.0 } else { - push @class, 'l1'; # basic unicode + push @class, 'l1'; # any unicode } }} @@ -173,10 +181,12 @@ sub cell { } } - return sprintf('%s%s', - defined $title ? qq{ title="$title"} : '', - @class ? sprintf(' class="%s"', join ' ', @class) : '', - $html || '', + return sprintf('<%s>%s%s', + join(' ', 'td', + defined $title ? qq{ title="$title"} : (), + @class ? sprintf('class="%s"', join ' ', @class) : (), + $html || (), + ), $cell eq '' ? ' ' : $cell, $anno, ); @@ -217,9 +227,17 @@ sub table { $colspan++; next; } + elsif ($cell eq '>-') { + $rows[-1] .= ''; + next; + } + elsif ($cell =~ m/^'.$cell; + next; + } $rows[-1] .= $self->cell($cell, - $colspan > 1 && qq{ colspan="$colspan"}, + $colspan > 1 && qq{colspan="$colspan"}, ); $colspan = 1;