X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/28ac2dea2dcafa09f6d7246c8703225fb96efeea..HEAD:/Shiar_Sheet/FormatChar.pm diff --git a/Shiar_Sheet/FormatChar.pm b/Shiar_Sheet/FormatChar.pm index ef0445d..f471497 100644 --- a/Shiar_Sheet/FormatChar.pm +++ b/Shiar_Sheet/FormatChar.pm @@ -8,23 +8,34 @@ use utf8; use Data::Dump 'pp'; use PLP::Functions 'EscapeHTML'; -our $VERSION = '1.07'; +our $VERSION = '1.10'; -our $uc = do 'unicode-char.inc.pl'; +our $uc = do 'data/unicode-char.inc.pl'; sub new { my ($class) = @_; bless { anno => ['di', 0], style => 'di' }, $class; } -sub glyph_info { +sub glyph_mkinfo { my ($self, $codepoint) = @_; - return $uc->{chr $codepoint} || eval { + # attempt to get unicode character information + my $info = eval { require Unicode::UCD; - if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) { - return [@$fullinfo{qw/category name - string/}]; - } - } || []; + Unicode::UCD::charinfo($codepoint) + || { category => 'Xn', name => '' }; + } or return; + my $string; + if ($info->{combining}) { + # overlay combining diacritics + $string = chr(9676) . chr($codepoint); + } + return [@$info{qw( category name )}, undef, $string]; +} + +sub glyph_info { + my ($self, $codepoint) = @_; + return $uc->{chr $codepoint} || $self->glyph_mkinfo($codepoint) || []; } sub glyph_html { @@ -34,7 +45,7 @@ sub glyph_html { my ($class, $name, $mnem, $entity, $string) = @$info; my $cell = EscapeHTML($string || $char); - my $title = sprintf 'U+%04X%s', $codepoint, !!$name && " ($name)"; + my $title = sprintf 'U+%04X%s', $codepoint, !!$name && " $name"; $cell = "$cell" if $class and $class =~ /\bZs\b/; $cell = ' ' if $cell eq ''; @@ -61,6 +72,31 @@ sub glyph_cell { return sprintf('%s', $self->glyph_html($char)); } +sub glyph_level_univer { + my ($self, $input) = @_; + if ($input =~ /\p{age=unassigned}/) { + # check include for assignments after unicode 6.0 (perl v5.14) + state $agemap = do 'data/unicode-age.inc.pl'; + my $version = $agemap->{ord $input}; + return $version ? 'l2' : 'l1'; + } + elsif ($input =~ /^\p{in=1.1}*$/) { + return 'l5'; # first release 1993 + } + elsif ($input =~ /^\p{in=3.0}*$/) { + return 'l4'; # 20th century + } + elsif ($input =~ /^\p{in=4.1}*$/) { + return 'l4'; # over 10 years ago + } + elsif ($input =~ /^\p{in=6.0}*$/) { + return 'l3'; # before 2012 + } + else { + return 'l2'; # more recent + } +} + sub cell { my ($self, $input, $html) = @_; my (@class, $title, $cell, $mnem, $entity); @@ -81,33 +117,13 @@ sub cell { $input =~ s/^\\//; # escaped char ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input); - my $codepoint = ord $input; if ($self->{style} eq 'univer') { - if ($input =~ /\p{age=unassigned}/) { - # check include for assignments after unicode 6.0 (perl v5.14) - state $agemap = do 'unicode-age.inc.pl'; - my $version = $agemap->{$codepoint}; - push @class, $version ? 'l2' : 'l1'; - } - elsif ($input =~ /^\p{in=1.1}*$/) { - push @class, 'l5'; # first release 1993 - } - elsif ($input =~ /^\p{in=3.0}*$/) { - push @class, 'l4'; # 20th century - } - elsif ($input =~ /^\p{in=4.1}*$/) { - push @class, 'l4'; # over 10 years ago - } - elsif ($input =~ /^\p{in=6.0}*$/) { - push @class, 'l3'; # before 2012 - } - else { - push @class, 'l2'; # more recent - } + push @class, $self->glyph_level_univer($input); next; } + my $codepoint = ord $input; if ($self->{style} eq 'di') { if ($mnem and $mnem =~ /…/) { # incomplete representation, usually partial @@ -136,7 +152,7 @@ sub cell { if ($input =~ /[ -~]/) { push @class, 'l5', 'u-ascii'; # ascii } - elsif ($input =~ /^\p{in=6.0}+$/) { + elsif ($input =~ /^\p{in=6.0}+$/ and $input !~ /\p{Co}/) { push @class, 'l2'; # in unicode 6.0 } else { @@ -173,8 +189,8 @@ sub cell { } } else { - if ($_ eq 'hex' or $cell =~ /^[^a-zA-Z]$/) { - $anno = sprintf(' %04X', 'value', ord $cell); + if ($_ eq 'hex' or $input =~ /^[^a-zA-Z]$/) { + $anno = sprintf(' %04X', 'value', ord $input); last; } } @@ -183,7 +199,7 @@ sub cell { return sprintf('<%s>%s%s', join(' ', 'td', - defined $title ? qq{ title="$title"} : (), + defined $title ? qq{title="$title"} : (), @class ? sprintf('class="%s"', join ' ', @class) : (), $html || (), ), @@ -192,33 +208,20 @@ sub cell { ); } -sub table { - my ($self, $digraphs) = @_; - - my @rows; - - my @colheads; - while ($digraphs->[0] !~ /^\./) { - my $cell = shift @$digraphs or last; - push @colheads, sprintf( - '<%s%s>%s', - $cell =~ s/^-// ? 'td' : 'th', - $cell =~ s/:(.*)// ? qq{ title="$1"} : '', - $cell eq '_' ? ' ' : $cell - ); - } - push @rows, sprintf '%s', join '', @colheads if @colheads; +sub row { + my ($self, $cells) = @_; + my @html; my $colspan = 1; - for my $cell (@$digraphs) { + for my $cell (@{$cells}) { if ($cell =~ s/^\.//) { # dot indicates start of a new row - push @rows, ''; + push @html, ''; if ($cell =~ s/^>//) { # header cell text follows $cell =~ s/_/ /g; # underscores may be used instead of whitespace (for qw//ability) my $class = $cell =~ s/^-// && ' class="ex"'; - $rows[-1] .= "".($cell || ' '); + $html[-1] .= "".($cell || ' '); } next; } @@ -228,24 +231,56 @@ sub table { next; } elsif ($cell eq '>-') { - $rows[-1] .= ''; + $html[-1] .= ''; next; } elsif ($cell =~ m/^'.$cell; + $html[-1] .= ''.$cell; next; } - $rows[-1] .= $self->cell($cell, + $html[-1] .= $self->cell($cell, $colspan > 1 && qq{colspan="$colspan"}, ); $colspan = 1; } - return sprintf qq{\n%s
\n}, - @{ $self->{anno} } ? ' dilabel' : '', - join '', map {"$_\n"} @rows; + return @html; +} + +sub tabletag { + my ($self) = @_; + my $class = 'glyphs'; + $class .= ' dilabel' if @{ $self->{anno} }; + return sprintf '', $class; +} + +sub table { + my ($self, $digraphs) = @_; + + my @rows; + + my @colheads; + while ($digraphs->[0] !~ /^\./) { + my $cell = shift @$digraphs or last; + if ($cell eq '>') { + push @colheads, ''; + next; + } + push @colheads, join('', + '<', + $cell =~ s/^-// ? 'td' : 'th', + $cell =~ s/:(.*)// && qq{ title="$1"}, + $cell =~ s/^(>+)// && ' colspan='.(length($1) + 1), + '>', + $cell eq '_' ? ' ' : $cell + ); + } + push @rows, sprintf '%s', join '', @colheads if @colheads; + push @rows, $self->row($digraphs); + + return join '', map {"$_\n"} $self->tabletag, @rows, '
'; } sub print { @@ -261,5 +296,30 @@ sub print { } } +sub legend { + my $self = shift; + my @classes = $self->{style} eq 'univer' ? ( + [l5 => 'unicode 1.1'], + [l4 => '20th century'], + [l3 => 'in 6.0 (2010)'], + [l2 => 'recent assignments'], + [l1 => 'proposed'], + [ex => 'irregular'], + ) : ( + [l5 => 'ascii'], + [l4 => $self->{style} eq 'di' ? 'digraph' : 'latin1'], + [l3 => $self->{style} eq 'di' ? 'proposed' : 'HTML4'], + [l2 => 'unicode ≤6.0'], + [l1 => 'other unicode'], + [ex => 'discouraged'], + ); + + return ( + '
', + (map { sprintf '
%s', @{$_} } @classes), + '
', + ); +} + 1;