X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/6c08f60465c441baff92074c3122fcb62457658c..5f257afb85ce61ae765dffad95ce755acb760234:/Shiar_Sheet/FormatChar.pm diff --git a/Shiar_Sheet/FormatChar.pm b/Shiar_Sheet/FormatChar.pm index 62f6791..0a8a50a 100644 --- a/Shiar_Sheet/FormatChar.pm +++ b/Shiar_Sheet/FormatChar.pm @@ -1,16 +1,16 @@ package Shiar_Sheet::FormatChar; +use 5.010; use strict; use warnings; +use utf8; use Data::Dump 'pp'; use PLP::Functions 'EscapeHTML'; -our $VERSION = '1.04'; +our $VERSION = '1.08'; -our $diinfo = do 'digraphs.inc.pl'; -our %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} } - sort { length $a <=> length $b } keys %$diinfo; +our $uc = do 'unicode-char.inc.pl'; sub new { my ($class) = @_; @@ -19,29 +19,27 @@ sub new { sub glyph_info { my ($self, $codepoint) = @_; - if (defined (my $mnem = $di{$codepoint})) { - return ($diinfo->{$mnem}, length $mnem == 2 ? $mnem : undef); - } - require Unicode::UCD; - if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) { - return [$codepoint, @$fullinfo{qw/name category script string/}]; - } - return [$codepoint]; + return $uc->{chr $codepoint} || eval { + require Unicode::UCD; + if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) { + return [@$fullinfo{qw/category name - string/}]; + } + } || []; } sub glyph_html { my ($self, $char) = @_; - my ($info, $mnem) = $self->glyph_info(ord $char); - my ($codepoint, $name, $prop, $script, $string) = @$info; + my $codepoint = ord $char; + my $info = $self->glyph_info($codepoint); + my ($class, $name, $mnem, $entity, $string) = @$info; my $cell = EscapeHTML($string || $char); - my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)"; - my @class = ('X', grep {$_} $prop, $script); + my $title = sprintf 'U+%04X%s', $codepoint, !!$name && " ($name)"; - $cell = "$cell" if $prop and $prop eq 'Zs'; + $cell = "$cell" if $class and $class =~ /\bZs\b/; $cell = ' ' if $cell eq ''; - return ($cell, EscapeHTML($title), join(' ', @class), $mnem); + return ($cell, EscapeHTML($title), !!$class && "X $class", $mnem, $entity); } sub glyphs_html { @@ -54,7 +52,7 @@ sub glyphs_html { EscapeHTML($_[0]), # cell join(' | ', map { $_->[1] } @chars), # title $chars[0][2], # class - join(' ', grep { defined } map { $_->[3] } @chars), # digraph + join(' ', map { $_->[3] // '…' } @chars), # digraph ); } @@ -65,7 +63,7 @@ sub glyph_cell { sub cell { my ($self, $input, $html) = @_; - my (@class, $title, $cell, $mnem); + my (@class, $title, $cell, $mnem, $entity); if ($input eq '-') { $cell = ''; @@ -74,110 +72,140 @@ sub cell { push @class, 'u-invalid'; $cell = ''; } - else { + else {{ push @class, 'X'; if ($input =~ s/^-//) { push @class, 'ex'; # discouraged } - ($cell, $title, my $class, $mnem) = $self->glyphs_html($input); + $input =~ s/^\\//; # escaped char + ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input); + my $codepoint = ord $input; - if ($self->{style} = 'di') { - if (defined $mnem) { - push @class, $class =~ /\bXz\b/ ? ('l2', 'u-prop') # unofficial - : ('l3', 'u-di'); # standard digraph + if ($self->{style} eq 'univer') { + if ($input =~ /\p{age=unassigned}/) { + # check include for assignments after unicode 6.0 (perl v5.14) + state $agemap = do 'unicode-age.inc.pl'; + my $version = $agemap->{$codepoint}; + push @class, $version ? 'l2' : 'l1'; + } + elsif ($input =~ /^\p{in=1.1}*$/) { + push @class, 'l5'; # first release 1993 + } + elsif ($input =~ /^\p{in=3.0}*$/) { + push @class, 'l4'; # 20th century + } + elsif ($input =~ /^\p{in=4.1}*$/) { + push @class, 'l4'; # over 10 years ago + } + elsif ($input =~ /^\p{in=6.0}*$/) { + push @class, 'l3'; # before 2012 + } + else { + push @class, 'l2'; # more recent + } + next; + } + + if ($self->{style} eq 'di') { + if ($mnem and $mnem =~ /…/) { + # incomplete representation, usually partial + } + elsif ($class =~ /\bu-di\b/) { + push @class, ('l4', 'u-di'); # standard digraph + } + elsif ($class =~ /\bu-prop\b/) { + push @class, ('l3', 'u-prop'); # unofficial + } + } + elsif ($self->{style} eq 'html') { + if (defined $entity) { + push @class, ($codepoint <= 0xFF ? 'l4' : 'l3', 'u-html'); } } else { - my $codepoint = ord(substr $input, 0, 1); if ($codepoint <= 0xFF) { - push @class, 'l3', 'u-lat1'; # latin1 + push @class, 'l4', 'u-lat1'; # latin1 } elsif ($codepoint <= 0xD7FF) { - push @class, 'l2', 'u-bmp'; # bmp + push @class, 'l3', 'u-bmp'; # bmp } } if ($input =~ /[ -~]/) { - push @class, 'l4', 'u-ascii'; # ascii + push @class, 'l5', 'u-ascii'; # ascii + } + elsif ($input =~ /^\p{in=6.0}+$/) { + push @class, 'l2'; # in unicode 6.0 } else { - push @class, 'l1'; # basic unicode + push @class, 'l1'; # any unicode } - } + }} my $anno = ''; - for (@{ $self->{anno} }) { - if (/html$/) { - require HTML::Entities; - if (my $entity = $HTML::Entities::char2entity{$cell}) { - $entity = substr($entity, 1, -1) unless /^&/; - $anno = sprintf(' %s', EscapeHTML($entity)); - last; + if ($cell ne '') { + for (@{ $self->{anno} }) { + if (/html$/) { + if (defined $entity) { + $entity = "&$entity;" if /^&/; + $anno = sprintf(' %s', EscapeHTML($entity)); + last; + } } - } - elsif ($_ eq 'xml') { - $anno = sprintf(' %s', - sprintf '#%d', ord($cell) - ); - last; - } - elsif ($_ eq '&xml') { - $anno = sprintf(' %s', - sprintf '&#%d;', ord($cell) - ); - last; - } - elsif ($_ eq 'di') { - if (defined $mnem and length $mnem) { - $anno = sprintf(' %s', EscapeHTML($mnem)); + elsif ($_ eq 'xml') { + $anno = sprintf(' %s', + sprintf '#%d', ord($cell) + ); last; } - } - else { - if ($_ eq 'hex' or $cell =~ /^[^a-zA-Z]$/) { - $anno = sprintf(' %04X', 'value', ord $cell); + elsif ($_ eq '&xml') { + $anno = sprintf(' %s', + sprintf '&#%d;', ord($cell) + ); last; } + elsif ($_ eq 'di') { + if (defined $mnem and length $mnem) { + $anno = sprintf(' %s', EscapeHTML($mnem)); + last; + } + } + else { + if ($_ eq 'hex' or $cell =~ /^[^a-zA-Z]$/) { + $anno = sprintf(' %04X', 'value', ord $cell); + last; + } + } } } - return sprintf('%s%s', - defined $title ? qq{ title="$title"} : '', - @class ? sprintf(' class="%s"', join ' ', @class) : '', - $html || '', + return sprintf('<%s>%s%s', + join(' ', 'td', + defined $title ? qq{title="$title"} : (), + @class ? sprintf('class="%s"', join ' ', @class) : (), + $html || (), + ), $cell eq '' ? ' ' : $cell, $anno, ); } -sub table { - my ($self, $digraphs) = @_; - - my @rows; - - my @colheads; - while ($digraphs->[0] !~ /^\./) { - my $cell = shift @$digraphs or last; - push @colheads, sprintf( - '<%s%s>%s', - $cell =~ s/^-// ? 'td' : 'th', - $cell =~ s/:(.*)// ? qq{ title="$1"} : '', - $cell eq '_' ? ' ' : $cell - ); - } - push @rows, sprintf '%s', join '', @colheads if @colheads; +sub row { + my ($self, $cells) = @_; + my @html; my $colspan = 1; - for my $cell (@$digraphs) { + for my $cell (@{$cells}) { if ($cell =~ s/^\.//) { # dot indicates start of a new row - push @rows, ''; + push @html, ''; if ($cell =~ s/^>//) { # header cell text follows $cell =~ s/_/ /g; # underscores may be used instead of whitespace (for qw//ability) - $rows[-1] .= ''.($cell || ' '); + my $class = $cell =~ s/^-// && ' class="ex"'; + $html[-1] .= "".($cell || ' '); } next; } @@ -186,27 +214,63 @@ sub table { $colspan++; next; } + elsif ($cell eq '>-') { + $html[-1] .= ''; + next; + } + elsif ($cell =~ m/^'.$cell; + next; + } - $rows[-1] .= $self->cell($cell, - $colspan > 1 && qq{ colspan="$colspan"}, + $html[-1] .= $self->cell($cell, + $colspan > 1 && qq{colspan="$colspan"}, ); $colspan = 1; } - return sprintf qq{\n%s
\n}, - @{ $self->{anno} } ? ' dilabel' : '', - join '', map {"$_\n"} @rows; + return @html; +} + +sub tabletag { + my ($self) = @_; + my $class = 'glyphs'; + $class .= ' dilabel' if @{ $self->{anno} }; + return sprintf '', $class; +} + +sub table { + my ($self, $digraphs) = @_; + + my @rows; + + my @colheads; + while ($digraphs->[0] !~ /^\./) { + my $cell = shift @$digraphs or last; + push @colheads, sprintf( + '<%s%s>%s', + $cell =~ s/^-// ? 'td' : 'th', + $cell =~ s/:(.*)// ? qq{ title="$1"} : '', + $cell eq '_' ? ' ' : $cell + ); + } + push @rows, sprintf '%s', join '', @colheads if @colheads; + push @rows, $self->row($digraphs); + + return join '', map {"$_\n"} $self->tabletag, @rows, '
'; } sub print { my $self = shift; while (@_) { - printf '

%s

'."\n\n", shift; + print '
'; + printf '

%s

', shift unless ref $_[0]; + print "\n\n"; while (ref $_[0] and $_ = shift) { print $self->table($_); } - print '
'; + print "\n
"; } }