package Shiar_Sheet::FormatChar;
+use 5.010;
use strict;
use warnings;
+use utf8;
use Data::Dump 'pp';
use PLP::Functions 'EscapeHTML';
-our $VERSION = '1.01';
+our $VERSION = '1.10';
-our $diinfo = do 'digraphs.inc.pl';
-our %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} }
- sort { length $a <=> length $b } keys %$diinfo;
+our $uc = do 'data/unicode-char.inc.pl';
sub new {
my ($class) = @_;
- bless { digraph => 1, unicode => 0 }, $class;
+ bless { anno => ['di', 0], style => 'di' }, $class;
}
-sub glyph_info {
+sub glyph_mkinfo {
my ($self, $codepoint) = @_;
- if (defined (my $mnem = $di{$codepoint})) {
- return ($diinfo->{$mnem}, length $mnem == 2 ? $mnem : undef);
- }
- require Unicode::UCD;
- if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) {
- return [$codepoint, @$fullinfo{qw/name category script string/}];
+ # attempt to get unicode character information
+ my $info = eval {
+ require Unicode::UCD;
+ Unicode::UCD::charinfo($codepoint)
+ || { category => 'Xn', name => '' };
+ } or return;
+ my $string;
+ if ($info->{combining}) {
+ # overlay combining diacritics
+ $string = chr(9676) . chr($codepoint);
}
- return [$codepoint];
+ return [@$info{qw( category name )}, undef, $string];
+}
+
+sub glyph_info {
+ my ($self, $codepoint) = @_;
+ return $uc->{chr $codepoint} || $self->glyph_mkinfo($codepoint) || [];
}
sub glyph_html {
my ($self, $char) = @_;
- my ($info, $mnem) = $self->glyph_info(ord $char);
- my ($codepoint, $name, $prop, $script, $string) = @$info;
+ my $codepoint = ord $char;
+ my $info = $self->glyph_info($codepoint);
+ my ($class, $name, $mnem, $entity, $string) = @$info;
my $cell = EscapeHTML($string || $char);
- my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
- my @class = ('X', grep {$_} $prop, $script);
+ my $title = sprintf 'U+%04X%s', $codepoint, !!$name && " $name";
- $cell = "<span>$cell</span>" if $prop and $prop eq 'Zs';
+ $cell = "<span>$cell</span>" if $class and $class =~ /\bZs\b/;
$cell = ' ' if $cell eq '';
- return ($cell, EscapeHTML($title), join(' ', @class), $mnem);
+ return ($cell, EscapeHTML($title), !!$class && "X $class", $mnem, $entity);
}
sub glyphs_html {
EscapeHTML($_[0]), # cell
join(' | ', map { $_->[1] } @chars), # title
$chars[0][2], # class
- join(' ', grep { defined } map { $_->[3] } @chars), # digraph
+ join(' ', map { $_->[3] // '…' } @chars), # digraph
);
}
return sprintf('<td class="%3$s" title="%2$s">%s', $self->glyph_html($char));
}
+sub glyph_level_univer {
+ my ($self, $input) = @_;
+ if ($input =~ /\p{age=unassigned}/) {
+ # check include for assignments after unicode 6.0 (perl v5.14)
+ state $agemap = do 'data/unicode-age.inc.pl';
+ my $version = $agemap->{ord $input};
+ return $version ? 'l2' : 'l1';
+ }
+ elsif ($input =~ /^\p{in=1.1}*$/) {
+ return 'l5'; # first release 1993
+ }
+ elsif ($input =~ /^\p{in=3.0}*$/) {
+ return 'l4'; # 20th century
+ }
+ elsif ($input =~ /^\p{in=4.1}*$/) {
+ return 'l4'; # over 10 years ago
+ }
+ elsif ($input =~ /^\p{in=6.0}*$/) {
+ return 'l3'; # before 2012
+ }
+ else {
+ return 'l2'; # more recent
+ }
+}
+
sub cell {
my ($self, $input, $html) = @_;
- my (@class, $title, $cell, $mnem);
+ my (@class, $title, $cell, $mnem, $entity);
if ($input eq '-') {
$cell = '';
}
elsif ($input eq '=') {
- push @class, 'di-invalid';
+ push @class, 'u-invalid';
$cell = '';
}
- else {
+ else {{
push @class, 'X';
if ($input =~ s/^-//) {
- push @class, 'di-rare'; # discouraged
+ push @class, 'ex'; # discouraged
}
- ($cell, $title, my $class, $mnem) = $self->glyphs_html($input);
+ $input =~ s/^\\//; # escaped char
+ ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
- if (defined $mnem) {
- push @class, 'di-d'; # digraph
- push @class, 'di-prop' if $class =~ /\bXz\b/; # unofficial
+ if ($self->{style} eq 'univer') {
+ push @class, $self->glyph_level_univer($input);
+ next;
+ }
+
+ my $codepoint = ord $input;
+ if ($self->{style} eq 'di') {
+ if ($mnem and $mnem =~ /…/) {
+ # incomplete representation, usually partial
+ }
+ elsif ($class =~ /\bu-di\b/) {
+ push @class, ('l4', 'u-di'); # standard digraph
+ }
+ elsif ($class =~ /\bu-prop\b/) {
+ push @class, ('l3', 'u-prop'); # unofficial
+ }
+ }
+ elsif ($self->{style} eq 'html') {
+ if (defined $entity) {
+ push @class, ($codepoint <= 0xFF ? 'l4' : 'l3', 'u-html');
+ }
+ }
+ else {
+ if ($codepoint <= 0xFF) {
+ push @class, 'l4', 'u-lat1'; # latin1
+ }
+ elsif ($codepoint <= 0xD7FF) {
+ push @class, 'l3', 'u-bmp'; # bmp
+ }
}
if ($input =~ /[ -~]/) {
- push @class, 'di-a'; # ascii
+ push @class, 'l5', 'u-ascii'; # ascii
+ }
+ elsif ($input =~ /^\p{in=6.0}+$/ and $input !~ /\p{Co}/) {
+ push @class, 'l2'; # in unicode 6.0
}
else {
- push @class, 'di-b'; # basic unicode
+ push @class, 'l1'; # any unicode
+ }
+ }}
+
+ my $anno = '';
+ if ($cell ne '') {
+ for (@{ $self->{anno} }) {
+ if (/html$/) {
+ if (defined $entity) {
+ $entity = "&$entity;" if /^&/;
+ $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($entity));
+ last;
+ }
+ }
+ elsif ($_ eq 'xml') {
+ $anno = sprintf(' <small class="digraph">%s</small>',
+ sprintf '#%d', ord($cell)
+ );
+ last;
+ }
+ elsif ($_ eq '&xml') {
+ $anno = sprintf(' <small class="digraph">%s</small>',
+ sprintf '&#%d;', ord($cell)
+ );
+ last;
+ }
+ elsif ($_ eq 'di') {
+ if (defined $mnem and length $mnem) {
+ $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem));
+ last;
+ }
+ }
+ else {
+ if ($_ eq 'hex' or $input =~ /^[^a-zA-Z]$/) {
+ $anno = sprintf(' <small class="%s">%04X</small>', 'value', ord $input);
+ last;
+ }
+ }
}
}
- return sprintf('<td%s%s%s>%s%s',
- defined $title ? qq{ title="$title"} : '',
- @class ? sprintf(' class="%s"', join ' ', @class) : '',
- $html || '',
+ return sprintf('<%s>%s%s',
+ join(' ', 'td',
+ defined $title ? qq{title="$title"} : (),
+ @class ? sprintf('class="%s"', join ' ', @class) : (),
+ $html || (),
+ ),
$cell eq '' ? ' ' : $cell,
- $self->{digraph} && defined $mnem && length $mnem
- ? sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem))
- : $self->{unicode} + $cell =~ /^[^a-zA-Z]$/ > 0
- ? sprintf(' <small class="%s">%04X</small>', 'value', ord $cell)
- : '',
+ $anno,
);
}
-sub table {
- my ($self, $digraphs) = @_;
-
- my @rows;
-
- my @colheads;
- while ($digraphs->[0] !~ /^\./) {
- my $cell = shift @$digraphs or last;
- push @colheads, sprintf(
- '<%s%s>%s',
- $cell =~ s/^-// ? 'td' : 'th',
- $cell =~ s/:(.*)// ? qq{ title="$1"} : '',
- $cell eq '_' ? ' ' : $cell
- );
- }
- push @rows, sprintf '<thead><tr>%s<tbody>', join '', @colheads if @colheads;
+sub row {
+ my ($self, $cells) = @_;
+ my @html;
my $colspan = 1;
- for my $cell (@$digraphs) {
+ for my $cell (@{$cells}) {
if ($cell =~ s/^\.//) {
# dot indicates start of a new row
- push @rows, '';
+ push @html, '<tr>';
if ($cell =~ s/^>//) {
# header cell text follows
$cell =~ s/_/ /g; # underscores may be used instead of whitespace (for qw//ability)
- $rows[-1] .= '<th>'.($cell || ' ');
+ my $class = $cell =~ s/^-// && ' class="ex"';
+ $html[-1] .= "<th$class>".($cell || ' ');
}
next;
}
$colspan++;
next;
}
+ elsif ($cell eq '>-') {
+ $html[-1] .= '<th>';
+ next;
+ }
+ elsif ($cell =~ m/^</) {
+ $html[-1] .= '<td>'.$cell;
+ next;
+ }
- $rows[-1] .= $self->cell($cell,
- $colspan > 1 && qq{ colspan="$colspan"},
+ $html[-1] .= $self->cell($cell,
+ $colspan > 1 && qq{colspan="$colspan"},
);
$colspan = 1;
}
- return sprintf qq{<table class="glyphs%s">\n%s</table>\n},
- $self->{digraph} || $self->{unicode} >= 0 ? ' dilabel' : '',
- join '', map {"<tr>$_\n"} @rows;
+ return @html;
+}
+
+sub tabletag {
+ my ($self) = @_;
+ my $class = 'glyphs';
+ $class .= ' dilabel' if @{ $self->{anno} };
+ return sprintf '<table class="%s">', $class;
+}
+
+sub table {
+ my ($self, $digraphs) = @_;
+
+ my @rows;
+
+ my @colheads;
+ while ($digraphs->[0] !~ /^\./) {
+ my $cell = shift @$digraphs or last;
+ if ($cell eq '>') {
+ push @colheads, '<tr>';
+ next;
+ }
+ push @colheads, join('',
+ '<',
+ $cell =~ s/^-// ? 'td' : 'th',
+ $cell =~ s/:(.*)// && qq{ title="$1"},
+ $cell =~ s/^(>+)// && ' colspan='.(length($1) + 1),
+ '>',
+ $cell eq '_' ? ' ' : $cell
+ );
+ }
+ push @rows, sprintf '<thead><tr>%s<tbody>', join '', @colheads if @colheads;
+ push @rows, $self->row($digraphs);
+
+ return join '', map {"$_\n"} $self->tabletag, @rows, '</table>';
}
sub print {
my $self = shift;
while (@_) {
- printf '<div class="section"><h2>%s</h2>'."\n\n", shift;
+ print '<div class="section">';
+ printf '<h2>%s</h2>', shift unless ref $_[0];
+ print "\n\n";
while (ref $_[0] and $_ = shift) {
print $self->table($_);
}
- print '</div>';
+ print "\n</div>";
}
}
+sub legend {
+ my $self = shift;
+ my @classes = $self->{style} eq 'univer' ? (
+ [l5 => 'unicode 1.1'],
+ [l4 => '20th century'],
+ [l3 => 'in 6.0 (2010)'],
+ [l2 => 'recent assignments'],
+ [l1 => 'proposed'],
+ [ex => 'irregular'],
+ ) : (
+ [l5 => 'ascii'],
+ [l4 => $self->{style} eq 'di' ? 'digraph' : 'latin1'],
+ [l3 => $self->{style} eq 'di' ? 'proposed' : 'HTML4'],
+ [l2 => 'unicode ≤6.0'],
+ [l1 => 'other unicode'],
+ [ex => 'discouraged'],
+ );
+
+ return (
+ '<div class="legend"><table class="glyphs"><tr>',
+ (map { sprintf '<td class="X %s">%s', @{$_} } @classes),
+ '</table></div>',
+ );
+}
+
1;