package Shiar_Sheet::FormatChar;
use 5.010;
use strict;
use warnings;
use utf8;
use Data::Dump 'pp';
use PLP::Functions 'EscapeHTML';
our $VERSION = '1.06';
our $uc = do 'unicode-char.inc.pl';
sub new {
my ($class) = @_;
bless { anno => ['di', 0], style => 'di' }, $class;
}
sub glyph_info {
my ($self, $codepoint) = @_;
return $uc->{chr $codepoint} || eval {
require Unicode::UCD;
if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) {
return [@$fullinfo{qw/category name - string/}];
}
} || [];
}
sub glyph_html {
my ($self, $char) = @_;
my $codepoint = ord $char;
my $info = $self->glyph_info($codepoint);
my ($class, $name, $mnem, $html, $string) = @$info;
my $cell = EscapeHTML($string || $char);
my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
$cell = "$cell" if $class =~ /\bZs\b/;
$cell = ' ' if $cell eq '';
return ($cell, EscapeHTML($title), "X $class", $mnem, $html);
}
sub glyphs_html {
my $self = shift;
return $self->glyph_html(@_) if length $_[0] <= 1;
my @chars = map { [ $self->glyph_html($_) ] } split //, $_[0];
return (
EscapeHTML($_[0]), # cell
join(' | ', map { $_->[1] } @chars), # title
$chars[0][2], # class
join(' ', map { $_->[3] // '…' } @chars), # digraph
);
}
sub glyph_cell {
my ($self, $char) = @_;
return sprintf('
%s', $self->glyph_html($char));
}
sub cell {
my ($self, $input, $html) = @_;
my (@class, $title, $cell, $mnem, $entity);
if ($input eq '-') {
$cell = '';
}
elsif ($input eq '=') {
push @class, 'u-invalid';
$cell = '';
}
else {{
push @class, 'X';
if ($input =~ s/^-//) {
push @class, 'ex'; # discouraged
}
$input =~ s/^\\//; # escaped char
($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
my $codepoint = ord $input;
if ($self->{style} eq 'univer') {
if ($input =~ /\p{age=unassigned}/) {
# check include for assignments after unicode 6.0 (perl v5.14)
state $agemap = do 'unicode-age.inc.pl';
my $version = $agemap->{$codepoint};
push @class, $version ? 'l2' : 'l1';
}
elsif ($input =~ /^\p{in=1.1}*$/) {
push @class, 'l5'; # first release 1993
}
elsif ($input =~ /^\p{in=3.0}*$/) {
push @class, 'l4'; # 20th century
}
elsif ($input =~ /^\p{in=4.1}*$/) {
push @class, 'l4'; # over 10 years ago
}
elsif ($input =~ /^\p{in=6.0}*$/) {
push @class, 'l3'; # before 2012
}
else {
push @class, 'l2'; # more recent
}
next;
}
if ($self->{style} eq 'di') {
if ($mnem =~ /…/) {
# incomplete representation, usually partial
}
elsif ($class =~ /\bu-di\b/) {
push @class, ('l4', 'u-di'); # standard digraph
}
elsif ($class =~ /\bu-prop\b/) {
push @class, ('l3', 'u-prop'); # unofficial
}
}
elsif ($self->{style} eq 'html') {
if (defined $entity) {
push @class, ($codepoint <= 0xFF ? 'l4' : 'l3', 'u-html');
}
}
else {
if ($codepoint <= 0xFF) {
push @class, 'l4', 'u-lat1'; # latin1
}
elsif ($codepoint <= 0xD7FF) {
push @class, 'l3', 'u-bmp'; # bmp
}
}
if ($input =~ /[ -~]/) {
push @class, 'l5', 'u-ascii'; # ascii
}
elsif ($input =~ /^\p{in=6.0}+$/) {
push @class, 'l2'; # in unicode 6.0
}
else {
push @class, 'l1'; # any unicode
}
}}
my $anno = '';
if ($cell ne '') {
for (@{ $self->{anno} }) {
if (/html$/) {
if (defined $entity) {
$entity = "&$entity;" if /^&/;
$anno = sprintf(' %s', EscapeHTML($entity));
last;
}
}
elsif ($_ eq 'xml') {
$anno = sprintf(' %s',
sprintf '#%d', ord($cell)
);
last;
}
elsif ($_ eq '&xml') {
$anno = sprintf(' %s',
sprintf '&#%d;', ord($cell)
);
last;
}
elsif ($_ eq 'di') {
if (defined $mnem and length $mnem) {
$anno = sprintf(' %s', EscapeHTML($mnem));
last;
}
}
else {
if ($_ eq 'hex' or $cell =~ /^[^a-zA-Z]$/) {
$anno = sprintf(' %04X', 'value', ord $cell);
last;
}
}
}
}
return sprintf(' | %s%s',
defined $title ? qq{ title="$title"} : '',
@class ? sprintf(' class="%s"', join ' ', @class) : '',
$html || '',
$cell eq '' ? ' ' : $cell,
$anno,
);
}
sub table {
my ($self, $digraphs) = @_;
my @rows;
my @colheads;
while ($digraphs->[0] !~ /^\./) {
my $cell = shift @$digraphs or last;
push @colheads, sprintf(
'<%s%s>%s',
$cell =~ s/^-// ? 'td' : 'th',
$cell =~ s/:(.*)// ? qq{ title="$1"} : '',
$cell eq '_' ? ' ' : $cell
);
}
push @rows, sprintf '%s | ', join '', @colheads if @colheads;
my $colspan = 1;
for my $cell (@$digraphs) {
if ($cell =~ s/^\.//) {
# dot indicates start of a new row
push @rows, '';
if ($cell =~ s/^>//) {
# header cell text follows
$cell =~ s/_/ /g; # underscores may be used instead of whitespace (for qw//ability)
my $class = $cell =~ s/^-// && ' class="ex"';
$rows[-1] .= "".($cell || ' ');
}
next;
}
elsif ($cell eq '>') {
# merge this cell to the next column
$colspan++;
next;
}
$rows[-1] .= $self->cell($cell,
$colspan > 1 && qq{ colspan="$colspan"},
);
$colspan = 1;
}
return sprintf qq{\n},
@{ $self->{anno} } ? ' dilabel' : '',
join '', map {"$_\n"} @rows;
}
sub print {
my $self = shift;
while (@_) {
print ' ';
printf ' %s', shift unless ref $_[0];
print "\n\n";
while (ref $_[0] and $_ = shift) {
print $self->table($_);
}
print "\n";
}
}
1;
|