use 5.010;
use strict;
use warnings;
+use utf8;
use Data::Dump 'pp';
use PLP::Functions 'EscapeHTML';
EscapeHTML($_[0]), # cell
join(' | ', map { $_->[1] } @chars), # title
$chars[0][2], # class
- join(' ', grep { defined } map { $_->[3] } @chars), # digraph
+ join(' ', map { $_->[3] // '…' } @chars), # digraph
);
}
$input =~ s/^\\//; # escaped char
($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
- my $codepoint = ord(substr $input, 0, 1);
+ my $codepoint = ord $input;
if ($self->{style} eq 'univer') {
if ($input =~ /\p{age=unassigned}/) {
}
if ($self->{style} eq 'di') {
- if ($class =~ /\bu-di\b/) {
- push @class, ('l3', 'u-di'); # standard digraph
+ if ($mnem =~ /…/) {
+ # incomplete representation, usually partial
+ }
+ elsif ($class =~ /\bu-di\b/) {
+ push @class, ('l4', 'u-di'); # standard digraph
}
elsif ($class =~ /\bu-prop\b/) {
- push @class, ('l2', 'u-prop'); # unofficial
+ push @class, ('l3', 'u-prop'); # unofficial
}
}
elsif ($self->{style} eq 'html') {
if (defined $entity) {
- push @class, ('l3', 'u-html');
+ push @class, ($codepoint <= 0xFF ? 'l4' : 'l3', 'u-html');
}
}
else {
if ($codepoint <= 0xFF) {
- push @class, 'l3', 'u-lat1'; # latin1
+ push @class, 'l4', 'u-lat1'; # latin1
}
elsif ($codepoint <= 0xD7FF) {
- push @class, 'l2', 'u-bmp'; # bmp
+ push @class, 'l3', 'u-bmp'; # bmp
}
}
if ($input =~ /[ -~]/) {
- push @class, 'l4', 'u-ascii'; # ascii
+ push @class, 'l5', 'u-ascii'; # ascii
+ }
+ elsif ($input =~ /^\p{in=6.0}+$/) {
+ push @class, 'l2'; # in unicode 6.0
}
else {
- push @class, 'l1'; # basic unicode
+ push @class, 'l1'; # any unicode
}
}}