1 package Shiar_Sheet::FormatChar;
9 use PLP::Functions 'EscapeHTML';
11 our $VERSION = '1.10';
13 our $uc = do 'data/unicode-char.inc.pl';
17 bless { anno => ['di', 0], style => 'di' }, $class;
21 my ($self, $codepoint) = @_;
22 # attempt to get unicode character information
25 Unicode::UCD::charinfo($codepoint)
26 || { category => 'Xn', name => '' };
29 if ($info->{combining}) {
30 # overlay combining diacritics
31 $string = chr(9676) . chr($codepoint);
33 return [@$info{qw( category name )}, undef, $string];
37 my ($self, $codepoint) = @_;
38 return $uc->{chr $codepoint} || $self->glyph_mkinfo($codepoint) || [];
42 my ($self, $char) = @_;
43 my $codepoint = ord $char;
44 my $info = $self->glyph_info($codepoint);
45 my ($class, $name, $mnem, $entity, $string) = @$info;
47 my $cell = EscapeHTML($string || $char);
48 my $title = sprintf 'U+%04X%s', $codepoint, !!$name && " $name";
50 $cell = "<span>$cell</span>" if $class and $class =~ /\bZs\b/;
51 $cell = ' ' if $cell eq '';
53 return ($cell, EscapeHTML($title), !!$class && "X $class", $mnem, $entity);
59 return $self->glyph_html(@_) if length $_[0] <= 1;
61 my @chars = map { [ $self->glyph_html($_) ] } split //, $_[0];
63 EscapeHTML($_[0]), # cell
64 join(' | ', map { $_->[1] } @chars), # title
66 join(' ', map { $_->[3] // '…' } @chars), # digraph
71 my ($self, $char) = @_;
72 return sprintf('<td class="%3$s" title="%2$s">%s', $self->glyph_html($char));
75 sub glyph_level_univer {
76 my ($self, $input) = @_;
77 if ($input =~ /\p{age=unassigned}/) {
78 # check include for assignments after unicode 6.0 (perl v5.14)
79 state $agemap = do 'data/unicode-age.inc.pl';
80 my $version = $agemap->{ord $input};
81 return $version ? 'l2' : 'l1';
83 elsif ($input =~ /^\p{in=1.1}*$/) {
84 return 'l5'; # first release 1993
86 elsif ($input =~ /^\p{in=3.0}*$/) {
87 return 'l4'; # 20th century
89 elsif ($input =~ /^\p{in=4.1}*$/) {
90 return 'l4'; # over 10 years ago
92 elsif ($input =~ /^\p{in=6.0}*$/) {
93 return 'l3'; # before 2012
96 return 'l2'; # more recent
101 my ($self, $input, $html) = @_;
102 my (@class, $title, $cell, $mnem, $entity);
107 elsif ($input eq '=') {
108 push @class, 'u-invalid';
114 if ($input =~ s/^-//) {
115 push @class, 'ex'; # discouraged
118 $input =~ s/^\\//; # escaped char
119 ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
121 if ($self->{style} eq 'univer') {
122 push @class, $self->glyph_level_univer($input);
126 my $codepoint = ord $input;
127 if ($self->{style} eq 'di') {
128 if ($mnem and $mnem =~ /…/) {
129 # incomplete representation, usually partial
131 elsif ($class =~ /\bu-di\b/) {
132 push @class, ('l4', 'u-di'); # standard digraph
134 elsif ($class =~ /\bu-prop\b/) {
135 push @class, ('l3', 'u-prop'); # unofficial
138 elsif ($self->{style} eq 'html') {
139 if (defined $entity) {
140 push @class, ($codepoint <= 0xFF ? 'l4' : 'l3', 'u-html');
144 if ($codepoint <= 0xFF) {
145 push @class, 'l4', 'u-lat1'; # latin1
147 elsif ($codepoint <= 0xD7FF) {
148 push @class, 'l3', 'u-bmp'; # bmp
152 if ($input =~ /[ -~]/) {
153 push @class, 'l5', 'u-ascii'; # ascii
155 elsif ($input =~ /^\p{in=6.0}+$/ and $input !~ /\p{Co}/) {
156 push @class, 'l2'; # in unicode 6.0
159 push @class, 'l1'; # any unicode
165 for (@{ $self->{anno} }) {
167 if (defined $entity) {
168 $entity = "&$entity;" if /^&/;
169 $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($entity));
173 elsif ($_ eq 'xml') {
174 $anno = sprintf(' <small class="digraph">%s</small>',
175 sprintf '#%d', ord($cell)
179 elsif ($_ eq '&xml') {
180 $anno = sprintf(' <small class="digraph">%s</small>',
181 sprintf '&#%d;', ord($cell)
186 if (defined $mnem and length $mnem) {
187 $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem));
192 if ($_ eq 'hex' or $input =~ /^[^a-zA-Z]$/) {
193 $anno = sprintf(' <small class="%s">%04X</small>', 'value', ord $input);
200 return sprintf('<%s>%s%s',
202 defined $title ? qq{title="$title"} : (),
203 @class ? sprintf('class="%s"', join ' ', @class) : (),
206 $cell eq '' ? ' ' : $cell,
212 my ($self, $cells) = @_;
216 for my $cell (@{$cells}) {
217 if ($cell =~ s/^\.//) {
218 # dot indicates start of a new row
220 if ($cell =~ s/^>//) {
221 # header cell text follows
222 $cell =~ s/_/ /g; # underscores may be used instead of whitespace (for qw//ability)
223 my $class = $cell =~ s/^-// && ' class="ex"';
224 $html[-1] .= "<th$class>".($cell || ' ');
228 elsif ($cell eq '>') {
229 # merge this cell to the next column
233 elsif ($cell eq '>-') {
237 elsif ($cell =~ m/^</) {
238 $html[-1] .= '<td>'.$cell;
242 $html[-1] .= $self->cell($cell,
243 $colspan > 1 && qq{colspan="$colspan"},
254 my $class = 'glyphs';
255 $class .= ' dilabel' if @{ $self->{anno} };
256 return sprintf '<table class="%s">', $class;
260 my ($self, $digraphs) = @_;
265 while ($digraphs->[0] !~ /^\./) {
266 my $cell = shift @$digraphs or last;
268 push @colheads, '<tr>';
271 push @colheads, join('',
273 $cell =~ s/^-// ? 'td' : 'th',
274 $cell =~ s/:(.*)// && qq{ title="$1"},
275 $cell =~ s/^(>+)// && ' colspan='.(length($1) + 1),
277 $cell eq '_' ? ' ' : $cell
280 push @rows, sprintf '<thead><tr>%s<tbody>', join '', @colheads if @colheads;
281 push @rows, $self->row($digraphs);
283 return join '', map {"$_\n"} $self->tabletag, @rows, '</table>';
289 print '<div class="section">';
290 printf '<h2>%s</h2>', shift unless ref $_[0];
292 while (ref $_[0] and $_ = shift) {
293 print $self->table($_);
301 my @classes = $self->{style} eq 'univer' ? (
302 [l5 => 'unicode 1.1'],
303 [l4 => '20th century'],
304 [l3 => 'in 6.0 (2010)'],
305 [l2 => 'recent assignments'],
310 [l4 => $self->{style} eq 'di' ? 'digraph' : 'latin1'],
311 [l3 => $self->{style} eq 'di' ? 'proposed' : 'HTML4'],
312 [l2 => 'unicode ≤6.0'],
313 [l1 => 'other unicode'],
314 [ex => 'discouraged'],
318 '<div class="legend"><table class="glyphs"><tr>',
319 (map { sprintf '<td class="X %s">%s', @{$_} } @classes),