keyboard: separate method to determine univer styling
[sheet.git] / Shiar_Sheet / FormatChar.pm
1 package Shiar_Sheet::FormatChar;
2
3 use 5.010;
4 use strict;
5 use warnings;
6 use utf8;
7
8 use Data::Dump 'pp';
9 use PLP::Functions 'EscapeHTML';
10
11 our $VERSION = '1.09';
12
13 our $uc = do 'data/unicode-char.inc.pl';
14
15 sub new {
16         my ($class) = @_;
17         bless { anno => ['di', 0], style => 'di' }, $class;
18 }
19
20 sub glyph_info {
21         my ($self, $codepoint) = @_;
22         return $uc->{chr $codepoint} || eval {
23                 require Unicode::UCD;
24                 if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) {
25                         return [@$fullinfo{qw/category name - string/}];
26                 }
27         } || [];
28 }
29
30 sub glyph_html {
31         my ($self, $char) = @_;
32         my $codepoint = ord $char;
33         my $info = $self->glyph_info($codepoint);
34         my ($class, $name, $mnem, $entity, $string) = @$info;
35
36         my $cell = EscapeHTML($string || $char);
37         my $title = sprintf 'U+%04X%s', $codepoint, !!$name && " $name";
38
39         $cell = "<span>$cell</span>" if $class and $class =~ /\bZs\b/;
40         $cell = '&nbsp;' if $cell eq '';
41
42         return ($cell, EscapeHTML($title), !!$class && "X $class", $mnem, $entity);
43 }
44
45 sub glyphs_html {
46         my $self = shift;
47
48         return $self->glyph_html(@_) if length $_[0] <= 1;
49
50         my @chars = map { [ $self->glyph_html($_) ] } split //, $_[0];
51         return (
52                 EscapeHTML($_[0]), # cell
53                 join(' | ', map { $_->[1] } @chars), # title
54                 $chars[0][2], # class
55                 join(' ',  map { $_->[3] // '…' } @chars), # digraph
56         );
57 }
58
59 sub glyph_cell {
60         my ($self, $char) = @_;
61         return sprintf('<td class="%3$s" title="%2$s">%s', $self->glyph_html($char));
62 }
63
64 sub glyph_level_univer {
65         my ($self, $input) = @_;
66         if ($input =~ /\p{age=unassigned}/) {
67                 # check include for assignments after unicode 6.0 (perl v5.14)
68                 state $agemap = do 'data/unicode-age.inc.pl';
69                 my $version = $agemap->{ord $input};
70                 return $version ? 'l2' : 'l1';
71         }
72         elsif ($input =~ /^\p{in=1.1}*$/) {
73                 return 'l5';  # first release 1993
74         }
75         elsif ($input =~ /^\p{in=3.0}*$/) {
76                 return 'l4';  # 20th century
77         }
78         elsif ($input =~ /^\p{in=4.1}*$/) {
79                 return 'l4';  # over 10 years ago
80         }
81         elsif ($input =~ /^\p{in=6.0}*$/) {
82                 return 'l3';  # before 2012
83         }
84         else {
85                 return 'l2';  # more recent
86         }
87 }
88
89 sub cell {
90         my ($self, $input, $html) = @_;
91         my (@class, $title, $cell, $mnem, $entity);
92
93         if ($input eq '-') {
94                 $cell = '';
95         }
96         elsif ($input eq '=') {
97                 push @class, 'u-invalid';
98                 $cell = '';
99         }
100         else {{
101                 push @class, 'X';
102
103                 if ($input =~ s/^-//) {
104                         push @class, 'ex'; # discouraged
105                 }
106
107                 $input =~ s/^\\//;  # escaped char
108                 ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
109
110                 if ($self->{style} eq 'univer') {
111                         push @class, $self->glyph_level_univer($input);
112                         next;
113                 }
114
115                 my $codepoint = ord $input;
116                 if ($self->{style} eq 'di') {
117                         if ($mnem and $mnem =~ /…/) {
118                                 # incomplete representation, usually partial
119                         }
120                         elsif ($class =~ /\bu-di\b/) {
121                                 push @class, ('l4', 'u-di'); # standard digraph
122                         }
123                         elsif ($class =~ /\bu-prop\b/) {
124                                 push @class, ('l3', 'u-prop'); # unofficial
125                         }
126                 }
127                 elsif ($self->{style} eq 'html') {
128                         if (defined $entity) {
129                                 push @class, ($codepoint <= 0xFF ? 'l4' : 'l3', 'u-html');
130                         }
131                 }
132                 else {
133                         if ($codepoint <= 0xFF) {
134                                 push @class, 'l4', 'u-lat1';  # latin1
135                         }
136                         elsif ($codepoint <= 0xD7FF) {
137                                 push @class, 'l3', 'u-bmp';  # bmp
138                         }
139                 }
140
141                 if ($input =~ /[ -~]/) {
142                         push @class, 'l5', 'u-ascii'; # ascii
143                 }
144                 elsif ($input =~ /^\p{in=6.0}+$/ and $input !~ /\p{Co}/) {
145                         push @class, 'l2'; # in unicode 6.0
146                 }
147                 else {
148                         push @class, 'l1'; # any unicode
149                 }
150         }}
151
152         my $anno = '';
153         if ($cell ne '') {
154                 for (@{ $self->{anno} }) {
155                         if (/html$/) {
156                                 if (defined $entity) {
157                                         $entity = "&$entity;" if /^&/;
158                                         $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($entity));
159                                         last;
160                                 }
161                         }
162                         elsif ($_ eq 'xml') {
163                                 $anno = sprintf(' <small class="digraph">%s</small>',
164                                         sprintf '#%d', ord($cell)
165                                 );
166                                 last;
167                         }
168                         elsif ($_ eq '&xml') {
169                                 $anno = sprintf(' <small class="digraph">%s</small>',
170                                         sprintf '&amp;#%d;', ord($cell)
171                                 );
172                                 last;
173                         }
174                         elsif ($_ eq 'di') {
175                                 if (defined $mnem and length $mnem) {
176                                         $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem));
177                                         last;
178                                 }
179                         }
180                         else {
181                                 if ($_ eq 'hex' or $input =~ /^[^a-zA-Z]$/) {
182                                         $anno = sprintf(' <small class="%s">%04X</small>', 'value', ord $input);
183                                         last;
184                                 }
185                         }
186                 }
187         }
188
189         return sprintf('<%s>%s%s',
190                 join(' ', 'td',
191                         defined $title ? qq{title="$title"}  : (),
192                         @class ? sprintf('class="%s"', join ' ', @class) : (),
193                         $html || (),
194                 ),
195                 $cell eq '' ? '&nbsp;' : $cell,
196                 $anno,
197         );
198 }
199
200 sub row {
201         my ($self, $cells) = @_;
202         my @html;
203
204         my $colspan = 1;
205         for my $cell (@{$cells}) {
206                 if ($cell =~ s/^\.//) {
207                         # dot indicates start of a new row
208                         push @html, '<tr>';
209                         if ($cell =~ s/^>//) {
210                                 # header cell text follows
211                                 $cell =~ s/_/ /g;  # underscores may be used instead of whitespace (for qw//ability)
212                                 my $class = $cell =~ s/^-// && ' class="ex"';
213                                 $html[-1] .= "<th$class>".($cell || '&nbsp;');
214                         }
215                         next;
216                 }
217                 elsif ($cell eq '>') {
218                         # merge this cell to the next column
219                         $colspan++;
220                         next;
221                 }
222                 elsif ($cell eq '>-') {
223                         $html[-1] .= '<th>';
224                         next;
225                 }
226                 elsif ($cell =~ m/^</) {
227                         $html[-1] .= '<td>'.$cell;
228                         next;
229                 }
230
231                 $html[-1] .= $self->cell($cell,
232                         $colspan > 1 && qq{colspan="$colspan"},
233                 );
234
235                 $colspan = 1;
236         }
237
238         return @html;
239 }
240
241 sub tabletag {
242         my ($self) = @_;
243         my $class = 'glyphs';
244         $class .= ' dilabel' if @{ $self->{anno} };
245         return sprintf '<table class="%s">', $class;
246 }
247
248 sub table {
249         my ($self, $digraphs) = @_;
250
251         my @rows;
252
253         my @colheads;
254         while ($digraphs->[0] !~ /^\./) {
255                 my $cell = shift @$digraphs or last;
256                 if ($cell eq '>') {
257                         push @colheads, '<tr>';
258                         next;
259                 }
260                 push @colheads, join('',
261                         '<',
262                         $cell =~ s/^-// ? 'td' : 'th',
263                         $cell =~ s/:(.*)// && qq{ title="$1"},
264                         $cell =~ s/^(>+)// && ' colspan='.(length($1) + 1),
265                         '>',
266                         $cell eq '_' ? '&nbsp;' : $cell
267                 );
268         }
269         push @rows, sprintf '<thead><tr>%s<tbody>', join '', @colheads if @colheads;
270         push @rows, $self->row($digraphs);
271
272         return join '', map {"$_\n"} $self->tabletag, @rows, '</table>';
273 }
274
275 sub print {
276         my $self = shift;
277         while (@_) {
278                 print '<div class="section">';
279                 printf '<h2>%s</h2>', shift unless ref $_[0];
280                 print "\n\n";
281                 while (ref $_[0] and $_ = shift) {
282                         print $self->table($_);
283                 }
284                 print "\n</div>";
285         }
286 }
287
288 sub legend {
289         my $self = shift;
290         my @classes = $self->{style} eq 'univer' ? (
291                 [l5 => 'unicode 1.1'],
292                 [l4 => '20th century'],
293                 [l3 => 'in 6.0 (2010)'],
294                 [l2 => 'recent assignments'],
295                 [l1 => 'proposed'],
296                 [ex => 'irregular'],
297         ) : (
298                 [l5 => 'ascii'],
299                 [l4 => $self->{style} eq 'di' ? 'digraph' : 'latin1'],
300                 [l3 => $self->{style} eq 'di' ? 'proposed' : 'HTML4'],
301                 [l2 => 'unicode ≤6.0'],
302                 [l1 => 'other unicode'],
303                 [ex => 'discouraged'],
304         );
305
306         return (
307                 '<div class="legend"><table class="glyphs"><tr>',
308                 (map { sprintf '<td class="X %s">%s', @{$_} } @classes),
309                 '</table></div>',
310         );
311 }
312
313 1;
314