unicode: prepend unicode 6.0 support level
[sheet.git] / Shiar_Sheet / FormatChar.pm
1 package Shiar_Sheet::FormatChar;
2
3 use 5.010;
4 use strict;
5 use warnings;
6
7 use Data::Dump 'pp';
8 use PLP::Functions 'EscapeHTML';
9
10 our $VERSION = '1.06';
11
12 our $uc = do 'unicode-char.inc.pl';
13
14 sub new {
15         my ($class) = @_;
16         bless { anno => ['di', 0], style => 'di' }, $class;
17 }
18
19 sub glyph_info {
20         my ($self, $codepoint) = @_;
21         return $uc->{chr $codepoint} || eval {
22                 require Unicode::UCD;
23                 if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) {
24                         return [@$fullinfo{qw/category name - string/}];
25                 }
26         } || [];
27 }
28
29 sub glyph_html {
30         my ($self, $char) = @_;
31         my $codepoint = ord $char;
32         my $info = $self->glyph_info($codepoint);
33         my ($class, $name, $mnem, $html, $string) = @$info;
34
35         my $cell = EscapeHTML($string || $char);
36         my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
37
38         $cell = "<span>$cell</span>" if $class =~ /\bZs\b/;
39         $cell = '&nbsp;' if $cell eq '';
40
41         return ($cell, EscapeHTML($title), "X $class", $mnem, $html);
42 }
43
44 sub glyphs_html {
45         my $self = shift;
46
47         return $self->glyph_html(@_) if length $_[0] <= 1;
48
49         my @chars = map { [ $self->glyph_html($_) ] } split //, $_[0];
50         return (
51                 EscapeHTML($_[0]), # cell
52                 join(' | ', map { $_->[1] } @chars), # title
53                 $chars[0][2], # class
54                 join(' ',  grep { defined } map { $_->[3] } @chars), # digraph
55         );
56 }
57
58 sub glyph_cell {
59         my ($self, $char) = @_;
60         return sprintf('<td class="%3$s" title="%2$s">%s', $self->glyph_html($char));
61 }
62
63 sub cell {
64         my ($self, $input, $html) = @_;
65         my (@class, $title, $cell, $mnem, $entity);
66
67         if ($input eq '-') {
68                 $cell = '';
69         }
70         elsif ($input eq '=') {
71                 push @class, 'u-invalid';
72                 $cell = '';
73         }
74         else {{
75                 push @class, 'X';
76
77                 if ($input =~ s/^-//) {
78                         push @class, 'ex'; # discouraged
79                 }
80
81                 $input =~ s/^\\//;  # escaped char
82                 ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
83                 my $codepoint = ord(substr $input, 0, 1);
84
85                 if ($self->{style} eq 'univer') {
86                         if ($input =~ /\p{age=unassigned}/) {
87                                 # check include for assignments after unicode 6.0 (perl v5.14)
88                                 state $agemap = do 'unicode-age.inc.pl';
89                                 my $version = $agemap->{$codepoint};
90                                 push @class, $version ? 'l2' : 'l1';
91                         }
92                         elsif ($input =~ /^\p{in=1.1}*$/) {
93                                 push @class, 'l5';  # first release 1993
94                         }
95                         elsif ($input =~ /^\p{in=3.0}*$/) {
96                                 push @class, 'l4';  # 20th century
97                         }
98                         elsif ($input =~ /^\p{in=4.1}*$/) {
99                                 push @class, 'l4';  # over 10 years ago
100                         }
101                         elsif ($input =~ /^\p{in=6.0}*$/) {
102                                 push @class, 'l3';  # before 2012
103                         }
104                         else {
105                                 push @class, 'l2';  # more recent
106                         }
107                         next;
108                 }
109
110                 if ($self->{style} eq 'di') {
111                         if ($class =~ /\bu-di\b/) {
112                                 push @class, ('l4', 'u-di'); # standard digraph
113                         }
114                         elsif ($class =~ /\bu-prop\b/) {
115                                 push @class, ('l3', 'u-prop'); # unofficial
116                         }
117                 }
118                 elsif ($self->{style} eq 'html') {
119                         if (defined $entity) {
120                                 push @class, ($codepoint <= 0xFF ? 'l4' : 'l3', 'u-html');
121                         }
122                 }
123                 else {
124                         if ($codepoint <= 0xFF) {
125                                 push @class, 'l4', 'u-lat1';  # latin1
126                         }
127                         elsif ($codepoint <= 0xD7FF) {
128                                 push @class, 'l3', 'u-bmp';  # bmp
129                         }
130                 }
131
132                 if ($input =~ /[ -~]/) {
133                         push @class, 'l5', 'u-ascii'; # ascii
134                 }
135                 elsif ($input =~ /^\p{in=6.0}+$/) {
136                         push @class, 'l2'; # in unicode 6.0
137                 }
138                 else {
139                         push @class, 'l1'; # any unicode
140                 }
141         }}
142
143         my $anno = '';
144         if ($cell ne '') {
145                 for (@{ $self->{anno} }) {
146                         if (/html$/) {
147                                 if (defined $entity) {
148                                         $entity = "&$entity;" if /^&/;
149                                         $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($entity));
150                                         last;
151                                 }
152                         }
153                         elsif ($_ eq 'xml') {
154                                 $anno = sprintf(' <small class="digraph">%s</small>',
155                                         sprintf '#%d', ord($cell)
156                                 );
157                                 last;
158                         }
159                         elsif ($_ eq '&xml') {
160                                 $anno = sprintf(' <small class="digraph">%s</small>',
161                                         sprintf '&amp;#%d;', ord($cell)
162                                 );
163                                 last;
164                         }
165                         elsif ($_ eq 'di') {
166                                 if (defined $mnem and length $mnem) {
167                                         $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem));
168                                         last;
169                                 }
170                         }
171                         else {
172                                 if ($_ eq 'hex' or $cell =~ /^[^a-zA-Z]$/) {
173                                         $anno = sprintf(' <small class="%s">%04X</small>', 'value', ord $cell);
174                                         last;
175                                 }
176                         }
177                 }
178         }
179
180         return sprintf('<td%s%s%s>%s%s',
181                 defined $title  ? qq{ title="$title"}  : '',
182                 @class ? sprintf(' class="%s"', join ' ', @class) : '',
183                 $html || '',
184                 $cell eq '' ? '&nbsp;' : $cell,
185                 $anno,
186         );
187 }
188
189 sub table {
190         my ($self, $digraphs) = @_;
191
192         my @rows;
193
194         my @colheads;
195         while ($digraphs->[0] !~ /^\./) {
196                 my $cell = shift @$digraphs or last;
197                 push @colheads, sprintf(
198                         '<%s%s>%s',
199                         $cell =~ s/^-// ? 'td' : 'th',
200                         $cell =~ s/:(.*)// ? qq{ title="$1"} : '',
201                         $cell eq '_' ? '&nbsp;' : $cell
202                 );
203         }
204         push @rows, sprintf '<thead><tr>%s<tbody>', join '', @colheads if @colheads;
205
206         my $colspan = 1;
207         for my $cell (@$digraphs) {
208                 if ($cell =~ s/^\.//) {
209                         # dot indicates start of a new row
210                         push @rows, '<tr>';
211                         if ($cell =~ s/^>//) {
212                                 # header cell text follows
213                                 $cell =~ s/_/ /g;  # underscores may be used instead of whitespace (for qw//ability)
214                                 my $class = $cell =~ s/^-// && ' class="ex"';
215                                 $rows[-1] .= "<th$class>".($cell || '&nbsp;');
216                         }
217                         next;
218                 }
219                 elsif ($cell eq '>') {
220                         # merge this cell to the next column
221                         $colspan++;
222                         next;
223                 }
224
225                 $rows[-1] .= $self->cell($cell,
226                         $colspan > 1 && qq{ colspan="$colspan"},
227                 );
228
229                 $colspan = 1;
230         }
231
232         return sprintf qq{<table class="glyphs%s">\n%s</table>\n},
233                 @{ $self->{anno} } ? ' dilabel' : '',
234                 join '', map {"$_\n"} @rows;
235 }
236
237 sub print {
238         my $self = shift;
239         while (@_) {
240                 print '<div class="section">';
241                 printf '<h2>%s</h2>', shift unless ref $_[0];
242                 print "\n\n";
243                 while (ref $_[0] and $_ = shift) {
244                         print $self->table($_);
245                 }
246                 print "\n</div>";
247         }
248 }
249
250 1;
251