5 use open IO => ':utf8';
9 $header{content_type} = 'text/html; charset=utf-8';
11 :><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
12 "http://www.w3.org/TR/html4/loose.dtd">
16 <title>charset cheat sheet</title>
17 <meta http-equiv="content-type" content="utf-8">
18 <link rel="stylesheet" type="text/css" media="all" href="/base.css">
22 <h1>Character encoding</h1>
25 my $diinfo = do 'digraphs.inc.pl';
26 my %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} }
29 use Encode qw(decode resolve_alias);
30 # generate character table(s)
31 # (~16x faster than decoding in loop;
32 # substr strings is twice as fast as splitting to an array)
34 # default => [qw(unicode utf-8 iso-8859-1 cp437 -cp1252- --iso-8859-15- -koi8-f)],
35 default => [qw(unicode- utf-8 iso-8859-1 -cp1252- --iso-8859-15- cp437 -cp850)],
36 0 => [qw(cp437 cp863)],
37 1 => [qw(iso-8859-1 cp1252 MacRoman cp850)],
38 2 => [qw(iso-8859-2 cp1250 cp852 MacCentralEurRoman MacCroatian MacRumanian)],
39 5 => [qw(koi8-f iso-8859-5 cp1251 MacCyrillic cp855 cp866)],
40 7 => [qw(iso-8859-7 cp1253 MacGreek cp737 cp869)],
41 8 => [qw(iso-8859-8 cp1255 MacHebrew cp862)],
45 my %row = (offset => 0);
47 if ($input =~ s/^--//) {
48 $row{offset} = $endpoint > 160 ? 160 : 48;
50 elsif ($input =~ s/^-//) {
51 $row{offset} = $endpoint > 128 ? 128 : 32;
53 if ($input =~ s/-$//) {
54 $endpoint = $row{offset} ? $row{offset} < 160 ? 159 : 191 : 127;
56 if ($row{set} = resolve_alias($input)) {
57 if ($row{set} eq 'Internal') {
58 $row{table} = ' ' x ($endpoint < 255 ? 640 : 4096);
59 $row{set} = 'Unicode BMP';
61 elsif ($row{set} eq 'utf-8-strict') {
66 $row{table} = decode($row{set}, pack 'C*', $row{offset} .. $endpoint);
70 print "<p>Encoding $input unknown</p>\n";
77 } map { defined $ALIAS{$_} ? @{ $ALIAS{$_} } : $_ }
78 $ENV{PATH_INFO} =~ /\w/ ? split(m{[/+\s]}, $ENV{PATH_INFO}) : 'default';
79 my $NOCHAR = chr 0xFFFD;
81 for my $cp437 (grep {$request[$_]->{set} eq 'cp437'} 0 .. $#request) {
82 substr($request[$cp437]->{table}, 237, 1) = pack 'U*', 0x3D5; # phi sign
83 substr($request[$cp437]->{table}, 0, 32) = pack 'U*', map {hex} qw(
84 2007 263A 263B 2665 2666 2663 2660 2022 25D8 25CB 25D9 2642 2640 266A 266B 263C
85 25BA 25C4 2195 203C 00B6 00A7 25AC 21A8 2191 2193 2192 2190 221F 2194 25B2 25BC
97 sub printcell_unicode {
100 print "\n".'<td class="X">?';
102 elsif ($value == 0) {
103 print '<td colspan="2" class="X Cc" style="border-right:none">control';
105 elsif ($value == 2) {
106 print '<td colspan="2" class="X L Po" style="border-left:none; border-right:none">comn';
108 elsif ($value == 4) {
109 print '<td colspan="4" class="X L Latin" style="border-left:none">basic latin';
111 elsif ($value == 8) {
112 print '<td colspan="2" class="X Cc" style="border-right:none">control';
114 elsif ($value == 10) {
115 print '<td colspan="2" class="X L So" style="border-left:none; border-right:none">comn';
117 elsif ($value == 12) {
118 print '<td colspan="4" class="X L Latin" style="border-left:none">latin1';
120 elsif ($value == 0x10) {
121 print '<td colspan="8" class="X L Latin">latin extended-A';
123 elsif ($value == 0x18) {
124 print '<td colspan="8" class="X L Latin">latin extended-B';
126 elsif ($value == 0x20) {
127 print '<td colspan="5" class="X L Latin">latin ext-B';
129 elsif ($value == 0x25) {
130 print '<td colspan="6" class="X L Latin">IPA';
132 elsif ($value == 0x2B) {
133 print '<td colspan="5" class="X Sk">spacing modifier';
135 elsif ($value == 0x30) {
136 print '<td colspan="8" class="X Mn">diacritics';
138 elsif ($value == 0x38) {
139 print '<td colspan="8" class="X L Greek">greek';
141 elsif ($value == 0x40) {
142 print '<td colspan="16" class="X L Cyrillic">cyrillic';
144 elsif ($value == 0x50) {
145 print '<td colspan="3" class="X L Cyrillic">cyrillic+';
147 elsif ($value == 0x53) {
148 print '<td colspan="5" class="X L Armenian">armenian';
150 elsif ($value == 0x58) {
151 print '<td colspan="8" class="X L Hebrew">hebrew';
153 elsif ($value == 0x60) {
154 print '<td colspan="16" class="X L Arabic">arabic';
156 elsif ($value == 0x70) {
157 print '<td colspan="5" class="X L Aramaic">syriac';
159 elsif ($value == 0x75) {
160 print '<td colspan="3" class="X L Arabic">arabic+';
162 elsif ($value == 0x78) {
163 print '<td colspan="4" class="X L African">thaana';
165 elsif ($value == 0x7C) {
166 print '<td colspan="4" class="X L African">n\'ko';
168 elsif ($value == 0x80) {
169 print '<td colspan="4" class="X Xr L Hebrew">samaritan';
171 elsif ($value == 0x84) {
172 print '<td colspan="2" class="X Xr L Aramaic">manda';
174 elsif ($value == 0x86) {
175 print '<td colspan="12" class="">reserved';
177 elsif ($value == 0x90) {
178 print '<td colspan="8" class="X L Brahmic">devanagari';
180 elsif ($value == 0x98) {
181 print '<td colspan="8" class="X L Brahmic">bengali';
183 elsif ($value == 0xA0) {
184 print '<td colspan="8" class="X L Brahmic">gurmukhi';
186 elsif ($value == 0xA8) {
187 print '<td colspan="8" class="X L Brahmic">gujarati';
189 elsif ($value == 0xB0) {
190 print '<td colspan="8" class="X L Brahmic">oriya';
192 elsif ($value == 0xB8) {
193 print '<td colspan="8" class="X L Brahmic">tamil';
195 elsif ($value == 0xC0) {
196 print '<td colspan="8" class="X L Brahmic">telugu';
198 elsif ($value == 0xC8) {
199 print '<td colspan="8" class="X L Brahmic">kannada';
201 elsif ($value == 0xD0) {
202 print '<td colspan="8" class="X L Brahmic">malayalam';
204 elsif ($value == 0xD8) {
205 print '<td colspan="8" class="X L Brahmic">sinhala';
207 elsif ($value == 0xE0) {
208 print '<td colspan="8" class="X L Brahmic Khmer">thai';
210 elsif ($value == 0xE8) {
211 print '<td colspan="8" class="X L Brahmic Khmer">lao';
213 elsif ($value == 0xF0) {
214 print '<td colspan="16" class="X L Brahmic">tibetan';
216 elsif ($value == 0x100) {
217 print '<td colspan="10" class="X L Brahmic">myanmar';
219 elsif ($value == 0x10A) {
220 print '<td colspan="6" class="X L Aramaic">georgian';
222 elsif ($value == 0x110) {
223 print '<td colspan="16" class="X L Hangul">hangeul jamo';
225 elsif ($value == 0x120) {
226 print '<td colspan="16" class="X L African">ethiopic';
228 elsif ($value == 0x130) {
229 print '<td colspan="8" class="X L African">ethiopic';
231 elsif ($value == 0x138) {
232 print '<td colspan="2" class="X L African">eth+';
234 elsif ($value == 0x13A) {
235 print '<td colspan="6" class="X L Syllabic">cherokee';
237 elsif ($value == 0x140) {
238 print '<td colspan="16" rowspan="2" class="X L Syllabic">unified canadian aboriginal syllabics';
240 elsif ($value == 0x160) {
241 print '<td colspan="8" class="X L Syllabic">unified canadian syllabics';
243 elsif ($value == 0x168) {
244 print '<td colspan="2" class="X L Alpha">ogham';
246 elsif ($value == 0x16A) {
247 print '<td colspan="6" class="X L Alpha">runic';
249 elsif ($value == 0x170) {
250 print '<td colspan="2" class="X L Brahmic">tagalog';
252 elsif ($value == 0x172) {
253 print '<td colspan="2" class="X L Brahmic">hanun';
255 elsif ($value == 0x174) {
256 print '<td colspan="2" class="X L Brahmic">buhid';
258 elsif ($value == 0x176) {
259 print '<td colspan="2" class="X L Brahmic" title="tagbanwa">tagb';
261 elsif ($value == 0x178) {
262 print '<td colspan="8" class="X L Brahmic Khmer">khmer';
264 elsif ($value == 0x180) {
265 print '<td colspan="11" class="X L Aramaic">mongolian';
267 elsif ($value == 0x18B) {
268 print '<td colspan="5" class="X Xr L Syllabic">canadian+';
270 elsif ($value == 0x190) {
271 print '<td colspan="5" class="X L Brahmic">limbu';
273 elsif ($value == 0x195) {
274 print '<td colspan="4" class="X L Brahmic">tai le';
276 elsif ($value == 0x198) {
277 print '<td colspan="6" class="X L Brahmic">new tai lue';
279 elsif ($value == 0x19E) {
280 print '<td colspan="2" class="X L Brahmic Khmer" title="khmer symbols">km';
282 elsif ($value == 0x1A0) {
283 print '<td colspan="2" class="X L Brahmic">lontara';
285 elsif ($value == 0x1A2) {
286 print '<td colspan="9" class="X Xr L Brahmic">tai tham';
288 elsif ($value == 0x1AB) {
289 print '<td colspan="5" class="">reserved';
291 elsif ($value == 0x1B0) {
292 print '<td colspan="8" class="X L Brahmic">balinese';
294 elsif ($value == 0x1B8) {
295 print '<td colspan="4" class="X L Brahmic">sundanese';
297 elsif ($value == 0x1BC) {
298 print '<td colspan="4" class="X Xr L Brahmic">batak';
300 elsif ($value == 0x1C0) {
301 print '<td colspan="5" class="X L Brahmic">lepcha';
303 elsif ($value == 0x1C5) {
304 print '<td colspan="3" class="X L Alpha">ol chiki';
306 elsif ($value == 0x1C8) {
307 print '<td colspan="5" class="">reserved';
309 elsif ($value == 0x1CD) {
310 print '<td colspan="3" class="X Xr Brahmic">vedic';
312 elsif ($value == 0x1D0) {
313 print '<td colspan="8" class="X L Latin">phonetic';
315 elsif ($value == 0x1D8) {
316 print '<td colspan="4" class="X L Latin">phonetic+';
318 elsif ($value == 0x1DC) {
319 print '<td colspan="4" class="X Mn">combining';
321 elsif ($value == 0x1E0) {
322 print '<td colspan="16" class="X L Latin">latin extended additional';
324 elsif ($value == 0x1F0) {
325 print '<td colspan="16" class="X L Greek">greek+';
327 elsif ($value == 0x200) {
328 print '<td colspan="7" class="X Po">general punctuation';
330 elsif ($value == 0x207) {
331 print '<td colspan="3" class="X Latin">suþscript'; # suth now means "sub and/or sup"
333 elsif ($value == 0x20A) {
334 print '<td colspan="3" class="X Sc">currency';
336 elsif ($value == 0x20D) {
337 print '<td colspan="3" class="X Mn">overlay';
339 elsif ($value == 0x210) {
340 print '<td colspan="5" class="X So">letterlike';
342 elsif ($value == 0x215) {
343 print '<td colspan="4" class="X Latin">number';
345 elsif ($value == 0x219) {
346 print '<td colspan="7" class="X So">arrows';
348 elsif ($value == 0x220) {
349 print '<td colspan="16" class="X Sm">mathematical symbols';
351 elsif ($value == 0x230) {
352 print '<td colspan="16" class="X So">miscellaneous technical';
354 elsif ($value == 0x240) {
355 print '<td colspan="4" class="X So">control';
357 elsif ($value == 0x244) {
358 print '<td colspan="2" class="X So">OCR';
360 elsif ($value == 0x246) {
361 print '<td colspan="10" class="X Latin">enclosed alphanumerics';
363 elsif ($value == 0x250) {
364 print '<td colspan="8" class="X So">box drawing';
366 elsif ($value == 0x258) {
367 print '<td colspan="2" class="X So">blocks';
369 elsif ($value == 0x25A) {
370 print '<td colspan="6" class="X So">geometric shapes';
372 elsif ($value == 0x260) {
373 print '<td colspan="16" class="X So">miscellaneous symbols';
375 elsif ($value == 0x270) {
376 print '<td colspan="12" class="X So">dingbats';
378 elsif ($value == 0x27C) {
379 print '<td colspan="3" class="X So">maths-A';
381 elsif ($value == 0x27F) {
382 print '<td colspan="1" class="X So" title="supplemental arrows-A">arr';
384 elsif ($value == 0x280) {
385 print '<td colspan="16" class="X L Alpha">braille';
387 elsif ($value == 0x290) {
388 print '<td colspan="8" class="X So">supplemental arrows-B';
390 elsif ($value == 0x298) {
391 print '<td colspan="8" class="X Sm">mathematical symbols-B';
393 elsif ($value == 0x2A0) {
394 print '<td colspan="16" class="X Sm">supplemental mathematical operators';
396 elsif ($value == 0x2B0) {
397 print '<td colspan="16" class="X So">miscellaneous symbols and arrows';
399 elsif ($value == 0x2C0) {
400 print '<td colspan="6" class="X L Cyrillic">glagolitic';
402 elsif ($value == 0x2C6) {
403 print '<td colspan="2" class="X L Latin">latin-C';
405 elsif ($value == 0x2C8) {
406 print '<td colspan="8" class="X L Greek">coptic';
408 elsif ($value == 0x2D0) {
409 print '<td colspan="3" class="X L Aramaic">georgian+';
411 elsif ($value == 0x2D3) {
412 print '<td colspan="5" class="X L Alpha">tifinagh'; #TODO: proto-canaanite
414 elsif ($value == 0x2D8) {
415 print '<td colspan="6" class="X L African">ethiopic+';
417 elsif ($value == 0x2DE) {
418 print '<td colspan="2" class="X L Cyrillic">cyrl-A';
420 elsif ($value == 0x2E0) {
421 print '<td colspan="8" class="X Po">punctuation+';
423 elsif ($value == 0x2E8) {
424 print '<td colspan="8" class="X L Han">cjk radicals';
426 elsif ($value == 0x2F0) {
427 print '<td colspan="14" class="X L Han">kangxi radicals';
429 elsif ($value == 0x2FE) {
430 print '<td colspan="1" class="">';
432 elsif ($value == 0x2FF) {
433 print '<td colspan="1" class="X So Han">idc';
435 elsif ($value == 0x300) {
436 print '<td colspan="4" class="X Po Han">cjk misc';
438 elsif ($value == 0x304) {
439 print '<td colspan="6" class="X L Hiragana">hiragana';
441 elsif ($value == 0x30A) {
442 print '<td colspan="6" class="X L Katakana">katakana';
444 elsif ($value == 0x310) {
445 print '<td colspan="3" class="X L Bopomofo">bopomofo';
447 elsif ($value == 0x313) {
448 print '<td colspan="6" class="X L Katakana">hangeul compat';
450 elsif ($value == 0x319) {
451 print '<td colspan="1" class="X L Han" title="kanbun">kbn';
453 elsif ($value == 0x31A) {
454 print '<td colspan="2" class="X L Bopomofo" title="bopomofo extended">bpmf';
456 elsif ($value == 0x31C) {
457 print '<td colspan="3" class="X L Han" title="CJK strokes">strokes';
459 elsif ($value == 0x31F) {
460 print '<td colspan="1" class="X L Katakana" title="katakana phonetic extensions">k+';
462 elsif ($value == 0x320) {
463 print '<td colspan="16" class="X L Han">enclosed cjk characters';
465 elsif ($value == 0x330) {
466 print '<td colspan="16" class="X Xd L Han">cjk compatibility';
468 elsif ($value == 0x340) {
469 print '<td colspan="16" rowspan="25" class="X L Han">cjk unified ideographs extension A';
471 elsif ($value == 0x4D0) {
472 print '<td colspan="12" class="X L Han" style="border-top:none">cjk unified ideographs extension A';
474 elsif ($value == 0x4DC) {
475 print '<td colspan="4" class="X So">hexagrams';
477 elsif ($value == 0x4E0) {
478 print '<td colspan="16" rowspan="82" class="X L Han">cjk unified ideographs';
480 elsif ($value == 0xA00) {
481 print '<td colspan="16" rowspan="4" class="X L Syllabic">yi';
483 elsif ($value == 0xA40) {
484 print '<td colspan="9" class="X L Syllabic" style="border-top:none">yi';
486 elsif ($value == 0xA49) {
487 print '<td colspan="4" class="X L Syllabic">yi radicals';
489 elsif ($value == 0xA4D) {
490 print '<td colspan="3" class="X Xr L Latin">lisu';
492 elsif ($value == 0xA50) {
493 print '<td colspan="16" class="X L Syllabic">vai';
495 elsif ($value == 0xA60) {
496 print '<td colspan="4" class="X L Syllabic" style="border-top:none">vai';
498 elsif ($value == 0xA64) {
499 print '<td colspan="6" class="X L Cyrillic">cyrillic extended-B';
501 elsif ($value == 0xA6A) {
502 print '<td colspan="6" class="X Xr L Syllabic">bamum';
504 elsif ($value == 0xA70) {
505 print '<td colspan="2" class="X L Mn">tones';
507 elsif ($value == 0xA72) {
508 print '<td colspan="14" class="X L Latin">latin extended-D';
510 elsif ($value == 0xA80) {
511 print '<td colspan="3" class="X L Brahmic">sylheti';
513 elsif ($value == 0xA83) {
514 print '<td colspan="1" class="X Xr No">in';
516 elsif ($value == 0xA84) {
517 print '<td colspan="4" class="X L Brahmic">phags-pa';
519 elsif ($value == 0xA88) {
520 print '<td colspan="6" class="X L Brahmic">saurashtra';
522 elsif ($value == 0xA8E) {
523 print '<td colspan="2" class="X Xr L Brahmic">deva+';
525 elsif ($value == 0xA90) {
526 print '<td colspan="3" class="X L Brahmic">kayah li';
528 elsif ($value == 0xA93) {
529 print '<td colspan="3" class="X L Brahmic">rejang';
531 elsif ($value == 0xA96) {
532 print '<td colspan="2" class="X Xr L Hangul">jamo-A';
534 elsif ($value == 0xA98) {
535 print '<td colspan="6" class="X Xr L Brahmic">javanese';
537 elsif ($value == 0xA9E) {
538 print '<td colspan="2" class="">res';
540 elsif ($value == 0xAA0) {
541 print '<td colspan="6" class="X L Brahmic">cham';
543 elsif ($value == 0xAA6) {
544 print '<td colspan="2" class="X Xr L Brahmic" title="myanmar extended-A">mym-A';
546 elsif ($value == 0xAA8) {
547 print '<td colspan="6" class="X Xr L Brahmic Khmer">tai viet';
549 elsif ($value == 0xAAE) {
550 print '<td colspan="2" class="X Xr L Brahmic" title="meetei mayek extended">mtei+';
552 elsif ($value == 0xAB0) {
553 print '<td colspan="12" class="">reserved';
555 elsif ($value == 0xABC) {
556 print '<td colspan="4" class="X Xr L Brahmic" title="meetei mayek">manipuri';
558 elsif ($value == 0xAC0) {
559 print '<td colspan="16" rowspan="43" class="X L Hangul">hangeul syllables';
561 elsif ($value == 0xD70) {
562 print '<td colspan="11" class="X L Hangul" style="border-top:none">hangeul syllables';
564 elsif ($value == 0xD7B) {
565 print '<td colspan="5" class="X L Hangul">haungeul jamo-B';
567 elsif ($value == 0xD80) {
568 print '<td colspan="16" rowspan="4" class="X Cs">high surrogates';
570 elsif ($value == 0xDC0) {
571 print '<td colspan="16" rowspan="4" class="X Cs">low surrogates';
573 elsif ($value == 0xE00) {
574 print '<td colspan="16" rowspan="25" class="X Co">private use';
576 elsif ($value == 0xF90) {
577 print '<td colspan="16" rowspan="2" class="X L Han">cjk compatibility ideographs';
579 elsif ($value == 0xFB0) {
580 print '<td colspan="5" class="X Xd L">presentation';
582 elsif ($value == 0xFB5) {
583 print '<td colspan="11" class="X Xd L Arabic" style="border-bottom:none">';
585 elsif ($value == 0xFC0) {
586 print '<td colspan="16" class="X Xd L Arabic" style="border-top:none; border-bottom:none">arabic presentation forms A';
588 elsif ($value == 0xFD0) {
589 print '<td colspan="13" class="X Xd L Arabic" style="border-top:none">';
591 elsif ($value == 0xFDD) {
592 print '<td colspan="2" class="Xi">?';
594 elsif ($value == 0xFDF) {
595 print '<td colspan="1" class="X Xd L Arabic" style="border-top:none">';
597 elsif ($value == 0xFE0) {
598 print '<td colspan="1" class="X Cc">var';
600 elsif ($value == 0xFE1) {
601 print '<td colspan="1" class="X L Pd">ver';
603 elsif ($value == 0xFE2) {
604 print '<td colspan="1" class="X L Mn">½';
606 elsif ($value == 0xFE3) {
607 print '<td colspan="2" class="X Xd Pd Han">comp';
609 elsif ($value == 0xFE5) {
610 print '<td colspan="2" class="X Xd L Latin">small';
612 elsif ($value == 0xFE7) {
613 print '<td colspan="9" class="X Xd L Arabic">arabic presentation B';
615 elsif ($value == 0xFF0) {
616 print '<td colspan="15" class="X L Latin">halfwidth & fullwidth forms';
618 elsif ($value == 0xFFF) {
619 print '<td colspan="1" class="X Cc">sp';
625 if ($value <= 0x7F) {
626 print '<td rowspan="8" colspan="16" class="X di-a"',
627 ' title="U+0000 – U+007F">single byte ASCII'
630 elsif ($value <= 0xBF) {
631 print '<td rowspan="4" colspan="16" class="X di-d"',
632 '>multi-byte continuation'
635 elsif ($value <= 0xC1) {
636 print '<td colspan="2" class="X di-b" style="border-right:none; border-bottom:none"',
637 ' title="U+0000 – U+007F">(overl.)'
640 elsif ($value <= 0xDF) {
641 print '<td rowspan="2" colspan="14" class="X di-prop" style="border-left:none"',
642 ' title="U+0080 – U+03FF">2-byte sequence start'
644 print '<td rowspan="1" colspan="16" class="X di-prop" style="border-top:none"',
645 ' title="U+0400 – U+07FF">'
648 elsif ($value <= 0xEF) {
649 print '<td colspan="16" class="X di-prop"',
650 ' title="U+0800 – U+FFFF">3-byte sequence start'
653 elsif ($value <= 0xF4) {
654 print '<td colspan="5" class="X di-prop" style="border-right:none"',
655 ' title="U+1·0000 – U+10·FFFF">4-byte sequence'
658 elsif ($value <= 0xF7) {
659 print '<td colspan="3" class="X di-b" style="border-left:none"',
660 ' title="U+11·0000 – U+1FF·FFFF">(overflow)'
663 elsif ($value <= 0xFB) {
664 print '<td colspan="4" class="X di-b"',
665 ' title="U+200·0000 – U+3FFF·FFFF">5-byte'
668 elsif ($value <= 0xFD) {
669 print '<td colspan="2" class="X di-b"',
670 ' title="U+4000·0000 – 7FFFF·FFFF">6-byte'
673 elsif ($value <= 0xFF) {
674 print '<td colspan="2" class="di-invalid">invalid'
678 print "\n".'<td class="X">?';
684 my @nibble = (0..9, 'A'..'F');
685 for my $row (@request) {
686 print '<li><table class="glyphs">';
687 printf '<caption>%s</caption>', $row->{set};
689 for my $section (qw{thead}) {
690 print "<$section><tr><th>↱";
691 print '<th>', $_ for @nibble;
695 for my $msb (0 .. (length($row->{table}) || 256) - 1 >> 4) {
696 printf '<tr><th>%X', $msb + ($row->{offset} >> 4);
697 for my $lsb (0 .. $#nibble) {
698 if ($row->{set} eq 'UTF-8') {
699 printcell_utf8(($msb<<4) + $lsb);
702 elsif ($row->{set} eq 'Unicode BMP') {
703 printcell_unicode(($msb<<4) + $lsb);
707 my $glyph = substr $row->{table}, ($msb<<4) + $lsb, 1;
708 if ($glyph eq $NOCHAR) {
712 my $info = [ord $glyph];
713 if (defined (my $mnem = $di{ord $glyph})) {
714 $info = $diinfo->{$mnem};
716 my ($codepoint, $name, $prop, $script, $string) = @$info;
718 $glyph = quote($string || $glyph);
719 my $desc = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
720 my @class = ('X', grep {$_} $prop, $script);
722 $glyph = "<span>$glyph</span>" if $prop eq 'Zs';
724 printf "\n".'<td class="%s" title="%s">%s',
725 join(' ', @class), quote($desc), $glyph;
738 <a href="http://sheet.shiar.nl/" rel="home">sheet.shiar.nl</a>/charset
739 <a href="git://git.shiar.nl/sheet" rel="vcs-git" title="Git repository"><:= "v$VERSION" :></a>
740 created by <a href="http://shiar.nl/" rel="author">Shiar</a> •
741 <a title="Licensed under the GNU Affero General Public License, version 3" rel="copyright"
742 href="http://www.fsf.org/licensing/licenses/agpl-3.0.html">AGPLv3</a> •
744 use Time::Format qw(time_format);
745 print time_format('yyyy-mm-dd', (stat $ENV{SCRIPT_FILENAME})[9]);