c9ef2b699501c55daf574b029e2316780d75d451
[sheet.git] / charset.plp
1 <:
2 use utf8;
3 use strict;
4 use warnings;
5 use open IO => ':utf8';
6
7 our $VERSION = '1.0';
8
9 $header{content_type} = 'text/html; charset=utf-8';
10
11 :><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
12  "http://www.w3.org/TR/html4/loose.dtd">
13 <html>
14
15 <head>
16 <title>charset cheat sheet</title>
17 <meta http-equiv="content-type" content="utf-8">
18 <link rel="stylesheet" type="text/css" media="all" href="/base.css">
19 </head>
20
21 <body>
22 <h1>Character encoding</h1>
23
24 <:
25 my $diinfo = do 'digraphs.inc.pl';
26 my %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} }
27         keys %$diinfo;
28
29 use Encode qw(decode);
30 # generate character table(s)
31 # (~16x faster than decoding in loop;
32 #  substr strings is twice as fast as splitting to an array)
33 my @request = ('iso-8859-1', 'cp437');
34 my @tables = map { decode($_, pack 'C*', 0..255) } @request;
35 my $NOCHAR = chr 0xFFFD;
36
37 for my $cp437 (grep {$request[$_] eq 'cp437'} 0 .. $#request) {
38         substr($tables[$cp437], 237, 1) = pack 'U*', 0x3D5; # phi sign
39         substr($tables[$cp437], 0, 32) = pack 'U*', map {hex} qw(
40                 2007 263A 263B 2665 2666 2663 2660 2022 25D8 25CB 25D9 2642 2640 266A 266B 263C
41                 25BA 25C4 2195 203C 00B6 00A7 25AC 21A8 2191 2193 2192 2190 221F 2194 25B2 25BC
42         );
43 }
44
45 sub quote {
46         local $_ = shift;
47         s/"/&quot;/g;
48         s/</&lt;/g;
49         s/>/&gt;/g;
50         return $_;
51 }
52
53 print "<ul>\n";
54
55 my @nibble = (0..9, 'A'..'F');
56 for my $tablenum (0 .. $#tables) {
57         print '<li><table class="glyphs">';
58         printf '<caption>%s</caption>', $request[$tablenum];
59         print '<col>';
60         for my $section (qw{thead}) {
61                 print "<$section><tr><th>↱";
62                 print '<th>', $_ for @nibble;
63                 print "\n";
64         }
65         print '<tbody>';
66         for my $msb (0 .. $#nibble) {
67                 print '<tr><th>', $nibble[$msb];
68                 for my $lsb (0 .. $#nibble) {
69                         my $glyph = substr $tables[$tablenum], ($msb<<4) + $lsb, 1;
70                         if ($glyph eq $NOCHAR) {
71                                 print '<td>';
72                                 next;
73                         }
74                         my $info = [ord $glyph];
75                         if (defined (my $mnem = $di{ord $glyph})) {
76                                 $info = $diinfo->{$mnem};
77                         }
78                         my ($codepoint, $name, $prop, $script, $string) = @$info;
79
80                         $glyph = quote($string || $glyph);
81                         my $desc = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
82                         my @class = ('X', grep {$_} $prop, $script);
83
84                         $glyph = "<span>$glyph</span>" if $prop eq 'Zs';
85
86                         printf "\n".'<td class="%s" title="%s">%s',
87                                 join(' ', @class), quote($desc), $glyph;
88                 }
89                 print "\n";
90         }
91         print "</table>\n";
92 }
93
94 {
95         print '<li><table class="glyphs"><caption>UTF-8</caption><col>';
96         for my $section (qw{thead}) {
97                 print "<$section><tr><th>↱";
98                 print '<th>', $_ for @nibble;
99                 print "\n";
100         }
101         print '<tbody>';
102         print '<tr rowspan="8">';
103         for my $msb (0 .. $#nibble) {
104                 print '<tr><th>', $nibble[$msb];
105                 for my $lsb (0 .. $#nibble) {
106                         my $value = ($msb<<4) + $lsb;
107                         if ($value <= 0x7F) {
108                                 print '<td rowspan="8" colspan="16" class="X di-a"',
109                                       ' title="U+0000 – U+007F">Single byte ASCII'
110                                         if $value == 0;
111                         }
112                         elsif ($value <= 0xBF) {
113                                 print '<td rowspan="4" colspan="16" class="X di-d"',
114                                       '>Multi-byte continuation'
115                                         if $value == 0x80;
116                         }
117                         elsif ($value <= 0xC1) {
118                                 print '<td colspan="2" class="X di-b" style="border-right:none; border-bottom:none"',
119                                       ' title="U+0000 – U+007F">(Overl.)'
120                                         if $value == 0xC0;
121                         }
122                         elsif ($value <= 0xDF) {
123                                 print '<td rowspan="2" colspan="14" class="X di-prop" style="border-left:none"',
124                                       ' title="U+0080 – U+03FF">2-byte sequence start'
125                                         if $value == 0xC2;
126                                 print '<td rowspan="1" colspan="16" class="X di-prop" style="border-top:none"',
127                                       ' title="U+0400 – U+07FF">'
128                                         if $value == 0xD0;
129                         }
130                         elsif ($value <= 0xEF) {
131                                 print '<td colspan="16" class="X di-prop"',
132                                       ' title="U+0800 – U+FFFF">3-byte sequence start'
133                                         if $value == 0xE0;
134                         }
135                         elsif ($value <= 0xF4) {
136                                 print '<td colspan="5" class="X di-prop" style="border-right:none"',
137                                       ' title="U+1·0000 – U+10·FFFF">4-byte sequence'
138                                         if $value == 0xF0;
139                         }
140                         elsif ($value <= 0xF7) {
141                                 print '<td colspan="3" class="X di-b" style="border-left:none"',
142                                       ' title="U+11·0000 – U+1FF·FFFF">(Overflow)'
143                                         if $value == 0xF5;
144                         }
145                         elsif ($value <= 0xFB) {
146                                 print '<td colspan="4" class="X di-b"',
147                                       ' title="U+200·0000 – U+3FFF·FFFF">5-byte'
148                                         if $value == 0xF8;
149                         }
150                         elsif ($value <= 0xFD) {
151                                 print '<td colspan="2" class="X di-b"',
152                                       ' title="U+4000·0000 – 7FFFF·FFFF">6-byte'
153                                         if $value == 0xFC;
154                         }
155                         elsif ($value <= 0xFF) {
156                                 print '<td colspan="2" class="di-invalid">Invalid'
157                                         if $value == 0xFE;
158                         }
159                         else {
160                                 print "\n".'<td class="X">?';
161                         }
162                 }
163                 print "\n";
164         }
165         print "</table>\n";
166 }
167
168 print "</ul>\n";
169
170 :>
171 <hr>
172
173 <p class="footer">
174         <a href="http://sheet.shiar.nl/" rel="home">sheet.shiar.nl</a>/charset
175         <a href="git://git.shiar.nl/sheet" rel="vcs-git" title="Git repository"><:= "v$VERSION" :></a>
176         created by <a href="http://shiar.nl/" rel="author">Shiar</a> •
177         <a title="Licensed under the GNU Affero General Public License, version 3" rel="copyright"
178            href="http://www.fsf.org/licensing/licenses/agpl-3.0.html">AGPLv3</a> •
179         last update <:
180                 use Time::Format qw(time_format);
181                 print time_format('yyyy-mm-dd', (stat $ENV{SCRIPT_FILENAME})[9]);
182         :>
183 </p>
184
185 </html>