remember given style option in cookie
[sheet.git] / charset.plp
1 <(common.inc.plp)><:
2         our $VERSION = 'v1.0';
3
4 :><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
5  "http://www.w3.org/TR/html4/loose.dtd">
6 <html>
7
8 <head>
9 <meta http-equiv="content-type" content="<:= $header{content_type} :>">
10 <title>charset cheat sheet</title>
11 <:= stylesheet(qw'light') :>
12 </head>
13
14 <body id="charset">
15 <h1>Character encoding</h1>
16
17 <:
18 my $diinfo = do 'digraphs.inc.pl';
19 my %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} }
20         keys %$diinfo;
21
22 use Encode qw(decode resolve_alias);
23 # generate character table(s)
24 # (~16x faster than decoding in loop;
25 #  substr strings is twice as fast as splitting to an array)
26 my %ALIAS = (
27 #       default => [qw(unicode utf-8 iso-8859-1 cp437 -cp1252- --iso-8859-15- -koi8-f)],
28         default => [qw(unicode- utf-8 iso-8859-1 -cp1252- --iso-8859-15- cp437 -cp850)],
29         0 => [qw(cp437 cp863)],
30         1 => [qw(iso-8859-1 cp1252 MacRoman cp850)],
31         2 => [qw(iso-8859-2 cp1250 cp852 MacCentralEurRoman MacCroatian MacRumanian)],
32         5 => [qw(koi8-f iso-8859-5 cp1251 MacCyrillic cp855 cp866)],
33         7 => [qw(iso-8859-7 cp1253 MacGreek cp737 cp869)],
34         8 => [qw(iso-8859-8 cp1255 MacHebrew cp862)],
35 );
36 my @request = map {
37         if (my $input = $_) {
38                 my %row = (offset => 0);
39                 my $endpoint = 255;
40                 if ($input =~ s/^--//) {
41                         $row{offset} = $endpoint > 160 ? 160 : 48;
42                 }
43                 elsif ($input =~ s/^-//) {
44                         $row{offset} = $endpoint > 128 ? 128 : 32;
45                 }
46                 if ($input =~ s/-$//) {
47                         $endpoint = $row{offset} ? $row{offset} < 160 ? 159 : 191 : 127;
48                 }
49
50                 if ($input =~ /^U([0-9a-f]+)(?:-([0-9a-f]+))?/) {
51                         my $start = hex($1) << ($2 ? 4 : 8);
52                         my $end = $2 ? hex($2) << 4 : $start + 240;
53                         $row{table} = join '', map { chr } $start .. $end+15;
54                         utf8::upgrade($row{table});  # prevent latin1 output
55                         $row{set} = sprintf 'Unicode block U+%02Xxx', $start >> 8;
56                 }
57                 elsif ($input eq 'U') {
58                         $row{table} = ' ' x 512;
59                         $row{set} = 'Unicode planes';
60                         $row{cell} = do 'charset-ucplanes.inc.pl';
61                 }
62                 elsif ($row{set} = resolve_alias($input)) {
63                         if ($row{set} eq 'Internal') {
64                                 $row{table} = ' ' x ($endpoint < 255 ? 640 : 4096);
65                                 $row{set} = 'Unicode BMP';
66                                 $row{cell} = do 'charset-unicode.inc.pl';
67                         }
68                         elsif ($row{set} eq 'utf-8-strict') {
69                                 $row{table} = undef;
70                                 $row{set} = 'UTF-8';
71                                 $row{cell} = do 'charset-utf8.inc.pl';
72                         }
73                         else {
74                                 $row{table} = decode($row{set}, pack 'C*', $row{offset} .. $endpoint);
75                         }
76                 }
77                 else {
78                         print "<p>Encoding $input unknown</p>\n";
79                 }
80                 \%row;
81         }
82         else {
83                 ();
84         }
85 } map { defined $ALIAS{$_} ? @{ $ALIAS{$_} } : $_ }
86         $ENV{PATH_INFO} =~ /\w/ ? split(m{[/+\s]}, $ENV{PATH_INFO}) : 'default';
87 my $NOCHAR = chr 0xFFFD;
88
89 for my $cp437 (grep {$request[$_]->{set} eq 'cp437'} 0 .. $#request) {
90         substr($request[$cp437]->{table}, 237, 1) = pack 'U*', 0x3D5; # phi sign
91         substr($request[$cp437]->{table}, 0, 32) = pack 'U*', map {hex} qw(
92                 2007 263A 263B 2665 2666 2663 2660 2022 25D8 25CB 25D9 2642 2640 266A 266B 263C
93                 25BA 25C4 2195 203C 00B6 00A7 25AC 21A8 2191 2193 2192 2190 221F 2194 25B2 25BC
94         );
95 }
96
97 sub quote {
98         local $_ = shift;
99         s/"/&quot;/g;
100         s/</&lt;/g;
101         s/>/&gt;/g;
102         return $_;
103 }
104
105 my @nibble = (0..9, 'A'..'F');
106 for my $row (@request) {
107         printf '<div class="section"><table class="glyphs%s">', !$row->{cell} && ' charmap';
108         printf '<caption>%s</caption>', $row->{set};
109         print '<col>' x 17;
110         for my $section (qw{thead}) {
111                 print "<$section><tr><th>↱";
112                 print '<th>', $_ for @nibble;
113                 print "\n";
114         }
115         print '<tbody>';
116         for my $msb (0 .. (length($row->{table}) || 256) - 1 >> 4) {
117                 printf '<tr><th>%X', $msb + ($row->{offset} >> 4);
118                 for my $lsb (0 .. $#nibble) {
119                         if ($row->{cell}) {
120                                 print $row->{cell}->(($msb<<4) + $lsb);
121                                 next;
122                         }
123
124                         my $glyph = substr $row->{table}, ($msb<<4) + $lsb, 1;
125                         if ($glyph eq $NOCHAR) {
126                                 print '<td>';
127                                 next;
128                         }
129
130                         my $info = [ord $glyph];
131                         if (defined (my $mnem = $di{ord $glyph})) {
132                                 $info = $diinfo->{$mnem};
133                         }
134                         else {
135                                 require Unicode::UCD;
136                                 my $fullinfo = Unicode::UCD::charinfo(ord $glyph);
137                                 $info = [@$fullinfo{qw/code name category script string/}] if $fullinfo;
138                         }
139                         my ($codepoint, $name, $prop, $script, $string) = @$info;
140
141                         $glyph = quote($string || $glyph);
142                         my $desc = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
143                         my @class = ('X', grep {$_} $prop, $script);
144
145                         $glyph = "<span>$glyph</span>" if $prop eq 'Zs';
146
147                         printf "\n".'<td class="%s" title="%s">%s',
148                                 join(' ', @class), quote($desc), $glyph;
149                 }
150                 print "\n";
151         }
152         print "</table></div>\n";
153 }
154
155 :>
156 <hr>
157
158 <div class="legend">
159         <table class="glyphs"><tr>
160         <td class="X Cc">control
161         <td class="X Zs"><span>whitespace</span>
162         <td class="X Mn">diacritic<table class="glyphs"><tr>
163                 <td class="X Sk">letter
164                 </table>
165         <td class="X Po">punctuation<table class="glyphs"><tr>
166                 <td class="X Pf">quote
167                 </table>
168         <td class="X So">symbol<table class="glyphs"><tr>
169                 <td class="X Sm">math
170                 <td class="X Sc">currency
171                 </table>
172         <td class="X No">numeric
173         <td class="X Greek">greek<table class="glyphs"><tr>
174                 <td class="X Latin">latin
175                 <td class="X Cyrillic">cyrillic
176                 </table>
177         <td class="X Aramaic">aramaic<table class="glyphs"><tr>
178                 <td class="X Brahmic">brahmic
179                 <td class="X Arabic">arabic
180                 </table>
181         <td class="X Syllabic">syllabic<table class="glyphs"><tr>
182                 <td class="X African">african
183                 <td class="X Hiragana">japanese
184                 <td class="X Han">cjk
185                 <td class="X Bopomofo">chinese
186                 </table>
187         <td class="X Alpha">alphabetic
188         </table>
189
190         <table class="glyphs"><tr>
191         <td class="X">unicode 5.0
192         <td class="X Xr">proposed
193         <td class="X Xd">deprecated
194         <td class="">unassigned
195         <td class="X Xi">invalid
196         </table>
197 </div>
198
199 <p class="footer">
200         <a href="/" rel="home">sheet.shiar.nl</a>/charset.<a href="/source/charset.plp"
201          rel="code" title="Written in Perl">plp</a>
202         <a href="http://git.shiar.nl/sheet.git/history/HEAD:/charset.plp"
203          rel="vcs-git" title="Git repository"><:= $VERSION :></a>
204         created by <a href="http://shiar.nl/" rel="author">Shiar</a> •
205         <a href="http://www.fsf.org/licensing/licenses/agpl-3.0.html" rel="copyright"
206          title="Licensed under the GNU Affero General Public License, version 3">AGPLv3</a>
207 </p>
208
209 </html>