cc: improve abbreviations; show options legend
[sheet.git] / charset.plp
index f546f2d2c6a99fb06d4e60fd90ee22a327a99daf..73f93772f26738c1fbb37304d465841ebbe73388 100644 (file)
@@ -4,7 +4,7 @@ use strict;
 use warnings;
 use open IO => ':utf8';
 
-our $VERSION = '1.0';
+our $VERSION = 'v1.0';
 
 $header{content_type} = 'text/html; charset=utf-8';
 
@@ -13,12 +13,12 @@ $header{content_type} = 'text/html; charset=utf-8';
 <html>
 
 <head>
+<meta http-equiv="content-type" content="<:= $header{content_type} :>">
 <title>charset cheat sheet</title>
-<meta http-equiv="content-type" content="utf-8">
 <link rel="stylesheet" type="text/css" media="all" href="/base.css">
 </head>
 
-<body>
+<body id="charset">
 <h1>Character encoding</h1>
 
 <:
@@ -53,7 +53,20 @@ my @request = map {
                if ($input =~ s/-$//) {
                        $endpoint = $row{offset} ? $row{offset} < 160 ? 159 : 191 : 127;
                }
-               if ($row{set} = resolve_alias($input)) {
+
+               if ($input =~ /^U([0-9a-f]+)(?:-([0-9a-f]+))?/) {
+                       my $start = hex($1) << ($2 ? 4 : 8);
+                       my $end = $2 ? hex($2) << 4 : $start + 240;
+                       $row{table} = join '', map { chr } $start .. $end+15;
+                       utf8::upgrade($row{table});  # prevent latin1 output
+                       $row{set} = sprintf 'Unicode block U+%02Xxx', $start >> 8;
+               }
+               elsif ($input eq 'U') {
+                       $row{table} = ' ' x 512;
+                       $row{set} = 'Unicode planes';
+                       $row{cell} = do 'charset-ucplanes.inc.pl';
+               }
+               elsif ($row{set} = resolve_alias($input)) {
                        if ($row{set} eq 'Internal') {
                                $row{table} = ' ' x ($endpoint < 255 ? 640 : 4096);
                                $row{set} = 'Unicode BMP';
@@ -96,13 +109,11 @@ sub quote {
        return $_;
 }
 
-print "<ul>\n";
-
 my @nibble = (0..9, 'A'..'F');
 for my $row (@request) {
-       print '<li><table class="glyphs">';
+       printf '<div class="section"><table class="glyphs%s">', !$row->{cell} && ' charmap';
        printf '<caption>%s</caption>', $row->{set};
-       print '<col>';
+       print '<col>' x 17;
        for my $section (qw{thead}) {
                print "<$section><tr><th>↱";
                print '<th>', $_ for @nibble;
@@ -127,6 +138,11 @@ for my $row (@request) {
                        if (defined (my $mnem = $di{ord $glyph})) {
                                $info = $diinfo->{$mnem};
                        }
+                       else {
+                               require Unicode::UCD;
+                               my $fullinfo = Unicode::UCD::charinfo(ord $glyph);
+                               $info = [@$fullinfo{qw/code name category script string/}] if $fullinfo;
+                       }
                        my ($codepoint, $name, $prop, $script, $string) = @$info;
 
                        $glyph = quote($string || $glyph);
@@ -140,24 +156,61 @@ for my $row (@request) {
                }
                print "\n";
        }
-       print "</table>\n";
+       print "</table></div>\n";
 }
 
-print "</ul>\n";
-
 :>
 <hr>
 
+<div class="legend">
+       <table class="glyphs"><tr>
+       <td class="X Cc">control
+       <td class="X Zs"><span>whitespace</span>
+       <td class="X Mn">diacritic<table class="glyphs"><tr>
+               <td class="X Sk">letter
+               </table>
+       <td class="X Po">punctuation<table class="glyphs"><tr>
+               <td class="X Pf">quote
+               </table>
+       <td class="X So">symbol<table class="glyphs"><tr>
+               <td class="X Sm">math
+               <td class="X Sc">currency
+               </table>
+       <td class="X No">numeric
+       <td class="X Greek">greek<table class="glyphs"><tr>
+               <td class="X Latin">latin
+               <td class="X Cyrillic">cyrillic
+               </table>
+       <td class="X Aramaic">aramaic<table class="glyphs"><tr>
+               <td class="X Brahmic">brahmic
+               <td class="X Arabic">arabic
+               </table>
+       <td class="X Syllabic">syllabic<table class="glyphs"><tr>
+               <td class="X African">african
+               <td class="X Hiragana">japanese
+               <td class="X Han">cjk
+               <td class="X Bopomofo">chinese
+               </table>
+       <td class="X Alpha">alphabetic
+       </table>
+
+       <table class="glyphs"><tr>
+       <td class="X">unicode 5.0
+       <td class="X Xr">proposed
+       <td class="X Xd">deprecated
+       <td class="">unassigned
+       <td class="X Xi">invalid
+       </table>
+</div>
+
 <p class="footer">
-       <a href="http://sheet.shiar.nl/" rel="home">sheet.shiar.nl</a>/charset
-       <a href="git://git.shiar.nl/sheet" rel="vcs-git" title="Git repository"><:= "v$VERSION" :></a>
+       <a href="/" rel="home">sheet.shiar.nl</a>/charset.<a href="/source/charset.plp"
+        rel="code" title="Written in Perl">plp</a>
+       <a href="http://git.shiar.nl/sheet.git/history/HEAD:/charset.plp"
+        rel="vcs-git" title="Git repository"><:= $VERSION :></a>
        created by <a href="http://shiar.nl/" rel="author">Shiar</a> •
-       <a title="Licensed under the GNU Affero General Public License, version 3" rel="copyright"
-          href="http://www.fsf.org/licensing/licenses/agpl-3.0.html">AGPLv3</a> •
-       last update <:
-               use Time::Format qw(time_format);
-               print time_format('yyyy-mm-dd', (stat $ENV{SCRIPT_FILENAME})[9]);
-       :>
+       <a href="http://www.fsf.org/licensing/licenses/agpl-3.0.html" rel="copyright"
+        title="Licensed under the GNU Affero General Public License, version 3">AGPLv3</a>
 </p>
 
 </html>