From: Mischa POSLAWSKY Date: Fri, 12 Sep 2008 20:32:22 +0000 (+0000) Subject: digraphs: show unicode character details X-Git-Tag: v1.2~64 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/95a62b8397c02f6103132a3c248df2c50e6c8829 digraphs: show unicode character details Get name, script, and category for each character from the Unicode Character Database. Set up various classes to decorate different properties/groups, and output char names as titles. --- diff --git a/digraphs.plp b/digraphs.plp index 9f0b370..93dfd75 100644 --- a/digraphs.plp +++ b/digraphs.plp @@ -4,6 +4,8 @@ use strict; use warnings; use open IO => ':utf8'; +use Unicode::UCD qw(charinfo); + our $VERSION = '1.0'; $header{content_type} = 'text/html; charset=utf-8'; @@ -24,6 +26,16 @@ table { border-collapse: collapse; table-layout: fixed; /* prevent resizing, notably in msie6 */ } +#legend { + margin-top: 1em; +} +#legend table { + width: 100%; + table-layout: auto; +#} +#legend td { + padding: 0 0.2em; +} thead th, td { width: 1.2em; /* msie only looks at the first row */ min-width: 1em; /* prevents gecko from restricting to page width */ @@ -35,14 +47,52 @@ td { border: 1px solid #888; background: #DDD; } -td.any { +td.X { background: #FFF; } + +td.Lm, td.Mc, td.Me, td.Zl, td.Zp, td.Cs {background:red} /* unknown */ + +/* letters */ +td.Greek {background: #FFE0CF} +td.Cyrillic {background: #FFDDA8} +td.Latin {background: #FFB} +td.Hebrew {background: #FFD} +td.Arabic {background: #EFE} +td.Hiragana {background: #DFC} +td.Katakana {background: #DFA} +td.Bopomofo {background: #BFC} + +td.Nd, td.Nl, +td.No {background: #FBB} /* number */ +td.Sc {background: #FCD} /* currency */ +td.Sm {background: #ECE} /* math */ +td.So {background: #DDCCFF} /* symbol */ +td.Cf, td.Pd, +td.Po {background: #CDF} /* punctuation */ +td.Ps, td.Pe, td.Pi, +td.Pf {background: #BEF} /* quote */ +td.Lm, +td.Sk {background: #CEE} /* spacing modifier */ +td.Mn {background: #ACC} /* modifier */ +td.Cc {background: #BBB; color: #666} /* control */ +td.Zs {background: #ACB} /* space */ +td.Zs span {background: #EEE} + +td.Xa {color: #040} /* ascii */ +td.Xl {color: #080} /* latin1 */ +td.Co {color: #800} /* private */ +td.Xz {color: #F00} /* proposed */ + +tr:hover td { + background: #FF8; +}

RFC-1345 Digraphs

+ <: my $di = do 'digraphs.inc.pl'; @@ -56,13 +106,17 @@ sub quote { my @chars = ((map {chr} ord '!' .. ord 'Z'), 'a'..'z'); splice @chars, $_, 1, () for 2, 3-1, 5-2, 31-3; # remove character exceptions # $ & @ +my @chars2 = (@chars, '_'); # trailing character (extended set) + print ''; -print ''; for my $c1 (@chars) { print "
 '; -print "$_" for @chars, '_'; +for my $section (qw{thead tfoot}) { + print "<$section>
 "; + print "$_" for @chars2; +} print '
$c1"; - for my $c2 (@chars, '_') { + for my $c2 (@chars2) { my $mnem = $c1 . $c2; if (not defined $di->{$mnem}) { print ''; @@ -71,13 +125,53 @@ for my $c1 (@chars) { my $chr = $di->{$mnem}; my $glyph = chr $chr; utf8::upgrade($glyph); # prevent latin1 output + my $info = charinfo($chr); - my @class = 'any'; + my $desc = $mnem; + $desc .= " ($_)" for $info->{name} || (); - printf '%s', - join(' ', @class), quote($mnem), quote($glyph); + my @class = 'X'; + push @class, $_ for $info->{category} || (); + push @class, $_ for $info->{script} || (); + + $glyph = quote($glyph); + $glyph = "$glyph" if $info->{category} eq 'Zs'; + + printf "\n".'%s', + join(' ', @class), quote($desc), $glyph; } - print "\n"; + print "\n$c1\n"; } print "
\n"; +:> +
+ +
control + spacing + modifier + spacing modifier + quote + punctuation + symbol + math + currency + numeric + greek + cyrillic + latin + hebrew + arabic + japanese + chinese +
+ + +
unicode + ascii + latin1 + private + proposed +
+
+