From 47ca3b887159be3f6e047d093ed1bafac4adc706 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sun, 19 Apr 2009 18:39:53 +0000 Subject: [PATCH] charset: describe unicode BMP blocks --- base.css | 14 +++- charset.plp | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 244 insertions(+), 6 deletions(-) diff --git a/base.css b/base.css index d36f4df..b535be3 100644 --- a/base.css +++ b/base.css @@ -63,13 +63,14 @@ hr ~ .footer { margin: 0; } -/* "keyboard" (list of keys) */ - ul { margin: 0; padding: 0; list-style: none; } + +/* "keyboard" (list of keys) */ + ul#rows {margin-top: -5ex} /* top (esc) row fits besides header */ li.row1 {margin-left: 7em} /* row offsets relative to ~6em key width */ li.row2 {margin-left: 8em} @@ -252,12 +253,19 @@ td.Xl {color: #070} /* latin1 */ td.Lm, td.Mc, td.Me, td.Zl, td.Zp, td.Cs {background: #F00} /* unstyled */ /* letter scripts */ +td.Armenian, td.Greek {background: #FFE0CF} td.Cyrillic {background: #FFDDA8} td.Latin {background: #FFB} +td.Aramaic, td.Hebrew {background: #FFD} td.Arabic {background: #EFE} -td.Hangul {background: #DEA} +td.African {background: #DED} +td.XXXXXX {background: #ACB} /* same as space */ +td.Brahmic {background: #FBB} /* same as number */ +td.Khmer {background: #FBA} +td.Hangul, +td.Syllabic {background: #DEA} td.Katakana {background: #DFA} td.Hiragana {background: #DFC} td.Bopomofo {background: #BFC} diff --git a/charset.plp b/charset.plp index e9c4fcc..25b69b9 100644 --- a/charset.plp +++ b/charset.plp @@ -43,7 +43,7 @@ my @request = grep { defined } map { $_ ? (resolve_alias($_) or print("Encoding $_ unknown") && ()) : (); } map { defined $ALIAS{$_} ? @{ $ALIAS{$_} } : $_ } $ENV{PATH_INFO} =~ /\w/ ? split(m{[/+\s]}, $ENV{PATH_INFO}) : 'default'; -my @tables = map { $_ eq 'utf-8-strict' ? undef : decode($_, pack 'C*', 0..255) } @request; +my @tables = map { $_ eq 'utf-8-strict' ? undef : $_ eq 'Internal' ? ' 'x512 : decode($_, pack 'C*', 0..255) } @request; my $NOCHAR = chr 0xFFFD; for my $cp437 (grep {$request[$_] eq 'cp437'} 0 .. $#request) { @@ -62,6 +62,232 @@ sub quote { return $_; } +sub printcell_unicode { + my ($value) = @_; + if ($value > 0x1FF) { + print "\n".'?'; + } + elsif ($value == 0) { + print 'control'; + } + elsif ($value == 2) { + print 'latin'; + } + elsif ($value == 8) { + print 'control'; + } + elsif ($value == 10) { + print 'latin supplement'; + } + elsif ($value == 0x10) { + print 'latin ext-A'; + } + elsif ($value == 0x18) { + print 'latin ext-B'; + } + elsif ($value == 0x20) { + print 'latin ext-B'; + } + elsif ($value == 0x25) { + print 'IPA'; + } + elsif ($value == 0x2B) { + print 'spacing modifier'; + } + elsif ($value == 0x30) { + print 'diacritics'; + } + elsif ($value == 0x38) { + print 'greek'; + } + elsif ($value == 0x40) { + print 'cyrillic'; + } + elsif ($value == 0x50) { + print 'cyrillic+'; + } + elsif ($value == 0x53) { + print 'armenian'; + } + elsif ($value == 0x58) { + print 'hebrew'; + } + elsif ($value == 0x60) { + print 'arabic'; + } + elsif ($value == 0x70) { + print 'syriac'; + } + elsif ($value == 0x75) { + print 'arabic+'; + } + elsif ($value == 0x78) { + print 'thaana'; + } + elsif ($value == 0x7C) { + print 'nko'; + } + elsif ($value == 0x80) { + print 'samaritan'; + } + elsif ($value == 0x84) { + print 'manda'; + } + elsif ($value == 0x86) { + print 'reserved'; + } + elsif ($value == 0x90) { + print 'devanagari'; + } + elsif ($value == 0x98) { + print 'bengali'; + } + elsif ($value == 0xA0) { + print 'gurmukhi'; + } + elsif ($value == 0xA8) { + print 'gujarati'; + } + elsif ($value == 0xB0) { + print 'oriya'; + } + elsif ($value == 0xB8) { + print 'tamil'; + } + elsif ($value == 0xC0) { + print 'telugu'; + } + elsif ($value == 0xC8) { + print 'kannada'; + } + elsif ($value == 0xD0) { + print 'malayalam'; + } + elsif ($value == 0xD8) { + print 'sinhala'; + } + elsif ($value == 0xE0) { + print 'thai'; + } + elsif ($value == 0xE8) { + print 'lao'; + } + elsif ($value == 0xF0) { + print 'tibetan'; + } + elsif ($value == 0x100) { + print 'myanmar'; + } + elsif ($value == 0x10A) { + print 'georgian'; + } + elsif ($value == 0x110) { + print 'hangeul jamo'; + } + elsif ($value == 0x120) { + print 'ethiopic'; + } + elsif ($value == 0x130) { + print 'ethiopic'; + } + elsif ($value == 0x138) { + print 'eth+'; + } + elsif ($value == 0x13A) { + print 'cherokee'; + } + elsif ($value == 0x140) { + print 'unified canadian aboriginal syllabics'; + } + elsif ($value == 0x160) { + print 'unified canadian syllabics'; + } + elsif ($value == 0x168) { + print 'ogham'; + } + elsif ($value == 0x16A) { + print 'runic'; + } + elsif ($value == 0x170) { + print 'tagalog'; + } + elsif ($value == 0x172) { + print 'hanun'; + } + elsif ($value == 0x174) { + print 'buhid'; + } + elsif ($value == 0x176) { + print 'tagb'; + } + elsif ($value == 0x178) { + print 'khmer'; + } + elsif ($value == 0x180) { + print 'mongolian'; + } + elsif ($value == 0x18B) { + print 'canadian+'; + } + elsif ($value == 0x190) { + print 'limbu'; + } + elsif ($value == 0x195) { + print 'tai le'; + } + elsif ($value == 0x198) { + print 'new tai lue'; + } + elsif ($value == 0x19E) { + print 'km'; + } + elsif ($value == 0x1A0) { + print 'lontara'; + } + elsif ($value == 0x1A2) { + print 'tai tham'; + } + elsif ($value == 0x1AB) { + print 'reserved'; + } + elsif ($value == 0x1B0) { + print 'balinese'; + } + elsif ($value == 0x1B8) { + print 'sundanese'; + } + elsif ($value == 0x1BC) { + print 'batak'; + } + elsif ($value == 0x1C0) { + print 'lepcha'; + } + elsif ($value == 0x1C5) { + print 'ol chiki'; + } + elsif ($value == 0x1C8) { + print 'reserved'; + } + elsif ($value == 0x1CD) { + print 'vedic'; + } + elsif ($value == 0x1D0) { + print 'phonetic'; + } + elsif ($value == 0x1D8) { + print 'phonetic+'; + } + elsif ($value == 0x1DC) { + print 'combining'; + } + elsif ($value == 0x1E0) { + print 'latin extended additional'; + } + elsif ($value == 0x1F0) { + print 'greek+'; + } +} + sub printcell_utf8 { my ($value) = @_; if ($value <= 0x7F) { @@ -134,13 +360,17 @@ for my $tablenum (0 .. $#tables) { print "\n"; } print ''; - for my $msb (0 .. $#nibble) { - print '', $nibble[$msb]; + for my $msb (0 .. (length($tables[$tablenum]) || 256) - 1 >> 4) { + printf '%X', $msb; for my $lsb (0 .. $#nibble) { if ($request[$tablenum] eq 'utf-8-strict') { printcell_utf8(($msb<<4) + $lsb); next; } + elsif ($request[$tablenum] eq 'Internal') { + printcell_unicode(($msb<<4) + $lsb); + next; + } my $glyph = substr $tables[$tablenum], ($msb<<4) + $lsb, 1; if ($glyph eq $NOCHAR) { -- 2.30.0