X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/92240b0d1e4434e0981b3326ca4d7ad8673dbf71..47ca3b887159be3f6e047d093ed1bafac4adc706:/charset.plp diff --git a/charset.plp b/charset.plp index d3ddea7..25b69b9 100644 --- a/charset.plp +++ b/charset.plp @@ -26,13 +26,34 @@ my $diinfo = do 'digraphs.inc.pl'; my %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} } keys %$diinfo; -use Encode qw(decode); +use Encode qw(decode resolve_alias); # generate character table(s) # (~16x faster than decoding in loop; # substr strings is twice as fast as splitting to an array) -my @tables = map { decode($_, pack 'C*', 0..255) } 'iso-8859-1'; +my %ALIAS = ( + default => [qw(utf-8 iso-8859-1 cp437)], + 0 => [qw(cp437 cp863)], + 1 => [qw(iso-8859-1 cp1252 MacRoman cp850)], + 2 => [qw(iso-8859-2 cp1250 cp852 MacCentralEurRoman MacCroatian MacRumanian)], + 5 => [qw(koi8-f iso-8859-5 cp1251 MacCyrillic cp855 cp866)], + 7 => [qw(iso-8859-7 cp1253 MacGreek cp737 cp869)], + 8 => [qw(iso-8859-8 cp1255 MacHebrew cp862)], +); +my @request = grep { defined } map { + $_ ? (resolve_alias($_) or print("Encoding $_ unknown") && ()) : (); +} map { defined $ALIAS{$_} ? @{ $ALIAS{$_} } : $_ } + $ENV{PATH_INFO} =~ /\w/ ? split(m{[/+\s]}, $ENV{PATH_INFO}) : 'default'; +my @tables = map { $_ eq 'utf-8-strict' ? undef : $_ eq 'Internal' ? ' 'x512 : decode($_, pack 'C*', 0..255) } @request; my $NOCHAR = chr 0xFFFD; +for my $cp437 (grep {$request[$_] eq 'cp437'} 0 .. $#request) { + substr($tables[$cp437], 237, 1) = pack 'U*', 0x3D5; # phi sign + substr($tables[$cp437], 0, 32) = pack 'U*', map {hex} qw( + 2007 263A 263B 2665 2666 2663 2660 2022 25D8 25CB 25D9 2642 2640 266A 266B 263C + 25BA 25C4 2195 203C 00B6 00A7 25AC 21A8 2191 2193 2192 2190 221F 2194 25B2 25BC + ); +} + sub quote { local $_ = shift; s/"/"/g; @@ -41,19 +62,317 @@ sub quote { return $_; } +sub printcell_unicode { + my ($value) = @_; + if ($value > 0x1FF) { + print "\n".'?'; + } + elsif ($value == 0) { + print 'control'; + } + elsif ($value == 2) { + print 'latin'; + } + elsif ($value == 8) { + print 'control'; + } + elsif ($value == 10) { + print 'latin supplement'; + } + elsif ($value == 0x10) { + print 'latin ext-A'; + } + elsif ($value == 0x18) { + print 'latin ext-B'; + } + elsif ($value == 0x20) { + print 'latin ext-B'; + } + elsif ($value == 0x25) { + print 'IPA'; + } + elsif ($value == 0x2B) { + print 'spacing modifier'; + } + elsif ($value == 0x30) { + print 'diacritics'; + } + elsif ($value == 0x38) { + print 'greek'; + } + elsif ($value == 0x40) { + print 'cyrillic'; + } + elsif ($value == 0x50) { + print 'cyrillic+'; + } + elsif ($value == 0x53) { + print 'armenian'; + } + elsif ($value == 0x58) { + print 'hebrew'; + } + elsif ($value == 0x60) { + print 'arabic'; + } + elsif ($value == 0x70) { + print 'syriac'; + } + elsif ($value == 0x75) { + print 'arabic+'; + } + elsif ($value == 0x78) { + print 'thaana'; + } + elsif ($value == 0x7C) { + print 'nko'; + } + elsif ($value == 0x80) { + print 'samaritan'; + } + elsif ($value == 0x84) { + print 'manda'; + } + elsif ($value == 0x86) { + print 'reserved'; + } + elsif ($value == 0x90) { + print 'devanagari'; + } + elsif ($value == 0x98) { + print 'bengali'; + } + elsif ($value == 0xA0) { + print 'gurmukhi'; + } + elsif ($value == 0xA8) { + print 'gujarati'; + } + elsif ($value == 0xB0) { + print 'oriya'; + } + elsif ($value == 0xB8) { + print 'tamil'; + } + elsif ($value == 0xC0) { + print 'telugu'; + } + elsif ($value == 0xC8) { + print 'kannada'; + } + elsif ($value == 0xD0) { + print 'malayalam'; + } + elsif ($value == 0xD8) { + print 'sinhala'; + } + elsif ($value == 0xE0) { + print 'thai'; + } + elsif ($value == 0xE8) { + print 'lao'; + } + elsif ($value == 0xF0) { + print 'tibetan'; + } + elsif ($value == 0x100) { + print 'myanmar'; + } + elsif ($value == 0x10A) { + print 'georgian'; + } + elsif ($value == 0x110) { + print 'hangeul jamo'; + } + elsif ($value == 0x120) { + print 'ethiopic'; + } + elsif ($value == 0x130) { + print 'ethiopic'; + } + elsif ($value == 0x138) { + print 'eth+'; + } + elsif ($value == 0x13A) { + print 'cherokee'; + } + elsif ($value == 0x140) { + print 'unified canadian aboriginal syllabics'; + } + elsif ($value == 0x160) { + print 'unified canadian syllabics'; + } + elsif ($value == 0x168) { + print 'ogham'; + } + elsif ($value == 0x16A) { + print 'runic'; + } + elsif ($value == 0x170) { + print 'tagalog'; + } + elsif ($value == 0x172) { + print 'hanun'; + } + elsif ($value == 0x174) { + print 'buhid'; + } + elsif ($value == 0x176) { + print 'tagb'; + } + elsif ($value == 0x178) { + print 'khmer'; + } + elsif ($value == 0x180) { + print 'mongolian'; + } + elsif ($value == 0x18B) { + print 'canadian+'; + } + elsif ($value == 0x190) { + print 'limbu'; + } + elsif ($value == 0x195) { + print 'tai le'; + } + elsif ($value == 0x198) { + print 'new tai lue'; + } + elsif ($value == 0x19E) { + print 'km'; + } + elsif ($value == 0x1A0) { + print 'lontara'; + } + elsif ($value == 0x1A2) { + print 'tai tham'; + } + elsif ($value == 0x1AB) { + print 'reserved'; + } + elsif ($value == 0x1B0) { + print 'balinese'; + } + elsif ($value == 0x1B8) { + print 'sundanese'; + } + elsif ($value == 0x1BC) { + print 'batak'; + } + elsif ($value == 0x1C0) { + print 'lepcha'; + } + elsif ($value == 0x1C5) { + print 'ol chiki'; + } + elsif ($value == 0x1C8) { + print 'reserved'; + } + elsif ($value == 0x1CD) { + print 'vedic'; + } + elsif ($value == 0x1D0) { + print 'phonetic'; + } + elsif ($value == 0x1D8) { + print 'phonetic+'; + } + elsif ($value == 0x1DC) { + print 'combining'; + } + elsif ($value == 0x1E0) { + print 'latin extended additional'; + } + elsif ($value == 0x1F0) { + print 'greek+'; + } +} + +sub printcell_utf8 { + my ($value) = @_; + if ($value <= 0x7F) { + print 'Single byte ASCII' + if $value == 0; + } + elsif ($value <= 0xBF) { + print 'Multi-byte continuation' + if $value == 0x80; + } + elsif ($value <= 0xC1) { + print '(Overl.)' + if $value == 0xC0; + } + elsif ($value <= 0xDF) { + print '2-byte sequence start' + if $value == 0xC2; + print '' + if $value == 0xD0; + } + elsif ($value <= 0xEF) { + print '3-byte sequence start' + if $value == 0xE0; + } + elsif ($value <= 0xF4) { + print '4-byte sequence' + if $value == 0xF0; + } + elsif ($value <= 0xF7) { + print '(Overflow)' + if $value == 0xF5; + } + elsif ($value <= 0xFB) { + print '5-byte' + if $value == 0xF8; + } + elsif ($value <= 0xFD) { + print '6-byte' + if $value == 0xFC; + } + elsif ($value <= 0xFF) { + print 'Invalid' + if $value == 0xFE; + } + else { + print "\n".'?'; + } +} + +print "