X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/8f45336102805af6d836a6ffcffc3583bfa2add8..2a2ae5d0bf7f0f0d0cf2377702fdf6f8827740ae:/charset.plp diff --git a/charset.plp b/charset.plp index e5ca8bf..41af494 100644 --- a/charset.plp +++ b/charset.plp @@ -32,7 +32,7 @@ use Encode qw(decode resolve_alias); # substr strings is twice as fast as splitting to an array) my %ALIAS = ( # default => [qw(unicode utf-8 iso-8859-1 cp437 -cp1252- --iso-8859-15- -koi8-f)], - default => [qw(unicode utf-8 iso-8859-1 -cp1252- --iso-8859-15- cp437 -cp850)], + default => [qw(unicode- utf-8 iso-8859-1 -cp1252- --iso-8859-15- cp437 -cp850)], 0 => [qw(cp437 cp863)], 1 => [qw(iso-8859-1 cp1252 MacRoman cp850)], 2 => [qw(iso-8859-2 cp1250 cp852 MacCentralEurRoman MacCroatian MacRumanian)], @@ -55,12 +55,14 @@ my @request = map { } if ($row{set} = resolve_alias($input)) { if ($row{set} eq 'Internal') { - $row{table} = ' 'x512; + $row{table} = ' ' x ($endpoint < 255 ? 640 : 4096); $row{set} = 'Unicode BMP'; + $row{cell} = do 'charset-unicode.inc.pl'; } elsif ($row{set} eq 'utf-8-strict') { $row{table} = undef; $row{set} = 'UTF-8'; + $row{cell} = do 'charset-utf8.inc.pl'; } else { $row{table} = decode($row{set}, pack 'C*', $row{offset} .. $endpoint); @@ -94,291 +96,6 @@ sub quote { return $_; } -sub printcell_unicode { - my ($value) = @_; - if ($value > 0x1FF) { - print "\n".'?'; - } - elsif ($value == 0) { - print 'control'; - } - elsif ($value == 2) { - print 'latin'; - } - elsif ($value == 8) { - print 'control'; - } - elsif ($value == 10) { - print 'latin supplement'; - } - elsif ($value == 0x10) { - print 'latin ext-A'; - } - elsif ($value == 0x18) { - print 'latin ext-B'; - } - elsif ($value == 0x20) { - print 'latin ext-B'; - } - elsif ($value == 0x25) { - print 'IPA'; - } - elsif ($value == 0x2B) { - print 'spacing modifier'; - } - elsif ($value == 0x30) { - print 'diacritics'; - } - elsif ($value == 0x38) { - print 'greek'; - } - elsif ($value == 0x40) { - print 'cyrillic'; - } - elsif ($value == 0x50) { - print 'cyrillic+'; - } - elsif ($value == 0x53) { - print 'armenian'; - } - elsif ($value == 0x58) { - print 'hebrew'; - } - elsif ($value == 0x60) { - print 'arabic'; - } - elsif ($value == 0x70) { - print 'syriac'; - } - elsif ($value == 0x75) { - print 'arabic+'; - } - elsif ($value == 0x78) { - print 'thaana'; - } - elsif ($value == 0x7C) { - print 'nko'; - } - elsif ($value == 0x80) { - print 'samaritan'; - } - elsif ($value == 0x84) { - print 'manda'; - } - elsif ($value == 0x86) { - print 'reserved'; - } - elsif ($value == 0x90) { - print 'devanagari'; - } - elsif ($value == 0x98) { - print 'bengali'; - } - elsif ($value == 0xA0) { - print 'gurmukhi'; - } - elsif ($value == 0xA8) { - print 'gujarati'; - } - elsif ($value == 0xB0) { - print 'oriya'; - } - elsif ($value == 0xB8) { - print 'tamil'; - } - elsif ($value == 0xC0) { - print 'telugu'; - } - elsif ($value == 0xC8) { - print 'kannada'; - } - elsif ($value == 0xD0) { - print 'malayalam'; - } - elsif ($value == 0xD8) { - print 'sinhala'; - } - elsif ($value == 0xE0) { - print 'thai'; - } - elsif ($value == 0xE8) { - print 'lao'; - } - elsif ($value == 0xF0) { - print 'tibetan'; - } - elsif ($value == 0x100) { - print 'myanmar'; - } - elsif ($value == 0x10A) { - print 'georgian'; - } - elsif ($value == 0x110) { - print 'hangeul jamo'; - } - elsif ($value == 0x120) { - print 'ethiopic'; - } - elsif ($value == 0x130) { - print 'ethiopic'; - } - elsif ($value == 0x138) { - print 'eth+'; - } - elsif ($value == 0x13A) { - print 'cherokee'; - } - elsif ($value == 0x140) { - print 'unified canadian aboriginal syllabics'; - } - elsif ($value == 0x160) { - print 'unified canadian syllabics'; - } - elsif ($value == 0x168) { - print 'ogham'; - } - elsif ($value == 0x16A) { - print 'runic'; - } - elsif ($value == 0x170) { - print 'tagalog'; - } - elsif ($value == 0x172) { - print 'hanun'; - } - elsif ($value == 0x174) { - print 'buhid'; - } - elsif ($value == 0x176) { - print 'tagb'; - } - elsif ($value == 0x178) { - print 'khmer'; - } - elsif ($value == 0x180) { - print 'mongolian'; - } - elsif ($value == 0x18B) { - print 'canadian+'; - } - elsif ($value == 0x190) { - print 'limbu'; - } - elsif ($value == 0x195) { - print 'tai le'; - } - elsif ($value == 0x198) { - print 'new tai lue'; - } - elsif ($value == 0x19E) { - print 'km'; - } - elsif ($value == 0x1A0) { - print 'lontara'; - } - elsif ($value == 0x1A2) { - print 'tai tham'; - } - elsif ($value == 0x1AB) { - print 'reserved'; - } - elsif ($value == 0x1B0) { - print 'balinese'; - } - elsif ($value == 0x1B8) { - print 'sundanese'; - } - elsif ($value == 0x1BC) { - print 'batak'; - } - elsif ($value == 0x1C0) { - print 'lepcha'; - } - elsif ($value == 0x1C5) { - print 'ol chiki'; - } - elsif ($value == 0x1C8) { - print 'reserved'; - } - elsif ($value == 0x1CD) { - print 'vedic'; - } - elsif ($value == 0x1D0) { - print 'phonetic'; - } - elsif ($value == 0x1D8) { - print 'phonetic+'; - } - elsif ($value == 0x1DC) { - print 'combining'; - } - elsif ($value == 0x1E0) { - print 'latin extended additional'; - } - elsif ($value == 0x1F0) { - print 'greek+'; - } -} - -sub printcell_utf8 { - my ($value) = @_; - if ($value <= 0x7F) { - print 'Single byte ASCII' - if $value == 0; - } - elsif ($value <= 0xBF) { - print 'Multi-byte continuation' - if $value == 0x80; - } - elsif ($value <= 0xC1) { - print '(Overl.)' - if $value == 0xC0; - } - elsif ($value <= 0xDF) { - print '2-byte sequence start' - if $value == 0xC2; - print '' - if $value == 0xD0; - } - elsif ($value <= 0xEF) { - print '3-byte sequence start' - if $value == 0xE0; - } - elsif ($value <= 0xF4) { - print '4-byte sequence' - if $value == 0xF0; - } - elsif ($value <= 0xF7) { - print '(Overflow)' - if $value == 0xF5; - } - elsif ($value <= 0xFB) { - print '5-byte' - if $value == 0xF8; - } - elsif ($value <= 0xFD) { - print '6-byte' - if $value == 0xFC; - } - elsif ($value <= 0xFF) { - print 'Invalid' - if $value == 0xFE; - } - else { - print "\n".'?'; - } -} - print "\n"; :>
+
+ +
control + whitespace + diacritic +
letter +
+
punctuation +
quote +
+
symbol +
math + currency +
+
numeric + greek +
latin + cyrillic +
+
aramaic +
brahmic + arabic +
+
syllabic +
african + japanese + cjk + chinese +
+
alphabetic +
+ + +
unicode 5.0 + proposed + deprecated + unassigned + invalid +
+
+