X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/138b47f5f5ac5364dbfc15075e21b0990ab919d9..98699865db67c9772d2a56571e8ddcaee5a5e7bd:/charset.plp diff --git a/charset.plp b/charset.plp index 777e0ee..2dc6964 100644 --- a/charset.plp +++ b/charset.plp @@ -32,7 +32,7 @@ use Encode qw(decode resolve_alias); # substr strings is twice as fast as splitting to an array) my %ALIAS = ( # default => [qw(unicode utf-8 iso-8859-1 cp437 -cp1252- --iso-8859-15- -koi8-f)], - default => [qw(unicode utf-8 iso-8859-1 -cp1252- --iso-8859-15- cp437 -cp850)], + default => [qw(unicode- utf-8 iso-8859-1 -cp1252- --iso-8859-15- cp437 -cp850)], 0 => [qw(cp437 cp863)], 1 => [qw(iso-8859-1 cp1252 MacRoman cp850)], 2 => [qw(iso-8859-2 cp1250 cp852 MacCentralEurRoman MacCroatian MacRumanian)], @@ -55,7 +55,7 @@ my @request = map { } if ($row{set} = resolve_alias($input)) { if ($row{set} eq 'Internal') { - $row{table} = ' 'x640; + $row{table} = ' ' x ($endpoint < 255 ? 640 : 4096); $row{set} = 'Unicode BMP'; } elsif ($row{set} eq 'utf-8-strict') { @@ -96,32 +96,38 @@ sub quote { sub printcell_unicode { my ($value) = @_; - if ($value > 0x27F) { + if ($value > 0xFFF) { print "\n".'?'; } elsif ($value == 0) { - print 'control'; + print 'control'; } elsif ($value == 2) { - print 'latin'; + print 'comn'; + } + elsif ($value == 4) { + print 'basic latin'; } elsif ($value == 8) { - print 'control'; + print 'control'; } elsif ($value == 10) { - print 'latin supplement'; + print 'comn'; + } + elsif ($value == 12) { + print 'latin1'; } elsif ($value == 0x10) { - print 'latin ext-A'; + print 'latin extended-A'; } elsif ($value == 0x18) { - print 'latin ext-B'; + print 'latin extended-B'; } elsif ($value == 0x20) { - print 'latin ext-B'; + print 'latin ext-B'; } elsif ($value == 0x25) { - print 'IPA'; + print 'IPA'; } elsif ($value == 0x2B) { print 'spacing modifier'; @@ -130,199 +136,199 @@ sub printcell_unicode { print 'diacritics'; } elsif ($value == 0x38) { - print 'greek'; + print 'greek'; } elsif ($value == 0x40) { - print 'cyrillic'; + print 'cyrillic'; } elsif ($value == 0x50) { - print 'cyrillic+'; + print 'cyrillic+'; } elsif ($value == 0x53) { - print 'armenian'; + print 'armenian'; } elsif ($value == 0x58) { - print 'hebrew'; + print 'hebrew'; } elsif ($value == 0x60) { - print 'arabic'; + print 'arabic'; } elsif ($value == 0x70) { - print 'syriac'; + print 'syriac'; } elsif ($value == 0x75) { - print 'arabic+'; + print 'arabic+'; } elsif ($value == 0x78) { - print 'thaana'; + print 'thaana'; } elsif ($value == 0x7C) { - print 'nko'; + print 'n\'ko'; } elsif ($value == 0x80) { - print 'samaritan'; + print 'samaritan'; } elsif ($value == 0x84) { - print 'manda'; + print 'manda'; } elsif ($value == 0x86) { - print 'reserved'; + print 'reserved'; } elsif ($value == 0x90) { - print 'devanagari'; + print 'devanagari'; } elsif ($value == 0x98) { - print 'bengali'; + print 'bengali'; } elsif ($value == 0xA0) { - print 'gurmukhi'; + print 'gurmukhi'; } elsif ($value == 0xA8) { - print 'gujarati'; + print 'gujarati'; } elsif ($value == 0xB0) { - print 'oriya'; + print 'oriya'; } elsif ($value == 0xB8) { - print 'tamil'; + print 'tamil'; } elsif ($value == 0xC0) { - print 'telugu'; + print 'telugu'; } elsif ($value == 0xC8) { - print 'kannada'; + print 'kannada'; } elsif ($value == 0xD0) { - print 'malayalam'; + print 'malayalam'; } elsif ($value == 0xD8) { - print 'sinhala'; + print 'sinhala'; } elsif ($value == 0xE0) { - print 'thai'; + print 'thai'; } elsif ($value == 0xE8) { - print 'lao'; + print 'lao'; } elsif ($value == 0xF0) { - print 'tibetan'; + print 'tibetan'; } elsif ($value == 0x100) { - print 'myanmar'; + print 'myanmar'; } elsif ($value == 0x10A) { - print 'georgian'; + print 'georgian'; } elsif ($value == 0x110) { - print 'hangeul jamo'; + print 'hangeul jamo'; } elsif ($value == 0x120) { - print 'ethiopic'; + print 'ethiopic'; } elsif ($value == 0x130) { - print 'ethiopic'; + print 'ethiopic'; } elsif ($value == 0x138) { - print 'eth+'; + print 'eth+'; } elsif ($value == 0x13A) { - print 'cherokee'; + print 'cherokee'; } elsif ($value == 0x140) { - print 'unified canadian aboriginal syllabics'; + print 'unified canadian aboriginal syllabics'; } elsif ($value == 0x160) { - print 'unified canadian syllabics'; + print 'unified canadian syllabics'; } elsif ($value == 0x168) { - print 'ogham'; + print 'ogham'; } elsif ($value == 0x16A) { - print 'runic'; + print 'runic'; } elsif ($value == 0x170) { - print 'tagalog'; + print 'tagalog'; } elsif ($value == 0x172) { - print 'hanun'; + print 'hanun'; } elsif ($value == 0x174) { - print 'buhid'; + print 'buhid'; } elsif ($value == 0x176) { - print 'tagb'; + print 'tagb'; } elsif ($value == 0x178) { - print 'khmer'; + print 'khmer'; } elsif ($value == 0x180) { - print 'mongolian'; + print 'mongolian'; } elsif ($value == 0x18B) { - print 'canadian+'; + print 'canadian+'; } elsif ($value == 0x190) { - print 'limbu'; + print 'limbu'; } elsif ($value == 0x195) { - print 'tai le'; + print 'tai le'; } elsif ($value == 0x198) { - print 'new tai lue'; + print 'new tai lue'; } elsif ($value == 0x19E) { - print 'km'; + print 'km'; } elsif ($value == 0x1A0) { - print 'lontara'; + print 'lontara'; } elsif ($value == 0x1A2) { - print 'tai tham'; + print 'tai tham'; } elsif ($value == 0x1AB) { - print 'reserved'; + print 'reserved'; } elsif ($value == 0x1B0) { - print 'balinese'; + print 'balinese'; } elsif ($value == 0x1B8) { - print 'sundanese'; + print 'sundanese'; } elsif ($value == 0x1BC) { - print 'batak'; + print 'batak'; } elsif ($value == 0x1C0) { - print 'lepcha'; + print 'lepcha'; } elsif ($value == 0x1C5) { - print 'ol chiki'; + print 'ol chiki'; } elsif ($value == 0x1C8) { - print 'reserved'; + print 'reserved'; } elsif ($value == 0x1CD) { - print 'vedic'; + print 'vedic'; } elsif ($value == 0x1D0) { - print 'phonetic'; + print 'phonetic'; } elsif ($value == 0x1D8) { - print 'phonetic+'; + print 'phonetic+'; } elsif ($value == 0x1DC) { print 'combining'; } elsif ($value == 0x1E0) { - print 'latin extended additional'; + print 'latin extended additional'; } elsif ($value == 0x1F0) { - print 'greek+'; + print 'greek+'; } elsif ($value == 0x200) { - print 'general punctuation'; + print 'general punctuation'; } elsif ($value == 0x207) { - print 'su[bp]script'; + print 'suþscript'; # suth now means "sub and/or sup" } elsif ($value == 0x20A) { print 'currency'; @@ -375,23 +381,260 @@ sub printcell_unicode { elsif ($value == 0x27F) { print 'arr'; } + elsif ($value == 0x280) { + print 'braille'; + } + elsif ($value == 0x290) { + print 'supplemental arrows-B'; + } + elsif ($value == 0x298) { + print 'mathematical symbols-B'; + } + elsif ($value == 0x2A0) { + print 'supplemental mathematical operators'; + } + elsif ($value == 0x2B0) { + print 'miscellaneous symbols and arrows'; + } + elsif ($value == 0x2C0) { + print 'glagolitic'; + } + elsif ($value == 0x2C6) { + print 'latin-C'; + } + elsif ($value == 0x2C8) { + print 'coptic'; + } + elsif ($value == 0x2D0) { + print 'georgian+'; + } + elsif ($value == 0x2D3) { + print 'tifinagh'; #TODO: proto-canaanite + } + elsif ($value == 0x2D8) { + print 'ethiopic+'; + } + elsif ($value == 0x2DE) { + print 'cyrl-A'; + } + elsif ($value == 0x2E0) { + print 'punctuation+'; + } + elsif ($value == 0x2E8) { + print 'cjk radicals'; + } + elsif ($value == 0x2F0) { + print 'kangxi radicals'; + } + elsif ($value == 0x2FE) { + print ''; + } + elsif ($value == 0x2FF) { + print 'idc'; + } + elsif ($value == 0x300) { + print 'cjk misc'; + } + elsif ($value == 0x304) { + print 'hiragana'; + } + elsif ($value == 0x30A) { + print 'katakana'; + } + elsif ($value == 0x310) { + print 'bopomofo'; + } + elsif ($value == 0x313) { + print 'hangeul compat'; + } + elsif ($value == 0x319) { + print 'kbn'; + } + elsif ($value == 0x31A) { + print 'bpmf'; + } + elsif ($value == 0x31C) { + print 'strokes'; + } + elsif ($value == 0x31F) { + print 'k+'; + } + elsif ($value == 0x320) { + print 'enclosed cjk characters'; + } + elsif ($value == 0x330) { + print 'cjk compatibility'; + } + elsif ($value == 0x340) { + print 'cjk unified ideographs extension A'; + } + elsif ($value == 0x4D0) { + print 'cjk unified ideographs extension A'; + } + elsif ($value == 0x4DC) { + print 'hexagrams'; + } + elsif ($value == 0x4E0) { + print 'cjk unified ideographs'; + } + elsif ($value == 0xA00) { + print 'yi'; + } + elsif ($value == 0xA40) { + print 'yi'; + } + elsif ($value == 0xA49) { + print 'yi radicals'; + } + elsif ($value == 0xA4D) { + print 'lisu'; + } + elsif ($value == 0xA50) { + print 'vai'; + } + elsif ($value == 0xA60) { + print 'vai'; + } + elsif ($value == 0xA64) { + print 'cyrillic extended-B'; + } + elsif ($value == 0xA6A) { + print 'bamum'; + } + elsif ($value == 0xA70) { + print 'tones'; + } + elsif ($value == 0xA72) { + print 'latin extended-D'; + } + elsif ($value == 0xA80) { + print 'sylheti'; + } + elsif ($value == 0xA83) { + print 'in'; + } + elsif ($value == 0xA84) { + print 'phags-pa'; + } + elsif ($value == 0xA88) { + print 'saurashtra'; + } + elsif ($value == 0xA8E) { + print 'deva+'; + } + elsif ($value == 0xA90) { + print 'kayah li'; + } + elsif ($value == 0xA93) { + print 'rejang'; + } + elsif ($value == 0xA96) { + print 'jamo-A'; + } + elsif ($value == 0xA98) { + print 'javanese'; + } + elsif ($value == 0xA9E) { + print 'res'; + } + elsif ($value == 0xAA0) { + print 'cham'; + } + elsif ($value == 0xAA6) { + print 'mym-A'; + } + elsif ($value == 0xAA8) { + print 'tai viet'; + } + elsif ($value == 0xAAE) { + print 'mtei+'; + } + elsif ($value == 0xAB0) { + print 'reserved'; + } + elsif ($value == 0xABC) { + print 'manipuri'; + } + elsif ($value == 0xAC0) { + print 'hangeul syllables'; + } + elsif ($value == 0xD70) { + print 'hangeul syllables'; + } + elsif ($value == 0xD7B) { + print 'haungeul jamo-B'; + } + elsif ($value == 0xD80) { + print 'high surrogates'; + } + elsif ($value == 0xDC0) { + print 'low surrogates'; + } + elsif ($value == 0xE00) { + print 'private use'; + } + elsif ($value == 0xF90) { + print 'cjk compatibility ideographs'; + } + elsif ($value == 0xFB0) { + print 'presentation'; + } + elsif ($value == 0xFB5) { + print ''; + } + elsif ($value == 0xFC0) { + print 'arabic presentation forms A'; + } + elsif ($value == 0xFD0) { + print ''; + } + elsif ($value == 0xFDD) { + print '?'; + } + elsif ($value == 0xFDF) { + print ''; + } + elsif ($value == 0xFE0) { + print 'var'; + } + elsif ($value == 0xFE1) { + print 'ver'; + } + elsif ($value == 0xFE2) { + print '½'; + } + elsif ($value == 0xFE3) { + print 'comp'; + } + elsif ($value == 0xFE5) { + print 'small'; + } + elsif ($value == 0xFE7) { + print 'arabic presentation B'; + } + elsif ($value == 0xFF0) { + print 'halfwidth & fullwidth forms'; + } + elsif ($value == 0xFFF) { + print 'sp'; + } } sub printcell_utf8 { my ($value) = @_; if ($value <= 0x7F) { print 'Single byte ASCII' + ' title="U+0000 – U+007F">single byte ASCII' if $value == 0; } elsif ($value <= 0xBF) { print 'Multi-byte continuation' + '>multi-byte continuation' if $value == 0x80; } elsif ($value <= 0xC1) { print '(Overl.)' + ' title="U+0000 – U+007F">(overl.)' if $value == 0xC0; } elsif ($value <= 0xDF) { @@ -414,7 +657,7 @@ sub printcell_utf8 { } elsif ($value <= 0xF7) { print '(Overflow)' + ' title="U+11·0000 – U+1FF·FFFF">(overflow)' if $value == 0xF5; } elsif ($value <= 0xFB) { @@ -428,7 +671,7 @@ sub printcell_utf8 { if $value == 0xFC; } elsif ($value <= 0xFF) { - print 'Invalid' + print 'invalid' if $value == 0xFE; } else {