X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/42302c75aa79bb1f14328d6b249379169f5a4fa1..bb412b576d883e11a4ec09d01c34a933d894af6c:/charset-unicode.inc.pl diff --git a/charset-unicode.inc.pl b/charset-unicode.inc.pl new file mode 100644 index 0000000..4f5a49d --- /dev/null +++ b/charset-unicode.inc.pl @@ -0,0 +1,180 @@ +my %uniblock = ( + 0x000, 'control', + 0x002, 'comn', + 0x004, 'basic latin', + 0x008, 'control', + 0x00A, 'comn', + 0x00C, 'latin1', + 0x010, 'latin extended-A', + 0x018, 'latin extended-B', + 0x020, 'latin ext-B', + 0x025, 'IPA', + 0x02B, 'spacing modifier', + 0x030, 'diacritics', + 0x038, 'greek', + 0x040, 'cyrillic', + 0x050, 'cyrillic+', + 0x053, 'armenian', + 0x058, 'hebrew', + 0x060, 'arabic', + 0x070, 'syriac', + 0x075, 'arabic+', + 0x078, 'thaana', + 0x07C, 'n\'ko', + 0x080, 'samaritan', + 0x084, 'manda', + 0x086, 'reserved', + 0x090, 'devanagari', + 0x098, 'bengali', + 0x0A0, 'gurmukhi', + 0x0A8, 'gujarati', + 0x0B0, 'oriya', + 0x0B8, 'tamil', + 0x0C0, 'telugu', + 0x0C8, 'kannada', + 0x0D0, 'malayalam', + 0x0D8, 'sinhala', + 0x0E0, 'thai', + 0x0E8, 'lao', + 0x0F0, 'tibetan', + 0x100, 'myanmar', + 0x10A, 'georgian', + 0x110, 'hangeul jamo', + 0x120, 'ethiopic', + 0x130, 'ethiopic', + 0x138, 'eth+', + 0x13A, 'cherokee', + 0x140, 'unified canadian aboriginal syllabics', + 0x160, 'unified canadian syllabics', + 0x168, 'ogham', + 0x16A, 'runic', + 0x170, 'tagalog', + 0x172, 'hanun', + 0x174, 'buhid', + 0x176, 'tagb', + 0x178, 'khmer', + 0x180, 'mongolian', + 0x18B, 'canadian+', + 0x190, 'limbu', + 0x195, 'tai le', + 0x198, 'new tai lue', + 0x19E, 'km', + 0x1A0, 'lontara', + 0x1A2, 'tai tham', + 0x1AB, 'reserved', + 0x1B0, 'balinese', + 0x1B8, 'sundanese', + 0x1BC, 'batak', + 0x1C0, 'lepcha', + 0x1C5, 'ol chiki', + 0x1C8, 'reserved', + 0x1CD, 'vedic', + 0x1D0, 'phonetic', + 0x1D8, 'phonetic+', + 0x1DC, 'combining', + 0x1E0, 'latin extended additional', + 0x1F0, 'greek+', + 0x200, 'general punctuation', + 0x207, 'suþscript', # suth now means "sub and/or sup" + 0x20A, 'currency', + 0x20D, 'overlay', + 0x210, 'letterlike', + 0x215, 'number', + 0x219, 'arrows', + 0x220, 'mathematical symbols', + 0x230, 'miscellaneous technical', + 0x240, 'control', + 0x244, 'OCR', + 0x246, 'enclosed alphanumerics', + 0x250, 'box drawing', + 0x258, 'blocks', + 0x25A, 'geometric shapes', + 0x260, 'miscellaneous symbols', + 0x270, 'dingbats', + 0x27C, 'maths-A', + 0x27F, 'arr', + 0x280, 'braille', + 0x290, 'supplemental arrows-B', + 0x298, 'mathematical symbols-B', + 0x2A0, 'supplemental mathematical operators', + 0x2B0, 'miscellaneous symbols and arrows', + 0x2C0, 'glagolitic', + 0x2C6, 'latin-C', + 0x2C8, 'coptic', + 0x2D0, 'georgian+', + 0x2D3, 'tifinagh', #TODO: proto-canaanite + 0x2D8, 'ethiopic+', + 0x2DE, 'cyrl-A', + 0x2E0, 'punctuation+', + 0x2E8, 'cjk radicals', + 0x2F0, 'kangxi radicals', + 0x2FE, '', + 0x2FF, 'idc', + 0x300, 'cjk misc', + 0x304, 'hiragana', + 0x30A, 'katakana', + 0x310, 'bopomofo', + 0x313, 'hangeul compat', + 0x319, 'kbn', + 0x31A, 'bpmf', + 0x31C, 'strokes', + 0x31F, 'k+', + 0x320, 'enclosed cjk characters', + 0x330, 'cjk compatibility', + 0x340, 'cjk unified ideographs extension A', + 0x4D0, 'cjk unified ideographs extension A', + 0x4DC, 'hexagrams', + 0x4E0, 'cjk unified ideographs', + 0xA00, 'yi', + 0xA40, 'yi', + 0xA49, 'yi radicals', + 0xA4D, 'lisu', + 0xA50, 'vai', + 0xA60, 'vai', + 0xA64, 'cyrillic extended-B', + 0xA6A, 'bamum', + 0xA70, 'tones', + 0xA72, 'latin extended-D', + 0xA80, 'sylheti', + 0xA83, 'in', + 0xA84, 'phags-pa', + 0xA88, 'saurashtra', + 0xA8E, 'deva+', + 0xA90, 'kayah li', + 0xA93, 'rejang', + 0xA96, 'jamo-A', + 0xA98, 'javanese', + 0xA9E, 'res', + 0xAA0, 'cham', + 0xAA6, 'mym-A', + 0xAA8, 'tai viet', + 0xAAE, 'mtei+', + 0xAB0, 'reserved', + 0xABC, 'manipuri', + 0xAC0, 'hangeul syllables', + 0xD70, 'hangeul syllables', + 0xD7B, 'haungeul jamo-B', + 0xD80, 'high surrogates', + 0xDC0, 'low surrogates', + 0xE00, 'private use', + 0xF90, 'cjk compatibility ideographs', + 0xFB0, 'presentation', + 0xFB5, '', + 0xFC0, 'arabic presentation forms A', + 0xFD0, '', + 0xFDD, '?', + 0xFDF, '', + 0xFE0, 'var', + 0xFE1, 'ver', + 0xFE2, '½', + 0xFE3, 'comp', + 0xFE5, 'small', + 0xFE7, 'arabic presentation B', + 0xFF0, 'halfwidth & fullwidth forms', + 0xFFF, 'sp', +); + +sub { + return defined $uniblock{$_[0]} ? $uniblock{$_[0]} : (); +} +