X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/42302c75aa79bb1f14328d6b249379169f5a4fa1..bb412b576d883e11a4ec09d01c34a933d894af6c:/charset-unicode.inc.pl
diff --git a/charset-unicode.inc.pl b/charset-unicode.inc.pl
new file mode 100644
index 0000000..4f5a49d
--- /dev/null
+++ b/charset-unicode.inc.pl
@@ -0,0 +1,180 @@
+my %uniblock = (
+ 0x000, '
control',
+ 0x002, ' | comn',
+ 0x004, ' | basic latin',
+ 0x008, ' | control',
+ 0x00A, ' | comn',
+ 0x00C, ' | latin1',
+ 0x010, ' | latin extended-A',
+ 0x018, ' | latin extended-B',
+ 0x020, ' | latin ext-B',
+ 0x025, ' | IPA',
+ 0x02B, ' | spacing modifier',
+ 0x030, ' | diacritics',
+ 0x038, ' | greek',
+ 0x040, ' | cyrillic',
+ 0x050, ' | cyrillic+',
+ 0x053, ' | armenian',
+ 0x058, ' | hebrew',
+ 0x060, ' | arabic',
+ 0x070, ' | syriac',
+ 0x075, ' | arabic+',
+ 0x078, ' | thaana',
+ 0x07C, ' | n\'ko',
+ 0x080, ' | samaritan',
+ 0x084, ' | manda',
+ 0x086, ' | reserved',
+ 0x090, ' | devanagari',
+ 0x098, ' | bengali',
+ 0x0A0, ' | gurmukhi',
+ 0x0A8, ' | gujarati',
+ 0x0B0, ' | oriya',
+ 0x0B8, ' | tamil',
+ 0x0C0, ' | telugu',
+ 0x0C8, ' | kannada',
+ 0x0D0, ' | malayalam',
+ 0x0D8, ' | sinhala',
+ 0x0E0, ' | thai',
+ 0x0E8, ' | lao',
+ 0x0F0, ' | tibetan',
+ 0x100, ' | myanmar',
+ 0x10A, ' | georgian',
+ 0x110, ' | hangeul jamo',
+ 0x120, ' | ethiopic',
+ 0x130, ' | ethiopic',
+ 0x138, ' | eth+',
+ 0x13A, ' | cherokee',
+ 0x140, ' | unified canadian aboriginal syllabics',
+ 0x160, ' | unified canadian syllabics',
+ 0x168, ' | ogham',
+ 0x16A, ' | runic',
+ 0x170, ' | tagalog',
+ 0x172, ' | hanun',
+ 0x174, ' | buhid',
+ 0x176, ' | tagb',
+ 0x178, ' | khmer',
+ 0x180, ' | mongolian',
+ 0x18B, ' | canadian+',
+ 0x190, ' | limbu',
+ 0x195, ' | tai le',
+ 0x198, ' | new tai lue',
+ 0x19E, ' | km',
+ 0x1A0, ' | lontara',
+ 0x1A2, ' | tai tham',
+ 0x1AB, ' | reserved',
+ 0x1B0, ' | balinese',
+ 0x1B8, ' | sundanese',
+ 0x1BC, ' | batak',
+ 0x1C0, ' | lepcha',
+ 0x1C5, ' | ol chiki',
+ 0x1C8, ' | reserved',
+ 0x1CD, ' | vedic',
+ 0x1D0, ' | phonetic',
+ 0x1D8, ' | phonetic+',
+ 0x1DC, ' | combining',
+ 0x1E0, ' | latin extended additional',
+ 0x1F0, ' | greek+',
+ 0x200, ' | general punctuation',
+ 0x207, ' | suþscript', # suth now means "sub and/or sup"
+ 0x20A, ' | currency',
+ 0x20D, ' | overlay',
+ 0x210, ' | letterlike',
+ 0x215, ' | number',
+ 0x219, ' | arrows',
+ 0x220, ' | mathematical symbols',
+ 0x230, ' | miscellaneous technical',
+ 0x240, ' | control',
+ 0x244, ' | OCR',
+ 0x246, ' | enclosed alphanumerics',
+ 0x250, ' | box drawing',
+ 0x258, ' | blocks',
+ 0x25A, ' | geometric shapes',
+ 0x260, ' | miscellaneous symbols',
+ 0x270, ' | dingbats',
+ 0x27C, ' | maths-A',
+ 0x27F, ' | arr',
+ 0x280, ' | braille',
+ 0x290, ' | supplemental arrows-B',
+ 0x298, ' | mathematical symbols-B',
+ 0x2A0, ' | supplemental mathematical operators',
+ 0x2B0, ' | miscellaneous symbols and arrows',
+ 0x2C0, ' | glagolitic',
+ 0x2C6, ' | latin-C',
+ 0x2C8, ' | coptic',
+ 0x2D0, ' | georgian+',
+ 0x2D3, ' | tifinagh', #TODO: proto-canaanite
+ 0x2D8, ' | ethiopic+',
+ 0x2DE, ' | cyrl-A',
+ 0x2E0, ' | punctuation+',
+ 0x2E8, ' | cjk radicals',
+ 0x2F0, ' | kangxi radicals',
+ 0x2FE, ' | ',
+ 0x2FF, ' | idc',
+ 0x300, ' | cjk misc',
+ 0x304, ' | hiragana',
+ 0x30A, ' | katakana',
+ 0x310, ' | bopomofo',
+ 0x313, ' | hangeul compat',
+ 0x319, ' | kbn',
+ 0x31A, ' | bpmf',
+ 0x31C, ' | strokes',
+ 0x31F, ' | k+',
+ 0x320, ' | enclosed cjk characters',
+ 0x330, ' | cjk compatibility',
+ 0x340, ' | cjk unified ideographs extension A',
+ 0x4D0, ' | cjk unified ideographs extension A',
+ 0x4DC, ' | hexagrams',
+ 0x4E0, ' | cjk unified ideographs',
+ 0xA00, ' | yi',
+ 0xA40, ' | yi',
+ 0xA49, ' | yi radicals',
+ 0xA4D, ' | lisu',
+ 0xA50, ' | vai',
+ 0xA60, ' | vai',
+ 0xA64, ' | cyrillic extended-B',
+ 0xA6A, ' | bamum',
+ 0xA70, ' | tones',
+ 0xA72, ' | latin extended-D',
+ 0xA80, ' | sylheti',
+ 0xA83, ' | in',
+ 0xA84, ' | phags-pa',
+ 0xA88, ' | saurashtra',
+ 0xA8E, ' | deva+',
+ 0xA90, ' | kayah li',
+ 0xA93, ' | rejang',
+ 0xA96, ' | jamo-A',
+ 0xA98, ' | javanese',
+ 0xA9E, ' | res',
+ 0xAA0, ' | cham',
+ 0xAA6, ' | mym-A',
+ 0xAA8, ' | tai viet',
+ 0xAAE, ' | mtei+',
+ 0xAB0, ' | reserved',
+ 0xABC, ' | manipuri',
+ 0xAC0, ' | hangeul syllables',
+ 0xD70, ' | hangeul syllables',
+ 0xD7B, ' | haungeul jamo-B',
+ 0xD80, ' | high surrogates',
+ 0xDC0, ' | low surrogates',
+ 0xE00, ' | private use',
+ 0xF90, ' | cjk compatibility ideographs',
+ 0xFB0, ' | presentation',
+ 0xFB5, ' | ',
+ 0xFC0, ' | arabic presentation forms A',
+ 0xFD0, ' | ',
+ 0xFDD, ' | ?',
+ 0xFDF, ' | ',
+ 0xFE0, ' | var',
+ 0xFE1, ' | ver',
+ 0xFE2, ' | ½',
+ 0xFE3, ' | comp',
+ 0xFE5, ' | small',
+ 0xFE7, ' | arabic presentation B',
+ 0xFF0, ' | halfwidth & fullwidth forms',
+ 0xFFF, ' | sp',
+);
+
+sub {
+ return defined $uniblock{$_[0]} ? $uniblock{$_[0]} : ();
+}
+
|