my %uniblock = (
0x000, '
control',
0x002, ' | comn',
0x004, ' | basic latin',
0x008, ' | control',
0x00A, ' | comn',
0x00C, ' | latin1',
0x010, ' | latin extended-A',
0x018, ' | latin extended-B',
0x020, ' | latin ext-B',
0x025, ' | IPA',
0x02B, ' | spacing modifier',
0x030, ' | diacritics',
0x038, ' | greek',
0x040, ' | cyrillic',
0x050, ' | cyrillic+',
0x053, ' | armenian',
0x058, ' | hebrew',
0x060, ' | arabic',
0x070, ' | syriac',
0x075, ' | arabic+',
0x078, ' | thaana',
0x07C, ' | n\'ko',
0x080, ' | samaritan',
0x084, ' | manda',
0x086, ' | reserved',
0x090, ' | devanagari',
0x098, ' | bengali',
0x0A0, ' | gurmukhi',
0x0A8, ' | gujarati',
0x0B0, ' | oriya',
0x0B8, ' | tamil',
0x0C0, ' | telugu',
0x0C8, ' | kannada',
0x0D0, ' | malayalam',
0x0D8, ' | sinhala',
0x0E0, ' | thai',
0x0E8, ' | lao',
0x0F0, ' | tibetan',
0x100, ' | myanmar',
0x10A, ' | georgian',
0x110, ' | hangeul jamo',
0x120, ' | ethiopic',
0x130, ' | ethiopic',
0x138, ' | eth+',
0x13A, ' | cherokee',
0x140, ' | unified canadian aboriginal syllabics',
0x160, ' | unified canadian syllabics',
0x168, ' | ogham',
0x16A, ' | runic',
0x170, ' | tagalog',
0x172, ' | hanun',
0x174, ' | buhid',
0x176, ' | tagb',
0x178, ' | khmer',
0x180, ' | mongolian',
0x18B, ' | canadian+',
0x190, ' | limbu',
0x195, ' | tai le',
0x198, ' | new tai lue',
0x19E, ' | khmer',
0x1A0, ' | lontara',
0x1A2, ' | tai tham',
0x1AB, ' | reserved',
0x1B0, ' | balinese',
0x1B8, ' | sundanese',
0x1BC, ' | batak',
0x1C0, ' | lepcha',
0x1C5, ' | ol chiki',
0x1C8, ' | reserved',
0x1CD, ' | vedic',
0x1D0, ' | phonetic',
0x1D8, ' | phonetic+',
0x1DC, ' | combining',
0x1E0, ' | latin extended additional',
0x1F0, ' | greek+',
0x200, ' | general punctuation',
0x207, ' | suþscript', # suth now means "sub and/or sup"
0x20A, ' | currency',
0x20D, ' | overlay',
0x210, ' | letterlike',
0x215, ' | number',
0x219, ' | arrows',
0x220, ' | mathematical symbols',
0x230, ' | miscellaneous technical',
0x240, ' | control',
0x244, ' | OCR',
0x246, ' | enclosed alphanumerics',
0x250, ' | box drawing',
0x258, ' | blocks',
0x25A, ' | geometric shapes',
0x260, ' | miscellaneous symbols',
0x270, ' | dingbats',
0x27C, ' | maths-A',
0x27F, ' | arr',
0x280, ' | braille',
0x290, ' | supplemental arrows-B',
0x298, ' | mathematical symbols-B',
0x2A0, ' | supplemental mathematical operators',
0x2B0, ' | miscellaneous symbols and arrows',
0x2C0, ' | glagolitic',
0x2C6, ' | latin-C',
0x2C8, ' | coptic',
0x2D0, ' | georgian+',
0x2D3, ' | tifinagh', #TODO: proto-canaanite
0x2D8, ' | ethiopic+',
0x2DE, ' | cyrl-A',
0x2E0, ' | punctuation+',
0x2E8, ' | cjk radicals',
0x2F0, ' | kangxi radicals',
0x2FE, ' | ',
0x2FF, ' | idc',
0x300, ' | cjk misc',
0x304, ' | hiragana',
0x30A, ' | katakana',
0x310, ' | bopomofo',
0x313, ' | hangeul compat',
0x319, ' | kbn',
0x31A, ' | bpmf',
0x31C, ' | strokes',
0x31F, ' | k+',
0x320, ' | enclosed cjk characters',
0x330, ' | cjk compatibility',
0x340, ' | cjk unified ideographs extension A',
0x4D0, ' | cjk unified ideographs extension A',
0x4DC, ' | hexagrams',
0x4E0, ' | cjk unified ideographs',
0xA00, ' | yi',
0xA40, ' | yi',
0xA49, ' | yi radicals',
0xA4D, ' | lisu',
0xA50, ' | vai',
0xA60, ' | vai',
0xA64, ' | cyrillic extended-B',
0xA6A, ' | bamum',
0xA70, ' | tones',
0xA72, ' | latin extended-D',
0xA80, ' | sylheti',
0xA83, ' | in',
0xA84, ' | phags-pa',
0xA88, ' | saurashtra',
0xA8E, ' | deva+',
0xA90, ' | kayah li',
0xA93, ' | rejang',
0xA96, ' | jamo-A',
0xA98, ' | javanese',
0xA9E, ' | res',
0xAA0, ' | cham',
0xAA6, ' | mym-A',
0xAA8, ' | tai viet',
0xAAE, ' | mtei+',
0xAB0, ' | reserved',
0xABC, ' | manipuri',
0xAC0, ' | hangeul syllables',
0xD70, ' | hangeul syllables',
0xD7B, ' | haungeul jamo-B',
0xD80, ' | high surrogates',
0xDC0, ' | low surrogates',
0xE00, ' | private use',
0xF90, ' | cjk compatibility ideographs',
0xFB0, ' | presentation',
0xFB5, ' | ',
0xFC0, ' | arabic presentation forms A',
0xFD0, ' | ',
0xFDD, ' | ?',
0xFDF, ' | ',
0xFE0, ' | var',
0xFE1, ' | ver',
0xFE2, ' | ½',
0xFE3, ' | comp',
0xFE5, ' | small',
0xFE7, ' | arabic presentation B',
0xFF0, ' | halfwidth & fullwidth forms',
0xFFF, ' | sp',
);
sub {
return defined $uniblock{$_[0]} ? $uniblock{$_[0]} : ();
}
|