use utf8;
my %uniblock = (
0x000, '
control',
0x002, ' | comn',
0x004, ' | basic latin',
0x008, ' | control',
0x00A, ' | comn',
0x00C, ' | latin1',
0x010, ' | latin extended-A',
0x018, ' | latin extended-B',
0x020, ' | latin ext-B',
0x025, ' | IPA',
0x02B, ' | spacing modifier',
0x030, ' | diacritics',
0x038, ' | greek',
0x040, ' | cyrillic',
0x050, ' | cyrillic+',
0x053, ' | armenian',
0x058, ' | hebrew',
0x060, ' | arabic',
0x070, ' | syriac',
0x075, ' | arabic+',
0x078, ' | thaana',
0x07C, ' | n\'ko',
0x080, ' | samaritan',
0x084, ' | manda',
0x086, ' | reserved',
0x08A, ' | arabic ext-A',
0x090, ' | devanagari',
0x098, ' | bengali',
0x0A0, ' | gurmukhi',
0x0A8, ' | gujarati',
0x0B0, ' | oriya',
0x0B8, ' | tamil',
0x0C0, ' | telugu',
0x0C8, ' | kannada',
0x0D0, ' | malayalam',
0x0D8, ' | sinhala',
0x0E0, ' | thai',
0x0E8, ' | lao',
0x0F0, ' | tibetan',
0x100, ' | myanmar',
0x10A, ' | georgian',
0x110, ' | hangeul jamo',
0x120, ' | ethiopic',
0x130, ' | ethiopic',
0x138, ' | eth+',
0x13A, ' | cherokee',
0x140, ' | unified canadian aboriginal syllabics',
0x160, ' | unified canadian syllabics',
0x168, ' | ogham',
0x16A, ' | runic',
0x170, ' | tagalog',
0x172, ' | hanun',
0x174, ' | buhid',
0x176, ' | tagb',
0x178, ' | khmer',
0x180, ' | mongolian',
0x18B, ' | canadian+',
0x190, ' | limbu',
0x195, ' | tai le',
0x198, ' | new tai lue',
0x19E, ' | khmer',
0x1A0, ' | lontara',
0x1A2, ' | tai tham',
0x1AB, ' | diacritics+',
0x1B0, ' | balinese',
0x1B8, ' | sundanese',
0x1BC, ' | batak',
0x1C0, ' | lepcha',
0x1C5, ' | ol chiki',
0x1C8, ' | cyr',
0x1C9, ' | reserved',
0x1CC, ' | sn',
0x1CD, ' | vedic',
0x1D0, ' | phonetic',
0x1D8, ' | phonetic+',
0x1DC, ' | diacritics+',
0x1E0, ' | latin extended additional',
0x1F0, ' | greek+',
0x200, ' | general punctuation',
0x207, ' | suþscript', # suth now means "sub and/or sup"
0x20A, ' | currency',
0x20D, ' | overlay',
0x210, ' | letterlike',
0x215, ' | number',
0x219, ' | arrows',
0x220, ' | mathematical symbols',
0x230, ' | miscellaneous technical',
0x240, ' | control',
0x244, ' | OCR',
0x246, ' | enclosed alphanumerics',
0x250, ' | box drawing',
0x258, ' | blocks',
0x25A, ' | geometric shapes',
0x260, ' | miscellaneous symbols',
0x270, ' | dingbats',
0x27C, ' | maths-A',
0x27F, ' | arr',
0x280, ' | braille',
0x290, ' | supplemental arrows-B',
0x298, ' | mathematical symbols-B',
0x2A0, ' | supplemental mathematical operators',
0x2B0, ' | miscellaneous symbols and arrows',
0x2C0, ' | glagolitic',
0x2C6, ' | latin-C',
0x2C8, ' | coptic',
0x2D0, ' | georgian+',
0x2D3, ' | tifinagh', #TODO: proto-canaanite
0x2D8, ' | ethiopic+',
0x2DE, ' | cyrl-A',
0x2E0, ' | punctuation+',
0x2E8, ' | cjk radicals',
0x2F0, ' | kangxi radicals',
0x2FE, ' | ',
0x2FF, ' | idc',
0x300, ' | cjk misc',
0x304, ' | hiragana',
0x30A, ' | katakana',
0x310, ' | bopomofo',
0x313, ' | hangeul compat',
0x319, ' | kbn',
0x31A, ' | bpmf',
0x31C, ' | strokes',
0x31F, ' | k+',
0x320, ' | enclosed cjk characters',
0x330, ' | cjk compatibility',
0x340, ' | cjk unified ideographs extension A',
0x4D0, ' | cjk unified ideographs extension A',
0x4DC, ' | hexagrams',
0x4E0, ' | cjk unified ideographs',
0xA00, ' | yi',
0xA40, ' | yi',
0xA49, ' | yi radicals',
0xA4D, ' | lisu',
0xA50, ' | vai',
0xA60, ' | vai',
0xA64, ' | cyrillic extended-B',
0xA6A, ' | bamum',
0xA70, ' | tones',
0xA72, ' | latin extended-D',
0xA80, ' | sylheti',
0xA83, ' | in',
0xA84, ' | phags-pa',
0xA88, ' | saurashtra',
0xA8E, ' | deva+',
0xA90, ' | kayah li',
0xA93, ' | rejang',
0xA96, ' | jamo-A',
0xA98, ' | javanese',
0xA9E, ' | mm-B',
0xAA0, ' | cham',
0xAA6, ' | mm-A',
0xAA8, ' | tai viet',
0xAAE, ' | mtei+',
0xAB0, ' | ethiopic-A',
0xAB3, ' | latin ext-E',
0xAB7, ' | cherokee+',
0xABC, ' | meithei',
0xAC0, ' | hangeul syllables',
0xD70, ' | hangeul syllables',
0xD7B, ' | haungeul jamo-B',
0xD80, ' | high surrogates',
0xDC0, ' | low surrogates',
0xE00, ' | private use',
0xF90, ' | cjk compatibility ideographs',
0xFB0, ' | presentation',
0xFB5, ' | ',
0xFC0, ' | arabic presentation forms A',
0xFD0, ' | ',
0xFDD, ' | ?',
0xFDF, ' | ',
0xFE0, ' | var',
0xFE1, ' | ver',
0xFE2, ' | ½',
0xFE3, ' | comp',
0xFE5, ' | small',
0xFE7, ' | arabic presentation B',
0xFF0, ' | halfwidth & fullwidth forms',
0xFFF, ' | sp',
0x1000, ' | linear B syllabary',
0x1008, ' | linear B ideograms',
0x1010, ' | aegean num',
0x1014, ' | greek numbers',
0x1019, ' | ancient sym',
0x101D, ' | phaistos',
0x1020, ' | reserved',
0x1028, ' | lycian',
0x102A, ' | carian',
0x102E, ' | coptic',
0x1030, ' | italic',
0x1033, ' | gothic',
0x1035, ' | permic',
0x1038, ' | ugarit',
0x103A, ' | old persian',
0x103E, ' | res',
0x1040, ' | deseret',
0x1045, ' | shavian',
0x1048, ' | osmanya',
0x104B, ' | osage',
0x1050, ' | elbasan',
0x1053, ' | c albanian',
0x1057, ' | qs',
0x1058, ' | vithkuqi',
0x105D, ' | res',
0x105E, ' | iber',
0x1060, ' | linear A',
0x1070, ' | linear A',
0x1078, ' | todhri',
0x107C, ' | cyp minoan',
0x1080, ' | cypriot',
0x1084, ' | aram',
0x1086, ' | palmr',
0x1088, ' | nabataean',
0x108A, ' | res',
0x108C, ' | numid',
0x108E, ' | hatr',
0x1090, ' | phoen',
0x1092, ' | lydian',
0x1094, ' | reserved',
0x1098, ' | mero h',
0x109A, ' | meroitic cursive',
0x10A0, ' | kharoshthi',
0x10A6, ' | s arab',
0x10A8, ' | n arab',
0x10AA, ' | balti',
0x10AC, ' | manichaean',
0x10B0, ' | avestan',
0x10B4, ' | parth',
0x10B6, ' | pahlav',
0x10B8, ' | psalt pahl',
0x10BB, ' | book pahl',
0x10BE, ' | babur',
0x10C0, ' | old turkic',
0x10C5, ' | reserved',
0x10C8, ' | old hungarian',
0x10D0, ' | rohingya',
0x10D4, ' | garay',
0x10D8, ' | reserved',
0x10E0, ' | sogdian',
0x10E6, ' | rumi',
0x10E8, ' | uyghur',
0x10EE, ' | elym',
0x10F0, ' | reserved',
0x1100, ' | brahmi',
0x1108, ' | kaithi',
0x110D, ' | sora som',
0x1110, ' | chakma',
0x1115, ' | mahajani',
0x1118, ' | sharada',
0x111E, ' | sinhal',
0x1120, ' | khojki',
0x1125, ' | landa',
0x1128, ' | multani',
0x112B, ' | khudabadi',
0x1130, ' | grantha',
0x1138, ' | tulu',
0x113E, ' | shar+',
0x1140, ' | newar',
0x1148, ' | tirhuta',
0x114E, ' | tani',
0x1150, ' | ranjana',
0x1156, ' | res',
0x1158, ' | siddham',
0x1160, ' | modi',
0x1166, ' | mong',
0x1168, ' | takri',
0x116D, ' | jenticha',
0x1170, ' | ahom',
0x1174, ' | zou',
0x117A, ' | pyu',
0x1180, ' | khema',
0x1184, ' | khe prih',
0x1188, ' | reserved', #TODO
0x118A, ' | warang citi',
0x1190, ' | tolong siki',
0x1194, ' | tikamuli',
0x1198, ' | khambu rai',
0x119C, ' | kirat rai',
0x11A0, ' | zanabazar square',
0x11A5, ' | soyombo',
0x11AB, ' | res',
0x11AC, ' | pau cin hau',
0x11B0, ' | dhives akuru',
0x11B5, ' | leke',
0x11B9, ' | gondi',
0x11BF, ' | res',
0x11C0, ' | bhaiksuki',
0x11C7, ' | marchen',
0x11CC, ' | balti B',
0x11D0, ' | kawi',
0x11D5, ' | nandinagari',
0x11DB, ' | reserved',
0x11E0, ' | chalukya',
0x11E6, ' | chola',
0x11EC, ' | reserved',
0x11F0, ' | satavahana',
0x11F6, ' | turkestani',
0x11FC, ' | tamil+',
0x1200, ' | cuneiform',
0x1240, ' | cuneiform numbers',
0x1248, ' | early dynastic cuneiform',
0x1250, ' | early dynastic cuneiform',
0x1258, ' | reserved',
0x1260, ' | proto-cuneiform',
0x12E0, ' | indus',
0x12F0, ' | indus',
0x12F9, ' | reserved',
0x1300, ' | egyptian hieroglyphs',
0x1340, ' | egyptian',
0x1343, ' | reserved',
0x1350, ' | egyptian hieroglyphs extended',
0x1410, ' | pau cin hau syllabary',
0x1440, ' | anatolian hieroglyphs',
0x1460, ' | anatolian hieroglyphs',
0x1468, ' | reserved',
0x1470, ' | eskaya',
0x14B0, ' | eskaya',
0x14B6, ' | reserved',
0x14C0, ' | reserved',
0x1540, ' | maya hieroglyphs',
0x1590, ' | reserved',
0x15C0, ' | aztec pictograms',
0x1600, ' | cirth',
0x1608, ' | tengwar',
0x1610, ' | reserved',
0x161A, ' | moon',
0x1620, ' | blissymbols',
0x1670, ' | bagam',
0x167B, ' | reserved',
0x1680, ' | bamum supplement',
0x16A0, ' | bamum+',
0x16A4, ' | mro',
0x16A7, ' | mossang tangsa',
0x16AD, ' | bassa vah',
0x16B0, ' | pahawh hmong',
0x16B9, ' | woleai',
0x16C0, ' | kpelle',
0x16C8, ' | afaka',
0x16CD, ' | lk tangsa',
0x16D0, ' | loma',
0x16DD, ' | zaghawa',
0x16E0, ' | mwangwego',
0x16E5, ' | reserved',
0x16F0, ' | miao',
0x16FA, ' | reserved',
0x16FE, ' | ideo',
0x1700, ' | tangut ideographs',
0x1880, ' | reserved',
0x1890, ' | tangut radicals',
0x18C0, ' | reserved',
0x1900, ' | jurchen',
0x1950, ' | jurchen',
0x1956, ' | jurchen rad',
0x195A, ' | reserved',
0x1960, ' | khitan small',
0x1970, ' | khitan small',
0x1978, ' | reserved',
0x1980, ' | khitan large',
0x1A10, ' | reserved',
0x1A80, ' | naxi geba',
0x1AA0, ' | naxi geba',
0x1AAB, ' | res',
0x1AAC, ' | naxi dongba',
0x1AB0, ' | naxi dongba',
0x1B00, ' | kana supplement',
0x1B10, ' | nushu',
0x1B20, ' | nushu',
0x1B29, ' | reserved',
0x1B30, ' | kaida',
0x1B38, ' | reserved',
0x1B40, ' | reserved',
0x1B50, ' | proto-elamite',
0x1BC0, ' | duployan',
0x1BCA, ' | sh',
0x1BCB, ' | pitman',
0x1D00, ' | byzantine musical',
0x1D10, ' | musical symbols',
0x1D20, ' | anc greek music',
0x1D25, ' | reserved',
0x1D30, ' | tai xuan jing',
0x1D36, ' | rod',
0x1D38, ' | reserved',
0x1D40, ' | mathematical alphanumeric', # Sm
0x1D80, ' | sutton',
0x1DB0, ' | reserved',
0x1E00, ' | glagol+',
0x1E03, ' | reserved',
0x1E10, ' | reserved',
0x1E80, ' | mende kikakui',
0x1E90, ' | adlam',
0x1E96, ' | reserved',
0x1EA0, ' | reserved',
0x1EC0, ' | persian siyaq',
0x1EC7, ' | indic siyaq',
0x1ECC, ' | diwani siyaq',
0x1ED0, ' | ottoman siyaq',
0x1ED4, ' | reserved',
0x1E8E, ' | res',
0x1EE0, ' | arabic mathematical alphabetic', # Sm
0x1EF0, ' | reserved',
0x1F00, ' | mahjong',
0x1F03, ' | domino tiles',
0x1F0A, ' | playing cards',
0x1F10, ' | enclosed alphanumeric supplement', # So
0x1F20, ' | enclosed ideographic supplement', # So
0x1F30, ' | miscellaneous symbols and pictographs',
0x1F60, ' | emoticons',
0x1F65, ' | ornament',
0x1F68, ' | transport',
0x1F70, ' | alchemical',
0x1F78, ' | geometric shapes ext',
0x1F80, ' | supplemental arrows-C',
0x1F90, ' | reserved',
);
sub {
return defined $uniblock{$_[0]} ? $uniblock{$_[0]} : ();
}
|