X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/56786a220fd951f5e6dabb21ef70be1dd3b80389..d4442602046916e548aef5bd877fe0ec15b7c63d:/charset-unicode.inc.pl diff --git a/charset-unicode.inc.pl b/charset-unicode.inc.pl index 2b709c2..a0717a7 100644 --- a/charset-unicode.inc.pl +++ b/charset-unicode.inc.pl @@ -1,11 +1,12 @@ use utf8; my %uniblock = ( - 0x000, 'control', - 0x002, 'comn', - 0x004, 'basic latin', - 0x008, 'control', - 0x00A, 'comn', - 0x00C, 'latin1', + # bmp + 0x000, 'control', + 0x002, 'comn', + 0x004, 'basic latin', + 0x008, 'control', + 0x00A, 'comn', + 0x00C, 'latin1', 0x010, 'latin extended-A', 0x018, 'latin extended-B', 0x020, 'latin ext-B', @@ -24,7 +25,8 @@ my %uniblock = ( 0x07C, 'n\'ko', 0x080, 'samaritan', 0x084, 'manda', - 0x086, 'reserved', + 0x086, 'syr', + 0x087, 'reserved', 0x08A, 'arabic ext-A', 0x090, 'devanagari', 0x098, 'bengali', @@ -69,8 +71,8 @@ my %uniblock = ( 0x1BC, 'batak', 0x1C0, 'lepcha', 0x1C5, 'ol chiki', - 0x1C8, 'cyr', - 0x1C9, 'reserved', + 0x1C8, 'cyr', + 0x1C9, 'georg+', 0x1CC, 'sn', 0x1CD, 'vedic', 0x1D0, 'phonetic', @@ -126,15 +128,15 @@ my %uniblock = ( 0x320, 'enclosed cjk characters', 0x330, 'cjk compatibility', 0x340, 'cjk unified ideographs extension A', - 0x4D0, 'cjk unified ideographs extension A', + 0x4D0, 'cjk unified ideographs extension A', 0x4DC, 'hexagrams', 0x4E0, 'cjk unified ideographs', 0xA00, 'yi', - 0xA40, 'yi', + 0xA40, 'yi', 0xA49, 'yi radicals', 0xA4D, 'lisu', 0xA50, 'vai', - 0xA60, 'vai', + 0xA60, 'vai', 0xA64, 'cyrillic extended-B', 0xA6A, 'bamum', 0xA70, 'tones', @@ -155,21 +157,21 @@ my %uniblock = ( 0xAAE, 'mtei+', 0xAB0, 'ethiopic-A', 0xAB3, 'latin ext-E', - 0xAB7, 'cherokee+', + 0xAB7, 'cherokee+', 0xABC, 'meithei', 0xAC0, 'hangeul syllables', - 0xD70, 'hangeul syllables', + 0xD70, 'hangeul syllables', 0xD7B, 'haungeul jamo-B', 0xD80, 'high surrogates', 0xDC0, 'low surrogates', 0xE00, 'private use', 0xF90, 'cjk compatibility ideographs', 0xFB0, 'presentation', - 0xFB5, '', - 0xFC0, 'arabic presentation forms A', - 0xFD0, '', + 0xFB5, '', + 0xFC0, 'arabic presentation forms A', + 0xFD0, '', 0xFDD, '?', - 0xFDF, '', + 0xFDF, '', 0xFE0, 'var', 0xFE1, 'ver', 0xFE2, '½', @@ -178,6 +180,262 @@ my %uniblock = ( 0xFE7, 'arabic presentation B', 0xFF0, 'halfwidth & fullwidth forms', 0xFFF, 'sp', + + # smp + 0x1000, 'linear B syllabary', + 0x1008, 'linear B ideograms', + 0x1010, 'aegean num', + 0x1014, 'greek numbers', + 0x1019, 'ancient sym', + 0x101D, 'phaistos', + 0x1020, 'iberian', + 0x1024, 'reserved', + 0x1028, 'lycian', + 0x102A, 'carian', + 0x102E, 'coptic', + 0x1030, 'italic', + 0x1033, 'gothic', + 0x1035, 'permic', + 0x1038, 'ugarit', + 0x103A, 'old persian', + 0x103E, 'sh.qs', + 0x1040, 'deseret', + 0x1045, 'shavian', + 0x1048, 'osmanya', + 0x104B, 'osage', + 0x1050, 'elbasan', + 0x1053, 'c albanian', + 0x1057, 'vithkuqi', + 0x105C, 'todhri', + 0x1060, 'linear A', + 0x1070, 'linear A', + 0x1078, 'cypro-minoan', + 0x1080, 'cypriot', + 0x1084, 'aram', + 0x1086, 'palmr', + 0x1088, 'nabataean', + 0x108A, 'res', + 0x108C, 'numid', + 0x108E, 'hatr', + 0x1090, 'phoen', + 0x1092, 'lydian', + 0x1094, 'reserved', + 0x1098, 'mero h', + 0x109A, 'meroitic cursive', + 0x10A0, 'kharoshthi', + 0x10A6, 's arab', + 0x10A8, 'n arab', + 0x10AA, 'balti', + 0x10AC, 'manichaean', + 0x10B0, 'avestan', + 0x10B4, 'parth', + 0x10B6, 'pahlav', + 0x10B8, 'psalt pahl', + 0x10BB, 'book pahl', + 0x10BE, 'babur', + 0x10C0, 'old turkic', + 0x10C5, 'reserved', + 0x10C8, 'old hungarian', + 0x10D0, 'rohingya', + 0x10D4, 'garay', + 0x10D8, 'byblos', + 0x10E0, 'reserved', + 0x10E6, 'rumi', + 0x10E8, 'reserved', + 0x10EE, 'elym', + 0x10F0, 'old sog', + 0x10F3, 'sogdian', + 0x10F7, 'res', + 0x10F8, 'uyghur', + 0x1100, 'brahmi', + 0x1108, 'kaithi', + 0x110D, 'sora som', + 0x1110, 'chakma', + 0x1115, 'mahajani', + 0x1118, 'sharada', + 0x111E, 'sinhal', + 0x1120, 'khojki', + 0x1125, 'landa', + 0x1128, 'multani', + 0x112B, 'khudabadi', + 0x1130, 'grantha', + 0x1138, 'tulu', + 0x113E, 'shar+', + 0x1140, 'newar', + 0x1148, 'tirhuta', + 0x114E, 'tani', + 0x1150, 'ranjana', + 0x1158, 'siddham', + 0x1160, 'modi', + 0x1166, 'mong', + 0x1168, 'takri', + 0x116D, 'jenticha', + 0x1170, 'ahom', + 0x1174, 'zou', + 0x117A, 'pyu', + 0x1180, 'dogra', + 0x1185, 'sirmauri', + 0x1189, 'res', + 0x118A, 'warang citi', + 0x1190, 'tolong siki', + 0x1194, 'tikamuli', + 0x1198, 'khambu rai', + 0x119C, 'kirat rai', + 0x11A0, 'zanabazar square', + 0x11A5, 'soyombo', + 0x11AB, 'res', + 0x11AC, 'pau cin hau', + 0x11B0, 'dhives akuru', + 0x11B5, 'leke', + 0x11B9, 'nandinagari', + 0x11BF, 'res', + 0x11C0, 'bhaiksuki', + 0x11C7, 'marchen', + 0x11CC, 'balti B', + 0x11D0, 'masaram gondi', + 0x11D6, 'gunjala gondi', + 0x11DB, 'kawi', + 0x11E0, 'tocharian', + 0x11E7, 'khotanese', + 0x11ED, 'res', + 0x11EE, 'makas', + 0x11F0, 'vatteluttu', + 0x11F4, 'res', + 0x11F6, 'chola', + 0x11FC, 'tamil+', + 0x1200, 'cuneiform', + 0x1240, 'cuneiform numbers', + 0x1248, 'early dynastic cuneiform', + 0x1250, 'e. dyn. cuneiform', + 0x1255, 'reserved', + 0x1260, 'proto-cuneiform', + 0x12E0, 'indus', + 0x12F0, 'indus', + 0x12F9, 'reserved', + 0x1300, 'egyptian hieroglyphs', + 0x1340, 'egyptian', + 0x1343, 'eg.c', + 0x1344, '', + 0x1350, 'egyptian hieroglyphs extended-A', + 0x1440, 'anatolian hieroglyphs', + 0x1460, 'anatolian', + 0x1468, '', + 0x1470, 'egyptian hieroglyphs extended-B', + 0x1500, 'lampung', + 0x1504, 'kerinci', + 0x1507, 'res', + 0x1508, '', + 0x1510, 'mandombe', + 0x1550, 'maya hieroglyphs', + 0x15A0, 'reserved', + 0x15C0, 'aztec pictograms', + 0x1600, 'cirth', + 0x1608, 'tengwar', + 0x1610, 'khema', + 0x1614, 'khe prih', + 0x1618, 'res', + 0x161A, 'moon', + 0x1620, 'blissymbols', + 0x1670, 'bagam', + 0x167B, 'iban', + 0x1680, 'bamum supplement', + 0x16A0, 'bamum+', + 0x16A4, 'mro', + 0x16A7, 'mossang tangsa', + 0x16AD, 'bassa vah', + 0x16B0, 'pahawh hmong', + 0x16B9, 'woleai', + 0x16C0, 'kpelle', + 0x16C8, 'afaka', + 0x16CD, 'lk tangsa', + 0x16D0, 'reserved', + 0x16DD, 'kulitan', + 0x16E0, 'mwangwego', + 0x16E4, 'medefaidrin', + 0x16EA, 'lontara+', + 0x16F0, 'miao', + 0x16FA, 'lontara b-b', + 0x16FE, 'ideo', + 0x1700, 'tangut ideographs', + 0x1880, 'tangut components', + 0x18B0, 'khitan small', + 0x18D0, 'khitan ideographs', + 0x1960, 'jurchen', + 0x19B0, 'jurchen', + 0x19B6, 'jurchen rad', + 0x19BA, 'reserved', + 0x19C0, 'reserved', + 0x19E0, 'pau cin hau syllabary', + 0x1A30, 'eskaya', + 0x1A80, 'naxi geba', + 0x1AA0, 'naxi geba', + 0x1AAB, 'res', + 0x1AAC, '', + 0x1AB0, 'naxi dongba', + 0x1B00, 'kana supplement', + 0x1B10, 'kana+A', + 0x1B13, 'kaidā', + 0x1B17, '', + 0x1B20, 'nushu', + 0x1B30, 'shuishu', + 0x1B50, 'proto-elamite', + 0x1BC0, 'duployan', + 0x1BCA, 'sh', + 0x1BCB, 'pitman', + 0x1BD0, 'shorthands?', + 0x1C00, 'micmac hieroglyphs', + 0x1CB0, 'rongorongo', + 0x1CE0, 'reserved', + 0x1D00, 'byzantine musical', + 0x1D10, 'musical symbols', + 0x1D20, 'anc greek music', + 0x1D25, 'reserved', + 0x1D2E, 'mayan', + 0x1D30, 'tai xuan jing', + 0x1D36, 'rod', + 0x1D38, 'mathematical alphanumeric+', + 0x1D40, 'mathematical alphanumeric', + 0x1D80, 'sutton', + 0x1DA0, '', + 0x1DAB, 'reserved', + 0x1DB0, 'reserved', + 0x1E00, 'glagol+', + 0x1E03, 'pallava', + 0x1E08, 'chalukya', + 0x1E0E, 'res', + 0x1E10, 'eebee hmong', + 0x1E1B, 'cher vang hmong', + 0x1E20, 'western cham', + 0x1E27, 'beria', + 0x1E2A, 'reserved', + 0x1E30, 'loma', + 0x1E50, 'reserved', + 0x1E80, 'mende kikakui', + 0x1E8E, 'res', + 0x1E90, 'adlam', + 0x1E96, 'reserved', + 0x1EA0, 'reserved', + 0x1EC0, 'persian siyaq', + 0x1EC7, 'indic siyaq', + 0x1ECC, 'diwani siyaq', + 0x1ED0, 'ottoman siyaq', + 0x1ED5, 'reserved', + 0x1EE0, 'arabic mathematical alphabetic', # Sm + 0x1EF0, 'reserved', + 0x1F00, 'mahjong', + 0x1F03, 'domino tiles', + 0x1F0A, 'playing cards', + 0x1F10, 'enclosed alphanumeric supplement', # So + 0x1F20, 'enclosed ideographic supplement', # So + 0x1F30, 'miscellaneous symbols and pictographs', + 0x1F60, 'emoticons', + 0x1F65, 'ornament', + 0x1F68, 'transport', + 0x1F70, 'alchemical', + 0x1F78, 'geometric shapes ext', + 0x1F80, 'supplemental arrows-C', + 0x1F90, 'supplemental symbols and pictographs', + 0x1FA0, 'reserved', ); sub {