use utf8;
-my %uniblock = (
- 0x000, '<td colspan="1" class="X Po">ascii',
- 0x001, '<td colspan="4" class="X L Latin">latin',
-# 0x005, '<td colspan="5" class="X Sk">spacing modifier',
- 0x005, '<td colspan="2" class="X Mn">comb',
- 0x007, '<td colspan="1" class="X L Greek">grk',
- 0x008, '<td colspan="2" class="X L Cyrillic">cyr',
- 0x00A, '<td colspan="1" class="X L Armenian">arm',
- 0x00B, '<td colspan="1" class="X L Aramaic">heb',
- 0x00C, '<td colspan="2" class="X L Arabic">arabic',
- 0x00E, '<td colspan="3" class="X L Aramaic">aram',
- 0x010, '<td colspan="2" class="X L Aramaic">aramaic',
- 0x012, '<td colspan="14" class="X L Brahmic">brahmic',
- 0x020, '<td colspan="1" class="X L Brahmic">mm',
- 0x021, '<td colspan="1" class="X L Aramaic">geor',
- 0x022, '<td colspan="2" class="X L Hangul">jamo',
- 0x024, '<td colspan="3" class="X L African">ethiopic',
- 0x027, '<td colspan="6" class="X L Syllabic">aboriginal',
- 0x02D, '<td colspan="1" class="X L Alpha">ger',
- 0x02E, '<td colspan="2" class="X L Brahmic">brahm',
- 0x030, '<td colspan="1" class="X L Aramaic">mon',
- 0x031, '<td colspan="1" class="X L Syllabic">can',
- 0x032, '<td colspan="8" class="X L Brahmic">brahmic',
- 0x03A, '<td colspan="4" class="X L Latin">extensions',
- 0x03E, '<td colspan="2" class="X L Greek">greek',
- 0x040, '<td colspan="1" class="X Po">·…',
- 0x041, '<td colspan="3" class="X So">symbols',
- 0x044, '<td colspan="2" class="X Sm">maths',
- 0x046, '<td colspan="3" class="X So">technical',
- 0x049, '<td colspan="1" class="X Latin">()',
- 0x04A, '<td colspan="2" class="X So">draw',
- 0x04C, '<td colspan="4" class="X So">symbols',
- 0x050, '<td colspan="2" class="X L Alpha">braille',
- 0x052, '<td colspan="1" class="X So">arr',
- 0x053, '<td colspan="3" class="X Sm">maths',
- 0x056, '<td colspan="2" class="X So">misc',
- 0x058, '<td colspan="2" class="X L Greek">ancient',
- 0x05A, '<td colspan="2" class="X L Alpha">ext',
- 0x05C, '<td colspan="1" class="X Po">·+',
- 0x05D, '<td colspan="3" class="X L Han">radicals',
- 0x060, '<td colspan="2" class="X L Katakana">japanese',
- 0x062, '<td colspan="4" class="X L Han">cjk+',
- 0x066, '<td colspan="2" class="X Xd L Han">compat',
- 0x068, '<td colspan="8" class="X L Han" style="border-bottom:none">',
- 0x070, '<td colspan="16" rowspan="2" class="X L Han" style="border-top:none">cjk ideographs A', #+2
- 0x09C, '<td colspan="16" rowspan="11" class="X L Han">cjk unified ideographs',
- 0x140, '<td colspan="9" class="X L Syllabic">yi',
- 0x149, '<td colspan="1" class="X L Latin">lisu',
- 0x14A, '<td colspan="2" class="X L Syllabic">vai',
- 0x14C, '<td colspan="1" class="X L Cyrillic">cyr',
- 0x14D, '<td colspan="1" class="X L Syllabic">bam',
- 0x14E, '<td colspan="2" class="X L Latin">lat-D',
- 0x150, '<td colspan="6" class="X L Brahmic">brahmic',
- 0x156, '<td colspan="2" class="X L Alpha">ext',
- 0x158, '<td colspan="8" class="X L Hangul" style="border-bottom:none">',
- 0x160, '<td colspan="16" rowspan="5" class="X L Hangul" style="border-top:none">hangeul syllables',
- 0x1B0, '<td colspan="16" class="X Cs">surrogates',
- 0x1C0, '<td colspan="16" rowspan="3" class="X Co" style="border-bottom:none">private use',
- 0x1F0, '<td colspan="2" class="X Co" style="border-top:none">',
- 0x1F2, '<td colspan="4" class="X L Han">cjk compat',
- 0x1F6, '<td colspan="8" class="X L Arabic">presentation',
- 0x1FE, '<td colspan="2" class="X L Latin">width',
- 0xFB0, '<td colspan="5" class="X Xd L">presentation',
- 0xFB5, '<td colspan="11" class="X Xd L Arabic" style="border-bottom:none">',
- 0xFC0, '<td colspan="16" class="X Xd L Arabic" style="border-top:none; border-bottom:none">arabic presentation forms A',
- 0xFD0, '<td colspan="13" class="X Xd L Arabic" style="border-top:none">',
- 0xFDD, '<td colspan="2" class="Xi">?',
- 0xFDF, '<td colspan="1" class="X Xd L Arabic" style="border-top:none">',
- 0xFE0, '<td colspan="1" class="X Cc">var',
- 0xFE1, '<td colspan="1" class="X L Pd">ver',
- 0xFE2, '<td colspan="1" class="X L Mn">½',
- 0xFE3, '<td colspan="2" class="X Xd Pd Han">comp',
- 0xFE5, '<td colspan="2" class="X Xd L Latin">small',
- 0xFE7, '<td colspan="9" class="X Xd L Arabic">arabic presentation B',
- 0xFF0, '<td colspan="15" class="X L Latin">halfwidth & fullwidth forms',
- 0xFFF, '<td colspan="1" class="X Cc">sp',
-);
-
-sub {
- return defined $uniblock{$_[0]} ? $uniblock{$_[0]} : ();
-}
++{
+ colsize => 0x80,
+ 0x00000 => [0x0080, 'X Po', 'ascii'],
+ 0x00080 => [0x0200, 'X L Latin', 'latin'],
+ 0x00280 => [0x0100, 'X Mn', 'diac', 'diacritics'], # also spacing Sk
+ 0x00380 => [0x0080, 'X L Greek', 'grk', 'greek'],
+ 0x00400 => [0x0100, 'X L Cyrillic', 'cyr', 'cyrillic'],
+ 0x00500 => [0x0080, 'X L Armenian', 'arm', 'armenian'],
+ 0x00580 => [0x0080, 'X L Aramaic', 'heb', 'hebrew'],
+ 0x00600 => [0x0100, 'X L Arabic', 'arabic'],
+ 0x00700 => [0x0200, 'X L African', 'rtl', 'various right-to-left scripts'],
+ 0x00900 => [0x0500, 'X L Brahmic', 'brahmic'],
+ 0x00E00 => [0x0100, 'X L Brahmic Khmer', 's-br', 'south brahmic: lao and thai'],
+ 0x00F00 => [0x0100, 'X L Brahmic', 'tibet', 'tibetan'],
+ 0x01000 => [0x0080, 'X L Brahmic', 'mm', 'myanmar'],
+ 0x01080 => [0x0080, 'X L Aramaic', 'geor', 'georgian'],
+ 0x01100 => [0x0100, 'X L Hangul', 'jamo'],
+ 0x01200 => [0x0180, 'X L African', 'ethiopic'],
+ 0x01380 => [0x0300, 'X L Syllabic', 'aboriginal', 'cherokee and canadian syllabics'],
+ 0x01680 => [0x0080, 'X L Alpha', 'ger', 'germanic runes'],
+ 0x01700 => [0x0100, 'X L Brahmic', 'brahm', 'various brahmic'],
+ 0x01800 => [0x0080, 'X L Aramaic', 'mon', 'mongolian'],
+ 0x01880 => [0x0080, 'X L Syllabic', 'can', 'canadian supplement'],
+ 0x01900 => [0x0400, 'X L Brahmic', 'brahmic'],
+ 0x01D00 => [0x0200, 'X L Latin', 'extensions'],
+ 0x01F00 => [0x0100, 'X L Greek', 'greek'],
+ 0x02000 => [0x0080, 'X Po', '·…', 'punctuation'],
+ 0x02080 => [0x0180, 'X So', 'symbols'],
+ 0x02200 => [0x0100, 'X Sm', 'maths'],
+ 0x02300 => [0x0180, 'X So', 'technical'],
+ 0x02480 => [0x0080, 'X Latin', '()', 'enclosed alphanumerics'],
+ 0x02500 => [0x0100, 'X So', 'draw', 'box drawing and shapes'],
+ 0x02600 => [0x0200, 'X So', 'symbols'],
+ 0x02800 => [0x0100, 'X L Alpha', 'braille'],
+ 0x02900 => [0x0080, 'X So', 'arr', 'arrows'],
+ 0x02980 => [0x0180, 'X Sm', 'maths'],
+ 0x02B00 => [0x0100, 'X So', 'misc', 'miscellaneous symbols'],
+ 0x02C00 => [0x0100, 'X L Greek', 'ancient', 'glagolitic, latin, coptic'],
+ 0x02D00 => [0x0100, 'X L Alpha', 'ext', 'extensions of various alphabets'],
+ 0x02E00 => [0x0080, 'X Po', '·+', 'punctuation'],
+ 0x02E80 => [0x0180, 'X L Han', 'radicals'],
+ 0x03000 => [0x0100, 'X L Katakana', 'japanese'],
+ 0x03100 => [0x0200, 'X L Han', 'cjk+'],
+ 0x03300 => [0x0100, 'X Xd L Han', 'compat', 'cjk compatibility'],
+ 0x03400 => [0x1A00, 'X L Han', 'cjk ideographs A'],
+ 0x04E00 => [0x5200, 'X L Han', 'cjk unified ideographs'],
+ 0x0A000 => [0x0480, 'X L Syllabic', 'yi'],
+ 0x0A480 => [0x0080, 'X L Latin', 'lisu'],
+ 0x0A500 => [0x0100, 'X L Syllabic', 'vai'],
+ 0x0A600 => [0x0080, 'X L Cyrillic', 'cyr', 'cyrillic ext-B'],
+ 0x0A680 => [0x0080, 'X L Syllabic', 'bam', 'bamum'],
+ 0x0A700 => [0x0100, 'X L Latin', 'lat-D'],
+ 0x0A800 => [0x0300, 'X L Brahmic', 'brahmic'],
+ 0x0AB00 => [0x0100, 'X L Alpha', 'ext', 'various extensions'],
+ 0x0AC00 => [0x2C00, 'X L Hangul', 'hangeul syllables'],
+ 0x0D800 => [0x0800, 'X Cs', 'surrogates'],
+ 0x0E000 => [0x1000, 'X Co', 'private use'],
+ 0x0F000 => [0x0900, 'X Co', '<q>corporate use</q>'],
+ 0x0F900 => [0x0200, 'X L Han', 'cjk compat'],
+ 0x0FB00 => [0x0400, 'X L Arabic', 'presentation'],
+ 0x0FF00 => [0x0100, 'X L Latin', 'width'],
+ 0x10000 => [0x0100, 'X L Syllabic', 'linear B'],
+ 0x10100 => [0x0100, 'X No', 'a num'],
+ 0x10200 => [0x0400, 'X L Alpha', 'ltr'],
+ 0x10600 => [0x0180, 'X L Syllabic', 'linear A'],
+ 0x10780 => [0x0080, 'X L Alpha', 'ltr'],
+ 0x10800 => [0x0800, 'X L Aramaic', 'rtl'],
+ 0x11000 => [0x1000, 'X L Brahmic', 'brahmic'],
+ 0x12000 => [0x0600, 'X L Syllabic', 'cuneiform'],
+ 0x12600 => [0x0800, 'X L Syllabic', 'proto-cuneiform'],
+ 0x12E00 => [0x0200, 'X L Syllabic', 'indus'],
+ 0x13000 => [0x1400, 'X L Syllabic', 'egyptian hieroglyphs'],
+ 0x14400 => [0x0280, 'X L Syllabic', 'anatolian'],
+ 0x14680 => [0x0980, 'X L Syllabic', 'egyptian'],
+ 0x15000 => [0x0080, 'X L Brahmic', 'bra', 'brahmic'],
+ 0x15080 => [0x0480, 'X L Syllabic', 'mandombe'],
+ 0x15500 => [0x0B00, 'X L Syllabic', 'american hieroglyphs'],
+ 0x16000 => [0x1000, 'X L Alpha', 'recent'],
+ 0x17000 => [0x1B00, 'X L Syllabic', 'tangut'],
+ 0x18B00 => [0x0B00, 'X L Han', 'khitan'],
+ 0x19600 => [0x0600, 'X L Han', 'jurchen'],
+ 0x19C00 => [0x1400, 'X L Han', 'southeast asian'],
+ 0x1B000 => [0x0180, 'X L Hiragana', 'kana'],
+ 0x1B180 => [0x0180, 'X L Han', 'nushu'],
+ 0x1B300 => [0x0200, 'X L Han', 'shuishu'],
+ 0x1B500 => [0x0700, 'X L Syllabic', 'proto-elamite'],
+ 0x1BC00 => [0x0400, 'X L Alpha', 'shorthands'],
+ 0x1C000 => [0x0B00, 'X L Syllabic', 'micmac hieroglyphs'],
+ 0x1CB00 => [0x0300, 'X L Syllabic', 'rongorongo'],
+ 0x1CE00 => [0x0200, '', 'large scripts'],
+ 0x1D000 => [0x0380, 'X So', 'notational systems'],
+ 0x1D380 => [0x0480, 'X Sm', 'math alphanumeric'],
+ 0x1D800 => [0x0400, 'X L Alpha', 'sutton signs'],
+ 0x1DC00 => [0x0400, '', 'notational'],
+ 0x1E000 => [0x0800, 'X L Alpha', 'ltr'],
+ 0x1E800 => [0x0600, 'X L Alpha', 'rtl'],
+ 0x1EE00 => [0x0200, 'X L Arabic', 'arabic math'],
+ 0x1F000 => [0x0100, 'X So', 'game'],
+ 0x1F100 => [0x0200, 'X L So', 'enclosed'],
+ 0x1F300 => [0x0500, 'X So', 'pictographic'],
+ 0x1F800 => [0x0100, 'X So', 'arrows'],
+ 0x1F900 => [0x0100, 'X So', 'pict', 'supplemental pictographs'],
+ 0x1FA00 => [0x0600, '', 'unassigned'],
+};