use utf8;
-my %utf8byte = (
- 0x00, '<td rowspan="8" colspan="16" class="X l4 u-ascii"'.
- ' title="U+0000 – U+007F">single byte ASCII',
- 0x80, '<td rowspan="4" colspan="16" class="X l3"'.
- '>multi-byte continuation',
- 0xC0, '<td colspan="2" class="X l1" style="border-right:none; border-bottom:none"'.
- ' title="U+0000 – U+007F">(overl.)',
- 0xC2, '<td rowspan="2" colspan="14" class="X l2 u-bmp" style="border-left:none"'.
- ' title="U+0080 – U+03FF">2-byte sequence start',
- 0xD0, '<td rowspan="1" colspan="16" class="X l2 u-bmp" style="border-top:none"'.
- ' title="U+0400 – U+07FF">',
- 0xE0, '<td colspan="16" class="X l2 u-bmp"'.
- ' title="U+0800 – U+FFFF">3-byte sequence start',
- 0xF0, '<td colspan="5" class="X l2" style="border-right:none"'.
- ' title="U+1·0000 – U+10·FFFF">4-byte sequence',
- 0xF5, '<td colspan="3" class="X l1" style="border-left:none"'.
- ' title="U+11·0000 – U+1FF·FFFF">(overflow)',
- 0xF8, '<td colspan="4" class="X l1"'.
- ' title="U+200·0000 – U+3FFF·FFFF">5-byte',
- 0xFC, '<td colspan="2" class="X l1"'.
- ' title="U+4000·0000 – 7FFFF·FFFF">6-byte',
- 0xFE, '<td colspan="2" class="di-invalid">invalid',
-);
-
-sub {
- return defined $utf8byte{$_[0]} ? $utf8byte{$_[0]} : ();
-}
-
++{
+ 0x00 => [0x80, 'X l4 u-ascii', 'single byte ASCII', 'U+0000 – U+007F'],
+ 0x80 => [0x40, 'X l3', 'multi-byte continuation'],
+ 0xC0 => [0x02, 'X l1 joinr joind', '(overl.)', 'U+0000 – U+007F'],
+ 0xC2 => [0x0E, 'X l2 u-bmp joinl rowspan="2"',
+ '2-byte sequence start', 'U+0080 – U+03FF'],
+ 0xD0 => [0x10, 'X l2 u-bmp joinu', '', 'U+0400 – U+07FF'],
+ 0xE0 => [0x10, 'X l2 u-bmp', '3-byte sequence start', 'U+0800 – U+FFFF'],
+ 0xF0 => [0x05, 'X l2 joinr', '4-byte sequence', 'U+1·0000 – U+10·FFFF'],
+ 0xF5 => [0x03, 'X l1 joinl', '(overflow)', 'U+11·0000 – U+1FF·FFFF'],
+ 0xF8 => [0x04, 'X l1', '5-byte', 'U+200·0000 – U+3FFF·FFFF'],
+ 0xFC => [0x02, 'X l1', '6-byte', 'U+4000·0000 – 7FFFF·FFFF'],
+ 0xFE => [0x02, 'di-invalid', 'invalid'],
+};