X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/ba0e12a8401b0da159e17e99fea246b1e3de8378..2a99bcdadec88d7483903e2c8420ad3e78fcb805:/charset-utf8.inc.pl diff --git a/charset-utf8.inc.pl b/charset-utf8.inc.pl index 78f31af..a60dc30 100644 --- a/charset-utf8.inc.pl +++ b/charset-utf8.inc.pl @@ -1,28 +1,15 @@ -my %utf8byte = ( - 0x00, 'single byte ASCII', - 0x80, 'multi-byte continuation', - 0xC0, '(overl.)', - 0xC2, '2-byte sequence start', - 0xD0, '', - 0xE0, '3-byte sequence start', - 0xF0, '4-byte sequence', - 0xF5, '(overflow)', - 0xF8, '5-byte', - 0xFC, '6-byte', - 0xFE, 'invalid', -); - -sub { - return defined $utf8byte{$_[0]} ? $utf8byte{$_[0]} : (); -} - +use utf8; ++{ + 0x00 => [0x80, 'X l4 u-ascii', 'single byte ASCII', 'U+0000 – U+007F'], + 0x80 => [0x40, 'X l3', 'multi-byte continuation'], + 0xC0 => [0x02, 'X l1 joinr joind', '(overl.)', 'U+0000 – U+007F'], + 0xC2 => [0x0E, 'X l2 u-bmp joinl rowspan="2"', + '2-byte sequence start', 'U+0080 – U+03FF'], + 0xD0 => [0x10, 'X l2 u-bmp joinu', '', 'U+0400 – U+07FF'], + 0xE0 => [0x10, 'X l2 u-bmp', '3-byte sequence start', 'U+0800 – U+FFFF'], + 0xF0 => [0x05, 'X l2 joinr', '4-byte sequence', 'U+1·0000 – U+10·FFFF'], + 0xF5 => [0x03, 'X l1 joinl', '(overflow)', 'U+11·0000 – U+1FF·FFFF'], + 0xF8 => [0x04, 'X l1', '5-byte', 'U+200·0000 – U+3FFF·FFFF'], + 0xFC => [0x02, 'X l1', '6-byte', 'U+4000·0000 – 7FFFF·FFFF'], + 0xFE => [0x02, 'di-invalid', 'invalid'], +};