From d5af60c1ee80a8c419489d1b2c4d8b70b4dd82a1 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Thu, 6 Apr 2017 00:14:24 +0200 Subject: [PATCH] charset: describe unicode plane abbreviations --- charset-ucplanes.inc.pl | 55 +++++++++++++++++++++-------------------- charset-unicode.inc.pl | 2 +- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/charset-ucplanes.inc.pl b/charset-ucplanes.inc.pl index fce05ff..7939c3a 100644 --- a/charset-ucplanes.inc.pl +++ b/charset-ucplanes.inc.pl @@ -2,55 +2,56 @@ use utf8; +{ 0x0000 => [0x008, 'X Po', 'ascii'], 0x0008 => [0x020, 'X L Latin', 'latin'], - 0x0028 => [0x010, 'X Mn', 'comb'], # also spacing Sk - 0x0038 => [0x008, 'X L Greek', 'grk'], - 0x0040 => [0x010, 'X L Cyrillic', 'cyr'], - 0x0050 => [0x008, 'X L Armenian', 'arm'], - 0x0058 => [0x008, 'X L Aramaic', 'heb'], + 0x0028 => [0x010, 'X Mn', 'diac', 'diacritics'], # also spacing Sk + 0x0038 => [0x008, 'X L Greek', 'grk', 'greek'], + 0x0040 => [0x010, 'X L Cyrillic', 'cyr', 'cyrillic'], + 0x0050 => [0x008, 'X L Armenian', 'arm', 'armenian'], + 0x0058 => [0x008, 'X L Aramaic', 'heb', 'hebrew'], 0x0060 => [0x010, 'X L Arabic', 'arabic'], - 0x0070 => [0x010, 'X L Aramaic', 'aram'], - 0x0080 => [0x010, 'X L Aramaic', 'aramaic'], - 0x0090 => [0x070, 'X L Brahmic', 'brahmic'], - 0x0100 => [0x008, 'X L Brahmic', 'mm'], - 0x0108 => [0x008, 'X L Aramaic', 'geor'], + 0x0070 => [0x020, 'X L African', 'rtl', 'various right-to-left scripts'], + 0x0090 => [0x050, 'X L Brahmic', 'brahmic'], + 0x00E0 => [0x010, 'X L Brahmic Khmer', 's-br', 'south brahmic: lao and thai'], + 0x00F0 => [0x010, 'X L Brahmic', 'tibet', 'tibetan'], + 0x0100 => [0x008, 'X L Brahmic', 'mm', 'myanmar'], + 0x0108 => [0x008, 'X L Aramaic', 'geor', 'georgian'], 0x0110 => [0x010, 'X L Hangul', 'jamo'], 0x0120 => [0x018, 'X L African', 'ethiopic'], - 0x0138 => [0x030, 'X L Syllabic', 'aboriginal'], - 0x0168 => [0x008, 'X L Alpha', 'ger'], - 0x0170 => [0x010, 'X L Brahmic', 'brahm'], - 0x0180 => [0x008, 'X L Aramaic', 'mon'], - 0x0188 => [0x008, 'X L Syllabic', 'can'], + 0x0138 => [0x030, 'X L Syllabic', 'aboriginal', 'cherokee and canadian syllabics'], + 0x0168 => [0x008, 'X L Alpha', 'ger', 'germanic runes'], + 0x0170 => [0x010, 'X L Brahmic', 'brahm', 'various brahmic'], + 0x0180 => [0x008, 'X L Aramaic', 'mon', 'mongolian'], + 0x0188 => [0x008, 'X L Syllabic', 'can', 'canadian supplement'], 0x0190 => [0x040, 'X L Brahmic', 'brahmic'], 0x01D0 => [0x020, 'X L Latin', 'extensions'], 0x01F0 => [0x010, 'X L Greek', 'greek'], - 0x0200 => [0x008, 'X Po', '·…'], + 0x0200 => [0x008, 'X Po', '·…', 'punctuation'], 0x0208 => [0x018, 'X So', 'symbols'], 0x0220 => [0x010, 'X Sm', 'maths'], 0x0230 => [0x018, 'X So', 'technical'], - 0x0248 => [0x008, 'X Latin', '()'], - 0x0250 => [0x010, 'X So', 'draw'], + 0x0248 => [0x008, 'X Latin', '()', 'enclosed alphanumerics'], + 0x0250 => [0x010, 'X So', 'draw', 'box drawing and shapes'], 0x0260 => [0x020, 'X So', 'symbols'], 0x0280 => [0x010, 'X L Alpha', 'braille'], - 0x0290 => [0x008, 'X So', 'arr'], + 0x0290 => [0x008, 'X So', 'arr', 'arrows'], 0x0298 => [0x018, 'X Sm', 'maths'], - 0x02B0 => [0x010, 'X So', 'misc'], - 0x02C0 => [0x010, 'X L Greek', 'ancient'], - 0x02D0 => [0x010, 'X L Alpha', 'ext'], - 0x02E0 => [0x008, 'X Po', '·+'], + 0x02B0 => [0x010, 'X So', 'misc', 'miscellaneous symbols'], + 0x02C0 => [0x010, 'X L Greek', 'ancient', 'glagolitic, latin, coptic'], + 0x02D0 => [0x010, 'X L Alpha', 'ext', 'extensions of various alphabets'], + 0x02E0 => [0x008, 'X Po', '·+', 'punctuation'], 0x02E8 => [0x018, 'X L Han', 'radicals'], 0x0300 => [0x010, 'X L Katakana', 'japanese'], 0x0310 => [0x020, 'X L Han', 'cjk+'], - 0x0330 => [0x010, 'X Xd L Han', 'compat'], + 0x0330 => [0x010, 'X Xd L Han', 'compat', 'cjk compatibility'], 0x0340 => [0x1A0, 'X L Han', 'cjk ideographs A'], 0x04E0 => [0x520, 'X L Han', 'cjk unified ideographs'], 0x0A00 => [0x048, 'X L Syllabic', 'yi'], 0x0A48 => [0x008, 'X L Latin', 'lisu'], 0x0A50 => [0x010, 'X L Syllabic', 'vai'], - 0x0A60 => [0x008, 'X L Cyrillic', 'cyr'], - 0x0A68 => [0x008, 'X L Syllabic', 'bam'], + 0x0A60 => [0x008, 'X L Cyrillic', 'cyr', 'cyrillic ext-B'], + 0x0A68 => [0x008, 'X L Syllabic', 'bam', 'bamum'], 0x0A70 => [0x010, 'X L Latin', 'lat-D'], 0x0A80 => [0x030, 'X L Brahmic', 'brahmic'], - 0x0AB0 => [0x010, 'X L Alpha', 'ext'], + 0x0AB0 => [0x010, 'X L Alpha', 'ext', 'various extensions'], 0x0AC0 => [0x2C0, 'X L Hangul', 'hangeul syllables'], 0x0D80 => [0x080, 'X Cs', 'surrogates'], 0x0E00 => [0x190, 'X Co', 'private use'], diff --git a/charset-unicode.inc.pl b/charset-unicode.inc.pl index 0d8685c..38449ab 100644 --- a/charset-unicode.inc.pl +++ b/charset-unicode.inc.pl @@ -22,7 +22,7 @@ use utf8; 0x0070 => [0x05, 'X L Aramaic', 'syriac'], 0x0075 => [0x03, 'X L Arabic', 'arabic+'], 0x0078 => [0x04, 'X L African', 'thaana'], - 0x007C => [0x04, 'X L African', 'n\'ko'], + 0x007C => [0x04, 'X L African', q(n'ko)], 0x0080 => [0x04, 'X L Hebrew', 'samaritan'], 0x0084 => [0x02, 'X L Aramaic', 'manda'], 0x0086 => [0x01, 'X Xr L Aramaic', 'syr'], -- 2.30.0