From e2f0b4fb89e4f7b684fe6dc8e318a44353b00f93 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Fri, 21 Apr 2017 17:04:48 +0200 Subject: [PATCH] charset: full titles for abbreviated unicode scripts --- charset-unicode.inc.pl | 94 +++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/charset-unicode.inc.pl b/charset-unicode.inc.pl index 2ddc116..1d668a9 100644 --- a/charset-unicode.inc.pl +++ b/charset-unicode.inc.pl @@ -4,10 +4,10 @@ use utf8; # bmp 0x00000 => [0x020, 'X Cc joinr', 'control'], - 0x00020 => [0x020, 'X L Po joinl joinr', 'comn'], + 0x00020 => [0x020, 'X L Po joinl joinr', 'comn', 'common ascii signs'], 0x00040 => [0x040, 'X L Latin joinl', 'basic latin'], 0x00080 => [0x020, 'X Cc joinr', 'control'], - 0x000A0 => [0x020, 'X L So joinl joinr', 'comn'], + 0x000A0 => [0x020, 'X L So joinl joinr', 'comn', 'common latin1 signs'], 0x000C0 => [0x040, 'X L Latin joinl', 'latin1'], 0x00100 => [0x080, 'X L Latin', 'latin extended-A'], 0x00180 => [0x0D0, 'X L Latin', 'latin extended-B'], @@ -25,11 +25,11 @@ use utf8; 0x00780 => [0x040, 'X L African', 'thaana'], 0x007C0 => [0x040, 'X L African', q(n'ko)], 0x00800 => [0x040, 'X L Hebrew', 'samaritan'], - 0x00840 => [0x020, 'X L Aramaic', 'manda'], - 0x00860 => [0x010, 'X Xr L Aramaic', 'syr'], + 0x00840 => [0x020, 'X L Aramaic', 'manda', 'mandaic'], + 0x00860 => [0x010, 'X Xr L Aramaic', 'syr', 'syriac malayalam'], 0x00870 => [0x030], 0x008A0 => [0x060, 'X L Arabic', 'arabic ext-A'], - 0x00900 => [0x080, 'X L Brahmic', 'devanagari'], + 0x00900 => [0x080, 'X L Brahmic', 'devanāgarī'], 0x00980 => [0x080, 'X L Brahmic', 'bengali'], 0x00A00 => [0x080, 'X L Brahmic', 'gurmukhi'], 0x00A80 => [0x080, 'X L Brahmic', 'gujarati'], @@ -37,7 +37,7 @@ use utf8; 0x00B80 => [0x080, 'X L Brahmic', 'tamil'], 0x00C00 => [0x080, 'X L Brahmic', 'telugu'], 0x00C80 => [0x080, 'X L Brahmic', 'kannada'], - 0x00D00 => [0x080, 'X L Brahmic', 'malayalam'], + 0x00D00 => [0x080, 'X L Brahmic', 'malayālam'], 0x00D80 => [0x080, 'X L Brahmic', 'sinhala'], 0x00E00 => [0x080, 'X L Brahmic Khmer', 'thai'], 0x00E80 => [0x080, 'X L Brahmic Khmer', 'lao'], @@ -52,7 +52,7 @@ use utf8; 0x01680 => [0x020, 'X L Alpha', 'ogham'], 0x016A0 => [0x060, 'X L Alpha', 'runic'], 0x01700 => [0x020, 'X L Brahmic', 'tagalog'], - 0x01720 => [0x020, 'X L Brahmic', 'hanun'], + 0x01720 => [0x020, 'X L Brahmic', 'hanun', 'hanunóo'], 0x01740 => [0x020, 'X L Brahmic', 'buhid'], 0x01760 => [0x020, 'X L Brahmic', 'tagb', 'tagbanwa'], 0x01780 => [0x080, 'X L Brahmic Khmer', 'khmer'], @@ -60,35 +60,35 @@ use utf8; 0x018B0 => [0x050, 'X L Syllabic', 'canadian+'], 0x01900 => [0x050, 'X L Brahmic', 'limbu'], 0x01950 => [0x030, 'X L Brahmic', 'tai le'], - 0x01980 => [0x060, 'X L Brahmic', 'new tai lue'], + 0x01980 => [0x060, 'X L Brahmic', 'new tai lü'], 0x019E0 => [0x020, 'X L Brahmic Khmer', 'khmer', 'khmer symbols'], 0x01A00 => [0x020, 'X L Brahmic', 'lontara'], 0x01A20 => [0x090, 'X L Brahmic', 'tai tham'], - 0x01AB0 => [0x050, 'X Mn', 'diacritics+'], + 0x01AB0 => [0x050, 'X Mn', 'diacritics+', 'combining diacritical marks extended'], 0x01B00 => [0x080, 'X L Brahmic', 'balinese'], 0x01B80 => [0x040, 'X L Brahmic', 'sundanese'], 0x01BC0 => [0x040, 'X L Brahmic', 'batak'], 0x01C00 => [0x050, 'X L Brahmic', 'lepcha'], 0x01C50 => [0x030, 'X L Alpha', 'ol chiki'], - 0x01C80 => [0x010, 'X L Cyrillic', 'cyr'], - 0x01C90 => [0x030, 'X Xr L Aramaic', 'georg+'], - 0x01CC0 => [0x010, 'X L Brahmic', 'sn'], - 0x01CD0 => [0x030, 'X L Brahmic', 'vedic'], - 0x01D00 => [0x080, 'X L Latin', 'phonetic'], - 0x01D80 => [0x040, 'X L Latin', 'phonetic+'], - 0x01DC0 => [0x040, 'X Mn', 'diacritics+'], + 0x01C80 => [0x010, 'X L Cyrillic', 'cyr', 'cyrillic extended-C'], + 0x01C90 => [0x030, 'X Xr L Aramaic', 'georg+', 'georgian extended'], + 0x01CC0 => [0x010, 'X L Brahmic', 'sn', 'sundanese supplement'], + 0x01CD0 => [0x030, 'X L Brahmic', 'vedic', 'vedic extensions'], + 0x01D00 => [0x080, 'X L Latin', 'phonetic', 'phonetic extensions'], + 0x01D80 => [0x040, 'X L Latin', 'phonetic+', 'phonetic extensions supplement'], + 0x01DC0 => [0x040, 'X Mn', 'diacritics+', 'combining diacritical marks supplement'], 0x01E00 => [0x100, 'X L Latin', 'latin extended additional'], - 0x01F00 => [0x100, 'X L Greek', 'greek+'], + 0x01F00 => [0x100, 'X L Greek', 'greek+', 'greek extended'], 0x02000 => [0x070, 'X Po', 'general punctuation'], - 0x02070 => [0x030, 'X L Latin', 'suþscript'], # suth now means "sub and/or sup" + 0x02070 => [0x030, 'X L Latin', 'suþscript', 'superscripts and subscripts'], # suth now means "sub and/or sup" 0x020A0 => [0x030, 'X Sc', 'currency'], 0x020D0 => [0x030, 'X Mn', 'overlay'], 0x02100 => [0x050, 'X So', 'letterlike'], - 0x02150 => [0x040, 'X Latin', 'number'], + 0x02150 => [0x040, 'X Latin', 'number', 'number forms'], 0x02190 => [0x070, 'X So', 'arrows'], 0x02200 => [0x100, 'X Sm', 'mathematical symbols'], 0x02300 => [0x100, 'X So', 'miscellaneous technical'], - 0x02400 => [0x040, 'X So', 'control'], + 0x02400 => [0x040, 'X So', 'control', 'control pictures'], 0x02440 => [0x020, 'X So', 'OCR'], 0x02460 => [0x0A0, 'X Latin', 'enclosed alphanumerics'], 0x02500 => [0x080, 'X So', 'box drawing'], @@ -96,7 +96,7 @@ use utf8; 0x025A0 => [0x060, 'X So', 'geometric shapes'], 0x02600 => [0x100, 'X So', 'miscellaneous symbols'], 0x02700 => [0x0C0, 'X So', 'dingbats'], - 0x027C0 => [0x030, 'X Sm', 'maths-A'], + 0x027C0 => [0x030, 'X Sm', 'maths-A', 'miscellaneous mathematical symbols-A'], 0x027F0 => [0x010, 'X So', 'arr', 'supplemental arrows-A'], 0x02800 => [0x100, 'X L Alpha', 'braille'], 0x02900 => [0x080, 'X So', 'supplemental arrows-B'], @@ -106,16 +106,16 @@ use utf8; 0x02C00 => [0x060, 'X L Cyrillic', 'glagolitic'], 0x02C60 => [0x020, 'X L Latin', 'latin-C'], 0x02C80 => [0x080, 'X L Greek', 'coptic'], - 0x02D00 => [0x030, 'X L Aramaic', 'georgian+'], + 0x02D00 => [0x030, 'X L Aramaic', 'georgian+', 'georgian supplement'], 0x02D30 => [0x050, 'X L Alpha', 'tifinagh'], #TODO: proto-canaanite - 0x02D80 => [0x060, 'X L African', 'ethiopic+'], - 0x02DE0 => [0x020, 'X L Cyrillic', 'cyrl-A'], - 0x02E00 => [0x080, 'X Po', 'punctuation+'], + 0x02D80 => [0x060, 'X L African', 'ethiopic+', 'ethiopic extended'], + 0x02DE0 => [0x020, 'X L Cyrillic', 'cyrl-A', 'cyrillic extended-A'], + 0x02E00 => [0x080, 'X Po', 'punctuation+', 'supplemental punctuation'], 0x02E80 => [0x080, 'X L Han', 'cjk radicals'], 0x02F00 => [0x0E0, 'X L Han', 'kangxi radicals'], 0x02FE0 => [0x010], - 0x02FF0 => [0x010, 'X So Han', 'idc'], - 0x03000 => [0x040, 'X Po Han', 'cjk misc'], + 0x02FF0 => [0x010, 'X So Han', 'idc', 'ideographic description characters'], + 0x03000 => [0x040, 'X Po Han', 'cjk misc', 'CJK symbols and punctuation'], 0x03040 => [0x060, 'X L Hiragana', 'hiragana'], 0x030A0 => [0x060, 'X L Katakana', 'katakana'], 0x03100 => [0x030, 'X L Bopomofo', 'bopomofo'], @@ -135,16 +135,16 @@ use utf8; 0x0A500 => [0x140, 'X L Syllabic', 'vai'], 0x0A640 => [0x060, 'X L Cyrillic', 'cyrillic ext-B'], 0x0A6A0 => [0x060, 'X L Syllabic', 'bamum'], - 0x0A700 => [0x020, 'X L Mn', 'tones'], + 0x0A700 => [0x020, 'X L Mn', 'tones', 'modifier tone letters'], 0x0A720 => [0x0E0, 'X L Latin', 'latin extended-D'], - 0x0A800 => [0x030, 'X L Brahmic', 'sylheti'], - 0x0A830 => [0x010, 'X No', 'in'], + 0x0A800 => [0x030, 'X L Brahmic', 'sylheti', 'syloti nagri'], + 0x0A830 => [0x010, 'X No', 'in', 'common indic number forms'], 0x0A840 => [0x040, 'X L Brahmic', 'phags-pa'], 0x0A880 => [0x060, 'X L Brahmic', 'saurashtra'], 0x0A8E0 => [0x020, 'X L Brahmic', 'deva+'], 0x0A900 => [0x030, 'X L Brahmic', 'kayah li'], 0x0A930 => [0x030, 'X L Brahmic', 'rejang'], - 0x0A960 => [0x020, 'X L Hangul', 'jamo-A'], + 0x0A960 => [0x020, 'X L Hangul', 'jamo-A', 'hangul jamo extended-A'], 0x0A980 => [0x060, 'X L Brahmic', 'javanese'], 0x0A9E0 => [0x020, 'X L Brahmic', 'mm-B', 'myanmar extended-B'], 0x0AA00 => [0x060, 'X L Brahmic', 'cham'], @@ -163,16 +163,16 @@ use utf8; 0x0F900 => [0x200, 'X L Han', 'cjk compatibility ideographs'], 0x0FB00 => [0x050, 'X Xd L Alpha', 'presentation'], 0x0FB50 => [0x280, 'X Xd L Arabic', 'arabic presentation forms A'], - 0x0FDD0 => [0x020, 'Xi', '?'], - 0x0FDF0 => [0x010, 'X Xd L Arabic joinu', ''], # continue after brief intermission - 0x0FE00 => [0x010, 'X Cc', 'var'], - 0x0FE10 => [0x010, 'X L Pd', 'ver'], - 0x0FE20 => [0x010, 'X L Mn', '½'], - 0x0FE30 => [0x020, 'X Xd Pd Han', 'comp'], - 0x0FE50 => [0x020, 'X Xd L Latin', 'small'], + 0x0FDD0 => [0x020, 'Xi', '?', 'non-characters'], + 0x0FDF0 => [0x010, 'X Xd L Arabic joinu', '', 'arabic presentation forms A'], # continue after brief intermission + 0x0FE00 => [0x010, 'X Cc', 'vs', 'variation selectors'], + 0x0FE10 => [0x010, 'X L Pd', 'ver', 'vertical forms'], + 0x0FE20 => [0x010, 'X L Mn', '½', 'combining half marks'], + 0x0FE30 => [0x020, 'X Xd Pd Han', 'comp', 'cjk compatibility forms'], + 0x0FE50 => [0x020, 'X Xd L Latin', 'small', 'small form variants'], 0x0FE70 => [0x090, 'X Xd L Arabic', 'arabic presentation B'], 0x0FF00 => [0x0F0, 'X L Latin', 'halfwidth & fullwidth forms'], - 0x0FFF0 => [0x010, 'X Cc', 'sp'], + 0x0FFF0 => [0x010, 'X Cc', 'sp', 'specials'], # smp 0x10000 => [0x080, 'X L Syllabic', 'linear B syllabary'], @@ -191,13 +191,13 @@ use utf8; 0x10350 => [0x030, 'X L Cyrillic', 'permic'], 0x10380 => [0x020, 'X L Alpha', 'ugarit'], 0x103A0 => [0x040, 'X L Alpha', 'old persian'], - 0x103E0 => [0x020, 'X Xr L Alpha', 'sh.qs'], + 0x103E0 => [0x020, 'X Xr L Alpha', 'sh.qs', 'shavian quikscript extensions'], 0x10400 => [0x050, 'X L Alpha', 'deseret'], 0x10450 => [0x030, 'X L Alpha', 'shavian'], 0x10480 => [0x030, 'X L Alpha', 'osmanya'], 0x104B0 => [0x050, 'X L Alpha', 'osage'], 0x10500 => [0x030, 'X L Alpha', 'elbasan'], - 0x10530 => [0x040, 'X L Alpha', 'c albanian'], + 0x10530 => [0x040, 'X L Alpha', 'c albanian', 'caucasian albanian'], 0x10570 => [0x050, 'X Xr L Alpha', 'vithkuqi'], 0x105C0 => [0x040, 'X Xr L Alpha', 'todhri'], 0x10600 => [0x180, 'X L Syllabic', 'linear A'], @@ -207,9 +207,9 @@ use utf8; 0x10860 => [0x020, 'X L Aramaic', 'palmr'], 0x10880 => [0x030, 'X L Aramaic', 'nabataean'], 0x108A0 => [0x010], - 0x108C0 => [0x020, 'X Xr L Alpha', 'numid'], - 0x108E0 => [0x020, 'X L Aramaic', 'hatr'], - 0x10900 => [0x020, 'X L Aramaic', 'phoen'], + 0x108C0 => [0x020, 'X Xr L Alpha', 'numid', 'numidian'], + 0x108E0 => [0x020, 'X L Aramaic', 'hatr', 'hatran'], + 0x10900 => [0x020, 'X L Aramaic', 'phoen', 'phoenician'], 0x10920 => [0x020, 'X L Greek', 'lydian'], 0x10940 => [0x040], 0x10980 => [0x020, 'X L Alpha', 'mero h', 'meroitic hieroglyphs'], @@ -224,7 +224,7 @@ use utf8; 0x10B60 => [0x020, 'X L Aramaic', 'pahlav', 'inscriptional pahlavi'], 0x10B80 => [0x030, 'X L Aramaic', 'psalt pahl', 'psalter pahlavi'], 0x10BB0 => [0x030, 'X Xr L Aramaic', 'book pahl', 'book pahlavi'], - 0x10BE0 => [0x020, 'X Xr L Alpha', 'babur'], + 0x10BE0 => [0x020, 'X Xr L Alpha', 'babur', 'khatt-i baburi'], 0x10C00 => [0x050, 'X L Aramaic', 'old turkic'], 0x10C50 => [0x030], 0x10C80 => [0x080, 'X L Aramaic', 'old hungarian'], @@ -232,7 +232,7 @@ use utf8; 0x10D40 => [0x040, 'X Xr L Alpha', 'garay'], 0x10D80 => [0x080, 'X Xr L Syllabic', 'byblos'], 0x10E00 => [0x060], - 0x10E60 => [0x020, 'X No', 'rumi'], + 0x10E60 => [0x020, 'X No', 'rumi', 'rumi numeral symbols'], 0x10E80 => [0x060], 0x10EE0 => [0x020, 'X Xr L Aramaic', 'elym'], 0x10F00 => [0x030, 'X Xr L Aramaic', 'old sog'], @@ -356,7 +356,7 @@ use utf8; 0x1B000 => [0x100, 'X L Hiragana', 'kana supplement'], 0x1B100 => [0x030, 'X Xr L Hiragana', 'kana+A'], 0x1B130 => [0x040, 'X Xr L Syllabic', 'kaidā'], - 0x1B170 => [0x190, 'X Xr L Han', 'nushu'], + 0x1B170 => [0x190, 'X Xr L Han', 'nüshu'], 0x1B300 => [0x200, 'X Xr L Han', 'shuishu'], 0x1B500 => [0x700, 'X Xr L Syllabic', 'proto-elamite'], 0x1BC00 => [0x0A0, 'X L Alpha', 'duployan'], -- 2.30.0