unicode: one set of barb arrows from unicode 7.0
[sheet.git] / charset-encoding.inc.pl
1 use 5.014;
2 use warnings;
3 use utf8;
4
5 +{
6         default    => [qw( u+0-27F utf8+realsize iso-8859-1 iso-8859-15 cp1252 cp437 cp850 )],
7         unicode    => [qw( uu+cols=32+realsize u+0-FFF u+1000-18AF u+18B0-1FFF )],
8         us         => [qw( cp437 cp863 gsm0338 AdobeStandardEncoding )],
9         ebcdic     => [qw( cp37 cp500 cp1047 posix-bc cp1026 cp875 )],
10         iso        => [map {"iso-8859-$_"} 1 .. 11, 13 .. 16],
11         dos        => [qw( cp437 cp865 cp861 cp860 cp863 cp850 cp857 cp852 cp775
12                            cp737 cp869 cp866 MIK cp855 cp862 cp864 )],
13         aix        => [qw( cp1006 )],
14         win        => [qw( cp1252 cp1250 cp1254 cp1257 cp1258 cp1253 cp1251 cp1255 cp1256 cp874 )],
15         mac        => [qw( MacRoman MacRomanian MacRumanian MacCroatian MacCentralEurRoman MacTurkish MacIcelandic MacSami
16                            MacGreek MacCyrillic MacHebrew MacArabic MacFarsi MacThai )],
17
18         # languages
19         westeur    => [qw( iso-8859-1 iso-8859-15 cp1252 iso-8859-14 cp850 hp-roman8 nextstep MacRoman )],
20         centeur    => [qw( iso-8859-2 iso-8859-16 cp1250 cp852 MacRomanian MacCroatian MacCentralEurRoman )], # MacRumanian only for DB
21         turkish    => [qw( iso-8859-9 iso-8859-3 cp1254 cp857 MacTurkish )],
22         norteur    => [qw( baltic nordic )],
23         baltic     => [qw( iso-8859-4 iso-8859-13 cp1257 cp775 )],
24         nordic     => [qw( iso-8859-10 cp865 cp861 MacIcelandic MacSami )],
25         cyrillic   => [qw( koi8-r koi8-u koi8-f iso-8859-5 cp1251 MacCyrillic cp866 MIK cp855
26                            +400 +2DE0 +A640-A69F +500-52F )], # MacUkrainian is broken
27         arabic     => [qw( iso-8859-6 cp1256 MacArabic cp864 cp1006 MacFarsi
28                            +600 +8A0-8BF+8E0 +750-77F )],
29         greek      => [qw( iso-8859-7 cp1253 MacGreek cp737 cp869  +370 +1F00 )],
30         hebrew     => [qw( iso-8859-8 cp1255 MacHebrew cp862  +590 )],
31         thai       => [qw( iso-8859-11 cp874 MacThai )], # U+0E00 identical to iso-8859-11
32         vietnamese => [qw( viscii cp1258 MacVietnamese )],
33         symbols    => [qw( symbol dingbats MacDingbats wingdings wingdings2 wingdings3 webdings )],
34
35         # iso-code shorthand
36         1 => 'westeur',
37         2 => 'centeur',
38         3 => 'turkish',
39         4 => 'baltic',
40         5 => 'cyrillic',
41         6 => 'arabic',
42         7 => 'greek',
43         8 => 'hebrew',
44         9 => 'turkish',
45         10 => 'nordic',
46         11 => 'thai',
47
48         'ascii'        => {inherit => ['' => '00-7F']},
49         'cp437'        => {
50                 inherit => ['cp850' => 0, 'ascii' => '00-1F+80'], # ascii range overridden later
51                 replace => {
52                         0    => ' ☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼', # visible variants of control characters
53                         0xED => 'ϕ', # non-greek usage and appearance
54                 },
55         },
56         'gsm0338'      => {inherit => ['ascii' => '00-7F']},
57         'dingbats'     => {inherit => ['' => '20-7F+A0']},
58         'macdingbats'  => {inherit => ['dingbats' => '80-9F']},
59         'adobezdingbat'=> {inherit => ['MacDingbats' => '80-9F']}, # should be identical but maps to private use
60         'symbol'       => {
61                 inherit => ['' => '20-7F+A0'],
62                 replace => {
63                         0x60 => '│', # replace radical extender by closest unicode equivalent
64                         0xBD => '⏐⎯', # arrow extenders
65                         0xD2 => '®©™', # serif variants
66                         0xE0 => '◊', # replace lookalike, should match AdobeSymbol
67                         0xE2 => '®©™', # sans-serif variants
68                         0xE6 => '⎛⎜⎝⎡⎢⎣⎧⎨⎩⎪',
69                         0xF0 => '€',
70                         0xF4 => '⎮⌡⎞⎟⎠⎤⎥⎦⎫⎬⎭',
71                 },
72         },
73         'adobesymbol'  => {inherit => ['symbol' => '20-7F+A0', '' => '20-7F+A0']}, # minor differences, irrelevant except for different '€'
74         'wingdings'    => {inherit => ['' => '20'], setup => sub {
75                 $_[0]->{table} = [(map {chr} 0 .. 0x20), qw(
76                           🖉 ✂ ✁ 👓 🕭 🕮 🕯 🕿 ✆ 🖂 🖃 📪 📫 📬 📭 📁 📂 📄 🗏 🗐 🗄 ⌛ 🖮 🖰 🖲 🖳 🖴 🖫 🖬 ✇ ✍
77                         🖎 ✌ 👌 👍 👎 ☜ ☞ ☝ ☟ 🖐 ☺ 😐 ☹ 💣 ☠ 🏳 🏱 ✈ ☼ 💧 ❄ 🕆 ✞ 🕈 ✠ ✡ ☪ ☯ ॐ ☸ ♈ ♉
78                         ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ 🙰 🙵 ● 🔾 ■ □ 🞐 ❑ ❒ ⬧ ⧫ ◆ ❖ ⬥ ⌧ ⮹ ⌘ 🏵 🏶 🙶 🙷 \7f
79                         ⓪ ① ② ③ ④ ⑤ ⑥ ⑦ ⑧ ⑨ ⑩ ⓿ ❶ ❷ ❸ ❹ ❺ ❻ ❼ ❽ ❾ ❿ 🙢 🙠 🙡 🙣 🙞 🙜 🙝 🙟 · •
80                         ▪ ⚪ 🞆 🞈 ◉ ◎ 🔿 ▪ ◻ 🟂 ✦ ★ ✶ ✴ ✹ ✵ ⯐ ⌖ ⟡ ⌑ ⯑ ✪ ✰ 🕐 🕑 🕒 🕓 🕔 🕕 🕖 🕗 🕘
81                         🕙 🕚 🕛 ⮰ ⮱ ⮲ ⮳ ⮴ ⮵ ⮶ ⮷ 🙪 🙫 🙕 🙔 🙗 🙖 🙐 🙑 🙒 🙓 ⌫ ⌦ ⮘ ⮚ ⮙ ⮛ ⮈ ⮊ ⮉ ⮋ 🡨
82                         🡪 🡩 🡫 🡬 🡭 🡯 🡮 🡸 🡺 🡹 🡻 🡼 🡽 🡿 🡾 ⇦ ⇨ ⇧ ⇩ ⬄ ⇳ ⬀ ⬁ ⬃ ⬂ 🢬 🢭 🗶 ✔ 🗷 🗹 
83                 )];
84         }},
85         'wingdings2'   => {inherit => ['' => '20'], setup => sub {
86                 $_[0]->{table} = [(map {chr} 0 .. 0x20), qw(
87                           🖊 🖋 🖌 🖍 ✄ ✀ 🕾 🕽 🗅 🗆 🗇 🗈 🗉 🗊 🗋 🗌 🗍 📋 🗑 🗔 🖵 🖶 🖷 🖸 🖭 🖯 🖱 🖒 🖓 🖘 🖙
88                         🖚 🖛 👈 👉 🖜 🖝 🖞 🖟 🖠 🖡 👆 👇 🖢 🖣 🖑 🗴 ✓ 🗵 ☑ ☒ ☒ ⮾ ⮿ ⦸ ⦸ 🙱 🙴 🙲 🙳 ‽ 🙹 🙺
89                         🙻 🙦 🙤 🙥 🙧 🙚 🙘 🙙 🙛 ⓪ ① ② ③ ④ ⑤ ⑥ ⑦ ⑧ ⑨ ⑩ ⓿ ❶ ❷ ❸ ❹ ❺ ❻ ❼ ❽ ❾ ❿ \7f
90                         ☉ 🌕 ☽ ☾ ⸿ ✝ 🕇 🕜 🕝 🕞 🕟 🕠 🕡 🕢 🕣 🕤 🕥 🕦 🕧 🙨 🙩 • ● ⚫ ⬤ 🞅 🞆 🞇 🞈 🞊 ⦿ ◾
91                         ■ ◼ ⬛ ⬜ 🞑 🞒 🞓 🞔 ▣ 🞕 🞖 🞗 ⬩ ⬥ ◆ ◇ 🞚 ◈ 🞛 🞜 🞝 ⬪ ⬧ ⧫ ◊ 🞠 ◖ ◗ ⯊ ⯋ ◼ ⬥
92                         ⬟ ⯂ ⬣ ⬢ ⯃ ⯄ 🞡 🞢 🞣 🞤 🞥 🞦 🞧 🞨 🞩 🞪 🞫 🞬 🞭 🞮 🞯 🞰 🞱 🞲 🞳 🞴 🞵 🞶 🞷 🞸 🞹 🞺
93                         🞻 🞼 🞽 🞾 🞿 🟀 🟂 🟄 ✦ 🟉 ★ ✶ 🟋 ✷ 🟏 🟒 ✹ 🟃 🟇 ✯ 🟍 🟔 ⯌ ⯍ ※ ⁂
94                 )];
95         }},
96         'wingdings3'   => {inherit => ['' => '20'], setup => sub {
97                 $_[0]->{table} = [(map {chr} 0 .. 0x20), qw(
98                           ⭠ ⭢ ⭡ ⭣ ⭦ ⭧ ⭩ ⭨ ⭰ ⭲ ⭱ ⭳ ⭶ ⭸ ⭻ ⭽ ⭤ ⭥ ⭪ ⭬ ⭫ ⭭ ⭍ ⮠ ⮡ ⮢ ⮣ ⮤ ⮥ ⮦ ⮧
99                         ⮐ ⮑ ⮒ ⮓ ⮀ ⮃ ⭾ ⭿ ⮄ ⮆ ⮅ ⮇ ⮏ ⮍ ⮎ ⮌ ⭮ ⭯ ⎋ ⌤ ⌃ ⌥ ⎵ ⏡ ⇪ ⮸ 🢠 🢡 🢢 🢣 🢤 🢥
100                         🢦 🢧 🢨 🢩 🢪 🢫 ← → ↑ ↓ ↖ ↗ ↙ ↘ 🡘 🡙 ▲ ▼ △ ▽ ◄ ► ◁ ▷ ◣ ◢ ◤ ◥ 🞀 🞂 🞁 \7f
101                         🞃 ▲ ▼ ◀ ▶ ⮜ ⮞ ⮝ ⮟ 🠐 🠒 🠑 🠓 🠔 🠖 🠕 🠗 🠘 🠚 🠙 🠛 🠜 🠞 🠝 🠟 🠀 🠂 🠁 🠃 🠄 🠆 🠅
102                         🠇 🠈 🠊 🠉 🠋 🠠 🠢 🠤 🠦 🠨 🠨 🠪 🢜 🢝 🢞 🢟 🠮 🠰 🠲 🠴 🠶 🠸 🠺 🠹 🠻 🢘 🢚 🢙 🢛 🠼 🠾 🠽
103                         🠿 🡀 🡂 🡁 🡃 🡄 🡆 🡅 🡇 ⮨ ⮩ ⮪ ⮫ ⮬ ⮭ ⮮ ⮯ 🡠 🡢 🡡 🡣 🡤 🡥 🡧 🡦 🡰 🡲 🡱 🡳 🡴 🡵 🡷
104                         🡶 🢀 🢂 🢁 🢃 🢄 🢅 🢇 🢆 🢐 🢒 🢑 🢓 🢔 🢖 🢕 🢗
105                 )];
106         }},
107         'webdings'     => {inherit => ['' => '20'], setup => sub {
108                 $_[0]->{table} = [(map {chr} 0 .. 0x20), qw(
109                           🕷 🕸 🕲 🕶 🏆 🎖 🖇 🗨 🗩 🗰 🗱 🌶 🎗 ▞ 🙼 🗕 🗖 🗗 ⏴ ⏵ ⏶ ⏷ ⏪ ⏩ ⏮ ⏭ ⏸ ⏹ ⏺ 🗚 🗳
110                         🛠 🏗 🏘 🏙 🏚 🏜 🏭 🏛 🏠 🏖 🏝 🛣 🔍 🏔 👁 👂 🏞 🏕 🛤 🏟 🛳 🕬 🕫 🕨 🔈 🎔 🎕 🗬 🙽 🗭 🗪 🗫
111                         ⮔ ✔ 🚲 □ 🛡 📦 🛱 ■ 🚑 🛈 🛩 🛰 🟈 🕴 ⚫ 🛥 🚔 🗘 🗙 ❓ 🛲 🚇 🚍 ⛳ 🛇 ⊖ 🚭 🗮 | 🗯 🗲 \7f
112                         🚹 🚺 🛉 🛊 🚼 👽 🏋 ⛷ 🏂 🏌 🏊 🏄 🏍 🏎 🚘 🗠 🛢 💰 🏷 💳 👪 🗡 🗢 🗣 ✯ 🖄 🖅 🖃 🖆 🖹 🖺 🖻
113                         🕵 🕰 🖽 🖾 📋 🗒 🗓 📖 📚 🗞 🗟 🗃 🗂 🖼 🎭 🎜 🎘 🎙 🎧 💿 🎞 📷 🎟 🎬 📽 📹 📾 📻 🎚 🎛 📺 💻
114                         🖥 🖦 🖧 🕹 🎮 🕻 🕼 📟 🖁 🖀 🖨 🖩 🖿 🖪 🗜 🔒 🔓 🗝 📥 📤 🕳 🌣 🌤 🌥 🌦 ☁ 🌧 🌨 🌩 🌪 🌬 🌫
115                         🌜 🌡 🛋 🛏 🍽 🍸 🛎 🛍 Ⓟ ♿ 🛆 🖈 🎓 🗤 🗥 🗦 🗧 🛪 🐿 🐦 🐟 🐕 🐈 🙬 🙮 🙭 🙯 🗺 🌍 🌏 🌎 🕊
116                 )];
117         }},
118
119         'iso-8859-2'   => {inherit => ['iso-8859-1' => 'A0']},
120         'iso-8859-3'   => {inherit => ['iso-8859-1' => 'A0']}, #TODO: also apply to iso-8859-9
121         'iso-8859-4'   => {inherit => ['iso-8859-2' => 'A0']},
122         'iso-8859-5'   => {inherit => ['iso-8859-1' => 'A0']},
123         'iso-8859-6'   => {inherit => ['cp1256' => '80', 'iso-8859-1' => 'A0']},
124         'iso-8859-7'   => {inherit => ['iso-8859-1' => 'A0']},
125         'iso-8859-8'   => {inherit => ['iso-8859-1' => 'A0']},
126         'iso-8859-9'   => {inherit => ['iso-8859-1' => 'D0-DF+F0']},
127         'iso-8859-10'  => {inherit => ['iso-8859-4' => 'A0']},
128         'iso-8859-11'  => {inherit => ['iso-8859-1' => 'A0']},
129         'iso-8859-13'  => {inherit => ['iso-8859-4' => 'A0']},
130         'iso-8859-14'  => {inherit => ['iso-8859-1' => 'A0']},
131         'iso-8859-15'  => {inherit => ['iso-8859-1' => 'A0-BF']},
132         'iso-8859-16'  => {inherit => ['iso-8859-2' => 'A0']},
133         'hp-roman8'    => {inherit => ['iso-8859-1' => 'A0']},
134
135         'cp1252'       => {inherit => ['iso-8859-1' => '80-9F']},
136         'cp1250'       => {inherit => ['iso-8859-2' => '80-BF', 'cp1252' => '80']},
137         'cp1254'       => {inherit => ['iso-8859-9' => '80-9F', 'cp1252' => '80-9F+D0']},
138         'cp874'        => {inherit => ['iso-8859-11' => '80-9F', 'cp1252' => '80']}, # windows-874 actually cp1162
139         'cp1257'       => {inherit => ['iso-8859-13' => '80-9F+FF', 'cp1252' => '80']},
140         'cp1251'       => {inherit => ['cp1252' => '80']},
141         'cp1253'       => {inherit => ['cp1252' => '80']},
142         'cp1255'       => {inherit => ['iso-8859-8' => '80-DF', 'cp1252' => '80']},
143         'cp1256'       => {inherit => ['cp1252' => '80']},
144         'cp1258'       => {inherit => ['cp1252' => '80-9F+C0']},
145
146         'cp850'        => {inherit => ['cp437' => '90']},
147         'cp860'        => {inherit => ['cp437' => '80-AF']},
148         'cp861'        => {inherit => ['cp865' => '80-AF']},
149         'cp863'        => {inherit => ['cp437' => '80-AF']},
150         'cp865'        => {inherit => ['cp437' => '90-AF']},
151         'cp852'        => {inherit => ['cp850' => '80', 'cp437' => '80']},
152         'cp857'        => {inherit => ['cp850' => '80-AF+D0-EF', 'cp437' => '80']},
153         'cp775'        => {inherit => ['cp850' => '80']},  # partial cp437
154         'cp866'        => {inherit => ['cp437' => '80-AF+E0']},
155         'cp855'        => {inherit => ['cp437' => '80']},
156         'cp1006'       => {inherit => ['iso-8859-6' => 'A0', 'cp437' => '80']},
157         'cp737'        => {inherit => ['cp437' => '80-AF+E0']},
158         'cp869'        => {inherit => ['cp437' => '80']},
159         'cp862'        => {inherit => ['cp437' => '80-9F']},
160         'cp864'        => {inherit => ['MacArabic' => '80', 'iso-8859-6' => '80', 'cp437' => '80']}, #TODO: compare form variants
161
162         'koi8-u'       => {inherit => ['koi8-r' => '90-BF']},
163         'koi8-f'       => {inherit => ['koi8-u' => '90-BF']},
164         'mik'          => {inherit => ['cp437' => '80-D8', 'cp866' => 'B0'], setup => sub {
165                 $_[0]->{table} = [(map {chr} 0 .. 0x7F), qw(
166                         А Б В Г Д Е Ж З И Й К Л М Н О П
167                         Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я
168                         а б в г д е ж з и й к л м н о п
169                         р с т у ф х ц ч ш щ ъ ы ь э ю я
170                         └ ┴ ┬ ├ ─ ┼ ╣ ║ ╚ ╔ ╩ ╦ ╠ ═ ╬ ┐
171                         ░ ▒ ▓ │ ┤ № § ╗ ╝ ┘ ┌ █ ▄ ▌ ▐ ▀
172                         α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ φ ε ∩
173                         ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■
174                 ), "\xA0"];
175         }},
176
177         'macromanian'  => {inherit => ['MacRoman' => 'A0-BF+D0-DF']},
178         'macrumanian'  => {inherit => ['MacRomanian' => 'A0-BF+D0-DF', 'MacRoman' => 'A0-BF+D0-DF']},
179         'maccroatian'  => {inherit => ['MacRoman' => 'A0']},
180         'maccentraleurroman' => {inherit => ['MacRoman' => '80']},
181         'macicelandic' => {inherit => ['MacRoman' => 'A0-AF+D0-EF']},
182         'macturkish'   => {inherit => ['MacRoman' => 'D0-DF']}, # F5 is unassigned
183         'macsami'      => {inherit => ['MacIcelandic' => '90-BF+D0-DF+F0', 'MacRoman' => '90']},
184         'macgreek'     => {inherit => ['MacRoman' => '80']},
185         'maccyrillic'  => {inherit => ['MacRoman' => '80']},
186         'machebrew'    => {
187                 inherit => ['iso-8859-8' => '80', 'MacRoman' => '80-8F+A0'], # ignore partial ascii
188                 varchar => 1,
189                 replace => {
190                         # strip private use characters for unneeded roundtrip
191                         0xDE => chr(0x5C7), # qamats qatan
192                         0xC0 => 'לֹ', # lamed holam
193                         #TODO: private use for canoral codes (obsolete nikud positioning)
194                 },
195         },
196         'macarabic'    => {inherit => ['iso-8859-6' => '80', 'cp864' => '80', 'MacRoman' => '80']}, #TODO: multiple parents
197         'macfarsi'     => {inherit => ['MacArabic' => 'B0-BF', 'MacRoman' => '80']},
198         'macthai'      => {
199                 inherit => ['iso-8859-11' => '80-9F+D0'],
200                 varchar => 1,
201                 replace => {
202                         # strip appended private use characters for unneeded roundtrip
203                         0x83 => "\x{E48}", 0x88 => "\x{E48}", 0x98 => "\x{E48}",
204                         0x84 => "\x{E49}", 0x89 => "\x{E49}", 0x99 => "\x{E49}",
205                         0x85 => "\x{E4A}", 0x8A => "\x{E4A}", 0x9A => "\x{E4A}",
206                         0x86 => "\x{E4B}", 0x8B => "\x{E4B}", 0x9B => "\x{E4B}",
207                         0x87 => "\x{E4C}", 0x8C => "\x{E4C}", 0x9C => "\x{E4C}",
208                         0x8F => "\x{E4D}",
209                         0x92 => "\x{E31}", 0x93 => "\x{E47}", 0x94 => "\x{E34}",
210                         0x95 => "\x{E35}", 0x96 => "\x{E36}", 0x97 => "\x{E37}",
211                 },
212         },
213
214         'cp37'         => {inherit => ['posix-bc' => '00']},
215         'posix-bc'     => {inherit => ['cp1047' => '40']},
216         'cp500'        => {inherit => ['cp37' => '40-5F+B0-BF']},
217         'cp1047'       => {inherit => ['cp37' => '10-2F+50-5F+A0-BF']},
218         'cp1026'       => {inherit => ['cp37' => '40']},
219         'cp875'        => {inherit => ['cp37' => '30']},
220
221         legacy     => [qw( cp437 ATASCII PETSCII MSX ZX-Spectrum ANSEL )],
222         'petscii'      => {inherit => ['' => '40-7F+A0-BF'], setup => sub {
223                 $_[0]->{table} = [(map {chr} 0 .. 0x3F), qw(
224                         @ a b c d e f g h i j k l m n o p q r s t u v w x y z [ £ ] ↑ ←
225                         🭹 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ┼ 🮌 │ 🮖 🮘
226                         . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
227                           ▌ ▄ ▔ ▁ ▏ ▒ ▕ 🮏 🮙 🮇 ├ ▗ └ ┐ ▂ ┌ ┴ ┬ ┤ ▎ ▍ 🮈 🮂 🮃 ▃ ✓ ▖ ▝ ┘ ▘ ▚
228                 )];
229         }},
230         'atascii'      => {inherit => ['' => '0-1F+60-7F'], setup => sub {
231                 $_[0]->{table} = [qw(
232                         ♥ ├ 🮇 ┘ ┤ ┐ ╱ ╲ ◢ ▗ ◣ ▝ ▘ 🮂 ▂ ▖ ♣ ┌ ─ ┼ • ▄ ▎ ┬ ┴ ▌ └ ␛ ↑ ↓ ← →
233                         _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
234                         _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
235                         ♦ a b c d e f g h i j k l m n o p q r s t u v w x y z ♠ | 🢰 ◀ ▶
236                 )];
237         }},
238         'zx-spectrum'  => {
239                 inherit => ['' => '50-8F'],
240                 set => 'ascii',
241                 replace => {
242                         ord('^') => '↑',
243                         ord('`') => '£',
244                         0x7F => '© ▝▘▀▗▐▚▜▖▞▌▛▄▟▙█',
245                 },
246         },
247         'msx'          => {inherit => ['cp437' => '80-FF'], setup => sub {
248                 $_[0]->{table} = [(map {chr} 0 .. 0x7F), qw(
249                         Ç ü é â ä à å ç ê ë è ï î ì Ä Å É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ
250                         á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » Ã ã Ĩ ĩ Õ õ Ũ ũ IJ ij ¾ ∽ ◊ ‰ ¶ §
251                         ▂ ▚ ▆ 🮂 ▬ 🮅 ▎ ▞ ▊ 🮇 🮊 🮙 🮘 🭭 🭯 🭬 🭮 🮚 🮛 ▘ ▗ ▝ ▖ 🮖 Δ ‡ ω █ ▄ ▌ ▐ ▀
252                         α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ ⌀ ∈ ∩ ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■
253                 )];
254         }},
255         'brascii'      => {
256                 inherit => ['' => 'D0-DF+F0-FF'],
257                 setup => sub {
258                         $_[0]->{table} = [(map {chr} 0 .. 0xFF)];
259                 },
260                 replace => {
261                         0xD7 => 'Œ',
262                         0xF7 => 'œ',
263                 },
264         },
265         'ansel'        => {
266                 note => '+GEDCOM',
267                 inherit => ['' => 'A0-CF+E0-FE'],
268                 setup => sub {
269                         $_[0]->{table} = [
270                                 (undef) x 0xA0,
271                                 undef, qw( Ł Ø Đ Þ Æ Œ ʹ · ♭ ®    ±          Ơ Ư ʾ ), undef,
272                                 qw( ʿ      ł ø đ þ æ œ ʺ ı £ ð ), undef, qw( ơ ư ), undef, undef,
273                                 qw( °      ℓ ℗ © ♯ ¿ ¡ ), (undef) x 0x19,
274                                 (map {$_ && chr}
275                                         0x309, 0x300, 0x0301, 0x0302, 0x0303, 0x304, 0x306, 0x307,
276                                         0x308, 0x30C, 0x030A, 0xFE20, 0xFE21, 0x315, 0x30B, 0x310,
277                                         0x327, 0x328, 0x0323, 0x0324, 0x0325, 0x333, 0x332, 0x326,
278                                         0x31C, 0x32E, 0xFE22, 0xFE23, undef,  undef, 0x313, undef,
279                                 ),
280                         ];
281                 },
282                 replace => {
283                         # GEDCOM extensions
284                         0xBE => '□',
285                         0xBF => '■',
286                         0xCD => 'e', # endowment?
287                         0xCE => 'o', # ordinance?
288                         0xCF => 'ß',
289                         0xFC => "\x{338}",
290                         # MARC21 extensions
291                         0xC7 => 'ß',
292                         0xC8 => '€',
293                 },
294         },
295         'ti86'         => {
296                 note => 'similar to TI85',
297                 inherit => ['', => '0-1F+80-EC'],
298                 setup => sub {
299                         $_[0]->{table} = [
300                                 undef, qw(
301                                         𝐛 𝐨 𝐝 𝐡 ▶ ⬆ ⬇ ∫ × 𝐀 𝐁 𝐂 𝐃 𝐄 𝐅
302                                         √ ⁻¹ ² ∠ ° ʳ ᵀ ≤ ≠ ≥ ⁻ ᴇ → ⏨ ↑ ↓
303                                 ),
304                                 (undef) x 0x60,
305                                 qw(
306                                         ₀ ₁ ₂ ₃ ₄ ₅ ₆ ₇ ₈ ₉ Á À Â Ä á à
307                                         â ä É È Ê Ë é è ê ë Í Ì Î Ï í ì
308                                         î ï Ó Ò Ô Ö ó ò ô ö Ú Ù Û Ü ú ù
309                                         û ü Ç ç Ñ ñ ´ ` ¨ ¿ ¡ α β γ Δ δ
310                                         ϵ θ λ μ π ρ Σ σ τ ϕ Ω x̅ y̅ ˟ … ◀
311                                         ■ ∕ ‐ ² ° ³ :⃞ ➧ ⧵ 🙽 ◥ ◣ ⊸ ∘ ⋱ █
312                                         ⇧ A⃞ a⃞ _ ⇧̲ A̲ a̲ ▒ ⬞ ˖ · ⁴ ﹦
313                                 ),
314                         ];
315                 },
316         },
317         'ti89'         => {
318                 note => 'also TI92(+)',
319                 inherit => ['', => '0-1F+7F-BE'],
320                 setup => sub {
321                         $_[0]->{table} = [
322                                 qw(
323                                         ▒ ␁ ␂ ␃ ␄ ␅ ␆ 🔔 ⌫ ⇥ ), chr(0xA), qw( ⬏ ⤒ ↵ 🔒 ✓
324                                         ■ ◂ ▸ ▴ ▾ ← → ↑ ↓ ◀ ▶ ⬆ ∪ ∩ ⊂ ∈
325                                 ),
326                                 (map {chr} 0x20 .. 0x7E), '◆',
327                                 qw(
328                                         α β Γ γ Δ δ ε ζ θ λ ξ ∏ π ρ ∑ σ
329                                         τ ϕ ψ Ω ω ᴇ ℯ 𝐢 ʳ ᵀ x̅ y̅ ≤ ≠ ≥ ∠
330                                         … ¡ ¢ £ ¤ ¥ ¦ § √ © ª « ¬ ⁻ ® ¯
331                                         ° ± ² ³ ⁻¹ µ ¶ · ⁺ ¹ º » 𝑑 ∫ ∞ ¿
332                                 ),
333                         ];
334                 },
335         },
336
337         ''             => {setup => sub {
338                 my $row = shift;
339                 $row->{offset} = delete $row->{startpoint};
340                 $row->{set} = 'Unicode characters';
341                 my $block = $row->{offset} >> 8;
342                 $row->{endpoint} ||= ($block + 1 << 8) - 1;
343                 $block == ($row->{endpoint} >> 8) or undef $block;
344
345                 $row->{table} = join '', map { chr =~ s/\A\p{Unassigned}\z/�/r }
346                         $row->{offset} .. $row->{endpoint};
347                 utf8::upgrade($row->{table});  # prevent latin1 output
348
349                 $row->{endpoint} -= $row->{offset};
350
351                 if (defined $block) {
352                         $row->{set} = sprintf 'Unicode block U+%02Xxx', $block;
353                         $row->{offset} %= 0x100;
354                 }
355
356                 return $row;
357         }},
358         u              => {setup => sub {
359                 my $row = shift;
360                 state $celldata = do 'charset-unicode.inc.pl'
361                         or Alert('Table data could not be read', $@ || $!);
362                 $row->{cell} = $celldata;
363
364                 $row->{endpoint} ||= 0x1FFF;
365                 $row->{set} = 'Unicode ' . (
366                         $row->{startpoint} <  0x1000 && $row->{endpoint} < 0x1000 ? 'BMP' :
367                         $row->{startpoint} >= 0x1000 && $row->{endpoint} < 0x2000 ? 'SMP' :
368                         'allocations'
369                 );
370                 return $row;
371         }},
372         uu             => {setup => sub {
373                 my $row = shift;
374                 $row->{cell} = do 'charset-ucplanes.inc.pl'
375                         or Alert('Table data could not be read', $@ || $!);
376                 $row->{endpoint} ||= 0x3FF;
377                 $row->{set} = 'Unicode planes';
378                 return $row;
379         }},
380         utf8           => {setup => sub {
381                 my $row = shift;
382                 $row->{set} = 'UTF-8';
383                 $row->{cell} = do 'charset-utf8.inc.pl'
384                         or Alert('Table data could not be read', $@ || $!);
385                 return $row;
386         }},
387         'utf-8'        => 'utf8',
388 };