charset: legacy map ansel (and extensions)
[sheet.git] / charset-encoding.inc.pl
1 use 5.014;
2 use warnings;
3 use utf8;
4
5 +{
6         default    => [qw( u+0-27F utf8+realsize iso-8859-1 iso-8859-15 cp1252 cp437 cp850 )],
7         unicode    => [qw( uu+cols=32+realsize u+0-FFF u+1000-18AF u+18B0-1FFF )],
8         us         => [qw( cp437 cp863 gsm0338 AdobeStandardEncoding )],
9         ebcdic     => [qw( cp37 cp500 cp1047 posix-bc cp1026 cp875 )],
10         iso        => [map {"iso-8859-$_"} 1 .. 11, 13 .. 16],
11         dos        => [qw( cp437 cp865 cp861 cp860 cp863 cp850 cp857 cp852 cp775
12                            cp737 cp869 cp866 MIK cp855 cp862 cp864 )],
13         aix        => [qw( cp1006 )],
14         win        => [qw( cp1252 cp1250 cp1254 cp1257 cp1258 cp1253 cp1251 cp1255 cp1256 cp874 )],
15         mac        => [qw( MacRoman MacRomanian MacRumanian MacCroatian MacCentralEurRoman MacTurkish MacIcelandic MacSami
16                            MacGreek MacCyrillic MacHebrew MacArabic MacFarsi MacThai )],
17
18         # languages
19         westeur    => [qw( iso-8859-1 iso-8859-15 cp1252 iso-8859-14 cp850 hp-roman8 nextstep MacRoman )],
20         centeur    => [qw( iso-8859-2 iso-8859-16 cp1250 cp852 MacRomanian MacCroatian MacCentralEurRoman )], # MacRumanian only for DB
21         turkish    => [qw( iso-8859-9 iso-8859-3 cp1254 cp857 MacTurkish )],
22         norteur    => [qw( baltic nordic )],
23         baltic     => [qw( iso-8859-4 iso-8859-13 cp1257 cp775 )],
24         nordic     => [qw( iso-8859-10 cp865 cp861 MacIcelandic MacSami )],
25         cyrillic   => [qw( koi8-r koi8-u koi8-f iso-8859-5 cp1251 MacCyrillic cp866 MIK cp855
26                            +400 +2DE0 +A640-A69F +500-52F )], # MacUkrainian is broken
27         arabic     => [qw( iso-8859-6 cp1256 MacArabic cp864 cp1006 MacFarsi
28                            +600 +8A0-8BF+8E0 +750-77F )],
29         greek      => [qw( iso-8859-7 cp1253 MacGreek cp737 cp869  +370 +1F00 )],
30         hebrew     => [qw( iso-8859-8 cp1255 MacHebrew cp862  +590 )],
31         thai       => [qw( iso-8859-11 cp874 MacThai )], # U+0E00 identical to iso-8859-11
32         vietnamese => [qw( viscii cp1258 MacVietnamese )],
33         symbols    => [qw( symbol dingbats MacDingbats wingdings wingdings2 wingdings3 webdings )],
34
35         # iso-code shorthand
36         1 => 'westeur',
37         2 => 'centeur',
38         3 => 'turkish',
39         4 => 'baltic',
40         5 => 'cyrillic',
41         6 => 'arabic',
42         7 => 'greek',
43         8 => 'hebrew',
44         9 => 'turkish',
45         10 => 'nordic',
46         11 => 'thai',
47
48         'ascii'        => {inherit => ['' => '00-7F']},
49         'cp437'        => {
50                 inherit => ['cp850' => 0, 'ascii' => '00-1F+80'], # ascii range overridden later
51                 replace => {
52                         0    => ' ☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼', # visible variants of control characters
53                         0xED => 'ϕ', # non-greek usage and appearance
54                 },
55         },
56         'gsm0338'      => {inherit => ['ascii' => '00-7F']},
57         'dingbats'     => {inherit => ['' => '20-7F+A0']},
58         'macdingbats'  => {inherit => ['dingbats' => '80-9F']},
59         'adobezdingbat'=> {inherit => ['MacDingbats' => '80-9F']}, # should be identical but maps to private use
60         'symbol'       => {
61                 inherit => ['' => '20-7F+A0'],
62                 replace => {
63                         0x60 => '│', # replace radical extender by closest unicode equivalent
64                         0xBD => '⏐⎯', # arrow extenders
65                         0xD2 => '®©™', # serif variants
66                         0xE0 => '◊', # replace lookalike, should match AdobeSymbol
67                         0xE2 => '®©™', # sans-serif variants
68                         0xE6 => '⎛⎜⎝⎡⎢⎣⎧⎨⎩⎪',
69                         0xF0 => '€',
70                         0xF4 => '⎮⌡⎞⎟⎠⎤⎥⎦⎫⎬⎭',
71                 },
72         },
73         'adobesymbol'  => {inherit => ['symbol' => '20-7F+A0', '' => '20-7F+A0']}, # minor differences, irrelevant except for different '€'
74         'wingdings'    => {inherit => ['' => '20'], setup => sub {
75                 $_[0]->{table} = [(map {chr} 0 .. 0x20), qw(
76                           🖉 ✂ ✁ 👓 🕭 🕮 🕯 🕿 ✆ 🖂 🖃 📪 📫 📬 📭 📁 📂 📄 🗏 🗐 🗄 ⌛ 🖮 🖰 🖲 🖳 🖴 🖫 🖬 ✇ ✍
77                         🖎 ✌ 👌 👍 👎 ☜ ☞ ☝ ☟ 🖐 ☺ 😐 ☹ 💣 ☠ 🏳 🏱 ✈ ☼ 💧 ❄ 🕆 ✞ 🕈 ✠ ✡ ☪ ☯ ॐ ☸ ♈ ♉
78                         ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ 🙰 🙵 ● 🔾 ■ □ 🞐 ❑ ❒ ⬧ ⧫ ◆ ❖ ⬥ ⌧ ⮹ ⌘ 🏵 🏶 🙶 🙷 \7f
79                         ⓪ ① ② ③ ④ ⑤ ⑥ ⑦ ⑧ ⑨ ⑩ ⓿ ❶ ❷ ❸ ❹ ❺ ❻ ❼ ❽ ❾ ❿ 🙢 🙠 🙡 🙣 🙞 🙜 🙝 🙟 · •
80                         ▪ ⚪ 🞆 🞈 ◉ ◎ 🔿 ▪ ◻ 🟂 ✦ ★ ✶ ✴ ✹ ✵ ⯐ ⌖ ⟡ ⌑ ⯑ ✪ ✰ 🕐 🕑 🕒 🕓 🕔 🕕 🕖 🕗 🕘
81                         🕙 🕚 🕛 ⮰ ⮱ ⮲ ⮳ ⮴ ⮵ ⮶ ⮷ 🙪 🙫 🙕 🙔 🙗 🙖 🙐 🙑 🙒 🙓 ⌫ ⌦ ⮘ ⮚ ⮙ ⮛ ⮈ ⮊ ⮉ ⮋ 🡨
82                         🡪 🡩 🡫 🡬 🡭 🡯 🡮 🡸 🡺 🡹 🡻 🡼 🡽 🡿 🡾 ⇦ ⇨ ⇧ ⇩ ⬄ ⇳ ⬀ ⬁ ⬃ ⬂ 🢬 🢭 🗶 ✔ 🗷 🗹 
83                 )];
84         }},
85         'wingdings2'   => {inherit => ['' => '20'], setup => sub {
86                 $_[0]->{table} = [(map {chr} 0 .. 0x20), qw(
87                           🖊 🖋 🖌 🖍 ✄ ✀ 🕾 🕽 🗅 🗆 🗇 🗈 🗉 🗊 🗋 🗌 🗍 📋 🗑 🗔 🖵 🖶 🖷 🖸 🖭 🖯 🖱 🖒 🖓 🖘 🖙
88                         🖚 🖛 👈 👉 🖜 🖝 🖞 🖟 🖠 🖡 👆 👇 🖢 🖣 🖑 🗴 ✓ 🗵 ☑ ☒ ☒ ⮾ ⮿ ⦸ ⦸ 🙱 🙴 🙲 🙳 ‽ 🙹 🙺
89                         🙻 🙦 🙤 🙥 🙧 🙚 🙘 🙙 🙛 ⓪ ① ② ③ ④ ⑤ ⑥ ⑦ ⑧ ⑨ ⑩ ⓿ ❶ ❷ ❸ ❹ ❺ ❻ ❼ ❽ ❾ ❿ \7f
90                         ☉ 🌕 ☽ ☾ ⸿ ✝ 🕇 🕜 🕝 🕞 🕟 🕠 🕡 🕢 🕣 🕤 🕥 🕦 🕧 🙨 🙩 • ● ⚫ ⬤ 🞅 🞆 🞇 🞈 🞊 ⦿ ◾
91                         ■ ◼ ⬛ ⬜ 🞑 🞒 🞓 🞔 ▣ 🞕 🞖 🞗 ⬩ ⬥ ◆ ◇ 🞚 ◈ 🞛 🞜 🞝 ⬪ ⬧ ⧫ ◊ 🞠 ◖ ◗ ⯊ ⯋ ◼ ⬥
92                         ⬟ ⯂ ⬣ ⬢ ⯃ ⯄ 🞡 🞢 🞣 🞤 🞥 🞦 🞧 🞨 🞩 🞪 🞫 🞬 🞭 🞮 🞯 🞰 🞱 🞲 🞳 🞴 🞵 🞶 🞷 🞸 🞹 🞺
93                         🞻 🞼 🞽 🞾 🞿 🟀 🟂 🟄 ✦ 🟉 ★ ✶ 🟋 ✷ 🟏 🟒 ✹ 🟃 🟇 ✯ 🟍 🟔 ⯌ ⯍ ※ ⁂
94                 )];
95         }},
96         'wingdings3'   => {inherit => ['' => '20'], setup => sub {
97                 $_[0]->{table} = [(map {chr} 0 .. 0x20), qw(
98                           ⭠ ⭢ ⭡ ⭣ ⭦ ⭧ ⭩ ⭨ ⭰ ⭲ ⭱ ⭳ ⭶ ⭸ ⭻ ⭽ ⭤ ⭥ ⭪ ⭬ ⭫ ⭭ ⭍ ⮠ ⮡ ⮢ ⮣ ⮤ ⮥ ⮦ ⮧
99                         ⮐ ⮑ ⮒ ⮓ ⮀ ⮃ ⭾ ⭿ ⮄ ⮆ ⮅ ⮇ ⮏ ⮍ ⮎ ⮌ ⭮ ⭯ ⎋ ⌤ ⌃ ⌥ ⎵ ⏡ ⇪ ⮸ 🢠 🢡 🢢 🢣 🢤 🢥
100                         🢦 🢧 🢨 🢩 🢪 🢫 ← → ↑ ↓ ↖ ↗ ↙ ↘ 🡘 🡙 ▲ ▼ △ ▽ ◄ ► ◁ ▷ ◣ ◢ ◤ ◥ 🞀 🞂 🞁 \7f
101                         🞃 ▲ ▼ ◀ ▶ ⮜ ⮞ ⮝ ⮟ 🠐 🠒 🠑 🠓 🠔 🠖 🠕 🠗 🠘 🠚 🠙 🠛 🠜 🠞 🠝 🠟 🠀 🠂 🠁 🠃 🠄 🠆 🠅
102                         🠇 🠈 🠊 🠉 🠋 🠠 🠢 🠤 🠦 🠨 🠨 🠪 🢜 🢝 🢞 🢟 🠮 🠰 🠲 🠴 🠶 🠸 🠺 🠹 🠻 🢘 🢚 🢙 🢛 🠼 🠾 🠽
103                         🠿 🡀 🡂 🡁 🡃 🡄 🡆 🡅 🡇 ⮨ ⮩ ⮪ ⮫ ⮬ ⮭ ⮮ ⮯ 🡠 🡢 🡡 🡣 🡤 🡥 🡧 🡦 🡰 🡲 🡱 🡳 🡴 🡵 🡷
104                         🡶 🢀 🢂 🢁 🢃 🢄 🢅 🢇 🢆 🢐 🢒 🢑 🢓 🢔 🢖 🢕 🢗
105                 )];
106         }},
107         'webdings'     => {inherit => ['' => '20'], setup => sub {
108                 $_[0]->{table} = [(map {chr} 0 .. 0x20), qw(
109                           🕷 🕸 🕲 🕶 🏆 🎖 🖇 🗨 🗩 🗰 🗱 🌶 🎗 ▞ 🙼 🗕 🗖 🗗 ⏴ ⏵ ⏶ ⏷ ⏪ ⏩ ⏮ ⏭ ⏸ ⏹ ⏺ 🗚 🗳
110                         🛠 🏗 🏘 🏙 🏚 🏜 🏭 🏛 🏠 🏖 🏝 🛣 🔍 🏔 👁 👂 🏞 🏕 🛤 🏟 🛳 🕬 🕫 🕨 🔈 🎔 🎕 🗬 🙽 🗭 🗪 🗫
111                         ⮔ ✔ 🚲 □ 🛡 📦 🛱 ■ 🚑 🛈 🛩 🛰 🟈 🕴 ⚫ 🛥 🚔 🗘 🗙 ❓ 🛲 🚇 🚍 ⛳ 🛇 ⊖ 🚭 🗮 | 🗯 🗲 \7f
112                         🚹 🚺 🛉 🛊 🚼 👽 🏋 ⛷ 🏂 🏌 🏊 🏄 🏍 🏎 🚘 🗠 🛢 💰 🏷 💳 👪 🗡 🗢 🗣 ✯ 🖄 🖅 🖃 🖆 🖹 🖺 🖻
113                         🕵 🕰 🖽 🖾 📋 🗒 🗓 📖 📚 🗞 🗟 🗃 🗂 🖼 🎭 🎜 🎘 🎙 🎧 💿 🎞 📷 🎟 🎬 📽 📹 📾 📻 🎚 🎛 📺 💻
114                         🖥 🖦 🖧 🕹 🎮 🕻 🕼 📟 🖁 🖀 🖨 🖩 🖿 🖪 🗜 🔒 🔓 🗝 📥 📤 🕳 🌣 🌤 🌥 🌦 ☁ 🌧 🌨 🌩 🌪 🌬 🌫
115                         🌜 🌡 🛋 🛏 🍽 🍸 🛎 🛍 Ⓟ ♿ 🛆 🖈 🎓 🗤 🗥 🗦 🗧 🛪 🐿 🐦 🐟 🐕 🐈 🙬 🙮 🙭 🙯 🗺 🌍 🌏 🌎 🕊
116                 )];
117         }},
118
119         'iso-8859-2'   => {inherit => ['iso-8859-1' => 'A0']},
120         'iso-8859-3'   => {inherit => ['iso-8859-1' => 'A0']}, #TODO: also apply to iso-8859-9
121         'iso-8859-4'   => {inherit => ['iso-8859-2' => 'A0']},
122         'iso-8859-5'   => {inherit => ['iso-8859-1' => 'A0']},
123         'iso-8859-6'   => {inherit => ['cp1256' => '80', 'iso-8859-1' => 'A0']},
124         'iso-8859-7'   => {inherit => ['iso-8859-1' => 'A0']},
125         'iso-8859-8'   => {inherit => ['iso-8859-1' => 'A0']},
126         'iso-8859-9'   => {inherit => ['iso-8859-1' => 'D0-DF+F0']},
127         'iso-8859-10'  => {inherit => ['iso-8859-4' => 'A0']},
128         'iso-8859-11'  => {inherit => ['iso-8859-1' => 'A0']},
129         'iso-8859-13'  => {inherit => ['iso-8859-4' => 'A0']},
130         'iso-8859-14'  => {inherit => ['iso-8859-1' => 'A0']},
131         'iso-8859-15'  => {inherit => ['iso-8859-1' => 'A0-BF']},
132         'iso-8859-16'  => {inherit => ['iso-8859-2' => 'A0']},
133         'hp-roman8'    => {inherit => ['iso-8859-1' => 'A0']},
134
135         'cp1252'       => {inherit => ['iso-8859-1' => '80-9F']},
136         'cp1250'       => {inherit => ['iso-8859-2' => '80-BF', 'cp1252' => '80']},
137         'cp1254'       => {inherit => ['iso-8859-9' => '80-9F', 'cp1252' => '80-9F+D0']},
138         'cp874'        => {inherit => ['iso-8859-11' => '80-9F', 'cp1252' => '80']}, # windows-874 actually cp1162
139         'cp1257'       => {inherit => ['iso-8859-13' => '80-9F+FF', 'cp1252' => '80']},
140         'cp1251'       => {inherit => ['cp1252' => '80']},
141         'cp1253'       => {inherit => ['cp1252' => '80']},
142         'cp1255'       => {inherit => ['iso-8859-8' => '80-DF', 'cp1252' => '80']},
143         'cp1256'       => {inherit => ['cp1252' => '80']},
144         'cp1258'       => {inherit => ['cp1252' => '80-9F+C0']},
145
146         'cp850'        => {inherit => ['cp437' => '90']},
147         'cp860'        => {inherit => ['cp437' => '80-AF']},
148         'cp861'        => {inherit => ['cp865' => '80-AF']},
149         'cp863'        => {inherit => ['cp437' => '80-AF']},
150         'cp865'        => {inherit => ['cp437' => '90-AF']},
151         'cp852'        => {inherit => ['cp850' => '80', 'cp437' => '80']},
152         'cp857'        => {inherit => ['cp850' => '80-AF+D0-EF', 'cp437' => '80']},
153         'cp775'        => {inherit => ['cp850' => '80']},  # partial cp437
154         'cp866'        => {inherit => ['cp437' => '80-AF+E0']},
155         'cp855'        => {inherit => ['cp437' => '80']},
156         'cp1006'       => {inherit => ['iso-8859-6' => 'A0', 'cp437' => '80']},
157         'cp737'        => {inherit => ['cp437' => '80-AF+E0']},
158         'cp869'        => {inherit => ['cp437' => '80']},
159         'cp862'        => {inherit => ['cp437' => '80-9F']},
160         'cp864'        => {inherit => ['MacArabic' => '80', 'iso-8859-6' => '80', 'cp437' => '80']}, #TODO: compare form variants
161
162         'koi8-u'       => {inherit => ['koi8-r' => '90-BF']},
163         'koi8-f'       => {inherit => ['koi8-u' => '90-BF']},
164         'mik'          => {inherit => ['cp437' => '80-D8', 'cp866' => 'B0'], setup => sub {
165                 $_[0]->{table} = [(map {chr} 0 .. 0x7F), qw(
166                         А Б В Г Д Е Ж З И Й К Л М Н О П
167                         Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я
168                         а б в г д е ж з и й к л м н о п
169                         р с т у ф х ц ч ш щ ъ ы ь э ю я
170                         └ ┴ ┬ ├ ─ ┼ ╣ ║ ╚ ╔ ╩ ╦ ╠ ═ ╬ ┐
171                         ░ ▒ ▓ │ ┤ № § ╗ ╝ ┘ ┌ █ ▄ ▌ ▐ ▀
172                         α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ φ ε ∩
173                         ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■
174                 ), "\xA0"];
175         }},
176
177         'macromanian'  => {inherit => ['MacRoman' => 'A0-BF+D0-DF']},
178         'macrumanian'  => {inherit => ['MacRomanian' => 'A0-BF+D0-DF', 'MacRoman' => 'A0-BF+D0-DF']},
179         'maccroatian'  => {inherit => ['MacRoman' => 'A0']},
180         'maccentraleurroman' => {inherit => ['MacRoman' => '80']},
181         'macicelandic' => {inherit => ['MacRoman' => 'A0-AF+D0-EF']},
182         'macturkish'   => {inherit => ['MacRoman' => 'D0-DF']}, # F5 is unassigned
183         'macsami'      => {inherit => ['MacIcelandic' => '90-BF+D0-DF+F0', 'MacRoman' => '90']},
184         'macgreek'     => {inherit => ['MacRoman' => '80']},
185         'maccyrillic'  => {inherit => ['MacRoman' => '80']},
186         'machebrew'    => {
187                 inherit => ['iso-8859-8' => '80', 'MacRoman' => '80-8F+A0'], # ignore partial ascii
188                 varchar => 1,
189                 replace => {
190                         # strip private use characters for unneeded roundtrip
191                         0xDE => chr(0x5C7), # qamats qatan
192                         0xC0 => 'לֹ', # lamed holam
193                         #TODO: private use for canoral codes (obsolete nikud positioning)
194                 },
195         },
196         'macarabic'    => {inherit => ['iso-8859-6' => '80', 'cp864' => '80', 'MacRoman' => '80']}, #TODO: multiple parents
197         'macfarsi'     => {inherit => ['MacArabic' => 'B0-BF', 'MacRoman' => '80']},
198         'macthai'      => {
199                 inherit => ['iso-8859-11' => '80-9F+D0'],
200                 varchar => 1,
201                 replace => {
202                         # strip appended private use characters for unneeded roundtrip
203                         0x83 => "\x{E48}", 0x88 => "\x{E48}", 0x98 => "\x{E48}",
204                         0x84 => "\x{E49}", 0x89 => "\x{E49}", 0x99 => "\x{E49}",
205                         0x85 => "\x{E4A}", 0x8A => "\x{E4A}", 0x9A => "\x{E4A}",
206                         0x86 => "\x{E4B}", 0x8B => "\x{E4B}", 0x9B => "\x{E4B}",
207                         0x87 => "\x{E4C}", 0x8C => "\x{E4C}", 0x9C => "\x{E4C}",
208                         0x8F => "\x{E4D}",
209                         0x92 => "\x{E31}", 0x93 => "\x{E47}", 0x94 => "\x{E34}",
210                         0x95 => "\x{E35}", 0x96 => "\x{E36}", 0x97 => "\x{E37}",
211                 },
212         },
213
214         'cp37'         => {inherit => ['posix-bc' => '00']},
215         'posix-bc'     => {inherit => ['cp1047' => '40']},
216         'cp500'        => {inherit => ['cp37' => '40-5F+B0-BF']},
217         'cp1047'       => {inherit => ['cp37' => '10-2F+50-5F+A0-BF']},
218         'cp1026'       => {inherit => ['cp37' => '40']},
219         'cp875'        => {inherit => ['cp37' => '30']},
220
221         legacy     => [qw( cp437 ATASCII PETSCII MSX ZX-Spectrum ANSEL )],
222         'petscii'      => {inherit => ['' => '40-7F+A0-BF'], setup => sub {
223                 $_[0]->{table} = [(map {chr} 0 .. 0x3F), qw(
224                         @ a b c d e f g h i j k l m n o p q r s t u v w x y z [ £ ] ↑ ←
225                         🭹 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ┼ 🮌 │ 🮖 🮘
226                         . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
227                           ▌ ▄ ▔ ▁ ▏ ▒ ▕ 🮏 🮙 🮇 ├ ▗ └ ┐ ▂ ┌ ┴ ┬ ┤ ▎ ▍ 🮈 🮂 🮃 ▃ ✓ ▖ ▝ ┘ ▘ ▚
228                 )];
229         }},
230         'atascii'      => {inherit => ['' => '0-1F+60-7F'], setup => sub {
231                 $_[0]->{table} = [qw(
232                         ♥ ├ 🮇 ┘ ┤ ┐ ╱ ╲ ◢ ▗ ◣ ▝ ▘ 🮂 ▂ ▖ ♣ ┌ ─ ┼ • ▄ ▎ ┬ ┴ ▌ └ ␛ ↑ ↓ ← →
233                         _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
234                         _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
235                         ♦ a b c d e f g h i j k l m n o p q r s t u v w x y z ♠ | 🢰 ◀ ▶
236                 )];
237         }},
238         'zx-spectrum'  => {
239                 inherit => ['' => '50-8F'],
240                 set => 'ascii',
241                 replace => {
242                         ord('^') => '↑',
243                         ord('`') => '£',
244                         0x7F => '© ▝▘▀▗▐▚▜▖▞▌▛▄▟▙█',
245                 },
246         },
247         'msx'          => {inherit => ['cp437' => '80-FF'], setup => sub {
248                 $_[0]->{table} = [(map {chr} 0 .. 0x7F), qw(
249                         Ç ü é â ä à å ç ê ë è ï î ì Ä Å É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ
250                         á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » Ã ã Ĩ ĩ Õ õ Ũ ũ IJ ij ¾ ∽ ◊ ‰ ¶ §
251                         ▂ ▚ ▆ 🮂 ▬ 🮅 ▎ ▞ ▊ 🮇 🮊 🮙 🮘 🭭 🭯 🭬 🭮 🮚 🮛 ▘ ▗ ▝ ▖ 🮖 Δ ‡ ω █ ▄ ▌ ▐ ▀
252                         α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ ⌀ ∈ ∩ ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■
253                 )];
254         }},
255         'brascii'      => {
256                 inherit => ['' => 'D0-DF+F0-FF'],
257                 setup => sub {
258                         $_[0]->{table} = [(map {chr} 0 .. 0xFF)];
259                 },
260                 replace => {
261                         0xD7 => 'Œ',
262                         0xF7 => 'œ',
263                 },
264         },
265         'ansel'        => {
266                 note => '+GEDCOM',
267                 inherit => ['' => 'A0-CF+E0-FE'],
268                 setup => sub {
269                         $_[0]->{table} = [
270                                 (undef) x 0xA0,
271                                 undef, qw( Ł Ø Đ Þ Æ Œ ʹ · ♭ ®    ±          Ơ Ư ʾ ), undef,
272                                 qw( ʿ      ł ø đ þ æ œ ʺ ı £ ð ), undef, qw( ơ ư ), undef, undef,
273                                 qw( °      ℓ ℗ © ♯ ¿ ¡ ), (undef) x 0x19,
274                                 (map {$_ && chr}
275                                         0x309, 0x300, 0x0301, 0x0302, 0x0303, 0x304, 0x306, 0x307,
276                                         0x308, 0x30C, 0x030A, 0xFE20, 0xFE21, 0x315, 0x30B, 0x310,
277                                         0x327, 0x328, 0x0323, 0x0324, 0x0325, 0x333, 0x332, 0x326,
278                                         0x31C, 0x32E, 0xFE22, 0xFE23, undef,  undef, 0x313, undef,
279                                 ),
280                         ];
281                 },
282                 replace => {
283                         # GEDCOM extensions
284                         0xBE => '□',
285                         0xBF => '■',
286                         0xCD => 'e', # endowment?
287                         0xCE => 'o', # ordinance?
288                         0xCF => 'ß',
289                         0xFC => "\x{338}",
290                         # MARC21 extensions
291                         0xC7 => 'ß',
292                         0xC8 => '€',
293                 },
294         },
295
296         ''             => {setup => sub {
297                 my $row = shift;
298                 $row->{offset} = delete $row->{startpoint};
299                 $row->{set} = 'Unicode characters';
300                 my $block = $row->{offset} >> 8;
301                 $row->{endpoint} ||= ($block + 1 << 8) - 1;
302                 $block == ($row->{endpoint} >> 8) or undef $block;
303
304                 $row->{table} = join '', map { chr =~ s/\A\p{Unassigned}\z/�/r }
305                         $row->{offset} .. $row->{endpoint};
306                 utf8::upgrade($row->{table});  # prevent latin1 output
307
308                 $row->{endpoint} -= $row->{offset};
309
310                 if (defined $block) {
311                         $row->{set} = sprintf 'Unicode block U+%02Xxx', $block;
312                         $row->{offset} %= 0x100;
313                 }
314
315                 return $row;
316         }},
317         u              => {setup => sub {
318                 my $row = shift;
319                 state $celldata = do 'charset-unicode.inc.pl'
320                         or Alert('Table data could not be read', $@ || $!);
321                 $row->{cell} = $celldata;
322
323                 $row->{endpoint} ||= 0x1FFF;
324                 $row->{set} = 'Unicode ' . (
325                         $row->{startpoint} <  0x1000 && $row->{endpoint} < 0x1000 ? 'BMP' :
326                         $row->{startpoint} >= 0x1000 && $row->{endpoint} < 0x2000 ? 'SMP' :
327                         'allocations'
328                 );
329                 return $row;
330         }},
331         uu             => {setup => sub {
332                 my $row = shift;
333                 $row->{cell} = do 'charset-ucplanes.inc.pl'
334                         or Alert('Table data could not be read', $@ || $!);
335                 $row->{endpoint} ||= 0x3FF;
336                 $row->{set} = 'Unicode planes';
337                 return $row;
338         }},
339         utf8           => {setup => sub {
340                 my $row = shift;
341                 $row->{set} = 'UTF-8';
342                 $row->{cell} = do 'charset-utf8.inc.pl'
343                         or Alert('Table data could not be read', $@ || $!);
344                 return $row;
345         }},
346         'utf-8'        => 'utf8',
347 };