charset: legacy map ansel (and extensions)
[sheet.git] / charset-encoding.inc.pl
index 1414f394fe1a583730ea949d87d562bf219ce89d..f4a9f26d1f9007a8153d320f467b40665a2a44ff 100644 (file)
@@ -218,6 +218,81 @@ use utf8;
        'cp1026'       => {inherit => ['cp37' => '40']},
        'cp875'        => {inherit => ['cp37' => '30']},
 
+       legacy     => [qw( cp437 ATASCII PETSCII MSX ZX-Spectrum ANSEL )],
+       'petscii'      => {inherit => ['' => '40-7F+A0-BF'], setup => sub {
+               $_[0]->{table} = [(map {chr} 0 .. 0x3F), qw(
+                       @ a b c d e f g h i j k l m n o p q r s t u v w x y z [ £ ] ↑ ←
+                       🭹 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ┼ 🮌 │ 🮖 🮘
+                       . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+                         ▌ ▄ ▔ ▁ ▏ ▒ ▕ 🮏 🮙 🮇 ├ ▗ └ ┐ ▂ ┌ ┴ ┬ ┤ ▎ ▍ 🮈 🮂 🮃 ▃ ✓ ▖ ▝ ┘ ▘ ▚
+               )];
+       }},
+       'atascii'      => {inherit => ['' => '0-1F+60-7F'], setup => sub {
+               $_[0]->{table} = [qw(
+                       ♥ ├ 🮇 ┘ ┤ ┐ ╱ ╲ ◢ ▗ ◣ ▝ ▘ 🮂 ▂ ▖ ♣ ┌ ─ ┼ • ▄ ▎ ┬ ┴ ▌ └ ␛ ↑ ↓ ← →
+                       _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+                       _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+                       ♦ a b c d e f g h i j k l m n o p q r s t u v w x y z ♠ | 🢰 ◀ ▶
+               )];
+       }},
+       'zx-spectrum'  => {
+               inherit => ['' => '50-8F'],
+               set => 'ascii',
+               replace => {
+                       ord('^') => '↑',
+                       ord('`') => '£',
+                       0x7F => '© ▝▘▀▗▐▚▜▖▞▌▛▄▟▙█',
+               },
+       },
+       'msx'          => {inherit => ['cp437' => '80-FF'], setup => sub {
+               $_[0]->{table} = [(map {chr} 0 .. 0x7F), qw(
+                       Ç ü é â ä à å ç ê ë è ï î ì Ä Å É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ
+                       á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » Ã ã Ĩ ĩ Õ õ Ũ ũ IJ ij ¾ ∽ ◊ ‰ ¶ §
+                       ▂ ▚ ▆ 🮂 ▬ 🮅 ▎ ▞ ▊ 🮇 🮊 🮙 🮘 🭭 🭯 🭬 🭮 🮚 🮛 ▘ ▗ ▝ ▖ 🮖 Δ ‡ ω █ ▄ ▌ ▐ ▀
+                       α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ ⌀ ∈ ∩ ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■
+               )];
+       }},
+       'brascii'      => {
+               inherit => ['' => 'D0-DF+F0-FF'],
+               setup => sub {
+                       $_[0]->{table} = [(map {chr} 0 .. 0xFF)];
+               },
+               replace => {
+                       0xD7 => 'Œ',
+                       0xF7 => 'œ',
+               },
+       },
+       'ansel'        => {
+               note => '+GEDCOM',
+               inherit => ['' => 'A0-CF+E0-FE'],
+               setup => sub {
+                       $_[0]->{table} = [
+                               (undef) x 0xA0,
+                               undef, qw( Ł Ø Đ Þ Æ Œ ʹ · ♭ ®    ±          Ơ Ư ʾ ), undef,
+                               qw( ʿ      ł ø đ þ æ œ ʺ ı £ ð ), undef, qw( ơ ư ), undef, undef,
+                               qw( °      ℓ ℗ © ♯ ¿ ¡ ), (undef) x 0x19,
+                               (map {$_ && chr}
+                                       0x309, 0x300, 0x0301, 0x0302, 0x0303, 0x304, 0x306, 0x307,
+                                       0x308, 0x30C, 0x030A, 0xFE20, 0xFE21, 0x315, 0x30B, 0x310,
+                                       0x327, 0x328, 0x0323, 0x0324, 0x0325, 0x333, 0x332, 0x326,
+                                       0x31C, 0x32E, 0xFE22, 0xFE23, undef,  undef, 0x313, undef,
+                               ),
+                       ];
+               },
+               replace => {
+                       # GEDCOM extensions
+                       0xBE => '□',
+                       0xBF => '■',
+                       0xCD => 'e', # endowment?
+                       0xCE => 'o', # ordinance?
+                       0xCF => 'ß',
+                       0xFC => "\x{338}",
+                       # MARC21 extensions
+                       0xC7 => 'ß',
+                       0xC8 => '€',
+               },
+       },
+
        ''             => {setup => sub {
                my $row = shift;
                $row->{offset} = delete $row->{startpoint};
@@ -226,7 +301,8 @@ use utf8;
                $row->{endpoint} ||= ($block + 1 << 8) - 1;
                $block == ($row->{endpoint} >> 8) or undef $block;
 
-               $row->{table} = join '', map { chr } $row->{offset} .. $row->{endpoint};
+               $row->{table} = join '', map { chr =~ s/\A\p{Unassigned}\z/�/r }
+                       $row->{offset} .. $row->{endpoint};
                utf8::upgrade($row->{table});  # prevent latin1 output
 
                $row->{endpoint} -= $row->{offset};