word/edit: read fractional crop coordinates
[sheet.git] / charset-encoding.inc.pl
index 92330ab0f876092ee425d0412a28d989d1e4ecaa..3eb7cc7f206016d6c30ddb03b36bfb44343e0455 100644 (file)
@@ -9,7 +9,7 @@ use utf8;
        ebcdic     => [qw( cp37 cp500 cp1047 posix-bc cp1026 cp875 )],
        iso        => [map {"iso-8859-$_"} 1 .. 11, 13 .. 16],
        dos        => [qw( cp437 cp865 cp861 cp860 cp863 cp850 cp857 cp852 cp775
-                          cp737 cp869 cp866 cp855 cp862 cp864 )],
+                          cp737 cp869 cp866 MIK cp855 cp862 cp864 )],
        aix        => [qw( cp1006 )],
        win        => [qw( cp1252 cp1250 cp1254 cp1257 cp1258 cp1253 cp1251 cp1255 cp1256 cp874 )],
        mac        => [qw( MacRoman MacRomanian MacRumanian MacCroatian MacCentralEurRoman MacTurkish MacIcelandic MacSami
@@ -22,7 +22,7 @@ use utf8;
        norteur    => [qw( baltic nordic )],
        baltic     => [qw( iso-8859-4 iso-8859-13 cp1257 cp775 )],
        nordic     => [qw( iso-8859-10 cp865 cp861 MacIcelandic MacSami )],
-       cyrillic   => [qw( koi8-r koi8-u koi8-f iso-8859-5 cp1251 MacCyrillic cp866 cp855
+       cyrillic   => [qw( koi8-r koi8-u koi8-f iso-8859-5 cp1251 MacCyrillic cp866 MIK cp855
                           +400 +2DE0 +A640-A69F +500-52F )], # MacUkrainian is broken
        arabic     => [qw( iso-8859-6 cp1256 MacArabic cp864 cp1006 MacFarsi
                           +600 +8A0-8BF+8E0 +750-77F )],
@@ -161,6 +161,18 @@ use utf8;
 
        'koi8-u'       => {inherit => ['koi8-r' => '90-BF']},
        'koi8-f'       => {inherit => ['koi8-u' => '90-BF']},
+       'mik'          => {inherit => ['cp437' => '80-D8', 'cp866' => 'B0'], setup => sub {
+               $_[0]->{table} = [(map {chr} 0 .. 0x7F), qw(
+                       А Б В Г Д Е Ж З И Й К Л М Н О П
+                       Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я
+                       а б в г д е ж з и й к л м н о п
+                       р с т у ф х ц ч ш щ ъ ы ь э ю я
+                       └ ┴ ┬ ├ ─ ┼ ╣ ║ ╚ ╔ ╩ ╦ ╠ ═ ╬ ┐
+                       ░ ▒ ▓ │ ┤ № § ╗ ╝ ┘ ┌ █ ▄ ▌ ▐ ▀
+                       α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ φ ε ∩
+                       ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■
+               ), "\xA0"];
+       }},
 
        'macromanian'  => {inherit => ['MacRoman' => 'A0-BF+D0-DF']},
        'macrumanian'  => {inherit => ['MacRomanian' => 'A0-BF+D0-DF', 'MacRoman' => 'A0-BF+D0-DF']},
@@ -206,6 +218,122 @@ use utf8;
        'cp1026'       => {inherit => ['cp37' => '40']},
        'cp875'        => {inherit => ['cp37' => '30']},
 
+       legacy     => [qw( cp437 ATASCII PETSCII MSX ZX-Spectrum ANSEL )],
+       'petscii'      => {inherit => ['' => '40-7F+A0-BF'], setup => sub {
+               $_[0]->{table} = [(map {chr} 0 .. 0x3F), qw(
+                       @ a b c d e f g h i j k l m n o p q r s t u v w x y z [ £ ] ↑ ←
+                       🭹 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ┼ 🮌 │ 🮖 🮘
+                       . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+                         ▌ ▄ ▔ ▁ ▏ ▒ ▕ 🮏 🮙 🮇 ├ ▗ └ ┐ ▂ ┌ ┴ ┬ ┤ ▎ ▍ 🮈 🮂 🮃 ▃ ✓ ▖ ▝ ┘ ▘ ▚
+               )];
+       }},
+       'atascii'      => {inherit => ['' => '0-1F+60-7F'], setup => sub {
+               $_[0]->{table} = [qw(
+                       ♥ ├ 🮇 ┘ ┤ ┐ ╱ ╲ ◢ ▗ ◣ ▝ ▘ 🮂 ▂ ▖ ♣ ┌ ─ ┼ • ▄ ▎ ┬ ┴ ▌ └ ␛ ↑ ↓ ← →
+                       _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+                       _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+                       ♦ a b c d e f g h i j k l m n o p q r s t u v w x y z ♠ | 🢰 ◀ ▶
+               )];
+       }},
+       'zx-spectrum'  => {
+               inherit => ['' => '50-8F'],
+               set => 'ascii',
+               replace => {
+                       ord('^') => '↑',
+                       ord('`') => '£',
+                       0x7F => '© ▝▘▀▗▐▚▜▖▞▌▛▄▟▙█',
+               },
+       },
+       'msx'          => {inherit => ['cp437' => '80-FF'], setup => sub {
+               $_[0]->{table} = [(map {chr} 0 .. 0x7F), qw(
+                       Ç ü é â ä à å ç ê ë è ï î ì Ä Å É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ
+                       á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » Ã ã Ĩ ĩ Õ õ Ũ ũ IJ ij ¾ ∽ ◊ ‰ ¶ §
+                       ▂ ▚ ▆ 🮂 ▬ 🮅 ▎ ▞ ▊ 🮇 🮊 🮙 🮘 🭭 🭯 🭬 🭮 🮚 🮛 ▘ ▗ ▝ ▖ 🮖 Δ ‡ ω █ ▄ ▌ ▐ ▀
+                       α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ ⌀ ∈ ∩ ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■
+               )];
+       }},
+       'brascii'      => {
+               inherit => ['' => 'D0-DF+F0-FF'],
+               setup => sub {
+                       $_[0]->{table} = [(map {chr} 0 .. 0xFF)];
+               },
+               replace => {
+                       0xD7 => 'Œ',
+                       0xF7 => 'œ',
+               },
+       },
+       'ansel'        => {
+               note => '+GEDCOM',
+               inherit => ['' => 'A0-CF+E0-FE'],
+               setup => sub {
+                       $_[0]->{table} = [
+                               (undef) x 0xA0,
+                               undef, qw( Ł Ø Đ Þ Æ Œ ʹ · ♭ ®    ±          Ơ Ư ʾ ), undef,
+                               qw( ʿ      ł ø đ þ æ œ ʺ ı £ ð ), undef, qw( ơ ư ), undef, undef,
+                               qw( °      ℓ ℗ © ♯ ¿ ¡ ), (undef) x 0x19,
+                               (map {$_ && chr}
+                                       0x309, 0x300, 0x0301, 0x0302, 0x0303, 0x304, 0x306, 0x307,
+                                       0x308, 0x30C, 0x030A, 0xFE20, 0xFE21, 0x315, 0x30B, 0x310,
+                                       0x327, 0x328, 0x0323, 0x0324, 0x0325, 0x333, 0x332, 0x326,
+                                       0x31C, 0x32E, 0xFE22, 0xFE23, undef,  undef, 0x313, undef,
+                               ),
+                       ];
+               },
+               replace => {
+                       # GEDCOM extensions
+                       0xBE => '□',
+                       0xBF => '■',
+                       0xCD => 'e', # endowment?
+                       0xCE => 'o', # ordinance?
+                       0xCF => 'ß',
+                       0xFC => "\x{338}",
+                       # MARC21 extensions
+                       0xC7 => 'ß',
+                       0xC8 => '€',
+               },
+       },
+       'ti86'         => {
+               note => 'similar to TI85',
+               inherit => ['', => '0-1F+80-EC'],
+               setup => sub {
+                       $_[0]->{table} = [
+                               undef, qw(
+                                       𝐛 𝐨 𝐝 𝐡 ▶ ⬆ ⬇ ∫ × 𝐀 𝐁 𝐂 𝐃 𝐄 𝐅
+                                       √ ⁻¹ ² ∠ ° ʳ ᵀ ≤ ≠ ≥ ⁻ ᴇ → ⏨ ↑ ↓
+                               ),
+                               (undef) x 0x60,
+                               qw(
+                                       ₀ ₁ ₂ ₃ ₄ ₅ ₆ ₇ ₈ ₉ Á À Â Ä á à
+                                       â ä É È Ê Ë é è ê ë Í Ì Î Ï í ì
+                                       î ï Ó Ò Ô Ö ó ò ô ö Ú Ù Û Ü ú ù
+                                       û ü Ç ç Ñ ñ ´ ` ¨ ¿ ¡ α β γ Δ δ
+                                       ϵ θ λ μ π ρ Σ σ τ ϕ Ω x̅ y̅ ˟ … ◀
+                                       ■ ∕ ‐ ² ° ³ :⃞ ➧ ⧵ 🙽 ◥ ◣ ⊸ ∘ ⋱ █
+                                       ⇧ A⃞ a⃞ _ ⇧̲ A̲ a̲ ▒ ⬞ ˖ · ⁴ ﹦
+                               ),
+                       ];
+               },
+       },
+       'ti89'         => {
+               note => 'also TI92(+)',
+               inherit => ['', => '0-1F+7F-BE'],
+               setup => sub {
+                       $_[0]->{table} = [
+                               qw(
+                                       ▒ ␁ ␂ ␃ ␄ ␅ ␆ 🔔 ⌫ ⇥ ), chr(0xA), qw( ⬏ ⤒ ↵ 🔒 ✓
+                                       ■ ◂ ▸ ▴ ▾ ← → ↑ ↓ ◀ ▶ ⬆ ∪ ∩ ⊂ ∈
+                               ),
+                               (map {chr} 0x20 .. 0x7E), '◆',
+                               qw(
+                                       α β Γ γ Δ δ ε ζ θ λ ξ ∏ π ρ ∑ σ
+                                       τ ϕ ψ Ω ω ᴇ ℯ 𝐢 ʳ ᵀ x̅ y̅ ≤ ≠ ≥ ∠
+                                       … ¡ ¢ £ ¤ ¥ ¦ § √ © ª « ¬ ⁻ ® ¯
+                                       ° ± ² ³ ⁻¹ µ ¶ · ⁺ ¹ º » 𝑑 ∫ ∞ ¿
+                               ),
+                       ];
+               },
+       },
+
        ''             => {setup => sub {
                my $row = shift;
                $row->{offset} = delete $row->{startpoint};
@@ -214,7 +342,8 @@ use utf8;
                $row->{endpoint} ||= ($block + 1 << 8) - 1;
                $block == ($row->{endpoint} >> 8) or undef $block;
 
-               $row->{table} = join '', map { chr } $row->{offset} .. $row->{endpoint};
+               $row->{table} = join '', map { chr =~ s/\A\p{Unassigned}\z/�/r }
+                       $row->{offset} .. $row->{endpoint};
                utf8::upgrade($row->{table});  # prevent latin1 output
 
                $row->{endpoint} -= $row->{offset};