charset: generic glyph replacement from metadata
[sheet.git] / charset-encoding.inc.pl
index 0f1d2d60bb113db9929ce51e874d4533db94b2cf..71ba2bae277446e4a68666dab5081b1cdc3875ef 100644 (file)
@@ -1,8 +1,9 @@
 use 5.014;
 use warnings;
+use utf8;
 
 +{
-       default    => [qw( u+0-27F utf-8+realsize iso-8859-1 iso-8859-15 cp1252 cp437 cp850 )],
+       default    => [qw( u+0-27F utf8+realsize iso-8859-1 iso-8859-15 cp1252 cp437 cp850 )],
        unicode    => [qw( uu+cols=32+realsize u+0-FFF u+1000-18AF u+18B0-1FFF )],
        us         => [qw( cp437 cp863 gsm0338 AdobeStandardEncoding )],
        ebcdic     => [qw( cp37 cp500 cp1047 posix-bc cp1026 cp875 )],
@@ -45,12 +46,30 @@ use warnings;
        11 => 'thai',
 
        'ascii'        => {inherit => ['' => '00-7F']},
-       'cp437'        => {inherit => ['cp850' => 0, 'ascii' => '00-1F+80']}, # ascii range overridden later
+       'cp437'        => {
+               inherit => ['cp850' => 0, 'ascii' => '00-1F+80'], # ascii range overridden later
+               replace => {
+                       0    => ' ☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼', # visible variants of control characters
+                       0xED => 'ϕ', # non-greek usage and appearance
+               },
+       },
        'gsm0338'      => {inherit => ['ascii' => '00-7F']},
        'dingbats'     => {inherit => ['' => '20-7F+A0']},
        'macdingbats'  => {inherit => ['dingbats' => '80-9F']},
        'adobezdingbat'=> {inherit => ['MacDingbats' => '80-9F']}, # should be identical but maps to private use
-       'symbol'       => {inherit => ['' => '20-7F+A0']},
+       'symbol'       => {
+               inherit => ['' => '20-7F+A0'],
+               replace => {
+                       0x60 => '│', # replace radical extender by closest unicode equivalent
+                       0xBD => '⏐⎯', # arrow extenders
+                       0xD2 => '®©™', # serif variants
+                       0xE0 => '◊', # replace lookalike, should match AdobeSymbol
+                       0xE2 => '®©™', # sans-serif variants
+                       0xE6 => '⎛⎜⎝⎡⎢⎣⎧⎨⎩⎪',
+                       0xF0 => '€',
+                       0xF4 => '⎮⌡⎞⎟⎠⎤⎥⎦⎫⎬⎭',
+               },
+       },
        'adobesymbol'  => {inherit => ['symbol' => '20-7F+A0', '' => '20-7F+A0']}, # minor differences, irrelevant except for different '€'
        'wingdings'    => {inherit => ['' => '20'], setup => sub {require Encode::Wingdings}},
        'wingdings2'   => {inherit => ['' => '20'], setup => sub {require Encode::Wingdings2}},
@@ -164,7 +183,6 @@ use warnings;
                $row->{set} = 'Unicode planes';
                return $row;
        }},
-       'utf-8'        => 'utf8',
        utf8           => {setup => sub {
                my $row = shift;
                $row->{set} = 'UTF-8';
@@ -173,4 +191,5 @@ use warnings;
                $row->{endpoint} = 0xFF;
                return $row;
        }},
+       'utf-8'        => 'utf8',
 };