use 5.014;
use warnings;
+use utf8;
+{
default => [qw( u+0-27F utf8+realsize iso-8859-1 iso-8859-15 cp1252 cp437 cp850 )],
11 => 'thai',
'ascii' => {inherit => ['' => '00-7F']},
- 'cp437' => {inherit => ['cp850' => 0, 'ascii' => '00-1F+80']}, # ascii range overridden later
+ 'cp437' => {
+ inherit => ['cp850' => 0, 'ascii' => '00-1F+80'], # ascii range overridden later
+ replace => {
+ 0 => ' ☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼', # visible variants of control characters
+ 0xED => 'ϕ', # non-greek usage and appearance
+ },
+ },
'gsm0338' => {inherit => ['ascii' => '00-7F']},
'dingbats' => {inherit => ['' => '20-7F+A0']},
'macdingbats' => {inherit => ['dingbats' => '80-9F']},
'adobezdingbat'=> {inherit => ['MacDingbats' => '80-9F']}, # should be identical but maps to private use
- 'symbol' => {inherit => ['' => '20-7F+A0']},
+ 'symbol' => {
+ inherit => ['' => '20-7F+A0'],
+ replace => {
+ 0x60 => '│', # replace radical extender by closest unicode equivalent
+ 0xBD => '⏐⎯', # arrow extenders
+ 0xD2 => '®©™', # serif variants
+ 0xE0 => '◊', # replace lookalike, should match AdobeSymbol
+ 0xE2 => '®©™', # sans-serif variants
+ 0xE6 => '⎛⎜⎝⎡⎢⎣⎧⎨⎩⎪',
+ 0xF0 => '€',
+ 0xF4 => '⎮⌡⎞⎟⎠⎤⎥⎦⎫⎬⎭',
+ },
+ },
'adobesymbol' => {inherit => ['symbol' => '20-7F+A0', '' => '20-7F+A0']}, # minor differences, irrelevant except for different '€'
'wingdings' => {inherit => ['' => '20'], setup => sub {require Encode::Wingdings}},
'wingdings2' => {inherit => ['' => '20'], setup => sub {require Encode::Wingdings2}},
'macsami' => {inherit => ['MacIcelandic' => '90-BF+D0-DF+F0', 'MacRoman' => '90']},
'macgreek' => {inherit => ['MacRoman' => '80']},
'maccyrillic' => {inherit => ['MacRoman' => '80']},
- 'machebrew' => {inherit => ['iso-8859-8' => '80', 'MacRoman' => '80-8F+A0']}, # partial ascii
+ 'machebrew' => {
+ inherit => ['iso-8859-8' => '80', 'MacRoman' => '80-8F+A0'], # ignore partial ascii
+ replace => {
+ # strip private use characters for unneeded roundtrip
+ 0xDE => chr(0x5C7), # qamats qatan
+ 0xC0 => 'לֹ', # lamed holam
+ #TODO: private use for canoral codes (obsolete nikud positioning)
+ },
+ },
'macarabic' => {inherit => ['iso-8859-6' => '80', 'cp864' => '80', 'MacRoman' => '80']}, #TODO: multiple parents
'macfarsi' => {inherit => ['MacArabic' => 'B0-BF', 'MacRoman' => '80']},
$row->{set} = 'UTF-8';
$row->{cell} = do 'charset-utf8.inc.pl'
or Alert('Table data could not be read', $@ || $!);
- $row->{endpoint} = 0xFF;
return $row;
}},
'utf-8' => 'utf8',