From: Mischa POSLAWSKY Date: Sat, 22 Apr 2017 13:21:07 +0000 (+0200) Subject: charset: generic varchar option to identify array maps X-Git-Tag: v1.10~89 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/5cae8635fc2bb8b3d6cd7d2d954209dfdc78228f charset: generic varchar option to identify array maps Clears the last hardcoded name exceptions. --- diff --git a/charset-encoding.inc.pl b/charset-encoding.inc.pl index b40d75a..ec53ef5 100644 --- a/charset-encoding.inc.pl +++ b/charset-encoding.inc.pl @@ -133,6 +133,7 @@ use utf8; 'maccyrillic' => {inherit => ['MacRoman' => '80']}, 'machebrew' => { inherit => ['iso-8859-8' => '80', 'MacRoman' => '80-8F+A0'], # ignore partial ascii + varchar => 1, replace => { # strip private use characters for unneeded roundtrip 0xDE => chr(0x5C7), # qamats qatan @@ -144,6 +145,7 @@ use utf8; 'macfarsi' => {inherit => ['MacArabic' => 'B0-BF', 'MacRoman' => '80']}, 'macthai' => { inherit => ['iso-8859-11' => '80-9F+D0'], + varchar => 1, replace => { # strip appended private use characters for unneeded roundtrip 0x83 => "\x{E48}", 0x88 => "\x{E48}", 0x98 => "\x{E48}", diff --git a/charset.plp b/charset.plp index 6300622..6ec4a24 100644 --- a/charset.plp +++ b/charset.plp @@ -152,7 +152,7 @@ sub tabinput { if ($row{set}) {} elsif ($row{set} = Encode::resolve_alias($input)) { $row{offset} = delete $row{startpoint}; - if ($row{set} eq 'MacHebrew' or $row{set} eq 'MacThai') { + if ($charset->{varchar}) { # array of possibly multiple characters per code point $row{table} = [ map { Encode::decode($row{set}, pack 'C*', $_) } $row{offset} .. $row{endpoint}