charset: generic varchar option to identify array maps
authorMischa POSLAWSKY <perl@shiar.org>
Sat, 22 Apr 2017 13:21:07 +0000 (15:21 +0200)
committerMischa POSLAWSKY <perl@shiar.org>
Thu, 25 May 2017 20:13:05 +0000 (22:13 +0200)
Clears the last hardcoded name exceptions.

charset-encoding.inc.pl
charset.plp

index b40d75a..ec53ef5 100644 (file)
@@ -133,6 +133,7 @@ use utf8;
        'maccyrillic'  => {inherit => ['MacRoman' => '80']},
        'machebrew'    => {
                inherit => ['iso-8859-8' => '80', 'MacRoman' => '80-8F+A0'], # ignore partial ascii
+               varchar => 1,
                replace => {
                        # strip private use characters for unneeded roundtrip
                        0xDE => chr(0x5C7), # qamats qatan
@@ -144,6 +145,7 @@ use utf8;
        'macfarsi'     => {inherit => ['MacArabic' => 'B0-BF', 'MacRoman' => '80']},
        'macthai'      => {
                inherit => ['iso-8859-11' => '80-9F+D0'],
+               varchar => 1,
                replace => {
                        # strip appended private use characters for unneeded roundtrip
                        0x83 => "\x{E48}", 0x88 => "\x{E48}", 0x98 => "\x{E48}",
index 6300622..6ec4a24 100644 (file)
@@ -152,7 +152,7 @@ sub tabinput {
        if ($row{set}) {}
        elsif ($row{set} = Encode::resolve_alias($input)) {
                $row{offset} = delete $row{startpoint};
-               if ($row{set} eq 'MacHebrew' or $row{set} eq 'MacThai') {
+               if ($charset->{varchar}) {
                        # array of possibly multiple characters per code point
                        $row{table} = [
                                map { Encode::decode($row{set}, pack 'C*', $_) } $row{offset} .. $row{endpoint}