From: Mischa POSLAWSKY Date: Sun, 9 Apr 2017 00:36:43 +0000 (+0200) Subject: charset: avoid encode aliases for special requests X-Git-Tag: v1.10~192 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/d26b60713dda612610cea4344acb625a68bfeb4a charset: avoid encode aliases for special requests --- diff --git a/charset.plp b/charset.plp index 9dd98a4..aa81d0b 100644 --- a/charset.plp +++ b/charset.plp @@ -262,62 +262,60 @@ sub tabinput { or Alert('Table data could not be read', $@ || $!); $row{endpoint} = 1023 * $row{cell}->{colsize}; } + elsif ($input eq 'unicode') { + $row{cell} = do 'charset-unicode.inc.pl' + or Alert('Table data could not be read', $@ || $!); + + $row{endpoint} ||= 8191; + $row{endpoint} *= $row{cell}->{colsize}; + $row{startpoint} = $row{cell}->{colsize} * $row{offset}; + $row{offset} = 0; + $row{set} = 'Unicode ' . ( + $row{startpoint} < 0x10000 && $row{endpoint} < 0x10000 ? 'BMP' : + $row{startpoint} >= 0x10000 && $row{endpoint} < 0x20000 ? 'SMP' : + 'allocations' + ); + } + elsif ($input =~ m/^utf-*8$/i) { + $row{set} = 'UTF-8'; + $row{cell} = do 'charset-utf8.inc.pl' + or Alert('Table data could not be read', $@ || $!); + $row{endpoint} = 255; + } elsif ($row{set} = Encode::resolve_alias($input)) { - if ($row{set} eq 'Internal') { - $row{cell} = do 'charset-unicode.inc.pl' - or Alert('Table data could not be read', $@ || $!); - - $row{endpoint} ||= 8191; - $row{endpoint} *= $row{cell}->{colsize}; - $row{startpoint} = $row{cell}->{colsize} * $row{offset}; - $row{offset} = 0; - $row{set} = 'Unicode ' . ( - $row{startpoint} < 0x10000 && $row{endpoint} < 0x10000 ? 'BMP' : - $row{startpoint} >= 0x10000 && $row{endpoint} < 0x20000 ? 'SMP' : - 'allocations' - ); - } - elsif ($row{set} eq 'utf-8-strict') { - $row{set} = 'UTF-8'; - $row{cell} = do 'charset-utf8.inc.pl' - or Alert('Table data could not be read', $@ || $!); - $row{endpoint} = 255; + $row{endpoint} ||= 255; + if ($row{set} eq 'MacHebrew' or $row{set} eq 'MacThai') { + # array of possibly multiple characters per code point + $row{table} = [ + map { Encode::decode($row{set}, pack 'C*', $_) } $row{offset} .. $row{endpoint} + ]; } else { - $row{endpoint} ||= 255; - if ($row{set} eq 'MacHebrew' or $row{set} eq 'MacThai') { - # array of possibly multiple characters per code point - $row{table} = [ - map { Encode::decode($row{set}, pack 'C*', $_) } $row{offset} .. $row{endpoint} - ]; - } - else { - # ~16x faster than decoding in loop; - # substr strings is twice as fast as splitting to an array - $row{table} = Encode::decode($row{set}, pack 'C*', $row{offset} .. $row{endpoint}); - } - $row{endpoint} -= $row{offset}; + # ~16x faster than decoding in loop; + # substr strings is twice as fast as splitting to an array + $row{table} = Encode::decode($row{set}, pack 'C*', $row{offset} .. $row{endpoint}); + } + $row{endpoint} -= $row{offset}; - if ($row{set} eq 'cp437') { - for my $phipos (237 - $row{offset}) { - next if $phipos < 0 or $phipos > $row{endpoint}; - # replace phi glyph - substr($row{table}, $phipos, 1) = pack 'U*', 0x3D5; - } - if ($row{offset} == 0) { - # replace control characters by visible variants - substr($row{table}, 0, 32) = pack 'U*', map {hex} qw( - 2007 263A 263B 2665 2666 2663 2660 2022 - 25D8 25CB 25D9 2642 2640 266A 266B 263C - 25BA 25C4 2195 203C 00B6 00A7 25AC 21A8 - 2191 2193 2192 2190 221F 2194 25B2 25BC - ); - } + if ($row{set} eq 'cp437') { + for my $phipos (237 - $row{offset}) { + next if $phipos < 0 or $phipos > $row{endpoint}; + # replace phi glyph + substr($row{table}, $phipos, 1) = pack 'U*', 0x3D5; + } + if ($row{offset} == 0) { + # replace control characters by visible variants + substr($row{table}, 0, 32) = pack 'U*', map {hex} qw( + 2007 263A 263B 2665 2666 2663 2660 2022 + 25D8 25CB 25D9 2642 2640 266A 266B 263C + 25BA 25C4 2195 203C 00B6 00A7 25AC 21A8 + 2191 2193 2192 2190 221F 2194 25B2 25BC + ); } - - $visible->{ascii} = # assume common base - $visible->{ $row{set} } = 1; } + + $visible->{ascii} = # assume common base + $visible->{ $row{set} } = 1; } else { Alert("Encoding $input unknown");