X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/3e2e19818a6102eff505984ca2430ae3ac901d08..0c9d5cd332b3a9124f75926de5094c90b353e238:/charset.plp diff --git a/charset.plp b/charset.plp index 4f60765..7f19bc1 100644 --- a/charset.plp +++ b/charset.plp @@ -143,50 +143,12 @@ sub tabinput { } } - if ($input =~ m{ \A (?:wing|web)dings \d* \z }ix) { - eval "require Encode::\u$input"; + if ($charset->{setup}) { + eval { $charset->{setup}->(\%row) } + or Alert("Incomplete setup of $input", $@); } - if ($input eq '') { - $row{offset} = delete $row{startpoint}; - $row{set} = 'Unicode characters'; - my $block = $row{offset} >> 8; - $row{endpoint} ||= ($block + 1 << 8) - 1; - $block == ($row{endpoint} >> 8) or undef $block; - - $row{table} = join '', map { chr } $row{offset} .. $row{endpoint}; - utf8::upgrade($row{table}); # prevent latin1 output - - $row{endpoint} -= $row{offset}; - - if (defined $block) { - $row{set} = sprintf 'Unicode block U+%02Xxx', $block; - $row{offset} %= 0x100; - } - } - elsif (lc $input eq 'uu') { - $row{cell} = do 'charset-ucplanes.inc.pl' - or Alert('Table data could not be read', $@ || $!); - $row{endpoint} ||= 0x3FF; - $row{set} = 'Unicode planes'; - } - elsif (lc $input eq 'u') { - $row{cell} = do 'charset-unicode.inc.pl' - or Alert('Table data could not be read', $@ || $!); - - $row{endpoint} ||= 0x1FFF; - $row{set} = 'Unicode ' . ( - $row{startpoint} < 0x1000 && $row{endpoint} < 0x1000 ? 'BMP' : - $row{startpoint} >= 0x1000 && $row{endpoint} < 0x2000 ? 'SMP' : - 'allocations' - ); - } - elsif ($input =~ m/^utf-*8$/i) { - $row{set} = 'UTF-8'; - $row{cell} = do 'charset-utf8.inc.pl' - or Alert('Table data could not be read', $@ || $!); - $row{endpoint} = 0xFF; - } + if ($row{set}) {} elsif ($row{set} = Encode::resolve_alias($input)) { $row{offset} = delete $row{startpoint}; $row{endpoint} ||= 0xFF; @@ -202,33 +164,6 @@ sub tabinput { $row{table} = Encode::decode($row{set}, pack 'C*', $row{offset} .. $row{endpoint}); } - if ($row{set} eq 'cp437') { - if ($row{offset} <= 0xED and $row{endpoint} >= 0xED) { - # replace phi glyph - substr($row{table}, 0xED - $row{offset}, 1) = 'ϕ'; - } - if ($row{offset} < 0x20) { - # replace control characters by visible variants - my $sub = substr ' ☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼', $row{offset}; - substr($row{table}, 0, length $sub) = $sub; - } - } - elsif ($row{set} eq 'symbol') { - if ($row{offset} <= 0x60 and $row{endpoint} >= 0x60) { - # replace radical extender by closest unicode equivalent - substr($row{table}, 0x60 - $row{offset}, 1) = '│'; - } - if ($row{offset} <= 0xBD and $row{endpoint} >= 0xFF) { - substr($row{table}, 0xBD - $row{offset}, 2) = '⏐⎯'; # arrow extenders - substr($row{table}, 0xD2 - $row{offset}, 3) = '®©™'; # serif variants - substr($row{table}, 0xE0 - $row{offset}, 1) = '◊'; # replace lookalike, should match AdobeSymbol - substr($row{table}, 0xE2 - $row{offset}, 3) = '®©™'; # sans-serif variants - substr($row{table}, 0xE6 - $row{offset}, 10) = '⎛⎜⎝⎡⎢⎣⎧⎨⎩⎪'; - substr($row{table}, 0xF0 - $row{offset}, 1) = '€'; - substr($row{table}, 0xF4 - $row{offset}, 11) = '⎮⌡⎞⎟⎠⎤⎥⎦⎫⎬⎭'; - } - } - $row{endpoint} -= $row{offset}; $visible->{ascii} = # assume common base @@ -238,6 +173,32 @@ sub tabinput { Alert("Encoding $input unknown"); return; } + + if (my $replace = $charset->{replace}) { + while (my ($offset, $sub) = each %{$replace}) { + $offset -= $row{offset}; + + my $length = length $sub; + + if ($offset < 0) { + $offset > -$length or next; # at least one character after start + # trim leftmost part to start at offset + substr($sub, 0, -$offset) = ''; + $length += $offset; + $offset = 0; + } + + if ((my $excess = $row{endpoint} - $offset - $length + 1) < 0) { + $excess > -$length or next; + # trim rightmost part to prevent overflow + substr($sub, $excess) = ''; + $length += $excess; + } + + substr($row{table}, $offset, $length) = $sub; + } + } + push @request, \%row; } tabinput($_) for @tablist;