From 0c9d5cd332b3a9124f75926de5094c90b353e238 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sat, 22 Apr 2017 12:32:37 +0200 Subject: [PATCH] charset: generic glyph replacement from metadata Common format with full support for partial overlaps. --- charset-encoding.inc.pl | 23 ++++++++++++++++-- charset.plp | 53 ++++++++++++++++++++--------------------- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/charset-encoding.inc.pl b/charset-encoding.inc.pl index eee5aa2..71ba2ba 100644 --- a/charset-encoding.inc.pl +++ b/charset-encoding.inc.pl @@ -1,5 +1,6 @@ use 5.014; use warnings; +use utf8; +{ default => [qw( u+0-27F utf8+realsize iso-8859-1 iso-8859-15 cp1252 cp437 cp850 )], @@ -45,12 +46,30 @@ use warnings; 11 => 'thai', 'ascii' => {inherit => ['' => '00-7F']}, - 'cp437' => {inherit => ['cp850' => 0, 'ascii' => '00-1F+80']}, # ascii range overridden later + 'cp437' => { + inherit => ['cp850' => 0, 'ascii' => '00-1F+80'], # ascii range overridden later + replace => { + 0 => ' ☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼', # visible variants of control characters + 0xED => 'ϕ', # non-greek usage and appearance + }, + }, 'gsm0338' => {inherit => ['ascii' => '00-7F']}, 'dingbats' => {inherit => ['' => '20-7F+A0']}, 'macdingbats' => {inherit => ['dingbats' => '80-9F']}, 'adobezdingbat'=> {inherit => ['MacDingbats' => '80-9F']}, # should be identical but maps to private use - 'symbol' => {inherit => ['' => '20-7F+A0']}, + 'symbol' => { + inherit => ['' => '20-7F+A0'], + replace => { + 0x60 => '│', # replace radical extender by closest unicode equivalent + 0xBD => '⏐⎯', # arrow extenders + 0xD2 => '®©™', # serif variants + 0xE0 => '◊', # replace lookalike, should match AdobeSymbol + 0xE2 => '®©™', # sans-serif variants + 0xE6 => '⎛⎜⎝⎡⎢⎣⎧⎨⎩⎪', + 0xF0 => '€', + 0xF4 => '⎮⌡⎞⎟⎠⎤⎥⎦⎫⎬⎭', + }, + }, 'adobesymbol' => {inherit => ['symbol' => '20-7F+A0', '' => '20-7F+A0']}, # minor differences, irrelevant except for different '€' 'wingdings' => {inherit => ['' => '20'], setup => sub {require Encode::Wingdings}}, 'wingdings2' => {inherit => ['' => '20'], setup => sub {require Encode::Wingdings2}}, diff --git a/charset.plp b/charset.plp index ed495fe..7f19bc1 100644 --- a/charset.plp +++ b/charset.plp @@ -164,33 +164,6 @@ sub tabinput { $row{table} = Encode::decode($row{set}, pack 'C*', $row{offset} .. $row{endpoint}); } - if ($row{set} eq 'cp437') { - if ($row{offset} <= 0xED and $row{endpoint} >= 0xED) { - # replace phi glyph - substr($row{table}, 0xED - $row{offset}, 1) = 'ϕ'; - } - if ($row{offset} < 0x20) { - # replace control characters by visible variants - my $sub = substr ' ☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼', $row{offset}; - substr($row{table}, 0, length $sub) = $sub; - } - } - elsif ($row{set} eq 'symbol') { - if ($row{offset} <= 0x60 and $row{endpoint} >= 0x60) { - # replace radical extender by closest unicode equivalent - substr($row{table}, 0x60 - $row{offset}, 1) = '│'; - } - if ($row{offset} <= 0xBD and $row{endpoint} >= 0xFF) { - substr($row{table}, 0xBD - $row{offset}, 2) = '⏐⎯'; # arrow extenders - substr($row{table}, 0xD2 - $row{offset}, 3) = '®©™'; # serif variants - substr($row{table}, 0xE0 - $row{offset}, 1) = '◊'; # replace lookalike, should match AdobeSymbol - substr($row{table}, 0xE2 - $row{offset}, 3) = '®©™'; # sans-serif variants - substr($row{table}, 0xE6 - $row{offset}, 10) = '⎛⎜⎝⎡⎢⎣⎧⎨⎩⎪'; - substr($row{table}, 0xF0 - $row{offset}, 1) = '€'; - substr($row{table}, 0xF4 - $row{offset}, 11) = '⎮⌡⎞⎟⎠⎤⎥⎦⎫⎬⎭'; - } - } - $row{endpoint} -= $row{offset}; $visible->{ascii} = # assume common base @@ -200,6 +173,32 @@ sub tabinput { Alert("Encoding $input unknown"); return; } + + if (my $replace = $charset->{replace}) { + while (my ($offset, $sub) = each %{$replace}) { + $offset -= $row{offset}; + + my $length = length $sub; + + if ($offset < 0) { + $offset > -$length or next; # at least one character after start + # trim leftmost part to start at offset + substr($sub, 0, -$offset) = ''; + $length += $offset; + $offset = 0; + } + + if ((my $excess = $row{endpoint} - $offset - $length + 1) < 0) { + $excess > -$length or next; + # trim rightmost part to prevent overflow + substr($sub, $excess) = ''; + $length += $excess; + } + + substr($row{table}, $offset, $length) = $sub; + } + } + push @request, \%row; } tabinput($_) for @tablist; -- 2.30.0