From cc77ead3a819db005fce5de21c36aaa000885a30 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sat, 22 Apr 2017 14:36:03 +0200 Subject: [PATCH] charset: replace private use glyphs in MacHebrew --- charset-encoding.inc.pl | 10 +++++++++- charset.plp | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/charset-encoding.inc.pl b/charset-encoding.inc.pl index c1fd74c..c402187 100644 --- a/charset-encoding.inc.pl +++ b/charset-encoding.inc.pl @@ -131,7 +131,15 @@ use utf8; 'macsami' => {inherit => ['MacIcelandic' => '90-BF+D0-DF+F0', 'MacRoman' => '90']}, 'macgreek' => {inherit => ['MacRoman' => '80']}, 'maccyrillic' => {inherit => ['MacRoman' => '80']}, - 'machebrew' => {inherit => ['iso-8859-8' => '80', 'MacRoman' => '80-8F+A0']}, # partial ascii + 'machebrew' => { + inherit => ['iso-8859-8' => '80', 'MacRoman' => '80-8F+A0'], # ignore partial ascii + replace => { + # strip private use characters for unneeded roundtrip + 0xDE => chr(0x5C7), # qamats qatan + 0xC0 => 'לֹ', # lamed holam + #TODO: private use for canoral codes (obsolete nikud positioning) + }, + }, 'macarabic' => {inherit => ['iso-8859-6' => '80', 'cp864' => '80', 'MacRoman' => '80']}, #TODO: multiple parents 'macfarsi' => {inherit => ['MacArabic' => 'B0-BF', 'MacRoman' => '80']}, diff --git a/charset.plp b/charset.plp index 5b415ee..6300622 100644 --- a/charset.plp +++ b/charset.plp @@ -178,6 +178,12 @@ sub tabinput { while (my ($offset, $sub) = each %{$replace}) { $offset -= $row{offset}; + if (ref $row{table} eq 'ARRAY') { + $row{table}->[$offset] = $sub + if $offset >= 0 and $offset <= $row{endpoint}; + next; + } + my $length = length $sub; if ($offset < 0) { -- 2.30.0