From cc3aea5b110f944ce10a22cdea96a0b3f6da15c9 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Mon, 27 Mar 2017 22:10:05 +0200 Subject: [PATCH] charset: common cell formatting for unicode planes Like unicode include, but support different nibble size. --- charset-ucplanes.inc.pl | 182 +++++++++++++++++++--------------------- charset.plp | 9 +- 2 files changed, 90 insertions(+), 101 deletions(-) diff --git a/charset-ucplanes.inc.pl b/charset-ucplanes.inc.pl index fd01acb..fce05ff 100644 --- a/charset-ucplanes.inc.pl +++ b/charset-ucplanes.inc.pl @@ -1,98 +1,86 @@ use utf8; -my %uniblock = ( - 0x0000, 'ascii', - 0x0008, 'latin', -# 0x0028, 'spacing modifier', - 0x0028, 'comb', - 0x0038, 'grk', - 0x0040, 'cyr', - 0x0050, 'arm', - 0x0058, 'heb', - 0x0060, 'arabic', - 0x0070, 'aram', - 0x0080, 'aramaic', - 0x0090, 'brahmic', - 0x0100, 'mm', - 0x0108, 'geor', - 0x0110, 'jamo', - 0x0120, 'ethiopic', - 0x0138, 'aboriginal', - 0x0168, 'ger', - 0x0170, 'brahm', - 0x0180, 'mon', - 0x0188, 'can', - 0x0190, 'brahmic', - 0x01D0, 'extensions', - 0x01F0, 'greek', - 0x0200, '·…', - 0x0208, 'symbols', - 0x0220, 'maths', - 0x0230, 'technical', - 0x0248, '()', - 0x0250, 'draw', - 0x0260, 'symbols', - 0x0280, 'braille', - 0x0290, 'arr', - 0x0298, 'maths', - 0x02B0, 'misc', - 0x02C0, 'ancient', - 0x02D0, 'ext', - 0x02E0, '·+', - 0x02E8, 'radicals', - 0x0300, 'japanese', - 0x0310, 'cjk+', - 0x0330, 'compat', - 0x0340, '', - 0x0380, 'cjk ideographs A', #+2 - 0x04E0, 'cjk unified ideographs', - 0x0A00, 'yi', - 0x0A48, 'lisu', - 0x0A50, 'vai', - 0x0A60, 'cyr', - 0x0A68, 'bam', - 0x0A70, 'lat-D', - 0x0A80, 'brahmic', - 0x0AB0, 'ext', - 0x0AC0, '', - 0x0B00, 'hangeul syllables', - 0x0D80, 'surrogates', - 0x0E00, 'private use', - 0x0F80, '', - 0x0F90, 'cjk compat', - 0x0FB0, 'presentation', - 0x0FF0, 'width', - - 0x1000, 'linear B', - 0x1010, 'a num', - 0x1020, 'ltr', - 0x1060, 'linear A', - 0x1078, 'ltr', - 0x1080, 'rtl', - 0x1100, 'brahmic', - 0x1200, 'cuneiform', - 0x1300, 'egyptian hieroglyphs', - 0x1400, 'other large scripts', - 0x1600, 'recent', - 0x1700, 'east asian', - 0x1B40, 'res', - 0x1B50, 'proto-elamite', - 0x1BC0, 'shorthands', - 0x1BE0, '', - 0x1C00, 'other large scripts', - 0x1D00, 'notational systems', - 0x1D40, 'mathematical', # Sm - 0x1D80, 'sutton signs', - 0x1DC0, 'notational', - 0x1E00, 'ltr', - 0x1E80, 'rtl', - 0x1F00, 'game', - 0x1F10, 'enclosed', - 0x1F30, 'pictographic', - 0x1F80, 'arrows', - 0x1F90, 'unassigned', -); - -sub { - return defined $uniblock{$_[0]} ? $uniblock{$_[0]} : (); -} - ++{ + 0x0000 => [0x008, 'X Po', 'ascii'], + 0x0008 => [0x020, 'X L Latin', 'latin'], + 0x0028 => [0x010, 'X Mn', 'comb'], # also spacing Sk + 0x0038 => [0x008, 'X L Greek', 'grk'], + 0x0040 => [0x010, 'X L Cyrillic', 'cyr'], + 0x0050 => [0x008, 'X L Armenian', 'arm'], + 0x0058 => [0x008, 'X L Aramaic', 'heb'], + 0x0060 => [0x010, 'X L Arabic', 'arabic'], + 0x0070 => [0x010, 'X L Aramaic', 'aram'], + 0x0080 => [0x010, 'X L Aramaic', 'aramaic'], + 0x0090 => [0x070, 'X L Brahmic', 'brahmic'], + 0x0100 => [0x008, 'X L Brahmic', 'mm'], + 0x0108 => [0x008, 'X L Aramaic', 'geor'], + 0x0110 => [0x010, 'X L Hangul', 'jamo'], + 0x0120 => [0x018, 'X L African', 'ethiopic'], + 0x0138 => [0x030, 'X L Syllabic', 'aboriginal'], + 0x0168 => [0x008, 'X L Alpha', 'ger'], + 0x0170 => [0x010, 'X L Brahmic', 'brahm'], + 0x0180 => [0x008, 'X L Aramaic', 'mon'], + 0x0188 => [0x008, 'X L Syllabic', 'can'], + 0x0190 => [0x040, 'X L Brahmic', 'brahmic'], + 0x01D0 => [0x020, 'X L Latin', 'extensions'], + 0x01F0 => [0x010, 'X L Greek', 'greek'], + 0x0200 => [0x008, 'X Po', '·…'], + 0x0208 => [0x018, 'X So', 'symbols'], + 0x0220 => [0x010, 'X Sm', 'maths'], + 0x0230 => [0x018, 'X So', 'technical'], + 0x0248 => [0x008, 'X Latin', '()'], + 0x0250 => [0x010, 'X So', 'draw'], + 0x0260 => [0x020, 'X So', 'symbols'], + 0x0280 => [0x010, 'X L Alpha', 'braille'], + 0x0290 => [0x008, 'X So', 'arr'], + 0x0298 => [0x018, 'X Sm', 'maths'], + 0x02B0 => [0x010, 'X So', 'misc'], + 0x02C0 => [0x010, 'X L Greek', 'ancient'], + 0x02D0 => [0x010, 'X L Alpha', 'ext'], + 0x02E0 => [0x008, 'X Po', '·+'], + 0x02E8 => [0x018, 'X L Han', 'radicals'], + 0x0300 => [0x010, 'X L Katakana', 'japanese'], + 0x0310 => [0x020, 'X L Han', 'cjk+'], + 0x0330 => [0x010, 'X Xd L Han', 'compat'], + 0x0340 => [0x1A0, 'X L Han', 'cjk ideographs A'], + 0x04E0 => [0x520, 'X L Han', 'cjk unified ideographs'], + 0x0A00 => [0x048, 'X L Syllabic', 'yi'], + 0x0A48 => [0x008, 'X L Latin', 'lisu'], + 0x0A50 => [0x010, 'X L Syllabic', 'vai'], + 0x0A60 => [0x008, 'X L Cyrillic', 'cyr'], + 0x0A68 => [0x008, 'X L Syllabic', 'bam'], + 0x0A70 => [0x010, 'X L Latin', 'lat-D'], + 0x0A80 => [0x030, 'X L Brahmic', 'brahmic'], + 0x0AB0 => [0x010, 'X L Alpha', 'ext'], + 0x0AC0 => [0x2C0, 'X L Hangul', 'hangeul syllables'], + 0x0D80 => [0x080, 'X Cs', 'surrogates'], + 0x0E00 => [0x190, 'X Co', 'private use'], + 0x0F90 => [0x020, 'X L Han', 'cjk compat'], + 0x0FB0 => [0x040, 'X L Arabic', 'presentation'], + 0x0FF0 => [0x010, 'X L Latin', 'width'], + 0x1000 => [0x010, 'X L Syllabic', 'linear B'], + 0x1010 => [0x010, 'X No', 'a num'], + 0x1020 => [0x040, 'X L Alpha', 'ltr'], + 0x1060 => [0x018, 'X L Syllabic', 'linear A'], + 0x1078 => [0x008, 'X L Alpha', 'ltr'], + 0x1080 => [0x080, 'X L Aramaic', 'rtl'], + 0x1100 => [0x100, 'X L Brahmic', 'brahmic'], + 0x1200 => [0x100, 'X L Syllabic', 'cuneiform'], + 0x1300 => [0x100, 'X L Syllabic', 'egyptian hieroglyphs'], + 0x1400 => [0x200, 'X L Syllabic', 'other large scripts'], + 0x1600 => [0x100, 'X L Alpha', 'recent'], + 0x1700 => [0x450, 'X L Han', 'east asian'], + 0x1B50 => [0x070, 'X L Syllabic', 'proto-elamite'], + 0x1BC0 => [0x040, 'X L Alpha', 'shorthands'], + 0x1C00 => [0x100, '', 'other large scripts'], + 0x1D00 => [0x040, 'X So', 'notational systems'], + 0x1D40 => [0x040, 'X L Latin', 'mathematical'], # Sm + 0x1D80 => [0x040, 'X L Alpha', 'sutton signs'], + 0x1DC0 => [0x040, '', 'notational'], + 0x1E00 => [0x080, 'X L Alpha', 'ltr'], + 0x1E80 => [0x080, 'X L Alpha', 'rtl'], + 0x1F00 => [0x010, 'X So', 'game'], + 0x1F10 => [0x020, 'X L So', 'enclosed'], + 0x1F30 => [0x050, 'X So', 'pictographic'], + 0x1F80 => [0x010, 'X So', 'arrows'], + 0x1F90 => [0x070, '', 'unassigned'], +}; diff --git a/charset.plp b/charset.plp index e55e771..15183cf 100644 --- a/charset.plp +++ b/charset.plp @@ -107,15 +107,16 @@ sub range_cell { my ($len, $class, $name, $title) = @{$def}; my $attr = ''; + $len /= $nibsize; $name //= $len <= 2 ? 'res' : 'reserved'; - if (my $part = $offset % 16) { + if (my $part = $offset/$nibsize % 16) { # continued row my $cols = 16 - $part; # remaining $cols = $len if $len < $cols; #TODO: optimise if ($len -= $cols) { # continued on new row - $table->{$offset + $cols} = [$len, "$class joinu", $name, $title]; + $table->{$offset + $nibsize*$cols} = [$len*$nibsize, "$class joinu", $name, $title]; $name = ''; $class .= ' joind'; } @@ -125,7 +126,7 @@ sub range_cell { # multiple full rows if ($len -= $rows << 4) { # partial row remains - $table->{$offset + $rows * 16} = [$len, "$class joinu", '', $title]; + $table->{$offset + $nibsize*$rows * 16} = [$len*$nibsize, "$class joinu", '', $title]; $class .= ' joind'; } $attr .= sprintf ' rowspan=%d', $rows; @@ -149,7 +150,7 @@ for my $row (@request) { } print ''; for my $msb (0 .. (length($row->{table}) || 256) - 1 >> 4) { - printf '%X', $msb + ($row->{offset} >> 4); + printf '%X', ($msb + ($row->{offset} >> 4)) * $nibsize; for my $lsb (0 .. $#nibble) { my $val = ( ($msb<<4) + $lsb ) * $nibsize; if ($row->{cell}) { -- 2.30.0