From cc3aea5b110f944ce10a22cdea96a0b3f6da15c9 Mon Sep 17 00:00:00 2001
From: Mischa POSLAWSKY <perl@shiar.org>
Date: Mon, 27 Mar 2017 22:10:05 +0200
Subject: [PATCH] charset: common cell formatting for unicode planes

Like unicode include, but support different nibble size.
---
 charset-ucplanes.inc.pl | 182 +++++++++++++++++++---------------------
 charset.plp             |   9 +-
 2 files changed, 90 insertions(+), 101 deletions(-)
diff --git a/charset-ucplanes.inc.pl b/charset-ucplanes.inc.pl
index fd01acb..fce05ff 100644
--- a/charset-ucplanes.inc.pl
+++ b/charset-ucplanes.inc.pl
@@ -1,98 +1,86 @@
 use utf8;
-my %uniblock = (
-	0x0000, '<td colspan="1" class="X Po">ascii',
-	0x0008, '<td colspan="4" class="X L Latin">latin',
-#	0x0028, '<td colspan="5" class="X Sk">spacing modifier',
-	0x0028, '<td colspan="2" class="X Mn">comb',
-	0x0038, '<td colspan="1" class="X L Greek">grk',
-	0x0040, '<td colspan="2" class="X L Cyrillic">cyr',
-	0x0050, '<td colspan="1" class="X L Armenian">arm',
-	0x0058, '<td colspan="1" class="X L Aramaic">heb',
-	0x0060, '<td colspan="2" class="X L Arabic">arabic',
-	0x0070, '<td colspan="3" class="X L Aramaic">aram',
-	0x0080, '<td colspan="2" class="X L Aramaic">aramaic',
-	0x0090, '<td colspan="14" class="X L Brahmic">brahmic',
-	0x0100, '<td colspan="1" class="X L Brahmic">mm',
-	0x0108, '<td colspan="1" class="X L Aramaic">geor',
-	0x0110, '<td colspan="2" class="X L Hangul">jamo',
-	0x0120, '<td colspan="3" class="X L African">ethiopic',
-	0x0138, '<td colspan="6" class="X L Syllabic">aboriginal',
-	0x0168, '<td colspan="1" class="X L Alpha">ger',
-	0x0170, '<td colspan="2" class="X L Brahmic">brahm',
-	0x0180, '<td colspan="1" class="X L Aramaic">mon',
-	0x0188, '<td colspan="1" class="X L Syllabic">can',
-	0x0190, '<td colspan="8" class="X L Brahmic">brahmic',
-	0x01D0, '<td colspan="4" class="X L Latin">extensions',
-	0x01F0, '<td colspan="2" class="X L Greek">greek',
-	0x0200, '<td colspan="1" class="X Po">Â·â¦',
-	0x0208, '<td colspan="3" class="X So">symbols',
-	0x0220, '<td colspan="2" class="X Sm">maths',
-	0x0230, '<td colspan="3" class="X So">technical',
-	0x0248, '<td colspan="1" class="X Latin">()',
-	0x0250, '<td colspan="2" class="X So">draw',
-	0x0260, '<td colspan="4" class="X So">symbols',
-	0x0280, '<td colspan="2" class="X L Alpha">braille',
-	0x0290, '<td colspan="1" class="X So">arr',
-	0x0298, '<td colspan="3" class="X Sm">maths',
-	0x02B0, '<td colspan="2" class="X So">misc',
-	0x02C0, '<td colspan="2" class="X L Greek">ancient',
-	0x02D0, '<td colspan="2" class="X L Alpha">ext',
-	0x02E0, '<td colspan="1" class="X Po">Â·+',
-	0x02E8, '<td colspan="3" class="X L Han">radicals',
-	0x0300, '<td colspan="2" class="X L Katakana">japanese',
-	0x0310, '<td colspan="4" class="X L Han">cjk+',
-	0x0330, '<td colspan="2" class="X Xd L Han">compat',
-	0x0340, '<td colspan="8" class="X L Han" style="border-bottom:none">',
-	0x0380, '<td colspan="16" rowspan="2" class="X L Han" style="border-top:none">cjk ideographs A', #+2
-	0x04E0, '<td colspan="16" rowspan="11" class="X L Han">cjk unified ideographs',
-	0x0A00, '<td colspan="9" class="X L Syllabic">yi',
-	0x0A48, '<td colspan="1" class="X L Latin">lisu',
-	0x0A50, '<td colspan="2" class="X L Syllabic">vai',
-	0x0A60, '<td colspan="1" class="X L Cyrillic">cyr',
-	0x0A68, '<td colspan="1" class="X L Syllabic">bam',
-	0x0A70, '<td colspan="2" class="X L Latin">lat-D',
-	0x0A80, '<td colspan="6" class="X L Brahmic">brahmic',
-	0x0AB0, '<td colspan="2" class="X L Alpha">ext',
-	0x0AC0, '<td colspan="8" class="X L Hangul" style="border-bottom:none">',
-	0x0B00, '<td colspan="16" rowspan="5" class="X L Hangul" style="border-top:none">hangeul syllables',
-	0x0D80, '<td colspan="16" class="X Cs">surrogates',
-	0x0E00, '<td colspan="16" rowspan="3" class="X Co" style="border-bottom:none">private use',
-	0x0F80, '<td colspan="2" class="X Co" style="border-top:none">',
-	0x0F90, '<td colspan="4" class="X L Han">cjk compat',
-	0x0FB0, '<td colspan="8" class="X L Arabic">presentation',
-	0x0FF0, '<td colspan="2" class="X L Latin">width',
-
-	0x1000, '<td colspan="2" class="X L Syllabic">linear B',
-	0x1010, '<td colspan="2" class="X No">a num',
-	0x1020, '<td colspan="8" class="X L Alpha">ltr',
-	0x1060, '<td colspan="3" class="X L Syllabic">linear A',
-	0x1078, '<td colspan="1" class="X L Alpha">ltr',
-	0x1080, '<td colspan="16" class="X L Aramaic">rtl',
-	0x1100, '<td colspan="16" rowspan="2" class="X L Brahmic">brahmic',
-	0x1200, '<td colspan="16" rowspan="2" class="X L Syllabic">cuneiform',
-	0x1300, '<td colspan="16" rowspan="2" class="X L Syllabic">egyptian hieroglyphs',
-	0x1400, '<td colspan="16" rowspan="4" class="X L Syllabic">other large scripts',
-	0x1600, '<td colspan="16" rowspan="2" class="X L Alpha">recent',
-	0x1700, '<td colspan="16" rowspan="8" class="X L Han">east asian',
-	0x1B40, '<td colspan="2" class="">res',
-	0x1B50, '<td colspan="14" class="X L Syllabic">proto-elamite',
-	0x1BC0, '<td colspan="8" class="X L Alpha">shorthands',
-	0x1BE0, '<td colspan="8" class="" style="border-bottom:none">',
-	0x1C00, '<td colspan="16" rowspan="2" class="" style="border-top:none">other large scripts',
-	0x1D00, '<td colspan="8" class="X So">notational systems',
-	0x1D40, '<td colspan="8" class="X L Latin">mathematical', # Sm
-	0x1D80, '<td colspan="8" class="X L Alpha">sutton signs',
-	0x1DC0, '<td colspan="8" class="">notational',
-	0x1E00, '<td colspan="16" class="X L Alpha">ltr',
-	0x1E80, '<td colspan="16" class="X L Alpha">rtl',
-	0x1F00, '<td colspan="2" class="X So">game',
-	0x1F10, '<td colspan="4" class="X L So">enclosed',
-	0x1F30, '<td colspan="12" class="X So">pictographic',
-	0x1F80, '<td colspan="2" class="X So">arrows',
-	0x1F90, '<td colspan="14" class="">unassigned',
-);
-
-sub {
-	return defined $uniblock{$_[0]} ? $uniblock{$_[0]} : ();
-}
-
++{
+	0x0000 => [0x008, 'X Po',               'ascii'],
+	0x0008 => [0x020, 'X L Latin',          'latin'],
+	0x0028 => [0x010, 'X Mn',               'comb'], # also spacing Sk
+	0x0038 => [0x008, 'X L Greek',          'grk'],
+	0x0040 => [0x010, 'X L Cyrillic',       'cyr'],
+	0x0050 => [0x008, 'X L Armenian',       'arm'],
+	0x0058 => [0x008, 'X L Aramaic',        'heb'],
+	0x0060 => [0x010, 'X L Arabic',         'arabic'],
+	0x0070 => [0x010, 'X L Aramaic',        'aram'],
+	0x0080 => [0x010, 'X L Aramaic',        'aramaic'],
+	0x0090 => [0x070, 'X L Brahmic',        'brahmic'],
+	0x0100 => [0x008, 'X L Brahmic',        'mm'],
+	0x0108 => [0x008, 'X L Aramaic',        'geor'],
+	0x0110 => [0x010, 'X L Hangul',         'jamo'],
+	0x0120 => [0x018, 'X L African',        'ethiopic'],
+	0x0138 => [0x030, 'X L Syllabic',       'aboriginal'],
+	0x0168 => [0x008, 'X L Alpha',          'ger'],
+	0x0170 => [0x010, 'X L Brahmic',        'brahm'],
+	0x0180 => [0x008, 'X L Aramaic',        'mon'],
+	0x0188 => [0x008, 'X L Syllabic',       'can'],
+	0x0190 => [0x040, 'X L Brahmic',        'brahmic'],
+	0x01D0 => [0x020, 'X L Latin',          'extensions'],
+	0x01F0 => [0x010, 'X L Greek',          'greek'],
+	0x0200 => [0x008, 'X Po',               'Â·â¦'],
+	0x0208 => [0x018, 'X So',               'symbols'],
+	0x0220 => [0x010, 'X Sm',               'maths'],
+	0x0230 => [0x018, 'X So',               'technical'],
+	0x0248 => [0x008, 'X Latin',            '()'],
+	0x0250 => [0x010, 'X So',               'draw'],
+	0x0260 => [0x020, 'X So',               'symbols'],
+	0x0280 => [0x010, 'X L Alpha',          'braille'],
+	0x0290 => [0x008, 'X So',               'arr'],
+	0x0298 => [0x018, 'X Sm',               'maths'],
+	0x02B0 => [0x010, 'X So',               'misc'],
+	0x02C0 => [0x010, 'X L Greek',          'ancient'],
+	0x02D0 => [0x010, 'X L Alpha',          'ext'],
+	0x02E0 => [0x008, 'X Po',               'Â·+'],
+	0x02E8 => [0x018, 'X L Han',            'radicals'],
+	0x0300 => [0x010, 'X L Katakana',       'japanese'],
+	0x0310 => [0x020, 'X L Han',            'cjk+'],
+	0x0330 => [0x010, 'X Xd L Han',         'compat'],
+	0x0340 => [0x1A0, 'X L Han',            'cjk ideographs A'],
+	0x04E0 => [0x520, 'X L Han',            'cjk unified ideographs'],
+	0x0A00 => [0x048, 'X L Syllabic',       'yi'],
+	0x0A48 => [0x008, 'X L Latin',          'lisu'],
+	0x0A50 => [0x010, 'X L Syllabic',       'vai'],
+	0x0A60 => [0x008, 'X L Cyrillic',       'cyr'],
+	0x0A68 => [0x008, 'X L Syllabic',       'bam'],
+	0x0A70 => [0x010, 'X L Latin',          'lat-D'],
+	0x0A80 => [0x030, 'X L Brahmic',        'brahmic'],
+	0x0AB0 => [0x010, 'X L Alpha',          'ext'],
+	0x0AC0 => [0x2C0, 'X L Hangul',         'hangeul syllables'],
+	0x0D80 => [0x080, 'X Cs',               'surrogates'],
+	0x0E00 => [0x190, 'X Co',               'private use'],
+	0x0F90 => [0x020, 'X L Han',            'cjk compat'],
+	0x0FB0 => [0x040, 'X L Arabic',         'presentation'],
+	0x0FF0 => [0x010, 'X L Latin',          'width'],
+	0x1000 => [0x010, 'X L Syllabic',       'linear B'],
+	0x1010 => [0x010, 'X No',               'a num'],
+	0x1020 => [0x040, 'X L Alpha',          'ltr'],
+	0x1060 => [0x018, 'X L Syllabic',       'linear A'],
+	0x1078 => [0x008, 'X L Alpha',          'ltr'],
+	0x1080 => [0x080, 'X L Aramaic',        'rtl'],
+	0x1100 => [0x100, 'X L Brahmic',        'brahmic'],
+	0x1200 => [0x100, 'X L Syllabic',       'cuneiform'],
+	0x1300 => [0x100, 'X L Syllabic',       'egyptian hieroglyphs'],
+	0x1400 => [0x200, 'X L Syllabic',       'other large scripts'],
+	0x1600 => [0x100, 'X L Alpha',          'recent'],
+	0x1700 => [0x450, 'X L Han',            'east asian'],
+	0x1B50 => [0x070, 'X L Syllabic',       'proto-elamite'],
+	0x1BC0 => [0x040, 'X L Alpha',          'shorthands'],
+	0x1C00 => [0x100, '',                   'other large scripts'],
+	0x1D00 => [0x040, 'X So',               'notational systems'],
+	0x1D40 => [0x040, 'X L Latin',          'mathematical'], # Sm
+	0x1D80 => [0x040, 'X L Alpha',          'sutton signs'],
+	0x1DC0 => [0x040, '',                   'notational'],
+	0x1E00 => [0x080, 'X L Alpha',          'ltr'],
+	0x1E80 => [0x080, 'X L Alpha',          'rtl'],
+	0x1F00 => [0x010, 'X So',               'game'],
+	0x1F10 => [0x020, 'X L So',             'enclosed'],
+	0x1F30 => [0x050, 'X So',               'pictographic'],
+	0x1F80 => [0x010, 'X So',               'arrows'],
+	0x1F90 => [0x070, '',                   'unassigned'],
+};
diff --git a/charset.plp b/charset.plp
index e55e771..15183cf 100644
--- a/charset.plp
+++ b/charset.plp
@@ -107,15 +107,16 @@ sub range_cell {
 	my ($len, $class, $name, $title) = @{$def};
 
 	my $attr = '';
+	$len /= $nibsize;
 	$name //= $len <= 2 ? 'res' : 'reserved';
 
-	if (my $part = $offset % 16) {
+	if (my $part = $offset/$nibsize % 16) {
 		# continued row
 		my $cols = 16 - $part;  # remaining
 		$cols = $len if $len < $cols; #TODO: optimise
 		if ($len -= $cols) {
 			# continued on new row
-			$table->{$offset + $cols} = [$len, "$class joinu", $name, $title];
+			$table->{$offset + $nibsize*$cols} = [$len*$nibsize, "$class joinu", $name, $title];
 			$name = '';
 			$class .= ' joind';
 		}
@@ -125,7 +126,7 @@ sub range_cell {
 		# multiple full rows
 		if ($len -= $rows << 4) {
 			# partial row remains
-			$table->{$offset + $rows * 16} = [$len, "$class joinu", '', $title];
+			$table->{$offset + $nibsize*$rows * 16} = [$len*$nibsize, "$class joinu", '', $title];
 			$class .= ' joind';
 		}
 		$attr .= sprintf ' rowspan=%d', $rows;
@@ -149,7 +150,7 @@ for my $row (@request) {
 	}
 	print '<tbody>';
 	for my $msb (0 .. (length($row->{table}) || 256) - 1 >> 4) {
-		printf '<tr><th>%X', $msb + ($row->{offset} >> 4);
+		printf '<tr><th>%X', ($msb + ($row->{offset} >> 4)) * $nibsize;
 		for my $lsb (0 .. $#nibble) {
 			my $val = ( ($msb<<4) + $lsb ) * $nibsize;
 			if ($row->{cell}) {
-- 
2.30.0