X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/7d1044e5e4df51459cafb742090ce62b52d9e087..d7c1207fb1cf7dcac45dd455a78d4c8c106ca4a3:/rfc1345convert diff --git a/rfc1345convert b/rfc1345convert index 412bac1..3157e04 100644 --- a/rfc1345convert +++ b/rfc1345convert @@ -46,6 +46,52 @@ for (@t) { $di{$mnem} = hex $char; } +# XXX +my %trans = ( + 0xE001 => 0, # join lines: not accepted + 0xE004 => 0, # umlaut is no different from diaeresis 0x0308 + 0xE005 => 0x0344, # discouraged + 0xE006 => 0x0300, + 0xE007 => 0x0301, + 0xE008 => 0x0302, + 0xE009 => 0x0303, + 0xE00A => 0x0304, + 0xE00B => 0x0306, + 0xE00C => 0x0307, + 0xE00D => 0x0308, + 0xE00E => 0x030A, + 0xE00F => 0x030B, + 0xE010 => 0x030C, + 0xE011 => 0x0327, + 0xE012 => 0x0328, + 0xE013 => 0x0332, + 0xE014 => 0x0333, + 0xE015 => 0x0338, + 0xE016 => 0x0345, + 0xE017 => 0x0314, + 0xE018 => 0x0313, + 0xE019 => 0x1FFE, + 0xE01A => 0x1FBF, + 0xE01B => 0x03D0, # middle beta = curled beta? + 0xE01C => 0x25CB, + 0xE01D => 0x0192, + 0xE01E => 0x0292, + 0xE01F => 0x33C2, # am, compatibility char + 0xE020 => 0x33D8, # pm, compatibility char + 0xE021 => 0x2121, + 0xE022 => 0xFE8E, + 0xE023 => 0, # dutch guilder 0192 is already encoded, and not very useful anyway + 0xE024 => 0x0393, + 0xE025 => 0x20D7, # also 20D1; non-spacing + 0xE026 => 0x1FEF, + 0xE027 => 0x1FC0, + 0xE028 => 0x01F0, #but uppercase +); +for (values %di) { + $_ >= 0xE000 or next; + $_ = $trans{$_} if defined $trans{$_}; +} + # personal addendums my @extra; if (-r 'shiar.inc.txt') { @@ -77,13 +123,35 @@ for (values %info) { # mark unofficial extras as such $info{$_}->{category} .= ' Xz' for @extra; +for (keys %di) { + $info{$_}->{string} = chr(9676) . chr($di{$_}) if $info{$_}->{combining}; + # find control characters (first 32 chars from 0 and 128) + next unless ($di{$_} & ~0b1001_1111) == 0 or $di{$_} == 127; + # rename to something more descriptive + $info{$_}->{name} = $info{$_}->{unicode10} + ? '<'.$info{$_}->{unicode10}.'>' # the old name was much more useful + : sprintf('', $di{$_}); # at least identify by value + # show descriptive symbols instead of control chars themselves + $info{$_}->{string} = $di{$_} < 32 ? chr($di{$_} + 0x2400) : chr(0xFFFD); +} + +# convert info hashes into arrays of strings to output in display order +for my $row (values %info) { + $row = [ map { $row->{$_} } qw/name category script string/ ]; + # strip off trailing missing values (especially string may be unknown) + defined $row->[-1] ? last : pop @$row for 1 .. @$row; +} + # output perl code of hash # (assume no backslashes or curlies, so we can just q{} w/o escaping) -print "{\n"; +print "+{\n"; +printf '(map {$_=>0} qw{%s}),'."\n", join(' ', + map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %di +); printf "q{%s}=>[%s],\n", $_, join(',', - $di{$_}, # glyph code point + $di{$_}, # original code point $info{$_} # optional additional arguments - ? map {"'$_'"} @{ $info{$_} }{qw/name category script/} + ? map {"'$_'"} @{ $info{$_} } : () ) for sort keys %di; print "}\n";