From 4268fe1b48ef3f5951c401477b20cd1300e07536 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sat, 21 Feb 2015 04:04:09 +0100 Subject: [PATCH] digraphs: clean up rfc parser code --- tools/mkdigraphs-rfc | 94 +++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 50 deletions(-) diff --git a/tools/mkdigraphs-rfc b/tools/mkdigraphs-rfc index ab03a42..d33503d 100755 --- a/tools/mkdigraphs-rfc +++ b/tools/mkdigraphs-rfc @@ -1,5 +1,5 @@ #!/usr/bin/env perl - +use 5.010; use strict; use warnings; use utf8; @@ -7,47 +7,8 @@ use open OUT => ':utf8', ':std'; our $VERSION = '1.00'; -# expect input data source at command line -@ARGV or die "Specify input source file or - for STDIN\n"; - -# skip everything until a character indented by 1 space (table start) -do { - $_ = readline; - defined or die "Premature input end"; -} until /^\s\S/; - -my @t = $_; # add first line (already read, assume it's ok) - -# read the rest of the character table -while ($_ = readline) { - # check for table end (chapter 4) - last if /^\d/; - - # parse table lines (ignore (unindented) page break) - next unless s/^ //; - chomp; - - # add the line to @t - if (s/^ {15}/ /) { - # continuation line (add to last entry) - $t[-1] .= $_; - } - else { - # add a new entry - push @t, $_; - } -} - -# create a hash of desired input -my %di; -for (@t) { - my ($mnem, $char, $name) = split / +/, $_, 3; - next if length $mnem != 2; - $di{$mnem} = hex $char; -} - -# XXX -my %trans = ( +# translation table for deprecated code points +my %replace = ( 0xE001 => 0, # join lines: not accepted 0xE004 => 0, # umlaut is no different from diaeresis 0x0308 0xE005 => 0x0344, # discouraged @@ -87,18 +48,51 @@ my %trans = ( 0xE027 => 0x1FC0, 0xE028 => 0x01F0, #but uppercase ); -for (values %di) { - $_ >= 0xE000 or next; - $_ = $trans{$_} if defined $trans{$_}; + +# expect input data source at command line +@ARGV or die "Specify input source file or - for STDIN\n"; + +# skip everything until a character indented by 1 space (table start) +do { + $_ = readline; + defined or die "Premature input end"; +} until /^\s\S/; + +my @line = $_; # add first line (already read, assume it's ok) + +# read the rest of the character table +while ($_ = readline) { + # check for table end (chapter 4) + last if /^\d/; + + # parse table lines (ignore (unindented) page break) + next unless s/^ //; + chomp; + + # append line contents + if (s/^ {15}/ /) { + # continuation line (add to last entry) + $line[-1] .= $_; + } + else { + # add a new entry + push @line, $_; + } } # output perl code of hash # (assume no backslashes or curlies, so we can just q{} w/o escaping) -print "# automatically generated by $0\n"; -print "use utf8;\n"; -print "+{\n"; -printf "q{%s}=>%s,\n", $_, $di{$_} for sort keys %di; -print "}\n"; +say "# automatically generated by $0"; +say 'use utf8;'; +say '+{'; +for (@line) { + my ($mnem, $chrhex, $name) = split / +/, $_, 3; + next if length $mnem != 2; + my $chrnum = hex $chrhex; + $chrnum = $replace{$chrnum} or next if defined $replace{$chrnum}; + say "q{$mnem}=>$chrnum,"; +} +say '}'; __END__ -- 2.30.0