From 4268fe1b48ef3f5951c401477b20cd1300e07536 Mon Sep 17 00:00:00 2001
From: Mischa POSLAWSKY <perl@shiar.org>
Date: Sat, 21 Feb 2015 04:04:09 +0100
Subject: [PATCH] digraphs: clean up rfc parser code

---
 tools/mkdigraphs-rfc | 94 +++++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 50 deletions(-)

diff --git a/tools/mkdigraphs-rfc b/tools/mkdigraphs-rfc
index ab03a42..d33503d 100755
--- a/tools/mkdigraphs-rfc
+++ b/tools/mkdigraphs-rfc
@@ -1,5 +1,5 @@
 #!/usr/bin/env perl
-
+use 5.010;
 use strict;
 use warnings;
 use utf8;
@@ -7,47 +7,8 @@ use open OUT => ':utf8', ':std';
 
 our $VERSION = '1.00';
 
-# expect input data source at command line
-@ARGV or die "Specify input source file or - for STDIN\n";
-
-# skip everything until a character indented by 1 space (table start)
-do {
-	$_ = readline;
-	defined or die "Premature input end";
-} until /^\s\S/;
-
-my @t = $_;  # add first line (already read, assume it's ok)
-
-# read the rest of the character table
-while ($_ = readline) {
-	# check for table end (chapter 4)
-	last if /^\d/;
-
-	# parse table lines (ignore (unindented) page break)
-	next unless s/^ //;
-	chomp;
-
-	# add the line to @t
-	if (s/^ {15}/ /) {
-		# continuation line (add to last entry)
-		$t[-1] .= $_;
-	}
-	else {
-		# add a new entry
-		push @t, $_;
-	}
-}
-
-# create a hash of desired input
-my %di;
-for (@t) {
-	my ($mnem, $char, $name) = split / +/, $_, 3;
-	next if length $mnem != 2;
-	$di{$mnem} = hex $char;
-}
-
-# XXX
-my %trans = (
+# translation table for deprecated code points
+my %replace = (
 	0xE001 => 0,  # join lines: not accepted
 	0xE004 => 0,  # umlaut is no different from diaeresis 0x0308
 	0xE005 => 0x0344, # discouraged
@@ -87,18 +48,51 @@ my %trans = (
 	0xE027 => 0x1FC0,
 	0xE028 => 0x01F0, #but uppercase
 );
-for (values %di) {
-	$_ >= 0xE000 or next;
-	$_ = $trans{$_} if defined $trans{$_};
+
+# expect input data source at command line
+@ARGV or die "Specify input source file or - for STDIN\n";
+
+# skip everything until a character indented by 1 space (table start)
+do {
+	$_ = readline;
+	defined or die "Premature input end";
+} until /^\s\S/;
+
+my @line = $_;  # add first line (already read, assume it's ok)
+
+# read the rest of the character table
+while ($_ = readline) {
+	# check for table end (chapter 4)
+	last if /^\d/;
+
+	# parse table lines (ignore (unindented) page break)
+	next unless s/^ //;
+	chomp;
+
+	# append line contents
+	if (s/^ {15}/ /) {
+		# continuation line (add to last entry)
+		$line[-1] .= $_;
+	}
+	else {
+		# add a new entry
+		push @line, $_;
+	}
 }
 
 # output perl code of hash
 # (assume no backslashes or curlies, so we can just q{} w/o escaping)
-print "# automatically generated by $0\n";
-print "use utf8;\n";
-print "+{\n";
-printf "q{%s}=>%s,\n", $_, $di{$_} for sort keys %di;
-print "}\n";
+say "# automatically generated by $0";
+say 'use utf8;';
+say '+{';
+for (@line) {
+	my ($mnem, $chrhex, $name) = split / +/, $_, 3;
+	next if length $mnem != 2;
+	my $chrnum = hex $chrhex;
+	$chrnum = $replace{$chrnum} or next if defined $replace{$chrnum};
+	say "q{$mnem}=>$chrnum,";
+}
+say '}';
 
 __END__
 
-- 
2.30.0