#!/usr/bin/env perl
-
+use 5.010;
use strict;
use warnings;
use utf8;
our $VERSION = '1.00';
-# expect input data source at command line
-@ARGV or die "Specify input source file or - for STDIN\n";
-
-# skip everything until a character indented by 1 space (table start)
-do {
- $_ = readline;
- defined or die "Premature input end";
-} until /^\s\S/;
-
-my @t = $_; # add first line (already read, assume it's ok)
-
-# read the rest of the character table
-while ($_ = readline) {
- # check for table end (chapter 4)
- last if /^\d/;
-
- # parse table lines (ignore (unindented) page break)
- next unless s/^ //;
- chomp;
-
- # add the line to @t
- if (s/^ {15}/ /) {
- # continuation line (add to last entry)
- $t[-1] .= $_;
- }
- else {
- # add a new entry
- push @t, $_;
- }
-}
-
-# create a hash of desired input
-my %di;
-for (@t) {
- my ($mnem, $char, $name) = split / +/, $_, 3;
- next if length $mnem != 2;
- $di{$mnem} = hex $char;
-}
-
-# XXX
-my %trans = (
+# translation table for deprecated code points
+my %replace = (
0xE001 => 0, # join lines: not accepted
0xE004 => 0, # umlaut is no different from diaeresis 0x0308
0xE005 => 0x0344, # discouraged
0xE027 => 0x1FC0,
0xE028 => 0x01F0, #but uppercase
);
-for (values %di) {
- $_ >= 0xE000 or next;
- $_ = $trans{$_} if defined $trans{$_};
+
+# expect input data source at command line
+@ARGV or die "Specify input source file or - for STDIN\n";
+
+# skip everything until a character indented by 1 space (table start)
+do {
+ $_ = readline;
+ defined or die "Premature input end";
+} until /^\s\S/;
+
+my @line = $_; # add first line (already read, assume it's ok)
+
+# read the rest of the character table
+while ($_ = readline) {
+ # check for table end (chapter 4)
+ last if /^\d/;
+
+ # parse table lines (ignore (unindented) page break)
+ next unless s/^ //;
+ chomp;
+
+ # append line contents
+ if (s/^ {15}/ /) {
+ # continuation line (add to last entry)
+ $line[-1] .= $_;
+ }
+ else {
+ # add a new entry
+ push @line, $_;
+ }
}
# output perl code of hash
# (assume no backslashes or curlies, so we can just q{} w/o escaping)
-print "# automatically generated by $0\n";
-print "use utf8;\n";
-print "+{\n";
-printf "q{%s}=>%s,\n", $_, $di{$_} for sort keys %di;
-print "}\n";
+say "# automatically generated by $0";
+say 'use utf8;';
+say '+{';
+for (@line) {
+ my ($mnem, $chrhex, $name) = split / +/, $_, 3;
+ next if length $mnem != 2;
+ my $chrnum = hex $chrhex;
+ $chrnum = $replace{$chrnum} or next if defined $replace{$chrnum};
+ say "q{$mnem}=>$chrnum,";
+}
+say '}';
__END__