6 use open OUT => ':utf8', ':std';
10 # expect input data source at command line
11 @ARGV or die "Specify input source file or - for STDIN\n";
13 # skip everything until a character indented by 1 space (table start)
16 defined or die "Premature input end";
19 my @t = $_; # add first line (already read, assume it's ok)
21 # read the rest of the character table
22 while ($_ = readline) {
23 # check for table end (chapter 4)
26 # parse table lines (ignore (unindented) page break)
32 # continuation line (add to last entry)
41 # create a hash of desired input
44 my ($mnem, $char, $name) = split / +/, $_, 3;
45 next if length $mnem != 2;
46 $di{$mnem} = hex $char;
51 0xE001 => 0, # join lines: not accepted
52 0xE004 => 0, # umlaut is no different from diaeresis 0x0308
53 0xE005 => 0x0344, # discouraged
75 0xE01B => 0x03D0, # middle beta = curled beta?
79 0xE01F => 0x33C2, # am, compatibility char
80 0xE020 => 0x33D8, # pm, compatibility char
83 0xE023 => 0, # dutch guilder 0192 is already encoded, and not very useful anyway
85 0xE025 => 0x20D7, # also 20D1; non-spacing
88 0xE028 => 0x01F0, #but uppercase
92 $_ = $trans{$_} if defined $trans{$_};
95 # output perl code of hash
96 # (assume no backslashes or curlies, so we can just q{} w/o escaping)
97 print "# automatically generated by $0\n";
100 printf "q{%s}=>%s,\n", $_, $di{$_} for sort keys %di;
107 mkdigraphs-rfc - Output digraph data from RFC-1345
111 Extract digraphs from text specifications as a perl hash:
113 mkdigraphs-rfc rfc1345.txt >digraphs-rfc.inc.pl
115 Input can be the literal RFC (or similar) document:
117 curl http://www.ietf.org/rfc/rfc1345.txt | mkdigraphlist -
119 Test by printing the character for DO (should be a dollar sign):
121 perl -e'$di = do "digraphs-rfc.inc.pl"; print chr $di->{DO}'
125 Parses the official RFC-1345 document, searching the
126 'character mnemonic table' for all digraph definitions.
127 If successful, Perl code is output resulting in a hash
128 with Unicode code points keyed by digraph.
129 Obsolete values (references to private use area)
130 are converted to modern alternatives.
131 Any errors and warnings are given at STDERR.
135 Mischa POSLAWSKY <perl@shiar.org>
139 Licensed under the GNU Affero General Public License version 3.