11 #TODO: automatic download if not specified on stdin
13 LWP::Simple::get('http://www.ietf.org/rfc/rfc1345.txt');
16 # skip everything until a character indented by 1 space (table start)
17 do {$_ = <>} until /^\s\S/;
19 my @t = $_; # add first line (already read, assume it's ok)
21 # read the rest of the character table
23 # check for table end (chapter 4)
26 # parse table lines (ignore (unindented) page break)
32 # continuation line (add to last entry)
41 # create a hash of desired input
44 my ($mnem, $char, $name) = split / +/, $_, 3;
45 next if length $mnem != 2;
46 $di{$mnem} = hex $char;
49 # optionally get unicode character information
52 map { $_ => Unicode::UCD::charinfo($di{$_}) } keys %di;
55 # add custom categories for certain blocks
57 $_->{category} .= ' Xa' if $_->{block} eq 'Basic Latin';
58 $_->{category} .= ' Xl' if $_->{block} eq 'Latin-1 Supplement';
61 # output perl code of hash
62 # (assume no backslashes or curlies, so we can just q{} w/o escaping)
64 printf "q{%s}=>[%s],\n", $_, join(',',
65 $di{$_}, # glyph code point
66 $info{$_} # optional additional arguments
67 ? map {"'$_'"} @{ $info{$_} }{qw/name category script/}