#!/usr/bin/env perl

use strict;
use warnings;

use Data::Dumper;

our $VERSION = '1.00';

if (0) {
	#TODO: automatic download if not specified on stdin
	require LWP::Simple;
	LWP::Simple::get('http://www.ietf.org/rfc/rfc1345.txt');
}

# skip everything until a character indented by 1 space (table start)
do {$_ = <>} until /^\s\S/;

my @t = $_;  # add first line (already read, assume it's ok)

# read the rest of the character table
while ($_ = <>) {
	# check for table end (chapter 4)
	last if /^4/;

	# parse table lines (ignore (unindented) page break)
	next unless s/^ //;
	chomp;

	# add the line to @t
	if (s/^ {15}/ /) {
		# continuation line (add to last entry)
		$t[-1] .= $_;
	}
	else {
		# add a new entry
		push @t, $_;
	}
}

# create a hash of desired input
my %di;
for (@t) {
	my ($mnem, $char, $name) = split / +/, $_, 3;
	next if length $mnem != 2;
	$di{$mnem} = hex $char;
}

# optionally get unicode character information
my %charinfo = eval {
	require Unicode::UCD;
	map { $_ => Unicode::UCD::charinfo($di{$_}) } keys %di;
};

# output perl code of hash
# (assume no backslashes or curlies, so we can just q{} w/o escaping)
print "{\n";
printf "q{%s}=>[%s],\n", $_, join(',',
	$di{$_},       # glyph code point
	$charinfo{$_}  # optional additional arguments
		? map {"'$_'"} @{ $charinfo{$_} }{qw/name category script/}
		: ()
) for sort keys %di;
print "}\n";