use strict;
use warnings;
+use utf8;
use open OUT => ':utf8', ':std';
use Data::Dumper;
-our $VERSION = '1.01';
+our $VERSION = '1.02';
-# determine input data source
-my $input;
-if (@ARGV) {
- # manual contents specified (either piped or filename(s) given)
- $input = \*ARGV;
-}
-else {
- # automatic download from official website
- require LWP::Simple;
- my $contents = LWP::Simple::get('http://www.ietf.org/rfc/rfc1345.txt')
- or die "Couldn't download RFC-1345 from ietf.org";
- open $input, '<', \$contents; # emulate file handle
-}
+# expect input data source at command line
+@ARGV or die "Specify input source file or - for STDIN\n";
# skip everything until a character indented by 1 space (table start)
do {
- $_ = readline $input;
+ $_ = readline;
defined or die "Premature input end";
} until /^\s\S/;
my @t = $_; # add first line (already read, assume it's ok)
# read the rest of the character table
-while ($_ = readline $input) {
+while ($_ = readline) {
# check for table end (chapter 4)
last if /^\d/;
if (-r 'shiar.inc.txt') {
open my $include, '<:utf8', 'shiar.inc.txt';
for (readline $include) {
- m{^([!"%'-Z_a-z]{2}) (.)} or next;
+ m{^(\$?[!"%'-Z_a-z]{2}) (.)} or next;
warn("$1 already defined"), next if defined $di{$1};
$di{$1} = ord $2;
push @extra, $1;
}
warn $@ if $@;
+$di{chr $_} = $_ for 32 .. 126;
+$di{'\\'.$_} = delete $di{$_} for '{', '}', '\\';
+
# optionally get unicode character information
my %info = eval {
require Unicode::UCD;
# show descriptive symbols instead of control chars themselves
$info{$_}->{string} = $di{$_} < 32 ? chr($di{$_} + 0x2400) : chr(0xFFFD);
}
+# presentational string for some control(lish) entries
+$info{$_}->{string} = '-' for grep { $di{$_} == 0x00AD } keys %di;
+$info{$_}->{string} = '→' for grep { $di{$_} == 0x200E } keys %di;
+$info{$_}->{string} = '←' for grep { $di{$_} == 0x200F } keys %di;
# convert info hashes into arrays of strings to output in display order
for my $row (values %info) {
=head1 SYNOPSIS
-Download and convert the digraph specification from ietf.org:
+Extract digraphs from text specifications as a perl hash:
- rfc1345convert > digraphs.inc.pl
+ rfc1345convert rfc1345.txt custom.txt > digraphs.inc.pl
-Test by printing the character for DO (should be a dollar sign):
+Input can be the literal RFC (or similar) document:
- perl -e'$di = do "digraphs.inc.pl"; print chr $di->{DO}->[0]'
+ curl http://www.ietf.org/rfc/rfc1345.txt | rfc1345convert -
-Manual specification of source retrieval:
+Test by printing the character for DO (should be a dollar sign):
- rfc1345convert rfc1345.txt
- curl $url | rfc1345convert -
+ perl -e'$di = do "digraphs.inc.pl"; print chr $di->{DO}->[0]'
=head1 DESCRIPTION