X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/92240b0d1e4434e0981b3326ca4d7ad8673dbf71..0d71f602a3268f8059dc48042280fb25c1abb4ba:/rfc1345convert diff --git a/rfc1345convert b/rfc1345convert index f932753..33b6ee9 100755 --- a/rfc1345convert +++ b/rfc1345convert @@ -2,36 +2,26 @@ use strict; use warnings; +use utf8; use open OUT => ':utf8', ':std'; use Data::Dumper; -our $VERSION = '1.01'; +our $VERSION = '1.02'; -# determine input data source -my $input; -if (@ARGV) { - # manual contents specified (either piped or filename(s) given) - $input = \*ARGV; -} -else { - # automatic download from official website - require LWP::Simple; - my $contents = LWP::Simple::get('http://www.ietf.org/rfc/rfc1345.txt') - or die "Couldn't download RFC-1345 from ietf.org"; - open $input, '<', \$contents; # emulate file handle -} +# expect input data source at command line +@ARGV or die "Specify input source file or - for STDIN\n"; # skip everything until a character indented by 1 space (table start) do { - $_ = readline $input; + $_ = readline; defined or die "Premature input end"; } until /^\s\S/; my @t = $_; # add first line (already read, assume it's ok) # read the rest of the character table -while ($_ = readline $input) { +while ($_ = readline) { # check for table end (chapter 4) last if /^\d/; @@ -109,7 +99,7 @@ my @extra; if (-r 'shiar.inc.txt') { open my $include, '<:utf8', 'shiar.inc.txt'; for (readline $include) { - m{^([!"%'-Z_a-z]{2}) (.)} or next; + m{^(\$?[!"%'-Z_a-z]{2}) (.)} or next; warn("$1 already defined"), next if defined $di{$1}; $di{$1} = ord $2; push @extra, $1; @@ -149,6 +139,10 @@ for (keys %di) { # show descriptive symbols instead of control chars themselves $info{$_}->{string} = $di{$_} < 32 ? chr($di{$_} + 0x2400) : chr(0xFFFD); } +# presentational string for some control(lish) entries +$info{$_}->{string} = '-' for grep { $di{$_} == 0x00AD } keys %di; +$info{$_}->{string} = '→' for grep { $di{$_} == 0x200E } keys %di; +$info{$_}->{string} = '←' for grep { $di{$_} == 0x200F } keys %di; # convert info hashes into arrays of strings to output in display order for my $row (values %info) { @@ -179,18 +173,17 @@ rfc1345convert - Output digraph data from RFC-1345 =head1 SYNOPSIS -Download and convert the digraph specification from ietf.org: +Extract digraphs from text specifications as a perl hash: - rfc1345convert > digraphs.inc.pl + rfc1345convert rfc1345.txt custom.txt > digraphs.inc.pl -Test by printing the character for DO (should be a dollar sign): +Input can be the literal RFC (or similar) document: - perl -e'$di = do "digraphs.inc.pl"; print chr $di->{DO}->[0]' + curl http://www.ietf.org/rfc/rfc1345.txt | rfc1345convert - -Manual specification of source retrieval: +Test by printing the character for DO (should be a dollar sign): - rfc1345convert rfc1345.txt - curl $url | rfc1345convert - + perl -e'$di = do "digraphs.inc.pl"; print chr $di->{DO}->[0]' =head1 DESCRIPTION