From: Mischa POSLAWSKY Date: Wed, 7 Jan 2009 03:14:11 +0000 (+0000) Subject: rfc1345convert: documentation and automatic download X-Git-Tag: v1.3~205 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/82c2fbd0e3157016f580c0ef823996ed26d8bb26 rfc1345convert: documentation and automatic download If no source is specified on the command-line, the document is downloaded from ietf.org (the official RFC body). With the addition of some perldoc describing the script (including license and expamples) it could potentially even be usable by others (though I admit its scope is limited, but who knows). --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..43426c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/rfc1345.txt diff --git a/rfc1345convert b/rfc1345convert old mode 100644 new mode 100755 index 3157e04..f6f0ab8 --- a/rfc1345convert +++ b/rfc1345convert @@ -5,23 +5,34 @@ use warnings; use Data::Dumper; -our $VERSION = '1.00'; +our $VERSION = '1.01'; -if (0) { - #TODO: automatic download if not specified on stdin +# determine input data source +my $input; +if (@ARGV) { + # manual contents specified (either piped or filename(s) given) + $input = \*ARGV; +} +else { + # automatic download from official website require LWP::Simple; - LWP::Simple::get('http://www.ietf.org/rfc/rfc1345.txt'); + my $contents = LWP::Simple::get('http://www.ietf.org/rfc/rfc1345.txt') + or die "Couldn't download RFC-1345 from ietf.org"; + open $input, '<', \$contents; # emulate file handle } # skip everything until a character indented by 1 space (table start) -do {$_ = <>} until /^\s\S/; +do { + $_ = readline $input; + defined or die "Premature input end"; +} until /^\s\S/; my @t = $_; # add first line (already read, assume it's ok) # read the rest of the character table -while ($_ = <>) { +while ($_ = readline $input) { # check for table end (chapter 4) - last if /^4/; + last if /^\d/; # parse table lines (ignore (unindented) page break) next unless s/^ //; @@ -156,3 +167,52 @@ printf "q{%s}=>[%s],\n", $_, join(',', ) for sort keys %di; print "}\n"; +__END__ + +=head1 NAME + +rfc1345convert - Output digraph data from RFC-1345 + +=head1 SYNOPSIS + +Download and convert the digraph specification from ietf.org: + + rfc1345convert > digraphs.inc.pl + +Test by printing the character for DO (should be a dollar sign): + + perl -e'$di = do "digraphs.inc.pl"; print chr $di->{DO}->[0]' + +Manual specification of source retrieval: + + rfc1345convert rfc1345.txt + curl $url | rfc1345convert - + +=head1 DESCRIPTION + +Parses the official RFC-1345 document, searching the +'character mnemonic table' for all digraph definitions. +If successful, Perl code is output resulting in a hash +with character data keyed by digraph. +Any errors and warnings are given at STDERR. + +The value can either be a scalar string containing another +digraph which can be considered identical (usually inverted), +or an array ref containing at least the resulting character's +Unicode code point value. If available, the following UCD data +is appended: character name, category, script, and output string. +For example: + + +{ + AE => [198, 'LATIN CAPITAL LETTER AE', 'Lu Xl', 'Latin'], + EA => 'AE', + } + +=head1 AUTHOR + +Mischa POSLAWSKY + +=head1 LICENSE + +Licensed under the GNU Affero General Public License version 3. +