X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/0b579580d620becc924c4e540ce41f5126e88e8d..133e96335267b12bb7e97d9bbe551a3a043bd32a:/tools/mkdigraphlist?ds=inline diff --git a/tools/mkdigraphlist b/tools/mkdigraphlist index 7a201a1..5117b76 100755 --- a/tools/mkdigraphlist +++ b/tools/mkdigraphlist @@ -4,45 +4,71 @@ use strict; use warnings; use utf8; -use open OUT => ':utf8', ':std'; -use Data::Dumper; +use open OUT => ':encoding(utf-8)', ':std'; +use JSON (); -our $VERSION = '1.03'; +our $VERSION = '1.07'; -# create a hash of desired input -my $di = do 'data/digraphs-rfc.inc.pl' +# import and combine various digraph data +push @INC, 'data'; +my $rfc = do 'digraphs-rfc.inc.pl' or die "error reading digraphs include: ", $@ // $!; - -# personal addendums -my $extra = do 'data/digraphs-shiar.inc.pl' +my $extra = do 'digraphs-shiar.inc.pl' or warn "could not include shiar proposals: ", $@ // $!; -$di = { %{$di}, %{$extra // {}} }; +my $vim = do 'digraphs-vim.inc.pl' + or warn "could not include vim extensions ", $@ // $!; +my $vimold = do 'digraphs-vim-74.inc.pl' + or warn "could not include vim compatibility ", $@ // $!; +my $di = { %{$vim // {}}, %{$rfc}, %{$extra // {}} }; # optionally get unicode character information my $uninfo = do 'unicode-char.inc.pl' or warn "could not include unicode details: ", $@ // $!; -# convert info hashes into arrays of strings to output in display order -for my $row (values %{$uninfo}) { - my ($class, $name, $di, $html, $string) = @{$row}; - $row = [$name, $class]; - push @{$row}, '', $string if defined $string; -} - -# output perl code of hash -# (assume no backslashes or curlies, so we can just q{} w/o escaping) -print "# automatically generated by $0\n"; -print "use utf8;\n"; -print "+{\n"; -printf '(map {$_=>0} qw{%s}),'."\n", join(' ', +# output json map of character info +my %table; +$table{$_} = 0 for ( grep { !defined $di->{$_} } map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %{$di} ); -printf "q{%s}=>[%s],\n", $_, join(',', - $di->{$_}, # original code point - (map {"'$_'"} @{ $uninfo->{ chr $di->{$_} } // [] }), # optional additional arguments -) for sort keys %{$di}; -print "}\n"; +$table{$_} = [ + ord $di->{$_}, # original code point + $uninfo->{ $di->{$_} }->[1] // '', # name + ( + $rfc->{$_} + ? $vim->{$_} ? 'l5' : 'l1' # vim+rfc or rfc only + : $vimold && $vimold->{$_} ? 'l4' # compat vim if known + : $vim->{$_} ? 'l3' : 'l2', # vim only or neither + ), + ($uninfo->{ $di->{$_} }->[0] // '') =~ s/ u-di| u-prop//gr, # class + $uninfo->{ $di->{$_} }->[4] // (), # string +] for sort keys %{$di}; + +print JSON->new->ascii->canonical->encode({ + title => 'RFC-1345', + key => \%table, + intro => join("\n", + 'Character mnemonics following compose key â:', + 'i^k in Vim,', + '^u^\ in Emacs,', + '^a^v in Screen.', + 'Similar but different from X.Org.', + 'Also see common Unicode.
', + 'Unofficial proposals',
+ 'are available as ex commands.',
+ ),
+ flag => {
+ l5 => 'full support',
+ l4 => 'vim extension',
+ l3 => 'vim v8.0',
+ l2 => 'proposal',
+ l1 => 'not in vim',
+ },
+ flagclass => {
+ l5 => '', # common
+ l3 => 'u-l5', # rare
+ },
+});
__END__
@@ -52,27 +78,26 @@ mkdigraphlist - Output character list of combined digraph data
=head1 SYNOPSIS
- mkdigraphlist >digraphs.inc.pl
- perl -e'$di = do "digraphs.inc.pl"; print chr $di->{DO}->[0]'
+ mkdigraphlist | jq -r '.key."DO"[0]' | perl -nE 'say chr' # $
=head1 DESCRIPTION
-Parses the official RFC-1345 document, searching the
-'character mnemonic table' for all digraph definitions.
-If successful, Perl code is output resulting in a hash
-with character data keyed by digraph.
-Any errors and warnings are given at STDERR.
+Combines precompiled digraph includes of rfc (1345), vim, and shiar
+and outputs a complete map including character details and usage classes.
-The value can either be a scalar string containing another
+The C