use warnings;
use utf8;
-use open OUT => ':utf8', ':std';
+use open OUT => ':encoding(utf-8)', ':std';
-our $VERSION = '1.03';
+our $VERSION = '1.06';
-# create a hash of desired input
-my $di = do 'data/digraphs-rfc.inc.pl'
+# import and combine various digraph data
+push @INC, 'data';
+my $rfc = do 'digraphs-rfc.inc.pl'
or die "error reading digraphs include: ", $@ // $!;
-
-# personal addendums
-my $extra = do 'data/digraphs-shiar.inc.pl'
+my $extra = do 'digraphs-shiar.inc.pl'
or warn "could not include shiar proposals: ", $@ // $!;
-my $vim = do 'data/digraphs-vim.inc.pl'
+my $vim = do 'digraphs-vim.inc.pl'
or warn "could not include vim extensions ", $@ // $!;
-$di = { %{$vim // {}}, %{$di}, %{$extra // {}} };
+my $vimold = do 'digraphs-vim-74.inc.pl'
+ or warn "could not include vim compatibility ", $@ // $!;
+my $di = { %{$vim // {}}, %{$rfc}, %{$extra // {}} };
# optionally get unicode character information
my $uninfo = do 'unicode-char.inc.pl'
or warn "could not include unicode details: ", $@ // $!;
-# convert info hashes into arrays of strings to output in display order
-for my $row (values %{$uninfo}) {
- my ($class, $name, $di, $html, $string) = @{$row};
- $row = [$name, $class];
- push @{$row}, '', $string if defined $string;
-}
-
# output perl code of hash
# (assume no backslashes or curlies, so we can just q{} w/o escaping)
print "# automatically generated by $0\n";
grep { !defined $di->{$_} }
map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %{$di}
);
-printf "q{%s}=>[%s],\n", $_, join(',',
- $di->{$_}, # original code point
- (map {"'$_'"} @{ $uninfo->{ chr $di->{$_} } // [] }), # optional additional arguments
+printf "q{%s}=>[%s],\n", s/(?=[\\}])/\\/gr, join(',',
+ ord $di->{$_}, # original code point
+ map {"'$_'"}
+ $uninfo->{ $di->{$_} }->[1] // '', # name
+ join(' ',
+ $rfc->{$_}
+ ? $vim->{$_} ? 'l4' : 'l1' # vim+rfc or rfc only
+ : $vim->{$_} ? 'l3' : 'l2', # vim only or neither
+ $vimold && $vim->{$_} && !$vimold->{$_} ? 'ex' : (), # new vim feature
+ ),
+ ($uninfo->{ $di->{$_} }->[0] // '') =~ s/ u-di| u-prop| ex//gr, # class
+ $uninfo->{ $di->{$_} }->[4] // (), # string
) for sort keys %{$di};
print "}\n";
digraph which can be considered identical (usually inverted),
or an array ref containing at least the resulting character's
Unicode code point value. If available, the following UCD data
-is appended: character name, category, script, and output string.
+is appended: character name, usage classes, unicode classes,
+and replacement output string.
For example:
+{
- AE => [198, 'LATIN CAPITAL LETTER AE', 'Lu Xl', 'Latin'],
+ AE => [198, 'LATIN CAPITAL LETTER AE', 'u-di', 'Latin Lu Xl u-v11'],
EA => 'AE',
}