use open OUT => ':utf8', ':std';
-our $VERSION = '1.03';
+our $VERSION = '1.06';
-# create a hash of desired input
-my $di = do 'data/digraphs-rfc.inc.pl'
+# import and combine various digraph data
+my $rfc = do 'data/digraphs-rfc.inc.pl'
or die "error reading digraphs include: ", $@ // $!;
-
-# personal addendums
my $extra = do 'data/digraphs-shiar.inc.pl'
or warn "could not include shiar proposals: ", $@ // $!;
my $vim = do 'data/digraphs-vim.inc.pl'
or warn "could not include vim extensions ", $@ // $!;
-$di = { %{$vim // {}}, %{$di}, %{$extra // {}} };
+my $vimold = do 'data/digraphs-vim-74.inc.pl'
+ or warn "could not include vim compatibility ", $@ // $!;
+my $di = { %{$vim // {}}, %{$rfc}, %{$extra // {}} };
# optionally get unicode character information
-my $uninfo = do 'unicode-char.inc.pl'
+my $uninfo = do 'data/unicode-char.inc.pl'
or warn "could not include unicode details: ", $@ // $!;
# output perl code of hash
grep { !defined $di->{$_} }
map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %{$di}
);
-printf "q{%s}=>[%s],\n", $_, join(',',
- $di->{$_}, # original code point
+printf "q{%s}=>[%s],\n", s/(?=[\\}])/\\/gr, join(',',
+ ord $di->{$_}, # original code point
map {"'$_'"}
- $uninfo->{ chr $di->{$_} }->[1], # name
- $uninfo->{ chr $di->{$_} }->[0], # class
- (map {('', $_)} $uninfo->{ chr $di->{$_} }->[4] // ()), # string
+ $uninfo->{ $di->{$_} }->[1] // '', # name
+ join(' ',
+ $rfc->{$_}
+ ? $vim->{$_} ? 'l4' : 'l1' # vim+rfc or rfc only
+ : $vim->{$_} ? 'l3' : 'l2', # vim only or neither
+ $vimold && $vim->{$_} && !$vimold->{$_} ? 'ex' : (), # new vim feature
+ ),
+ ($uninfo->{ $di->{$_} }->[0] // '') =~ s/ u-di| u-prop| ex//gr, # class
+ $uninfo->{ $di->{$_} }->[4] // (), # string
) for sort keys %{$di};
print "}\n";
digraph which can be considered identical (usually inverted),
or an array ref containing at least the resulting character's
Unicode code point value. If available, the following UCD data
-is appended: character name, category, script, and output string.
+is appended: character name, usage classes, unicode classes,
+and replacement output string.
For example:
+{
- AE => [198, 'LATIN CAPITAL LETTER AE', 'Lu Xl', 'Latin'],
+ AE => [198, 'LATIN CAPITAL LETTER AE', 'u-di', 'Latin Lu Xl u-v11'],
EA => 'AE',
}