} or warn "Failed importing html entities: $@";
my %diinc = (
- 'digraphs.inc.pl' => 'u-di',
+ 'data/digraphs-rfc.inc.pl' => 'u-di',
+ 'data/digraphs-shiar.inc.pl' => 'u-prop Xz',
);
for (keys %diinc) {
-e $_ or next;
while (my ($mnem, $cp) = each %$di) {
length $mnem == 2 or next; # limit to digraphs
my $class = $diinc{$_};
- if (ref $cp) {
- # old style array
- $class = 'u-prop' if $cp->[2] and $cp->[2] =~ m/\bXz\b/;
- $cp = chr $cp->[0];
- }
- $info{$cp}->{di} //= $mnem;
- $info{$cp}->{class}->{$class}++;
+ $info{chr $cp}->{di} //= $mnem;
+ $info{chr $cp}->{class}->{$class}++;
}
}
$info->{class}->{Xa}++ if $info->{block} eq 'Basic Latin';
$info->{class}->{Xl}++ if $info->{block} eq 'Latin-1 Supplement';
- given ($cp) {
- when ($info->{string}) {
+ {
+ if ($info->{string}) {
# keep predefined presentational string
}
- when ($info->{combining}) {
+ elsif ($info->{combining}) {
# overlay combining accents
$info->{string} = chr(9676) . $chr;
}
- when (($cp & ~0b1001_1111) == 0 or $cp == 127) {
+ elsif (($cp & ~0b1001_1111) == 0 or $cp == 127) {
# control characters (first 32 chars from 0 and 128)
# rename to something more descriptive
$info->{name} = $info->{unicode10}
}
# output perl code of hash
+say "# automatically generated by $0";
say 'use utf8;';
say '+{';
for my $cp (sort keys %info) {