}
}
+eval {
+ # read introducing unicode versions for known characters
+ my $agemap = do 'unicode-age.inc.pl' or die $@ || $!;
+ for my $chr (keys %info) {
+ my $version = $agemap->{ord $chr} or next;
+ $info{$chr}->{class}->{'u-v'.$version}++
+ }
+ 1;
+} or warn "Failed including unicode version data $@";
+
for my $chr (keys %info) {
my $cp = ord $chr;
# attempt to get unicode character information
|| { block => '?', category => 'Xn', name => '', script => '' }
} or next;
- $info->{$_} = $info{$chr}->{$_} for qw(di html class string);
+ $info->{$_} = $info{$chr}->{$_} for keys %{ $info{$chr} };
# categorise by unicode types and writing script
$info->{class}->{$_}++ for $info->{category};
? '<'.$info->{unicode10}.'>' # the old name was much more useful
: sprintf('<control U+%04X>', $cp); # at least identify by value
# show descriptive symbols instead of control chars themselves
- $info->{string} = $cp < 32 ? chr($cp + 0x2400) : chr(0xFFFD);
+ $info->{string} = $cp < 32 ? chr($cp + 0x2400) :
+ $cp == 127 ? chr(0x2421) :
+ chr(0xFFFD);
}
}