$di{$mnem} = hex $char;
}
+# personal addendums
+my @extra;
+if (-r 'shiar.inc.txt') {
+ open my $include, '<:utf8', 'shiar.inc.txt';
+ for (readline $include) {
+ m{^([!"%'-Z_a-z]{2}) (.)} or next;
+ warn("$1 already defined"), next if defined $di{$1};
+ $di{$1} = ord $2;
+ push @extra, $1;
+ }
+}
+warn $@ if $@;
+
# optionally get unicode character information
-my %charinfo = eval {
+my %info = eval {
require Unicode::UCD;
- map { $_ => Unicode::UCD::charinfo($di{$_}) } keys %di;
+ map {
+ $_ => Unicode::UCD::charinfo($di{$_})
+ || { block => '?', category => 'Xn', name => '', script => '' }
+ } keys %di;
};
+# add custom categories for certain blocks
+for (values %info) {
+ $_->{category} .= ' Xa' if $_->{block} eq 'Basic Latin';
+ $_->{category} .= ' Xl' if $_->{block} eq 'Latin-1 Supplement';
+}
+
+# mark unofficial extras as such
+$info{$_}->{category} .= ' Xz' for @extra;
+
+for (keys %di) {
+ # find control characters (first 32 chars from 0 and 128)
+ next if $di{$_} & ~0b1001_1111;
+ # rename to something more descriptive
+ $info{$_}->{name} = $info{$_}->{unicode10}
+ ? '<'.$info{$_}->{unicode10}.'>' # the old name was much more useful
+ : sprintf('<control U+%04X>', $di{$_}); # at least identify by value
+ # show descriptive symbols instead of control chars themselves
+ $di{$_} += 0x2400 if $di{$_} < 32;
+}
+
# output perl code of hash
# (assume no backslashes or curlies, so we can just q{} w/o escaping)
-print "{\n";
+print "+{\n";
+printf '(map {$_=>0} qw{%s}),'."\n", join(' ',
+ map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %di
+);
printf "q{%s}=>[%s],\n", $_, join(',',
$di{$_}, # glyph code point
- $charinfo{$_} # optional additional arguments
- ? map {"'$_'"} @{ $charinfo{$_} }{qw/name category script/}
+ $info{$_} # optional additional arguments
+ ? map {"'$_'"} @{ $info{$_} }{qw/name category script/}
: ()
) for sort keys %di;
print "}\n";