X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/f2e965b21a4be2aea69d887f619481e04f3ca58a..4e71d82512759ed7da92d213ac66d006dae456a1:/tools/convert-unicode.pl diff --git a/tools/convert-unicode.pl b/tools/convert-unicode.pl index 731210b..19fde5d 100755 --- a/tools/convert-unicode.pl +++ b/tools/convert-unicode.pl @@ -16,6 +16,29 @@ my %info = ( ); $info{chr $_} //= {} for 32 .. 126; +eval { + my $tables = do 'unicode-table.inc.pl' or die $@ || $!; + for (values %$tables) { + for (values %$_) { + for (@$_) { + length $_ == 1 or next; # ignore meta values + s/\\//; # unescape + $info{$_} //= {}; + } + } + } + 1; +} or warn "Failed reading unicode tables: $@"; + +eval { + require HTML::Entities; + while (my ($char, $entity) = each %HTML::Entities::char2entity) { + $entity =~ /[a-zA-Z]/ or next; # only actual aliases + $info{$char}->{html} = substr($entity, 1, -1); + } + 1; +} or warn "Failed importing html entities: $@"; + my %diinc = ( 'digraphs.inc.pl' => 'u-di', ); @@ -39,7 +62,7 @@ for my $chr (keys %info) { || { block => '?', category => 'Xn', name => '', script => '' } } or next; - $info->{$_} = $info{$chr}->{$_} for qw(di class string); + $info->{$_} = $info{$chr}->{$_} for qw(di html class string); # categorise by unicode types and writing script $info->{class}->{$_}++ for $info->{category}; @@ -76,7 +99,7 @@ say '+{'; for my $cp (sort keys %info) { $info{$cp}->{classstr} = join(' ', sort keys %{ $info{$cp}->{class} }); # convert info hashes into arrays of strings to output in display order - my $row = [ map { $info{$cp}->{$_} } qw/classstr name di string/ ]; + my $row = [ map { $info{$cp}->{$_} } qw/classstr name di html string/ ]; # strip off trailing missing values (especially string may be unknown) defined $row->[-1] ? last : pop @$row for 1 .. @$row; # final line (assume safe within single quotes)