From: Mischa POSLAWSKY Date: Thu, 6 Jan 2011 23:52:44 +0000 (+0100) Subject: unicode: predetermine html entities in include X-Git-Tag: v1.5~80 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/b4e3ab4c99479faa0b03df7b45e8c69d301e09b8 unicode: predetermine html entities in include --- diff --git a/Shiar_Sheet/FormatChar.pm b/Shiar_Sheet/FormatChar.pm index f22f42b..0e59678 100644 --- a/Shiar_Sheet/FormatChar.pm +++ b/Shiar_Sheet/FormatChar.pm @@ -29,7 +29,7 @@ sub glyph_html { my ($self, $char) = @_; my $codepoint = ord $char; my $info = $self->glyph_info($codepoint); - my ($class, $name, $mnem, $string) = @$info; + my ($class, $name, $mnem, $html, $string) = @$info; my $cell = EscapeHTML($string || $char); my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)"; @@ -37,7 +37,7 @@ sub glyph_html { $cell = "$cell" if $class =~ /\bZs\b/; $cell = ' ' if $cell eq ''; - return ($cell, EscapeHTML($title), "X $class", $mnem); + return ($cell, EscapeHTML($title), "X $class", $mnem, $html); } sub glyphs_html { @@ -61,7 +61,7 @@ sub glyph_cell { sub cell { my ($self, $input, $html) = @_; - my (@class, $title, $cell, $mnem); + my (@class, $title, $cell, $mnem, $entity); if ($input eq '-') { $cell = ''; @@ -78,7 +78,7 @@ sub cell { } $input =~ s/^\\//; # escaped char - ($cell, $title, my $class, $mnem) = $self->glyphs_html($input); + ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input); if ($self->{style} = 'di') { if ($class =~ /\bu-di\b/) { @@ -110,9 +110,8 @@ sub cell { if ($cell ne '') { for (@{ $self->{anno} }) { if (/html$/) { - require HTML::Entities; - if (my $entity = $HTML::Entities::char2entity{$cell}) { - $entity = substr($entity, 1, -1) unless /^&/; + if (defined $entity) { + $entity = "&$entity;" if /^&/; $anno = sprintf(' %s', EscapeHTML($entity)); last; } diff --git a/tools/convert-unicode.pl b/tools/convert-unicode.pl index 348878c..19fde5d 100755 --- a/tools/convert-unicode.pl +++ b/tools/convert-unicode.pl @@ -30,6 +30,15 @@ eval { 1; } or warn "Failed reading unicode tables: $@"; +eval { + require HTML::Entities; + while (my ($char, $entity) = each %HTML::Entities::char2entity) { + $entity =~ /[a-zA-Z]/ or next; # only actual aliases + $info{$char}->{html} = substr($entity, 1, -1); + } + 1; +} or warn "Failed importing html entities: $@"; + my %diinc = ( 'digraphs.inc.pl' => 'u-di', ); @@ -53,7 +62,7 @@ for my $chr (keys %info) { || { block => '?', category => 'Xn', name => '', script => '' } } or next; - $info->{$_} = $info{$chr}->{$_} for qw(di class string); + $info->{$_} = $info{$chr}->{$_} for qw(di html class string); # categorise by unicode types and writing script $info->{class}->{$_}++ for $info->{category}; @@ -90,7 +99,7 @@ say '+{'; for my $cp (sort keys %info) { $info{$cp}->{classstr} = join(' ', sort keys %{ $info{$cp}->{class} }); # convert info hashes into arrays of strings to output in display order - my $row = [ map { $info{$cp}->{$_} } qw/classstr name di string/ ]; + my $row = [ map { $info{$cp}->{$_} } qw/classstr name di html string/ ]; # strip off trailing missing values (especially string may be unknown) defined $row->[-1] ? last : pop @$row for 1 .. @$row; # final line (assume safe within single quotes)