From: Mischa POSLAWSKY <perl@shiar.org>
Date: Thu, 6 Jan 2011 23:52:44 +0000 (+0100)
Subject: unicode: predetermine html entities in include
X-Git-Tag: v1.5~80
X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/b4e3ab4c99479faa0b03df7b45e8c69d301e09b8

unicode: predetermine html entities in include
---

diff --git a/Shiar_Sheet/FormatChar.pm b/Shiar_Sheet/FormatChar.pm
index f22f42b..0e59678 100644
--- a/Shiar_Sheet/FormatChar.pm
+++ b/Shiar_Sheet/FormatChar.pm
@@ -29,7 +29,7 @@ sub glyph_html {
 	my ($self, $char) = @_;
 	my $codepoint = ord $char;
 	my $info = $self->glyph_info($codepoint);
-	my ($class, $name, $mnem, $string) = @$info;
+	my ($class, $name, $mnem, $html, $string) = @$info;
 
 	my $cell = EscapeHTML($string || $char);
 	my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
@@ -37,7 +37,7 @@ sub glyph_html {
 	$cell = "<span>$cell</span>" if $class =~ /\bZs\b/;
 	$cell = '&nbsp;' if $cell eq '';
 
-	return ($cell, EscapeHTML($title), "X $class", $mnem);
+	return ($cell, EscapeHTML($title), "X $class", $mnem, $html);
 }
 
 sub glyphs_html {
@@ -61,7 +61,7 @@ sub glyph_cell {
 
 sub cell {
 	my ($self, $input, $html) = @_;
-	my (@class, $title, $cell, $mnem);
+	my (@class, $title, $cell, $mnem, $entity);
 
 	if ($input eq '-') {
 		$cell = '';
@@ -78,7 +78,7 @@ sub cell {
 		}
 
 		$input =~ s/^\\//;  # escaped char
-		($cell, $title, my $class, $mnem) = $self->glyphs_html($input);
+		($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
 
 		if ($self->{style} = 'di') {
 			if ($class =~ /\bu-di\b/) {
@@ -110,9 +110,8 @@ sub cell {
 	if ($cell ne '') {
 		for (@{ $self->{anno} }) {
 			if (/html$/) {
-				require HTML::Entities;
-				if (my $entity = $HTML::Entities::char2entity{$cell}) {
-					$entity = substr($entity, 1, -1) unless /^&/;
+				if (defined $entity) {
+					$entity = "&$entity;" if /^&/;
 					$anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($entity));
 					last;
 				}
diff --git a/tools/convert-unicode.pl b/tools/convert-unicode.pl
index 348878c..19fde5d 100755
--- a/tools/convert-unicode.pl
+++ b/tools/convert-unicode.pl
@@ -30,6 +30,15 @@ eval {
 	1;
 } or warn "Failed reading unicode tables: $@";
 
+eval {
+	require HTML::Entities;
+	while (my ($char, $entity) = each %HTML::Entities::char2entity) {
+		$entity =~ /[a-zA-Z]/ or next;  # only actual aliases
+		$info{$char}->{html} = substr($entity, 1, -1);
+	}
+	1;
+} or warn "Failed importing html entities: $@";
+
 my %diinc = (
 	'digraphs.inc.pl' => 'u-di',
 );
@@ -53,7 +62,7 @@ for my $chr (keys %info) {
 			|| { block => '?', category => 'Xn', name => '', script => '' }
 	} or next;
 
-	$info->{$_} = $info{$chr}->{$_} for qw(di class string);
+	$info->{$_} = $info{$chr}->{$_} for qw(di html class string);
 
 	# categorise by unicode types and writing script
 	$info->{class}->{$_}++ for $info->{category};
@@ -90,7 +99,7 @@ say '+{';
 for my $cp (sort keys %info) {
 	$info{$cp}->{classstr} = join(' ', sort keys %{ $info{$cp}->{class} });
 	# convert info hashes into arrays of strings to output in display order
-	my $row = [ map { $info{$cp}->{$_} } qw/classstr name di string/ ];
+	my $row = [ map { $info{$cp}->{$_} } qw/classstr name di html string/ ];
 	# strip off trailing missing values (especially string may be unknown)
 	defined $row->[-1] ? last : pop @$row for 1 .. @$row;
 	# final line (assume safe within single quotes)