formatchar: common &; optional for html entity annotation
[sheet.git] / Shiar_Sheet / FormatChar.pm
index a501b945ca1ecb1a784f27ebe477e80ef4e46ec8..62f6791ba9f1e1d72332c87b30cc56b498d7dfe0 100644 (file)
@@ -6,7 +6,7 @@ use warnings;
 use Data::Dump 'pp';
 use PLP::Functions 'EscapeHTML';
 
-our $VERSION = '1.00';
+our $VERSION = '1.04';
 
 our $diinfo = do 'digraphs.inc.pl';
 our %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} }
@@ -14,7 +14,7 @@ our %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} }
 
 sub new {
        my ($class) = @_;
-       bless {}, $class;
+       bless { anno => ['di', 0], style => 'di' }, $class;
 }
 
 sub glyph_info {
@@ -38,7 +38,7 @@ sub glyph_html {
        my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
        my @class = ('X', grep {$_} $prop, $script);
 
-       $cell = "<span>$cell</span>" if $prop eq 'Zs';
+       $cell = "<span>$cell</span>" if $prop and $prop eq 'Zs';
        $cell = '&nbsp;' if $cell eq '';
 
        return ($cell, EscapeHTML($title), join(' ', @class), $mnem);
@@ -71,28 +71,75 @@ sub cell {
                $cell = '';
        }
        elsif ($input eq '=') {
-               push @class, 'di-invalid';
+               push @class, 'u-invalid';
                $cell = '';
        }
        else {
                push @class, 'X';
 
                if ($input =~ s/^-//) {
-                       push @class, 'di-rare'; # discouraged
+                       push @class, 'ex'; # discouraged
                }
 
                ($cell, $title, my $class, $mnem) = $self->glyphs_html($input);
 
-               if (defined $mnem) {
-                       push @class, 'di-d'; # digraph
-                       push @class, 'di-prop' if $class =~ /\bXz\b/; # unofficial
+               if ($self->{style} = 'di') {
+                       if (defined $mnem) {
+                               push @class, $class =~ /\bXz\b/ ? ('l2', 'u-prop') # unofficial
+                                       : ('l3', 'u-di'); # standard digraph
+                       }
+               }
+               else {
+                       my $codepoint = ord(substr $input, 0, 1);
+                       if ($codepoint <= 0xFF) {
+                               push @class, 'l3', 'u-lat1';  # latin1
+                       }
+                       elsif ($codepoint <= 0xD7FF) {
+                               push @class, 'l2', 'u-bmp';  # bmp
+                       }
                }
 
                if ($input =~ /[ -~]/) {
-                       push @class, 'di-a'; # ascii
+                       push @class, 'l4', 'u-ascii'; # ascii
                }
                else {
-                       push @class, 'di-b'; # basic unicode
+                       push @class, 'l1'; # basic unicode
+               }
+       }
+
+       my $anno = '';
+       for (@{ $self->{anno} }) {
+               if (/html$/) {
+                       require HTML::Entities;
+                       if (my $entity = $HTML::Entities::char2entity{$cell}) {
+                               $entity = substr($entity, 1, -1) unless /^&/;
+                               $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($entity));
+                               last;
+                       }
+               }
+               elsif ($_ eq 'xml') {
+                       $anno = sprintf(' <small class="digraph">%s</small>',
+                               sprintf '#%d', ord($cell)
+                       );
+                       last;
+               }
+               elsif ($_ eq '&xml') {
+                       $anno = sprintf(' <small class="digraph">%s</small>',
+                               sprintf '&amp;#%d;', ord($cell)
+                       );
+                       last;
+               }
+               elsif ($_ eq 'di') {
+                       if (defined $mnem and length $mnem) {
+                               $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem));
+                               last;
+                       }
+               }
+               else {
+                       if ($_ eq 'hex' or $cell =~ /^[^a-zA-Z]$/) {
+                               $anno = sprintf(' <small class="%s">%04X</small>', 'value', ord $cell);
+                               last;
+                       }
                }
        }
 
@@ -101,11 +148,7 @@ sub cell {
                @class ? sprintf(' class="%s"', join ' ', @class) : '',
                $html || '',
                $cell eq '' ? '&nbsp;' : $cell,
-               defined $mnem && length $mnem
-                       ? sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem))
-                       : $cell =~ /^[^a-zA-Z]$/
-                               ? sprintf(' <small class="%s">%04X</small>', 'value', ord $cell)
-                               : '',
+               $anno,
        );
 }
 
@@ -130,7 +173,7 @@ sub table {
        for my $cell (@$digraphs) {
                if ($cell =~ s/^\.//) {
                        # dot indicates start of a new row
-                       push @rows, '';
+                       push @rows, '<tr>';
                        if ($cell =~ s/^>//) {
                                # header cell text follows
                                $cell =~ s/_/ /g;  # underscores may be used instead of whitespace (for qw//ability)
@@ -151,8 +194,20 @@ sub table {
                $colspan = 1;
        }
 
-       return sprintf qq{<table class="glyphs dilabel">\n%s</table>\n},
-               join '', map {"<tr>$_\n"} @rows;
+       return sprintf qq{<table class="glyphs%s">\n%s</table>\n},
+               @{ $self->{anno} } ? ' dilabel' : '',
+               join '', map {"$_\n"} @rows;
+}
+
+sub print {
+       my $self = shift;
+       while (@_) {
+               printf '<div class="section"><h2>%s</h2>'."\n\n", shift;
+               while (ref $_[0] and $_ = shift) {
+                       print $self->table($_);
+               }
+               print '</div>';
+       }
 }
 
 1;