unicode: mark missing digraphs in multiple characters
authorMischa POSLAWSKY <perl@shiar.org>
Sat, 21 Feb 2015 04:33:12 +0000 (05:33 +0100)
committerMischa POSLAWSKY <perl@shiar.org>
Tue, 9 Jun 2015 03:43:41 +0000 (05:43 +0200)
Shiar_Sheet/FormatChar.pm

index 8b00318a43850ee2a5ad5567de66f317a71589f2..ee0c31065f9c7e716dafb92b38960d8af0a8b911 100644 (file)
@@ -3,6 +3,7 @@ package Shiar_Sheet::FormatChar;
 use 5.010;
 use strict;
 use warnings;
+use utf8;
 
 use Data::Dump 'pp';
 use PLP::Functions 'EscapeHTML';
@@ -51,7 +52,7 @@ sub glyphs_html {
                EscapeHTML($_[0]), # cell
                join(' | ', map { $_->[1] } @chars), # title
                $chars[0][2], # class
-               join(' ',  grep { defined } map { $_->[3] } @chars), # digraph
+               join(' ',  map { $_->[3] // '…' } @chars), # digraph
        );
 }
 
@@ -80,7 +81,7 @@ sub cell {
 
                $input =~ s/^\\//;  # escaped char
                ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
-               my $codepoint = ord(substr $input, 0, 1);
+               my $codepoint = ord $input;
 
                if ($self->{style} eq 'univer') {
                        if ($input =~ /\p{age=unassigned}/) {
@@ -108,7 +109,10 @@ sub cell {
                }
 
                if ($self->{style} eq 'di') {
-                       if ($class =~ /\bu-di\b/) {
+                       if ($mnem =~ /…/) {
+                               # incomplete representation, usually partial
+                       }
+                       elsif ($class =~ /\bu-di\b/) {
                                push @class, ('l4', 'u-di'); # standard digraph
                        }
                        elsif ($class =~ /\bu-prop\b/) {