From 2876b76b7b654f43dd1668c4a963a049450ed66c Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sat, 21 Feb 2015 05:33:12 +0100 Subject: [PATCH] unicode: mark missing digraphs in multiple characters --- Shiar_Sheet/FormatChar.pm | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Shiar_Sheet/FormatChar.pm b/Shiar_Sheet/FormatChar.pm index 8b00318..ee0c310 100644 --- a/Shiar_Sheet/FormatChar.pm +++ b/Shiar_Sheet/FormatChar.pm @@ -3,6 +3,7 @@ package Shiar_Sheet::FormatChar; use 5.010; use strict; use warnings; +use utf8; use Data::Dump 'pp'; use PLP::Functions 'EscapeHTML'; @@ -51,7 +52,7 @@ sub glyphs_html { EscapeHTML($_[0]), # cell join(' | ', map { $_->[1] } @chars), # title $chars[0][2], # class - join(' ', grep { defined } map { $_->[3] } @chars), # digraph + join(' ', map { $_->[3] // '…' } @chars), # digraph ); } @@ -80,7 +81,7 @@ sub cell { $input =~ s/^\\//; # escaped char ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input); - my $codepoint = ord(substr $input, 0, 1); + my $codepoint = ord $input; if ($self->{style} eq 'univer') { if ($input =~ /\p{age=unassigned}/) { @@ -108,7 +109,10 @@ sub cell { } if ($self->{style} eq 'di') { - if ($class =~ /\bu-di\b/) { + if ($mnem =~ /…/) { + # incomplete representation, usually partial + } + elsif ($class =~ /\bu-di\b/) { push @class, ('l4', 'u-di'); # standard digraph } elsif ($class =~ /\bu-prop\b/) { -- 2.30.0