X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/16499a59e8c124cedc7cc499a3ebfaeb714e1f44..133e96335267b12bb7e97d9bbe551a3a043bd32a:/Shiar_Sheet/FormatChar.pm?ds=sidebyside
diff --git a/Shiar_Sheet/FormatChar.pm b/Shiar_Sheet/FormatChar.pm
index 6772dfc..f471497 100644
--- a/Shiar_Sheet/FormatChar.pm
+++ b/Shiar_Sheet/FormatChar.pm
@@ -8,23 +8,34 @@ use utf8;
use Data::Dump 'pp';
use PLP::Functions 'EscapeHTML';
-our $VERSION = '1.08';
+our $VERSION = '1.10';
-our $uc = do 'unicode-char.inc.pl';
+our $uc = do 'data/unicode-char.inc.pl';
sub new {
my ($class) = @_;
bless { anno => ['di', 0], style => 'di' }, $class;
}
-sub glyph_info {
+sub glyph_mkinfo {
my ($self, $codepoint) = @_;
- return $uc->{chr $codepoint} || eval {
+ # attempt to get unicode character information
+ my $info = eval {
require Unicode::UCD;
- if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) {
- return [@$fullinfo{qw/category name - string/}];
- }
- } || [];
+ Unicode::UCD::charinfo($codepoint)
+ || { category => 'Xn', name => '' };
+ } or return;
+ my $string;
+ if ($info->{combining}) {
+ # overlay combining diacritics
+ $string = chr(9676) . chr($codepoint);
+ }
+ return [@$info{qw( category name )}, undef, $string];
+}
+
+sub glyph_info {
+ my ($self, $codepoint) = @_;
+ return $uc->{chr $codepoint} || $self->glyph_mkinfo($codepoint) || [];
}
sub glyph_html {
@@ -34,7 +45,7 @@ sub glyph_html {
my ($class, $name, $mnem, $entity, $string) = @$info;
my $cell = EscapeHTML($string || $char);
- my $title = sprintf 'U+%04X%s', $codepoint, !!$name && " ($name)";
+ my $title = sprintf 'U+%04X%s', $codepoint, !!$name && " $name";
$cell = "$cell" if $class and $class =~ /\bZs\b/;
$cell = ' ' if $cell eq '';
@@ -61,6 +72,31 @@ sub glyph_cell {
return sprintf('
%s', $self->glyph_html($char));
}
+sub glyph_level_univer {
+ my ($self, $input) = @_;
+ if ($input =~ /\p{age=unassigned}/) {
+ # check include for assignments after unicode 6.0 (perl v5.14)
+ state $agemap = do 'data/unicode-age.inc.pl';
+ my $version = $agemap->{ord $input};
+ return $version ? 'l2' : 'l1';
+ }
+ elsif ($input =~ /^\p{in=1.1}*$/) {
+ return 'l5'; # first release 1993
+ }
+ elsif ($input =~ /^\p{in=3.0}*$/) {
+ return 'l4'; # 20th century
+ }
+ elsif ($input =~ /^\p{in=4.1}*$/) {
+ return 'l4'; # over 10 years ago
+ }
+ elsif ($input =~ /^\p{in=6.0}*$/) {
+ return 'l3'; # before 2012
+ }
+ else {
+ return 'l2'; # more recent
+ }
+}
+
sub cell {
my ($self, $input, $html) = @_;
my (@class, $title, $cell, $mnem, $entity);
@@ -81,33 +117,13 @@ sub cell {
$input =~ s/^\\//; # escaped char
($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
- my $codepoint = ord $input;
if ($self->{style} eq 'univer') {
- if ($input =~ /\p{age=unassigned}/) {
- # check include for assignments after unicode 6.0 (perl v5.14)
- state $agemap = do 'unicode-age.inc.pl';
- my $version = $agemap->{$codepoint};
- push @class, $version ? 'l2' : 'l1';
- }
- elsif ($input =~ /^\p{in=1.1}*$/) {
- push @class, 'l5'; # first release 1993
- }
- elsif ($input =~ /^\p{in=3.0}*$/) {
- push @class, 'l4'; # 20th century
- }
- elsif ($input =~ /^\p{in=4.1}*$/) {
- push @class, 'l4'; # over 10 years ago
- }
- elsif ($input =~ /^\p{in=6.0}*$/) {
- push @class, 'l3'; # before 2012
- }
- else {
- push @class, 'l2'; # more recent
- }
+ push @class, $self->glyph_level_univer($input);
next;
}
+ my $codepoint = ord $input;
if ($self->{style} eq 'di') {
if ($mnem and $mnem =~ /â¦/) {
# incomplete representation, usually partial
@@ -136,7 +152,7 @@ sub cell {
if ($input =~ /[ -~]/) {
push @class, 'l5', 'u-ascii'; # ascii
}
- elsif ($input =~ /^\p{in=6.0}+$/) {
+ elsif ($input =~ /^\p{in=6.0}+$/ and $input !~ /\p{Co}/) {
push @class, 'l2'; # in unicode 6.0
}
else {
@@ -173,8 +189,8 @@ sub cell {
}
}
else {
- if ($_ eq 'hex' or $cell =~ /^[^a-zA-Z]$/) {
- $anno = sprintf(' %04X', 'value', ord $cell);
+ if ($_ eq 'hex' or $input =~ /^[^a-zA-Z]$/) {
+ $anno = sprintf(' %04X', 'value', ord $input);
last;
}
}
@@ -248,10 +264,16 @@ sub table {
my @colheads;
while ($digraphs->[0] !~ /^\./) {
my $cell = shift @$digraphs or last;
- push @colheads, sprintf(
- '<%s%s>%s',
+ if ($cell eq '>') {
+ push @colheads, ' | ';
+ next;
+ }
+ push @colheads, join('',
+ '<',
$cell =~ s/^-// ? 'td' : 'th',
- $cell =~ s/:(.*)// ? qq{ title="$1"} : '',
+ $cell =~ s/:(.*)// && qq{ title="$1"},
+ $cell =~ s/^(>+)// && ' colspan='.(length($1) + 1),
+ '>',
$cell eq '_' ? ' ' : $cell
);
}
@@ -275,18 +297,26 @@ sub print {
}
sub legend {
- my @classes = (
- ["X l5" => 'unicode 1.1'],
- ["X l4" => '20th century'],
- ["X l3" => 'in 6.0 (2010)'],
- ["X l2" => 'recent assignments'],
- ["X l1" => 'proposed'],
- ["ex" => 'irregular'],
+ my $self = shift;
+ my @classes = $self->{style} eq 'univer' ? (
+ [l5 => 'unicode 1.1'],
+ [l4 => '20th century'],
+ [l3 => 'in 6.0 (2010)'],
+ [l2 => 'recent assignments'],
+ [l1 => 'proposed'],
+ [ex => 'irregular'],
+ ) : (
+ [l5 => 'ascii'],
+ [l4 => $self->{style} eq 'di' ? 'digraph' : 'latin1'],
+ [l3 => $self->{style} eq 'di' ? 'proposed' : 'HTML4'],
+ [l2 => 'unicode â¤6.0'],
+ [l1 => 'other unicode'],
+ [ex => 'discouraged'],
);
return (
'',
- (map { sprintf '%s', @{$_} } @classes),
+ (map { sprintf ' | %s', @{$_} } @classes),
' |
',
);
}