From: Mischa POSLAWSKY Date: Mon, 9 Apr 2012 21:19:13 +0000 (+0200) Subject: font: coverage groups X-Git-Tag: v1.5~3 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/6721f1111bc49b8ee5efc0b39d74321c1393cdfb?hp=4b13af2d4d53762c4cb1aed7e4d0de3b5666c1bc font: coverage groups --- diff --git a/chars.plp b/chars.plp index b081ac2..c36615d 100644 --- a/chars.plp +++ b/chars.plp @@ -7,7 +7,7 @@ Html({ unicode glyph char character reference common ipa symbol sign mark table digraph '], stylesheet => [qw'light dark mono circus red'], - data => [qw'unicode-table.inc.pl unicode-char.inc.pl'], + data => [qw( unicode-cover.inc.pl ttfsupport unicode-char.inc.pl )], }); :> @@ -33,7 +33,6 @@ my %oslist = ( ); my @ossel = qw( win95 oss android ); -my $tables = do 'unicode-table.inc.pl' or die $@ || $!; my (%font, @fontlist); for my $os (@ossel) { my $osfonts = $oslist{$os}; @@ -54,47 +53,29 @@ for my $os (@ossel) { my @chars; my @querydesc; -my $query = $ENV{PATH_INFO} || $get{q} || 'ipa'; -for ($query) { - s{^/}{}; - when (qr{^[a-z]+(?:/|\z)}) { - for (split / /) { - push @querydesc, "preset group $_"; - my ($tablegroup, $tablename) = split m{/}, $_, 2; - my @tables = $tablename ? $tables->{$tablegroup}->{$tablename} - : sort values %{ $tables->{$tablegroup} }; - for (@tables) { - my $includerows; # ignore rows before body row - for (@{$_}) { - $includerows ||= m/^[.]/ or next; - next if /^[.-]/; - next if $_ eq '>' or $_ eq '='; - push @chars, $_; - } +if (my $query = $ENV{PATH_INFO} || $get{q} || 'ipa') { + my $groupinfo = do 'unicode-cover.inc.pl' or die $@ || $!; + for (split /[\s+]/, $query) { + s{^/}{}; + when (qr{^[\d,;\s+-]+$}) { + push @querydesc, "character codepoints $_"; + for (map { split /[^\d-]/ } $_) { + my ($charnum, $range) = split /-/, $_; + push @chars, chr $_ for $charnum .. ($range // $charnum); } } - when ('ipa') { - @chars = grep { !m/[a-zA-Z]/ } @chars; + when ($_) { + my $row = $groupinfo->{$_} or do { + warn "group $_ not found"; + next; + }; + push @querydesc, $row->{-name} // $_; + push @chars, map { chr } @{ $row->{-chars} }; } - } - when (qr{[\d,;\s+-]+}) { - push @querydesc, "character codepoints $_"; - for (map { split /[^\d-]/ } $_) { - my ($charnum, $range) = split /-/, $_; - push @chars, chr $_ for $charnum .. ($range // $charnum); + default { + die "unknown parameter: $_\n"; } } - when (qr{[A-Z]}) { - push @querydesc, "unicode match $_"; - eval { - my $match = qr/\A\p{$_}\z/; - push @chars, grep { m/$match/ } map { chr $_ } - 0..0xD7FF, 0xE000..0xFDCF, 0xFDF0..0xFFFD; - } or die "invalid unicode match: $_\n"; - } - default { - die "unknown parameter: $_\n"; - } } @chars <= 1500 or die sprintf( diff --git a/font.plp b/font.plp index 79ac28d..50d8dcb 100644 --- a/font.plp +++ b/font.plp @@ -52,8 +52,10 @@ print ''; printf '%s', $_ for @fontlist; say ''; -for my $name (sort keys %{$cover}) { - my $row = $cover->{$name}; +for my $group (sort keys %{$cover}) { + say ''; +for my $name (sort keys %{ $cover->{$group} }) { + my $row = $cover->{$group}->{$name}; print ''; $name = qq{$name} if $row->{-count} and $row->{-count} < 1280; @@ -76,6 +78,8 @@ for my $name (sort keys %{$cover}) { } say ''; } + say ''; +} say "\n"; diff --git a/tools/mkfontinfo b/tools/mkfontinfo index fe7059f..0f0857d 100755 --- a/tools/mkfontinfo +++ b/tools/mkfontinfo @@ -23,34 +23,54 @@ for my $fontfile (glob 'ttfsupport/*'.$incsuffix) { }; } -my @chargroups = qw( - N Z Math - Assigned - Latin Greek Cyrillic Georgian Arabic Thai Hangul Han -); + when (qr{^[a-z]+(?:/|\z)}) { + } + +my %charlist; + +my $chartables = do 'unicode-table.inc.pl' or warn $@ || $!; +if ($chartables) { + while (my ($tablegroup, $grouprow) = each %{$chartables}) { + while (my ($tablename, $chars) = each %{$grouprow}) { + next if $tablename =~ /^-/; + my $includerows; # ignore rows before body row + for (@{$chars}) { + $includerows ||= m/^[.]/ or next; + next if /^[.-]/; + next if $_ eq '>' or $_ eq '='; + push @{ $charlist{table}->{"$tablegroup/$tablename"} }, $_; + push @{ $charlist{table}->{$tablegroup} }, $_; + } + } +# if ($tablegroup eq 'ipa') { +# @chars = grep { !m/[a-zA-Z]/ } @chars; +# } + } +} -say 'use utf8;'; -say '+{'; -for my $name (@chargroups) { - my $match = qr/\A\p{$name}\z/; - my @chars = eval { - grep { m/$match/ } map { chr $_ } - 0..0xD7FF, 0xE000..0xFDCF, 0xFDF0..0xFFFD, - } or do { - warn $@; - next; - }; +use Unicode::UCD 'charinfo'; +for my $code (0 .. 256**2) { + my $charinfo = charinfo($code) or next; + next if $charinfo->{category} =~ /^[MC]/; # ignore Marks and "other" Control chars + push @{ $charlist{$_}->{ $charinfo->{$_} } }, chr $code + for qw( script category block ); +} - my %cover = map { +for (values %charlist) { +for my $chars (values %{$_}) { + my %row = map { my $fontcover = $font{$_}; - ($_ => scalar grep { $fontcover->{$_} } @chars); + ($_ => scalar grep { $fontcover->{$_} } @{$chars}); } keys %font; - $cover{-count} = scalar @chars; - $cover{-chars} = [ map { ord } sort @chars ]; + $row{-count} = scalar @{$chars}; +# $row{-chars} = [ map { ord } sort @{$chars} ]; - say $name.' => '.pp(\%cover).','; + $chars = \%row; } -say '}'; +} + +say 'use utf8;'; +say '+'.pp(\%charlist); __END__