use utf8;
use open OUT => ':utf8', ':std';
-use List::Util 'reduce';
use File::Basename 'basename';
use Data::Dump 'pp';
-our $VERSION = '1.00';
+our $VERSION = '1.01';
-my %font;
+my @fontlist;
+
+my %cover;
my $incsuffix = '.inc.pl';
for my $fontfile (glob 'ttfsupport/*'.$incsuffix) {
my ($fontid) = basename($fontfile, $incsuffix);
my ($fontmeta, @fontrange) = do $fontfile or next;
- $font{$fontid} = {
- -id => $fontmeta->{id} || $fontid,
- -name => $fontmeta->{name},
- map { (chr $_ => 1) } @fontrange
- };
+ $fontmeta->{file} = $fontid;
+ push @fontlist, $fontmeta;
+ $cover{$fontid} = { map { (chr $_ => 1) } @fontrange };
}
-my @chargroups = qw(
- N Z Math
- Assigned
- Latin Greek Cyrillic Georgian Arabic Thai Hangul Han
-);
+my %charlist;
+
+my $chartables = do 'unicode-table.inc.pl' or warn $@ || $!;
+if ($chartables) {
+ while (my ($tablegroup, $grouprow) = each %{$chartables}) {
+ while (my ($tablename, $chars) = each %{$grouprow}) {
+ next if $tablename =~ /^-/;
+ my $includerows; # ignore rows before body row
+ for (@{$chars}) {
+ $includerows ||= m/^[.]/ or next;
+ next if /^[.-]/;
+ next if $_ eq '>' or $_ eq '=';
+ s/^\\//; # escape
+ length $_ == 1 or next; # multiple characters lost in query
+ push @{ $charlist{table}->{"$tablegroup/$tablename"} }, $_;
+ push @{ $charlist{table}->{$tablegroup} }, $_;
+ }
+ }
+# if ($tablegroup eq 'ipa') {
+# @chars = grep { !m/[a-zA-Z]/ } @chars;
+# }
+ }
+}
-say 'use utf8;';
-say '+{';
-for my $name (@chargroups) {
- my $match = qr/\A\p{$name}\z/;
- my @chars = eval {
- grep { m/$match/ } map { chr $_ }
- 0..0xD7FF, 0xE000..0xFDCF, 0xFDF0..0xFFFD,
- } or do {
- warn $@;
- next;
+eval {
+ require HTML::Entities;
+ our %char2entity;
+ HTML::Entities->import('%char2entity');
+ while (my ($char, $entity) = each %char2entity) {
+ $entity =~ /[a-zA-Z]/ or next; # only actual aliases
+ push @{ $charlist{table}->{html} }, $char;
+ }
+ 1;
+} or warn "Could not include count for html entities: $@";
+
+eval {
+ use Unicode::UCD 'charinfo';
+ for my $code (0 .. 256**2) {
+ my $charinfo = charinfo($code) or next;
+ next if $charinfo->{category} =~ /^[MC]/; # ignore Marks and "other" Control chars
+ push @{ $charlist{$_}->{ $charinfo->{$_} } }, chr $code
+ for qw( script category block );
+ }
+ 1;
+} or warn "Could not include unicode groups: $@";
+
+for (values %charlist) {
+for my $chars (values %{$_}) {
+ my %row;
+ $row{support} = [
+ map { scalar grep { defined } @{ $cover{$_->{file}} }{ @{$chars} } }
+ @fontlist
+ ];
+ $row{count} = scalar @{$chars};
+
+ $row{query} = eval {
+ my @query = map { ord } sort @{$chars};
+ my $i = 0;
+ while ($i < @query) {
+ my $j = $i + 1;
+ my $v = $query[$i];
+ while ($j < @query) {
+ $v++;
+ last if $query[$j] != $v;
+ $j++;
+ }
+ if ($j - $i > 2) {
+ splice(@query, $i, $j - $i, "$query[$i]-$query[$j-1]");
+ }
+ $i++;
+ }
+ return join '+', @query;
};
- my %cover = map {
- my $fontcover = $font{$_};
- ($_ => scalar grep { $fontcover->{$_} } @chars);
- } keys %font;
- $cover{-count} = scalar @chars;
- $cover{-chars} = [ map { ord } sort @chars ];
+ $chars = \%row;
+}
+}
+
+$charlist{fonts} = \@fontlist;
- say $name.' => '.pp(\%cover).',';
+my %osfonts = (
+ win95 => [qw( arial arialuni lucidau verdana timesnew couriernew )], # microsoft
+ mac10 => [qw( helvetica lucida times garamond palatino )], # apple
+ android => [qw( roboto noto )], # google
+ oss => [qw( dvsans c2k unifont opensans )],
+);
+my %fontnum = map { ($fontlist[$_]->{file} => $_) } 0 .. $#fontlist;
+while (my ($os, $fontids) = each %osfonts) {
+ $charlist{os}->{$os} = [ map { $fontnum{$_} // () } @{$fontids} ];
}
-say '}';
+$charlist{osdefault} = [qw( win95 mac10 oss android )];
+
+say "# automatically generated by $0";
+say 'use utf8;';
+say '+'.pp(\%charlist);
__END__