6 use open OUT => ':encoding(utf-8)', ':std';
7 use File::Basename 'basename';
10 our $VERSION = '1.02';
15 my $incsuffix = '.inc.pl';
16 for my $fontfile (glob 'data/font/*'.$incsuffix) {
17 my ($fontid) = basename($fontfile, $incsuffix);
18 my ($fontmeta, @fontrange) = do "./$fontfile";
23 $fontmeta->{file} = $fontid;
24 my $year = substr $fontmeta->{date}, 0, 4;
25 $fontmeta->{description} = join(' ',
26 (map { "version $_" } $fontmeta->{version} || ()),
27 $fontmeta->{version} && $fontmeta->{version} =~ /\Q$year/ ? () :
28 (map { "($_)" } $year || ()),
30 push @fontlist, $fontmeta;
31 my $fontrange = $fontmeta->{cover};
32 $cover{$fontid} = { map { (chr $_ => 1) } $fontmeta->{cover}->@* };
37 $charlist{table}->{abc} = ['A'..'Z', 'a'..'z'];
39 my $chartables = do './unicode-table.inc.pl' or warn $@ || $!;
41 while (my ($tablegroup, $grouprow) = each %{$chartables}) {
42 while (my ($tablename, $chars) = each %{$grouprow}) {
43 next if $tablename =~ /^-/;
44 my $includerows; # ignore rows before body row
46 $includerows ||= m/^[.]/ or next;
48 next if $_ eq '>' or $_ eq '=';
50 length $_ == 1 or next; # multiple characters lost in query
51 push @{ $charlist{table}->{"$tablegroup/$tablename"} }, $_;
52 push @{ $charlist{table}->{$tablegroup} }, $_;
55 # if ($tablegroup eq 'ipa') {
56 # @chars = grep { !m/[a-zA-Z]/ } @chars;
62 require HTML::Entities;
64 HTML::Entities->import('%char2entity');
65 while (my ($char, $entity) = each %char2entity) {
66 $entity =~ /[a-zA-Z]/ or next; # only actual aliases
67 push @{ $charlist{table}->{html} }, $char;
70 } or warn "Could not include count for html entities: $@";
73 my $agemap = do './data/unicode-age.inc.pl'
74 or warn "Could not include unicode version data: $!";
76 use Unicode::UCD 'charinfo';
77 for my $code (0 .. 256**2*2) {
78 my $charinfo = charinfo($code) or next;
79 next if $charinfo->{category} =~ /^[C]/; # ignore "other" Control chars
80 push @{ $charlist{$_}->{ $charinfo->{$_} } }, chr $code
81 for qw( script category block );
82 push @{ $charlist{version}->{$_} }, (chr $code) x ($agemap->{$code} <= $_)
86 } or warn "Could not include unicode groups: $@";
88 for (values %charlist) {
89 for my $chars (values %{$_}) {
92 map { scalar grep { defined } @{ $cover{$_->{file}} }{ @{$chars} } }
95 $row{count} = scalar @{$chars};
98 my @query = map { ord } sort @{$chars};
100 while ($i < @query) {
103 while ($j < @query) {
105 last if $query[$j] != $v;
109 splice(@query, $i, $j - $i, "$query[$i]-$query[$j-1]");
113 return join '+', @query;
120 $charlist{fonts} = \@fontlist;
123 win2k => [qw( arial.win2k arialuni lucidau verdana.win2k times.win2k cour.win2k )], # microsoft
124 win8 => [map {"$_.win8"} qw( arial verdana times georgia pala cour )],
125 mac109 => [map {"$_.mac109"} qw( helv lucida times pala )], # apple
126 android => [qw( roboto droidmono notosans )], # google
127 oss => [qw( dvsans freesans code2000 unifont )],
130 # copy rows to derive older os versions (same list with different trailing number)
131 s/8$/7/ for @{ $osfonts{ win7} = [@{ $osfonts{ win8} }] };
132 s/9$/7/ for @{ $osfonts{mac107} = [@{ $osfonts{mac109} }] };
135 my %fontnum = map { ($fontlist[$_]->{file} => $_) } 0 .. $#fontlist;
136 while (my ($os, $fontids) = each %osfonts) {
137 $charlist{os}->{$os} = [ map { $fontnum{$_} // () } @{$fontids} ];
139 $charlist{osdefault} = [qw( win2k win8 mac109 android oss )];
141 say "# automatically generated by $0";
143 say '+', pp(\%charlist) =~ s{
144 ( \[ \s* \d [^]]* ) ,\s* (?= \] ) # arrays of numbers, excluding trailing comma
145 }{ $1 =~ s/\s+//gr }msxgre; # strip whitespace
151 mkfontinfo - Prepare font coverage of various character groups
155 mkfontinfo > unicode-cover.inc.pl
157 Test by finding the number of cyrillic characters in DejaVu Sans:
159 perl -E'$f = do "unicode-cover.inc.pl"; say $f->{Cyrillic}->{dvsans}'
163 Mischa POSLAWSKY <perl@shiar.org>
167 Licensed under the GNU Affero General Public License version 3.