#!/usr/bin/env perl use 5.014; use warnings; use utf8; use open OUT => ':encoding(utf-8)', ':std'; use File::Basename 'basename'; use Data::Dump 'pp'; our $VERSION = '1.02'; my @fontlist; my %cover; my $incsuffix = '.inc.pl'; for my $fontfile (glob 'data/font/*'.$incsuffix) { my ($fontid) = basename($fontfile, $incsuffix); my ($fontmeta, @fontrange) = do "./$fontfile"; if (!$fontmeta) { warn "$fontfile: $!"; next; } $fontmeta->{file} = $fontid; my $year = substr $fontmeta->{date}, 0, 4; $fontmeta->{description} = join(' ', (map { "version $_" } $fontmeta->{version} || ()), $fontmeta->{version} && $fontmeta->{version} =~ /\Q$year/ ? () : (map { "($_)" } $year || ()), ); push @fontlist, $fontmeta; my $fontrange = $fontmeta->{cover}; $cover{$fontid} = { map { (chr $_ => 1) } $fontmeta->{cover}->@* }; } my %charlist; $charlist{table}->{abc} = ['A'..'Z', 'a'..'z']; my $chartables = do './unicode-table.inc.pl' or warn $@ || $!; if ($chartables) { while (my ($tablegroup, $grouprow) = each %{$chartables}) { while (my ($tablename, $chars) = each %{$grouprow}) { next if $tablename =~ /^-/; my $includerows; # ignore rows before body row for (@{$chars}) { $includerows ||= m/^[.]/ or next; next if /^[.-]/; next if $_ eq '>' or $_ eq '='; s/^\\//; # escape length $_ == 1 or next; # multiple characters lost in query push @{ $charlist{table}->{"$tablegroup/$tablename"} }, $_; push @{ $charlist{table}->{$tablegroup} }, $_; } } # if ($tablegroup eq 'ipa') { # @chars = grep { !m/[a-zA-Z]/ } @chars; # } } } eval { require HTML::Entities; our %char2entity; HTML::Entities->import('%char2entity'); while (my ($char, $entity) = each %char2entity) { $entity =~ /[a-zA-Z]/ or next; # only actual aliases push @{ $charlist{table}->{html} }, $char; } 1; } or warn "Could not include count for html entities: $@"; eval { my $agemap = do './data/unicode-age.inc.pl' or warn "Could not include unicode version data: $!"; use Unicode::UCD 'charinfo'; for my $code (0 .. 256**2*2) { my $charinfo = charinfo($code) or next; next if $charinfo->{category} =~ /^[C]/; # ignore "other" Control chars push @{ $charlist{$_}->{ $charinfo->{$_} } }, chr $code for qw( script category block ); push @{ $charlist{version}->{$_} }, (chr $code) x ($agemap->{$code} <= $_) for 11, 30, 63; } 1; } or warn "Could not include unicode groups: $@"; for (values %charlist) { for my $chars (values %{$_}) { my %row; $row{support} = [ map { scalar grep { defined } @{ $cover{$_->{file}} }{ @{$chars} } } @fontlist ]; $row{count} = scalar @{$chars}; $row{query} = eval { my @query = map { ord } sort @{$chars}; my $i = 0; while ($i < @query) { my $j = $i + 1; my $v = $query[$i]; while ($j < @query) { $v++; last if $query[$j] != $v; $j++; } if ($j - $i > 2) { splice(@query, $i, $j - $i, "$query[$i]-$query[$j-1]"); } $i++; } return join '+', @query; }; $chars = \%row; } } $charlist{fonts} = \@fontlist; my %osfonts = ( win2k => [qw( arial.win2k arialuni lucidau verdana.win2k times.win2k cour.win2k )], # microsoft win8 => [map {"$_.win8"} qw( arial verdana times georgia pala cour )], mac109 => [map {"$_.mac109"} qw( helv lucida times pala )], # apple android => [qw( roboto droidmono notosans )], # google oss => [qw( dvsans freesans code2000 unifont )], ); if (0) { # copy rows to derive older os versions (same list with different trailing number) s/8$/7/ for @{ $osfonts{ win7} = [@{ $osfonts{ win8} }] }; s/9$/7/ for @{ $osfonts{mac107} = [@{ $osfonts{mac109} }] }; } my %fontnum = map { ($fontlist[$_]->{file} => $_) } 0 .. $#fontlist; while (my ($os, $fontids) = each %osfonts) { $charlist{os}->{$os} = [ map { $fontnum{$_} // () } @{$fontids} ]; } $charlist{osdefault} = [qw( win2k win8 mac109 android oss )]; say "# automatically generated by $0"; say 'use utf8;'; say '+', pp(\%charlist) =~ s{ ( \[ \s* \d [^]]* ) ,\s* (?= \] ) # arrays of numbers, excluding trailing comma }{ $1 =~ s/\s+//gr }msxgre; # strip whitespace __END__ =head1 NAME mkfontinfo - Prepare font coverage of various character groups =head1 SYNOPSIS mkfontinfo > unicode-cover.inc.pl Test by finding the number of cyrillic characters in DejaVu Sans: perl -E'$f = do "unicode-cover.inc.pl"; say $f->{Cyrillic}->{dvsans}' =head1 AUTHOR Mischa POSLAWSKY =head1 LICENSE Licensed under the GNU Affero General Public License version 3.