-say 'use utf8;';
-say '+{';
-for my $name (@chargroups) {
- my $match = qr/\A\p{$name}\z/;
- my @chars = eval {
- grep { m/$match/ } map { chr $_ }
- 0..0xD7FF, 0xE000..0xFDCF, 0xFDF0..0xFFFD,
- } or do {
- warn $@;
- next;
+eval {
+ require HTML::Entities;
+ our %char2entity;
+ HTML::Entities->import('%char2entity');
+ while (my ($char, $entity) = each %char2entity) {
+ $entity =~ /[a-zA-Z]/ or next; # only actual aliases
+ push @{ $charlist{table}->{html} }, $char;
+ }
+ 1;
+} or warn "Could not include count for html entities: $@";
+
+eval {
+ my $agemap = do 'data/unicode-age.inc.pl'
+ or warn "Could not include unicode version data: $!";
+
+ use Unicode::UCD 'charinfo';
+ for my $code (0 .. 256**2*2) {
+ my $charinfo = charinfo($code) or next;
+ next if $charinfo->{category} =~ /^[MC]/; # ignore Marks and "other" Control chars
+ push @{ $charlist{$_}->{ $charinfo->{$_} } }, chr $code
+ for qw( script category block );
+ push @{ $charlist{version}->{$_} }, (chr $code) x ($agemap->{$code} <= $_)
+ for 11, 30, 63;
+ }
+ 1;
+} or warn "Could not include unicode groups: $@";
+
+for (values %charlist) {
+for my $chars (values %{$_}) {
+ my %row;
+ $row{support} = [
+ map { scalar grep { defined } @{ $cover{$_->{file}} }{ @{$chars} } }
+ @fontlist
+ ];
+ $row{count} = scalar @{$chars};
+
+ $row{query} = eval {
+ my @query = map { ord } sort @{$chars};
+ my $i = 0;
+ while ($i < @query) {
+ my $j = $i + 1;
+ my $v = $query[$i];
+ while ($j < @query) {
+ $v++;
+ last if $query[$j] != $v;
+ $j++;
+ }
+ if ($j - $i > 2) {
+ splice(@query, $i, $j - $i, "$query[$i]-$query[$j-1]");
+ }
+ $i++;
+ }
+ return join '+', @query;