unicode glyph char character reference common ipa symbol sign mark table digraph
'],
stylesheet => [qw'light dark mono circus red'],
- data => [qw'unicode-table.inc.pl unicode-char.inc.pl'],
+ data => [qw( unicode-cover.inc.pl ttfsupport unicode-char.inc.pl )],
});
:>
);
my @ossel = qw( win95 oss android );
-my $tables = do 'unicode-table.inc.pl' or die $@ || $!;
my (%font, @fontlist);
for my $os (@ossel) {
my $osfonts = $oslist{$os};
my @chars;
my @querydesc;
-my $query = $ENV{PATH_INFO} || $get{q} || 'ipa';
-for ($query) {
- s{^/}{};
- when (qr{^[a-z]+(?:/|\z)}) {
- for (split / /) {
- push @querydesc, "preset group $_";
- my ($tablegroup, $tablename) = split m{/}, $_, 2;
- my @tables = $tablename ? $tables->{$tablegroup}->{$tablename}
- : sort values %{ $tables->{$tablegroup} };
- for (@tables) {
- my $includerows; # ignore rows before body row
- for (@{$_}) {
- $includerows ||= m/^[.]/ or next;
- next if /^[.-]/;
- next if $_ eq '>' or $_ eq '=';
- push @chars, $_;
- }
+if (my $query = $ENV{PATH_INFO} || $get{q} || 'ipa') {
+ my $groupinfo = do 'unicode-cover.inc.pl' or die $@ || $!;
+ for (split /[\s+]/, $query) {
+ s{^/}{};
+ when (qr{^[\d,;\s+-]+$}) {
+ push @querydesc, "character codepoints $_";
+ for (map { split /[^\d-]/ } $_) {
+ my ($charnum, $range) = split /-/, $_;
+ push @chars, chr $_ for $charnum .. ($range // $charnum);
}
}
- when ('ipa') {
- @chars = grep { !m/[a-zA-Z]/ } @chars;
+ when ($_) {
+ my $row = $groupinfo->{$_} or do {
+ warn "group $_ not found";
+ next;
+ };
+ push @querydesc, $row->{-name} // $_;
+ push @chars, map { chr } @{ $row->{-chars} };
}
- }
- when (qr{[\d,;\s+-]+}) {
- push @querydesc, "character codepoints $_";
- for (map { split /[^\d-]/ } $_) {
- my ($charnum, $range) = split /-/, $_;
- push @chars, chr $_ for $charnum .. ($range // $charnum);
+ default {
+ die "unknown parameter: $_\n";
}
}
- when (qr{[A-Z]}) {
- push @querydesc, "unicode match $_";
- eval {
- my $match = qr/\A\p{$_}\z/;
- push @chars, grep { m/$match/ } map { chr $_ }
- 0..0xD7FF, 0xE000..0xFDCF, 0xFDF0..0xFFFD;
- } or die "invalid unicode match: $_\n";
- }
- default {
- die "unknown parameter: $_\n";
- }
}
@chars <= 1500 or die sprintf(
};
}
-my @chargroups = qw(
- N Z Math
- Assigned
- Latin Greek Cyrillic Georgian Arabic Thai Hangul Han
-);
+ when (qr{^[a-z]+(?:/|\z)}) {
+ }
+
+my %charlist;
+
+my $chartables = do 'unicode-table.inc.pl' or warn $@ || $!;
+if ($chartables) {
+ while (my ($tablegroup, $grouprow) = each %{$chartables}) {
+ while (my ($tablename, $chars) = each %{$grouprow}) {
+ next if $tablename =~ /^-/;
+ my $includerows; # ignore rows before body row
+ for (@{$chars}) {
+ $includerows ||= m/^[.]/ or next;
+ next if /^[.-]/;
+ next if $_ eq '>' or $_ eq '=';
+ push @{ $charlist{table}->{"$tablegroup/$tablename"} }, $_;
+ push @{ $charlist{table}->{$tablegroup} }, $_;
+ }
+ }
+# if ($tablegroup eq 'ipa') {
+# @chars = grep { !m/[a-zA-Z]/ } @chars;
+# }
+ }
+}
-say 'use utf8;';
-say '+{';
-for my $name (@chargroups) {
- my $match = qr/\A\p{$name}\z/;
- my @chars = eval {
- grep { m/$match/ } map { chr $_ }
- 0..0xD7FF, 0xE000..0xFDCF, 0xFDF0..0xFFFD,
- } or do {
- warn $@;
- next;
- };
+use Unicode::UCD 'charinfo';
+for my $code (0 .. 256**2) {
+ my $charinfo = charinfo($code) or next;
+ next if $charinfo->{category} =~ /^[MC]/; # ignore Marks and "other" Control chars
+ push @{ $charlist{$_}->{ $charinfo->{$_} } }, chr $code
+ for qw( script category block );
+}
- my %cover = map {
+for (values %charlist) {
+for my $chars (values %{$_}) {
+ my %row = map {
my $fontcover = $font{$_};
- ($_ => scalar grep { $fontcover->{$_} } @chars);
+ ($_ => scalar grep { $fontcover->{$_} } @{$chars});
} keys %font;
- $cover{-count} = scalar @chars;
- $cover{-chars} = [ map { ord } sort @chars ];
+ $row{-count} = scalar @{$chars};
+# $row{-chars} = [ map { ord } sort @{$chars} ];
- say $name.' => '.pp(\%cover).',';
+ $chars = \%row;
}
-say '}';
+}
+
+say 'use utf8;';
+say '+'.pp(\%charlist);
__END__