-use utf8;
use Unicode::Normalize qw( NFKD );
use Text::Unidecode qw( unidecode );
-my @usintrows = (
- [ 'a' .. 'z'],
- [qw(Á B ¢ Ð É F G H Í J Œ Ø µ Ñ Ó Ö Ä ® § Þ Ú V Å X Ü Æ)],
- [qw(á b © ð é f g h í j œ ø µ ñ ó ö ä ® ß þ ú v å x ü æ)],
- [qw(Å ı Ç ð ´ ̉ ˝ ̣ ˆ ½ Þ ¾ ˜ Ø ∏ Œ ‰ / ˇ ¨ ◊ „ ˛ ¼ ¸)],
- [qw(å ∫ ç ∂ ́ ƒ © ˙ ̂ ∆ ° ¬ µ ̃ ø π œ ® ß † ̈ √ ∑ ≈ ¥ Ω)],
-);
-my @usint = (
- map {
- my $c = $_;
- [ map { $usintrows[$_]->[$c] } 0 .. 2 ]
- } 0 .. $#{ $usintrows[0] }
-);
-
+my %g; # present group classes
{
+ rows => [0, 1],
def => {
'' => {
map {
- my @row = @{$_};
+ my $v = $rows{$_};
my $class = (
- !defined $row[2] || $row[0] eq $row[2] ? 1 # identical
- : $row[2] =~ /\A\p{Mn}+\z/ ? 8 # combining accent
- : NFKD($row[2]) =~ $row[0] ? 2 # decomposed equivalent
- : $row[2] =~ /^\p{Latin}/ ? 4 # latin script
- : unidecode($row[2]) =~ /^\W*\Q$row[0]/ ? 5 # transliterated
- : 7
+ !defined $v || $_ eq $v ? 1 # identical
+ : $v =~ /\A\p{Mn}+\z/ ? 9 # combining accent
+ : $v =~ /\A[\p{Sk}\p{Lm}]+\z/ ? 8 # modifier symbol
+ : $v =~ /\A[\pM\pP]+\z/ ? 7 # mark
+ : NFKD($v) =~ /\Q$_/ ? 2 # decomposed equivalent
+ : unidecode($v) =~ /\Q$_\E+/i ? 4 # transliterated
+ : $v =~ /^\p{Latin}/ ? 5 # latin script
+ : 6
);
- $row[0] => "g$class"
- } @usint
+ $g{$class} = 1;
+ $_ => "g$class"
+ } keys %rows
},
},
- key => {
- map {
- $_->[0] => "$_->[1]<br>$_->[2]"
- } @usint
- },
+ key => \%rows,
flag => {
g1 => ['unaltered', "same results as without modifier"],
g2 => ['accented', "decomposes to the original letter with a combining accent"],
- g4 => ['latin', "a different (accented) latin letter"],
- g5 => ['similar', "transliterates (mostly) into the unmodified letter"],
- g7 => ['other', "symbol not directly deducible from key"],
- g8 => ['combining', "diacritical mark to be combined with a following character"],
+ g4 => ['similar', "transliterates (mostly) into the unmodified letter"],
+ g5 => ['latin', "a different (accented) latin letter"],
+ g6 => ['symbol', "other character not directly deducible from key"],
+ g7 => ['punctuation', "(punctuation) mark"],
+ !$g{9} ? () : (
+ g8 => ['mark', "modifier letter or mark (spacing diacritic)"],
+ g9 => ['combining', "diacritical mark to be combined with a following character"],
+ ),
},
}