baltic => [qw( iso-8859-4 iso-8859-13 cp1257 cp775 )],
nordic => [qw( iso-8859-10 cp865 cp861 MacIcelandic MacSami )],
cyrillic => [qw( koi8-r koi8-u koi8-f iso-8859-5 cp1251 MacCyrillic cp866 cp855
- U4 U2DE-2DF UA64-A69 U50-52 )], # MacUkrainian is broken
+ +400 +2DE0 +A640-A69F +500-52F )], # MacUkrainian is broken
arabic => [qw( iso-8859-6 cp1256 MacArabic cp864 cp1006 MacFarsi
- U6 U8A-8F+0-1F+40 U75-77 )],
- greek => [qw( iso-8859-7 cp1253 MacGreek cp737 cp869 U37-3F U1F )],
- hebrew => [qw( iso-8859-8 cp1255 MacHebrew cp862 U59-5F )],
+ +600 +8A0-8BF+8E0 +750-77F )],
+ greek => [qw( iso-8859-7 cp1253 MacGreek cp737 cp869 +370 +1F00 )],
+ hebrew => [qw( iso-8859-8 cp1255 MacHebrew cp862 +590 )],
thai => [qw( iso-8859-11 cp874 MacThai )],
vietnamese => [qw( viscii cp1258 MacVietnamese )],
symbols => [qw( symbol dingbats MacDingbats wingdings wingdings2 wingdings3 webdings )],
eval "require Encode::\u$input";
}
- if ($input =~ m{ \A u ([0-9a-f]+) (?:-([0-9a-f]+))? \z }ix) {
- my $start = hex($1) << ($2 ? 4 : 8);
- my $end = $2 ? (hex($2) << 4) + $row{cols} - 1 : $start + 255;
- $row{table} = join '', map { chr } $start .. $end;
+ if ($input eq '') {
+ $row{offset} = delete $row{startpoint};
+ $row{set} = 'Unicode characters';
+ my $block = $row{offset} >> 8;
+ $row{endpoint} ||= ($block + 1 << 8) - 1;
+ $block == ($row{endpoint} >> 8) or undef $block;
+
+ $row{table} = join '', map { chr } $row{offset} .. $row{endpoint};
utf8::upgrade($row{table}); # prevent latin1 output
- $row{endpoint} = $end - $start;
- $row{set} = sprintf 'Unicode block U+%02Xxx', $start >> 8;
- $row{offset} = $start % 256;
- $row{startpoint} = 0;
+
+ $row{endpoint} -= $row{offset};
+
+ if (defined $block) {
+ $row{set} = sprintf 'Unicode block U+%02Xxx', $block;
+ $row{offset} %= 0x100;
+ }
}
elsif (lc $input eq 'uu') {
- $row{set} = 'Unicode planes';
$row{cell} = do 'charset-ucplanes.inc.pl'
or Alert('Table data could not be read', $@ || $!);
$row{endpoint} ||= 1023;
+ $row{set} = 'Unicode planes';
}
elsif (lc $input eq 'u') {
$row{cell} = do 'charset-unicode.inc.pl'