X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/a0ba9298856b2426c5c66b6d2f2b284d98cee594..bd4970d57dbf38e4c2b32d91c1395b87925b200c:/charset.plp diff --git a/charset.plp b/charset.plp index c9ef2b6..e9c4fcc 100644 --- a/charset.plp +++ b/charset.plp @@ -26,12 +26,24 @@ my $diinfo = do 'digraphs.inc.pl'; my %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} } keys %$diinfo; -use Encode qw(decode); +use Encode qw(decode resolve_alias); # generate character table(s) # (~16x faster than decoding in loop; # substr strings is twice as fast as splitting to an array) -my @request = ('iso-8859-1', 'cp437'); -my @tables = map { decode($_, pack 'C*', 0..255) } @request; +my %ALIAS = ( + default => [qw(utf-8 iso-8859-1 cp437)], + 0 => [qw(cp437 cp863)], + 1 => [qw(iso-8859-1 cp1252 MacRoman cp850)], + 2 => [qw(iso-8859-2 cp1250 cp852 MacCentralEurRoman MacCroatian MacRumanian)], + 5 => [qw(koi8-f iso-8859-5 cp1251 MacCyrillic cp855 cp866)], + 7 => [qw(iso-8859-7 cp1253 MacGreek cp737 cp869)], + 8 => [qw(iso-8859-8 cp1255 MacHebrew cp862)], +); +my @request = grep { defined } map { + $_ ? (resolve_alias($_) or print("Encoding $_ unknown") && ()) : (); +} map { defined $ALIAS{$_} ? @{ $ALIAS{$_} } : $_ } + $ENV{PATH_INFO} =~ /\w/ ? split(m{[/+\s]}, $ENV{PATH_INFO}) : 'default'; +my @tables = map { $_ eq 'utf-8-strict' ? undef : decode($_, pack 'C*', 0..255) } @request; my $NOCHAR = chr 0xFFFD; for my $cp437 (grep {$request[$_] eq 'cp437'} 0 .. $#request) { @@ -50,12 +62,71 @@ sub quote { return $_; } +sub printcell_utf8 { + my ($value) = @_; + if ($value <= 0x7F) { + print 'Single byte ASCII' + if $value == 0; + } + elsif ($value <= 0xBF) { + print 'Multi-byte continuation' + if $value == 0x80; + } + elsif ($value <= 0xC1) { + print '(Overl.)' + if $value == 0xC0; + } + elsif ($value <= 0xDF) { + print '2-byte sequence start' + if $value == 0xC2; + print '' + if $value == 0xD0; + } + elsif ($value <= 0xEF) { + print '3-byte sequence start' + if $value == 0xE0; + } + elsif ($value <= 0xF4) { + print '4-byte sequence' + if $value == 0xF0; + } + elsif ($value <= 0xF7) { + print '(Overflow)' + if $value == 0xF5; + } + elsif ($value <= 0xFB) { + print '5-byte' + if $value == 0xF8; + } + elsif ($value <= 0xFD) { + print '6-byte' + if $value == 0xFC; + } + elsif ($value <= 0xFF) { + print 'Invalid' + if $value == 0xFE; + } + else { + print "\n".'?'; + } +} + print "\n"; :>