From a0ba9298856b2426c5c66b6d2f2b284d98cee594 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sun, 19 Apr 2009 14:31:06 +0000 Subject: [PATCH] charset: utf-8 byte function table --- charset.plp | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/charset.plp b/charset.plp index c83508b..c9ef2b6 100644 --- a/charset.plp +++ b/charset.plp @@ -91,6 +91,80 @@ for my $tablenum (0 .. $#tables) { print "\n"; } +{ + print '
  • '; + for my $section (qw{thead}) { + print "<$section>'; + print ''; + for my $msb (0 .. $#nibble) { + print '
    UTF-8
    ↱"; + print '', $_ for @nibble; + print "\n"; + } + print '
    ', $nibble[$msb]; + for my $lsb (0 .. $#nibble) { + my $value = ($msb<<4) + $lsb; + if ($value <= 0x7F) { + print 'Single byte ASCII' + if $value == 0; + } + elsif ($value <= 0xBF) { + print 'Multi-byte continuation' + if $value == 0x80; + } + elsif ($value <= 0xC1) { + print '(Overl.)' + if $value == 0xC0; + } + elsif ($value <= 0xDF) { + print '2-byte sequence start' + if $value == 0xC2; + print '' + if $value == 0xD0; + } + elsif ($value <= 0xEF) { + print '3-byte sequence start' + if $value == 0xE0; + } + elsif ($value <= 0xF4) { + print '4-byte sequence' + if $value == 0xF0; + } + elsif ($value <= 0xF7) { + print '(Overflow)' + if $value == 0xF5; + } + elsif ($value <= 0xFB) { + print '5-byte' + if $value == 0xF8; + } + elsif ($value <= 0xFD) { + print '6-byte' + if $value == 0xFC; + } + elsif ($value <= 0xFF) { + print 'Invalid' + if $value == 0xFE; + } + else { + print "\n".'?'; + } + } + print "\n"; + } + print "
    \n"; +} + print "\n"; :> -- 2.30.0