From: Mischa POSLAWSKY Date: Sun, 19 Apr 2009 00:06:42 +0000 (+0000) Subject: charset: page with latin1 character table X-Git-Tag: v1.3~180 X-Git-Url: http://git.shiar.nl/sheet.git/commitdiff_plain/92240b0d1e4434e0981b3326ca4d7ad8673dbf71 charset: page with latin1 character table A 16x16 table with each iso-8859-1 byte in order. Rather than simply using chr(code point), it converts the byte range using Encode::decode, so it can just as easily display any other charset known by Perl. Uses the digraphs include for character details. While this may lack some characters, it is faster and easier. Plug the most glaring gaps by adding the entire ASCII range as single-character "digraphs". Linked from vi i^v (as code points can be entered there), but mostly useful as reference (not necessarily limited to vim). --- diff --git a/charset.plp b/charset.plp new file mode 100644 index 0000000..d3ddea7 --- /dev/null +++ b/charset.plp @@ -0,0 +1,80 @@ +<: +use utf8; +use strict; +use warnings; +use open IO => ':utf8'; + +our $VERSION = '1.0'; + +$header{content_type} = 'text/html; charset=utf-8'; + +:> + + + +charset cheat sheet + + + + + +

Character encoding

+ +<: +my $diinfo = do 'digraphs.inc.pl'; +my %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} } + keys %$diinfo; + +use Encode qw(decode); +# generate character table(s) +# (~16x faster than decoding in loop; +# substr strings is twice as fast as splitting to an array) +my @tables = map { decode($_, pack 'C*', 0..255) } 'iso-8859-1'; +my $NOCHAR = chr 0xFFFD; + +sub quote { + local $_ = shift; + s/"/"/g; + s//>/g; + return $_; +} + +my @nibble = (0..9, 'A'..'F'); +for my $table (@tables) { + print ''; + for my $section (qw{thead tfoot}) { + print "<$section>'; + for my $msb (0 .. $#nibble) { + print '
↳"; + print '', $_ for @nibble; + print " \n"; + } + print '
', $nibble[$msb]; + for my $lsb (0 .. $#nibble) { + my $glyph = substr $table, ($msb<<4) + $lsb, 1; + if ($glyph eq $NOCHAR) { + print ''; + next; + } + my $info = [ord $glyph]; + if (defined (my $mnem = $di{ord $glyph})) { + $info = $diinfo->{$mnem}; + } + my ($codepoint, $name, $prop, $script, $string) = @$info; + + $glyph = quote($string || $glyph); + my $desc = sprintf 'U+%04X%s', $codepoint, $name && " ($name)"; + my @class = ('X', grep {$_} $prop, $script); + + $glyph = "$glyph" if $prop eq 'Zs'; + + printf "\n".'%s', + join(' ', @class), quote($desc), $glyph; + } + print "\n", $nibble[$msb], "\n"; + } + print "
\n"; +} + diff --git a/rfc1345convert b/rfc1345convert index cae6d47..f932753 100755 --- a/rfc1345convert +++ b/rfc1345convert @@ -117,6 +117,9 @@ if (-r 'shiar.inc.txt') { } warn $@ if $@; +$di{chr $_} = $_ for 32 .. 126; +$di{'\\'.$_} = delete $di{$_} for '{', '}', '\\'; + # optionally get unicode character information my %info = eval { require Unicode::UCD; diff --git a/vim.inc.pl b/vim.inc.pl index 98abbc6..b73b254 100644 --- a/vim.inc.pl +++ b/vim.inc.pl @@ -504,7 +504,7 @@ i => { '^h' => ["back$sign{_}space", "co"], '^i' => ["tab", "co"], '^j' => ["enter", "co"], - '^k' => ["di$sign{_}graph", "co arg arg vim linkdigraphs.plp", "Key code"], + '^k' => ["di$sign{_}graph", 'co arg arg vim linkdigraphs', "Key code"], '^l' => ["im leave ins mode", "mo mode vim"], # insertmode only '^m' => ["enter", "co"], '^n' => ["find next keyword", "pm vim"], @@ -515,7 +515,7 @@ i => { '^s' => "^s", '^t' => ["indent", "co"], '^u' => ["delete line", "co"], - '^v' => ["literal or ch$sign{_}ar$sign{_}cod$sign{_}e", 'co', "Verbatim or by Value"], + '^v' => ["literal or ch$sign{_}ar$sign{_}cod$sign{_}e", 'co linkcharset', "Verbatim or by Value"], '^w' => ["delete word back", "co"], '^x' => ["ex$sign{_}pand mode", 'me arg modei^x vim'], '^y' => ["ins char above", "co vim"],