digraphs: parse xorg compositions for alternative mnemonics
authorMischa POSLAWSKY <perl@shiar.org>
Sun, 22 Feb 2015 17:07:06 +0000 (18:07 +0100)
committerMischa POSLAWSKY <perl@shiar.org>
Tue, 9 Jun 2015 03:43:42 +0000 (05:43 +0200)
Find multi-key compose sequences in X11 source files, for comparison to
unrelated RFC-1345 mnemonics.

Makefile
tools/mkdigraphs-xorg [new file with mode: 0755]

index d0ea050afd9fee03c9c2c5cacb077108c70e2c43..f674f3f98974057a65425ad218f2760921a5848b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
 all: digraphs.inc.pl unicode-cover.inc.pl countries.inc.pl data/browser/support.inc.pl
+more: all data/digraphs-xorg.inc.pl
 
-download: data/DerivedAge.txt data/rfc1345.txt data/countryInfo.txt data/caniuse.json
+download: data/DerivedAge.txt data/rfc1345.txt data/xorg-compose data/countryInfo.txt data/caniuse.json
 .PHONY: download
 
 data/DerivedAge.txt:
@@ -18,6 +19,12 @@ data/digraphs-rfc.inc.pl: tools/mkdigraphs-rfc data/rfc1345.txt
 data/digraphs-shiar.inc.pl: tools/mkdigraphs-shiar shiar.inc.txt
        $< $(word 2,$^) >$@
 
+data/xorg-compose:
+       tools/wget-ifmodified http://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre $@
+
+data/digraphs-xorg.inc.pl: tools/mkdigraphs-xorg data/xorg-compose
+       $< $(word 2,$^) >$@
+
 data/digraphs-vim.inc.pl: tools/mkdigraphs-vim
        $< >$@
 
diff --git a/tools/mkdigraphs-xorg b/tools/mkdigraphs-xorg
new file mode 100755 (executable)
index 0000000..32a5044
--- /dev/null
@@ -0,0 +1,65 @@
+#!/usr/bin/env perl
+use 5.014;
+use warnings;
+use utf8;
+use open IO => ':utf8', ':std';
+use Data::Dump 'pp';
+
+our $VERSION = '1.00';
+
+open my $keysymh, '<', '/usr/include/X11/keysymdef.h'
+       or die "Could not find keysym definitions: $!\n";
+
+my %keysym;
+while (readline $keysymh) {
+       m{
+               \A  [#]define[ ]XK_ (?<name>[a-zA-Z_0-9]+)
+               \h+ 0x(?<value>[0-9a-f]+)
+               \h* [/][*] [\h(] U[+] (?<unicode>[0-9A-F]{4,6})
+       }msx or next;
+       $keysym{ $+{name} } = chr hex $+{unicode};
+}
+
+say "# automatically generated by $0";
+say '+{';
+
+while ($_ = readline) {
+       my ($mnem, $chr, $trail) = /^<Multi_key>\h(.*?)\h+:\h"([^"]+)"\h*(.*)/
+               or next;
+       $chr =~ s/\\(.)/$1/g;
+       $mnem !~ /<dead|<KP_|<U[0-9A-Fa-f]{4}/ or next;  # skip non-standard keys
+       $mnem =~ s{<([^>]+)> ?}{$keysym{$1} // die "reference to unknown keysym $1\n"}eg;
+       $mnem !~ /[^ -\x7F]/ or next;  # skip unicode
+#      (state $seen = {})->{$chr}++ and next;
+       printf "%s => %s,\n", pp($mnem), pp($chr);
+}
+
+say '}';
+
+__END__
+
+=head1 NAME
+
+mkdigraphs-xorg - Output Xorg compose sequences
+
+=head1 SYNOPSIS
+
+
+    mkdigraphs-xorg /usr/share/X11/locale/en_US.UTF-8/Compose >digraphs-xorg.inc.pl
+    perl -e'$di = do "digraphs-xorg.inc.pl"; print chr $di->{AT}'
+
+=head1 DESCRIPTION
+
+Extracts Multi_key definitions from X11/Xorg Compose.pre include file.
+If successful, Perl code is output resulting in a hash
+with Unicode code points keyed by mnemonics.
+Any errors and warnings are given at STDERR.
+
+=head1 AUTHOR
+
+Mischa POSLAWSKY <perl@shiar.org>
+
+=head1 LICENSE
+
+Licensed under the GNU Affero General Public License version 3.
+