#!/usr/bin/env perl use 5.014; use warnings; use utf8; use open IO => ':encoding(utf-8)', ':std'; use re '/msx'; use JSON 'decode_json'; use Data::Dump 'pp'; use Shiar_Sheet::FormatChar; our $VERSION = '1.01'; my $matchvim; # enable to prefer best compatibility my $symname = eval { open my $keysymh, '<', 'data/keysymdef.json' or die $!; local $/; return decode_json(readline $keysymh); } or die "Could not read keysym definitions: $@\n"; my $vidi = eval { open my $jsfh, '<', 'data/digraphs.json' or die $!; local $/; return JSON->new->decode(readline $jsfh); } or warn "Could not read comparison digraphs: $@\n"; my %table; while ($_ = readline) { my ($mnem, $chr, $trail) = m/\A \h (.*?) \h+ : \h "([^"]+)" \h* (.*)/ or next; $chr =~ s/\\(.)/$1/g; $mnem !~ m/]+)> \h?}{$symname->{$1} // die "reference to unknown keysym $1\n"}eg; 1; } or warn($@), next; $mnem =~ m/\A [\x20-\x7F]{2} \z/ or next; # only interested in two ascii my $alias = \(state $seen = {})->{$chr}; # assume first is preferred my $cp = ord $chr; my ($class, $name, undef, undef, $string) = @{ Shiar_Sheet::FormatChar->glyph_info($cp) }; my $comparison = ( !$vidi->{key}->{$mnem} ? 'l3' : # free $vidi->{key}->{$mnem}->[0] != $cp ? 'l1' : # conflict $vidi->{key}->{$mnem}->[2] eq 'l5' ? 'l5' : # rfc 'l4' # any ); if (${$alias}) { # aliases an earlier occurrence if ($matchvim and ${$alias}->[2] lt $comparison) { # replace lower compatibility level ${$alias}->[3] = 'l0'; ${$alias}->[2] .= ' u-' . ${$alias}->[2]; ${$alias} = undef; } else { $class = 'l0'; my $menm = substr($mnem, 1, 1).substr($mnem, 0, 1); if ($table{$menm} && $table{$menm}[0] == $cp) { # unannotated if identical to reversed input $cp = 0; } else { $class .= ' ex'; } } } $table{$mnem} = [ $cp, $name, $comparison, $class, $string // () ]; ${$alias} //= $table{$mnem}; } print JSON->new->canonical->indent->encode({ title => 'X.Org', key => \%table, intro => join("\n", 'Character mnemonics following compose key ⎄:', 'in the X Window System (Shift+AltGr by default).', 'Differences from RFC-1345 are indicated.', 'Also see common Unicode.', ), keywords => [qw( xorg x11 x )], flag => { 'l5' => "matching RFC-1345", 'l4' => "matching Vim extension", 'l3' => "unique to Xorg", 'l1' => "conflict", ('l0' => "Xorg preference") x !!$matchvim, 'l0 ex' => "alias", }, flagclass => { l5 => 'u-l4', l4 => 'u-l5', }, }); __END__ =head1 NAME mkdigraphs-xorg - Output Xorg compose sequences =head1 SYNOPSIS mkdigraphs-xorg /usr/share/X11/locale/en_US.UTF-8/Compose | jq -r '.key."AT"[0]' | perl -nE 'say chr' # @ =head1 DESCRIPTION Extracts Multi_key definitions from X11/Xorg Compose.pre include file. If successful, a JSON object is output containing a digraphs list in C with Unicode code points keyed by mnemonics. Any errors and warnings are given at STDERR. =head1 AUTHOR Mischa POSLAWSKY =head1 LICENSE Licensed under the GNU Affero General Public License version 3.