From 0e122d62ab55a64c9db4c856b67ac34f14acd988 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Tue, 8 Feb 2011 19:54:59 +0100 Subject: [PATCH] unicode: full repository of common accented latin All letter/diacritic combinations for major european languages, included for ?html for a more complete entity coverage. --- unicode-table.inc.pl | 19 +++++++++++++++++++ unicode.plp | 6 ++++++ 2 files changed, 25 insertions(+) diff --git a/unicode-table.inc.pl b/unicode-table.inc.pl index 3cebceb..7ab3dd3 100644 --- a/unicode-table.inc.pl +++ b/unicode-table.inc.pl @@ -106,6 +106,25 @@ latin => { . ă ę œ ů š ð . ı ė º ư ș đ }], + vowels => [qw{ + . á é í ó ú ý + . â ê î ô û ŷ + . à è ì ò ù ỳ + . ã ẽ ĩ õ ũ ỹ + . ä ë ï ö ü ÿ + . æ & ij œ ᵫ - + . å - ı ⱺ ů ẙ + . ă ĕ ĭ ŏ ŭ - + . ǎ ě ǐ ǒ ǔ - + . ą ę į ǫ ų - + }], + westeuro => [qw{ + . ç ð ñ ß þ + }], + easteuro => [qw{ + . č ď ľ ň ř š ť ž + . ć đ ł ń ŕ ś ŧ ź ż + }], uncommon => [qw{ .>Englisc æ ð ē ȝ œ þ ƿ .>Sámi á č đ ŋ š ŧ ž diff --git a/unicode.plp b/unicode.plp index 7100aad..76c31ec 100644 --- a/unicode.plp +++ b/unicode.plp @@ -117,6 +117,12 @@ my @config = qw( ?kataderiv ); +splice @config, 4, 2, qw( + latin/vowels=-5?latin/vowels + westeuro + ?easteuro +) if exists $get{html}; + $_ and m{/*+(.+)} and @config = split /[ ]/, $1 for $ENV{PATH_INFO}, $get{q}; my $tables = do 'unicode-table.inc.pl' or die $@ || $!; -- 2.30.0