From e1497cc49fa909c193ec5b165fc7445305be09b5 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sun, 14 Sep 2008 02:56:27 +0000 Subject: [PATCH] digraphs: alternate glyph string in include Allow the digraph include to specify string overrides in cases where a glyph should not be shown literally. These are: - Combining characters: prepend a placeholder. Dead chars are invalid on their own in fact. - ASCII control characters: substitude display symbols at U+24xx. Though browsers usually show a character placeholder, it's not very nice to send control chars directly. - Other control characters: show the replacement character U+FFFD. Actually with some (Linux) fonts, the anonymous code point fallback is more descriptive, but better to be on the safe side (they still have semantic value after all). --- digraphs.inc.pl | 196 ++++++++++++++++++++++++------------------------ digraphs.plp | 4 +- rfc1345convert | 10 ++- 3 files changed, 106 insertions(+), 104 deletions(-) diff --git a/digraphs.inc.pl b/digraphs.inc.pl index 4b4fcae..bca5855 100644 --- a/digraphs.inc.pl +++ b/digraphs.inc.pl @@ -14,31 +14,31 @@ q{!b}=>[664,'LATIN LETTER BILABIAL CLICK','Ll Xz','Latin'], q{!c}=>[448,'LATIN LETTER DENTAL CLICK','Lo Xz','Latin'], q{!q}=>[451,'LATIN LETTER RETROFLEX CLICK','Lo Xz','Latin'], q{!x}=>[449,'LATIN LETTER LATERAL CLICK','Lo Xz','Latin'], -q{"!}=>[768,'COMBINING GRAVE ACCENT','Mn','Inherited'], -q{""}=>[779,'COMBINING DOUBLE ACUTE ACCENT','Mn','Inherited'], -q{"'}=>[769,'COMBINING ACUTE ACCENT','Mn','Inherited'], -q{"(}=>[774,'COMBINING BREVE','Mn','Inherited'], -q{"+}=>[1612,'ARABIC DAMMATAN','Mn','Inherited'], -q{",}=>[807,'COMBINING CEDILLA','Mn','Inherited'], -q{"-}=>[772,'COMBINING MACRON','Mn','Inherited'], -q{".}=>[775,'COMBINING DOT ABOVE','Mn','Inherited'], -q{"/}=>[824,'COMBINING LONG SOLIDUS OVERLAY','Mn','Inherited'], -q{"0}=>[778,'COMBINING RING ABOVE','Mn','Inherited'], -q{"1}=>[836,'COMBINING GREEK DIALYTIKA TONOS','Mn','Inherited'], -q{"3}=>[9216,'','Cc Xa','Common'], +q{"!}=>[768,'COMBINING GRAVE ACCENT','Mn','Inherited','◌̀'], +q{""}=>[779,'COMBINING DOUBLE ACUTE ACCENT','Mn','Inherited','◌̋'], +q{"'}=>[769,'COMBINING ACUTE ACCENT','Mn','Inherited','◌́'], +q{"(}=>[774,'COMBINING BREVE','Mn','Inherited','◌̆'], +q{"+}=>[1612,'ARABIC DAMMATAN','Mn','Inherited','◌ٌ'], +q{",}=>[807,'COMBINING CEDILLA','Mn','Inherited','◌̧'], +q{"-}=>[772,'COMBINING MACRON','Mn','Inherited','◌̄'], +q{".}=>[775,'COMBINING DOT ABOVE','Mn','Inherited','◌̇'], +q{"/}=>[824,'COMBINING LONG SOLIDUS OVERLAY','Mn','Inherited','◌̸'], +q{"0}=>[778,'COMBINING RING ABOVE','Mn','Inherited','◌̊'], +q{"1}=>[836,'COMBINING GREEK DIALYTIKA TONOS','Mn','Inherited','◌̈́'], +q{"3}=>[0,'','Cc Xa','Common','␀'], q{"5}=>[12443,'KATAKANA-HIRAGANA VOICED SOUND MARK','Sk','Common'], q{"6}=>[8220,'LEFT DOUBLE QUOTATION MARK','Pi','Common'], q{"9}=>[8221,'RIGHT DOUBLE QUOTATION MARK','Pf','Common'], -q{":}=>[776,'COMBINING DIAERESIS','Mn','Inherited'], -q{";}=>[808,'COMBINING OGONEK','Mn','Inherited'], -q{"<}=>[780,'COMBINING CARON','Mn','Inherited'], -q{"=}=>[819,'COMBINING DOUBLE LOW LINE','Mn','Inherited'], -q{">}=>[770,'COMBINING CIRCUMFLEX ACCENT','Mn','Inherited'], -q{"?}=>[771,'COMBINING TILDE','Mn','Inherited'], -q{"_}=>[818,'COMBINING LOW LINE','Mn','Inherited'], -q{"d}=>[788,'COMBINING REVERSED COMMA ABOVE','Mn','Inherited'], -q{"i}=>[837,'COMBINING GREEK YPOGEGRAMMENI','Mn','Inherited'], -q{"p}=>[787,'COMBINING COMMA ABOVE','Mn','Inherited'], +q{":}=>[776,'COMBINING DIAERESIS','Mn','Inherited','◌̈'], +q{";}=>[808,'COMBINING OGONEK','Mn','Inherited','◌̨'], +q{"<}=>[780,'COMBINING CARON','Mn','Inherited','◌̌'], +q{"=}=>[819,'COMBINING DOUBLE LOW LINE','Mn','Inherited','◌̳'], +q{">}=>[770,'COMBINING CIRCUMFLEX ACCENT','Mn','Inherited','◌̂'], +q{"?}=>[771,'COMBINING TILDE','Mn','Inherited','◌̃'], +q{"_}=>[818,'COMBINING LOW LINE','Mn','Inherited','◌̲'], +q{"d}=>[788,'COMBINING REVERSED COMMA ABOVE','Mn','Inherited','◌̔'], +q{"i}=>[837,'COMBINING GREEK YPOGEGRAMMENI','Mn','Inherited','◌ͅ'], +q{"p}=>[787,'COMBINING COMMA ABOVE','Mn','Inherited','◌̓'], q{%"}=>[1068,'CYRILLIC CAPITAL LETTER SOFT SIGN','Lu','Cyrillic'], q{%'}=>[1100,'CYRILLIC SMALL LETTER SOFT SIGN','Ll','Cyrillic'], q{%0}=>[8240,'PER MILLE SIGN','Po','Common'], @@ -50,7 +50,7 @@ q{'"}=>[733,'DOUBLE ACUTE ACCENT','Sk','Common'], q{'%}=>[1012,'GREEK CAPITAL THETA SYMBOL','Lu','Greek'], q{''}=>[180,'ACUTE ACCENT','Sk Xl','Common'], q{'(}=>[728,'BREVE','Sk','Common'], -q{'+}=>[1615,'ARABIC DAMMA','Mn','Inherited'], +q{'+}=>[1615,'ARABIC DAMMA','Mn','Inherited','◌ُ'], q{',}=>[184,'CEDILLA','Sk Xl','Common'], q{'-}=>[8254,'OVERLINE','Po','Common'], q{'.}=>[729,'DOT ABOVE','Sk','Common'], @@ -144,16 +144,16 @@ q{.M}=>[183,'MIDDLE DOT','Po Xl','Common'], q{.P}=>[8901,'DOT OPERATOR','Sm','Common'], q{.S}=>[9617,'LIGHT SHADE','So','Common'], q{._}=>[12290,'IDEOGRAPHIC FULL STOP','Po','Common'], -q{/+}=>[1614,'ARABIC FATHA','Mn','Inherited'], +q{/+}=>[1614,'ARABIC FATHA','Mn','Inherited','◌َ'], q{/-}=>[8224,'DAGGER','Po','Common'], q{//}=>[92,'REVERSE SOLIDUS','Po Xa','Common'], q{/0}=>[8709,'EMPTY SET','Sm','Common'], q{/=}=>[8225,'DOUBLE DAGGER','Po','Common'], q{/>}=>[9002,'RIGHT-POINTING ANGLE BRACKET','Pe','Common'], -q{/c}=>[9216,'','Cc Xa','Common'], +q{/c}=>[0,'','Cc Xa','Common','␀'], q{/f}=>[8260,'FRACTION SLASH','Sm','Common'], q{0(}=>[8733,'PROPORTIONAL TO','Sm','Common'], -q{0+}=>[1618,'ARABIC SUKUN','Mn','Inherited'], +q{0+}=>[1618,'ARABIC SUKUN','Mn','Inherited','◌ْ'], q{0.}=>[8857,'CIRCLED DOT OPERATOR','Sm','Common'], q{00}=>[8734,'INFINITY','Sm','Common'], q{02}=>[8858,'CIRCLED RING OPERATOR','Sm','Common'], @@ -172,7 +172,7 @@ q{0s}=>[8320,'SUBSCRIPT ZERO','No','Common'], q{0u}=>[9786,'WHITE SMILING FACE','So','Common'], q{1"}=>[8245,'REVERSED PRIME','Po','Common'], q{1'}=>[8242,'PRIME','Po','Common'], -q{1+}=>[1616,'ARABIC KASRA','Mn','Inherited'], +q{1+}=>[1616,'ARABIC KASRA','Mn','Inherited','◌ِ'], q{1.}=>[9352,'DIGIT ONE FULL STOP','No','Common'], q{12}=>[189,'VULGAR FRACTION ONE HALF','No Xl','Common'], q{13}=>[8531,'VULGAR FRACTION ONE THIRD','No','Common'], @@ -211,7 +211,7 @@ q{3!}=>[9478,'BOX DRAWINGS LIGHT TRIPLE DASH VERTICAL','So','Common'], q{3"}=>[8247,'REVERSED TRIPLE PRIME','Po','Common'], q{3'}=>[8244,'TRIPLE PRIME','Po','Common'], q{3*}=>[8258,'ASTERISM','Po Xz','Common'], -q{3+}=>[1617,'ARABIC SHADDA','Mn','Inherited'], +q{3+}=>[1617,'ARABIC SHADDA','Mn','Inherited','◌ّ'], q{3-}=>[9476,'BOX DRAWINGS LIGHT TRIPLE DASH HORIZONTAL','So','Common'], q{3.}=>[9354,'DIGIT THREE FULL STOP','No','Common'], q{3/}=>[9479,'BOX DRAWINGS HEAVY TRIPLE DASH VERTICAL','So','Common'], @@ -290,7 +290,7 @@ q{9r}=>[8568,'SMALL ROMAN NUMERAL NINE','Nl','Common'], q{9s}=>[8329,'SUBSCRIPT NINE','No','Common'], q{:(}=>[9785,'WHITE FROWNING FACE','So Xz','Common'], q{:)}=>[9786,'WHITE SMILING FACE','So Xz','Common'], -q{:+}=>[1611,'ARABIC FATHATAN','Mn','Inherited'], +q{:+}=>[1611,'ARABIC FATHATAN','Mn','Inherited','◌ً'], q{:.}=>[8757,'BECAUSE','Sm','Common'], q{:3}=>[8942,'VERTICAL ELLIPSIS','Sm','Common'], q{:9}=>[8222,'DOUBLE LOW-9 QUOTATION MARK','Ps','Common'], @@ -320,7 +320,7 @@ q{<>}=>[8596,'LEFT RIGHT ARROW','Sm','Common'], q{[9756,'WHITE LEFT POINTING INDEX','So','Common'], q{="}=>[1066,'CYRILLIC CAPITAL LETTER HARD SIGN','Lu','Cyrillic'], q{='}=>[1098,'CYRILLIC SMALL LETTER HARD SIGN','Ll','Cyrillic'], -q{=+}=>[1613,'ARABIC KASRATAN','Mn','Inherited'], +q{=+}=>[1613,'ARABIC KASRATAN','Mn','Inherited','◌ٍ'], q{=2}=>[8215,'DOUBLE LOW LINE','Po','Common'], q{=3}=>[8801,'IDENTICAL TO','Sm','Common'], q{=<}=>[8804,'LESS-THAN OR EQUAL TO','Sm','Common'], @@ -340,7 +340,7 @@ q{>7}=>[8969,'RIGHT CEILING','Sm','Common'], q{>=}=>[8805,'GREATER-THAN OR EQUAL TO','Sm','Common'], q{>>}=>[187,'RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK','Pf Xl','Common'], q{>H}=>[9758,'WHITE RIGHT POINTING INDEX','So','Common'], -q{>V}=>[8407,'COMBINING RIGHT ARROW ABOVE','Mn','Inherited'], +q{>V}=>[8407,'COMBINING RIGHT ARROW ABOVE','Mn','Inherited','◌⃗'], q{?!}=>[8253,'INTERROBANG','Po Xz','Common'], q{?*}=>[8128,'GREEK PERISPOMENI','Sk','Greek'], q{?+}=>[1567,'ARABIC QUESTION MARK','Po','Common'], @@ -379,10 +379,10 @@ q{A=}=>[1040,'CYRILLIC CAPITAL LETTER A','Lu','Cyrillic'], q{A>}=>[194,'LATIN CAPITAL LETTER A WITH CIRCUMFLEX','Lu Xl','Latin'], q{A?}=>[195,'LATIN CAPITAL LETTER A WITH TILDE','Lu Xl','Latin'], q{AA}=>[197,'LATIN CAPITAL LETTER A WITH RING ABOVE','Lu Xl','Latin'], -q{AC}=>[159,'','Cc Xl','Common'], +q{AC}=>[159,'','Cc Xl','Common','�'], q{AE}=>[198,'LATIN CAPITAL LETTER AE','Lu Xl','Latin'], q{AJ}=>[12624,'HANGUL LETTER AE','Lo Xz','Hangul'], -q{AK}=>[9222,'','Cc Xa','Common'], +q{AK}=>[6,'','Cc Xa','Common','␆'], q{AN}=>[8743,'LOGICAL AND','Sm','Common'], q{AO}=>[8491,'ANGSTROM SIGN','Lu','Latin'], q{At}=>[64,'COMMERCIAL AT','Po Xa','Common'], @@ -395,10 +395,10 @@ q{B2}=>[385,'LATIN CAPITAL LETTER B WITH HOOK','Lu Xz','Latin'], q{B=}=>[1041,'CYRILLIC CAPITAL LETTER BE','Lu','Cyrillic'], q{BB}=>[166,'BROKEN BAR','So Xl','Common'], q{BD}=>[9586,'BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT','So','Common'], -q{BH}=>[130,'','Cc Xl','Common'], +q{BH}=>[130,'','Cc Xl','Common','�'], q{BJ}=>[12611,'HANGUL LETTER SSANGPIEUP','Lo Xz','Hangul'], -q{BL}=>[9223,'','Cc Xa','Common'], -q{BS}=>[9224,'','Cc Xa','Common'], +q{BL}=>[7,'','Cc Xa','Common','␇'], +q{BS}=>[8,'','Cc Xa','Common','␈'], q{B_}=>[7686,'LATIN CAPITAL LETTER B WITH LINE BELOW','Lu','Latin'], q{Ba}=>[12496,'KATAKANA LETTER BA','Lo','Katakana'], q{Bd}=>[9699,'BLACK LOWER LEFT TRIANGLE','So','Common'], @@ -416,11 +416,11 @@ q{C3}=>[1152,'CYRILLIC CAPITAL LETTER KOPPA','Lu','Cyrillic'], q{C<}=>[268,'LATIN CAPITAL LETTER C WITH CARON','Lu','Latin'], q{C=}=>[1062,'CYRILLIC CAPITAL LETTER TSE','Lu','Cyrillic'], q{C>}=>[264,'LATIN CAPITAL LETTER C WITH CIRCUMFLEX','Lu','Latin'], -q{CC}=>[148,'','Cc Xl','Common'], +q{CC}=>[148,'','Cc Xl','Common','�'], q{CG}=>[8766,'INVERTED LAZY S','Sm','Common'], -q{CI}=>[155,'','Cc Xl','Common'], -q{CN}=>[9240,'','Cc Xa','Common'], -q{CR}=>[9229,'','Cc Xa','Common'], +q{CI}=>[155,'','Cc Xl','Common','�'], +q{CN}=>[24,'','Cc Xa','Common','␘'], +q{CR}=>[13,'','Cc Xa','Common','␍'], q{Ca}=>[8248,'CARET','Po','Common'], q{Ci}=>[9675,'WHITE CIRCLE','So','Common'], q{Co}=>[169,'COPYRIGHT SIGN','So Xl','Common'], @@ -433,24 +433,24 @@ q{D,}=>[7696,'LATIN CAPITAL LETTER D WITH CEDILLA','Lu','Latin'], q{D-}=>[208,'LATIN CAPITAL LETTER ETH','Lu Xl','Latin'], q{D.}=>[7690,'LATIN CAPITAL LETTER D WITH DOT ABOVE','Lu','Latin'], q{D/}=>[272,'LATIN CAPITAL LETTER D WITH STROKE','Lu','Latin'], -q{D1}=>[9233,'','Cc Xa','Common'], -q{D2}=>[9234,'','Cc Xa','Common'], -q{D3}=>[9235,'','Cc Xa','Common'], -q{D4}=>[9236,'','Cc Xa','Common'], +q{D1}=>[17,'','Cc Xa','Common','␑'], +q{D2}=>[18,'','Cc Xa','Common','␒'], +q{D3}=>[19,'','Cc Xa','Common','␓'], +q{D4}=>[20,'','Cc Xa','Common','␔'], q{D;}=>[393,'LATIN CAPITAL LETTER AFRICAN D','Lu Xz','Latin'], q{D<}=>[270,'LATIN CAPITAL LETTER D WITH CARON','Lu','Latin'], q{D=}=>[1044,'CYRILLIC CAPITAL LETTER DE','Lu','Cyrillic'], -q{DC}=>[144,'','Cc Xl','Common'], +q{DC}=>[144,'','Cc Xl','Common','�'], q{DE}=>[8710,'INCREMENT','Sm','Common'], q{DG}=>[176,'DEGREE SIGN','So Xl','Common'], q{DH}=>[9523,'BOX DRAWINGS HEAVY DOWN AND HORIZONTAL','So','Common'], q{DI}=>[8748,'DOUBLE INTEGRAL','Sm','Common'], q{DJ}=>[12600,'HANGUL LETTER SSANGTIKEUT','Lo Xz','Hangul'], -q{DL}=>[9232,'','Cc Xa','Common'], +q{DL}=>[16,'','Cc Xa','Common','␐'], q{DO}=>[36,'DOLLAR SIGN','Sc Xa','Common'], q{DR}=>[9487,'BOX DRAWINGS HEAVY DOWN AND RIGHT','So','Common'], q{DS}=>[1029,'CYRILLIC CAPITAL LETTER DZE','Lu','Cyrillic'], -q{DT}=>[127,'','Cc Xa','Common'], +q{DT}=>[127,'','Cc Xa','Common','�'], q{DZ}=>[1039,'CYRILLIC CAPITAL LETTER DZHE','Lu','Cyrillic'], q{D_}=>[7694,'LATIN CAPITAL LETTER D WITH LINE BELOW','Lu','Latin'], q{Da}=>[12480,'KATAKANA LETTER DA','Lo','Katakana'], @@ -485,17 +485,17 @@ q{E<}=>[282,'LATIN CAPITAL LETTER E WITH CARON','Lu','Latin'], q{E=}=>[1045,'CYRILLIC CAPITAL LETTER IE','Lu','Cyrillic'], q{E>}=>[202,'LATIN CAPITAL LETTER E WITH CIRCUMFLEX','Lu Xl','Latin'], q{E?}=>[7868,'LATIN CAPITAL LETTER E WITH TILDE','Lu','Latin'], -q{EB}=>[9239,'','Cc Xa','Common'], -q{EC}=>[9243,'','Cc Xa','Common'], +q{EB}=>[23,'','Cc Xa','Common','␗'], +q{EC}=>[27,'','Cc Xa','Common','␛'], q{ED}=>[439,'LATIN CAPITAL LETTER EZH','Lu','Latin'], -q{EG}=>[151,'','Cc Xl','Common'], +q{EG}=>[151,'','Cc Xl','Common','�'], q{EJ}=>[12628,'HANGUL LETTER E','Lo Xz','Hangul'], q{EK}=>[12630,'HANGUL LETTER YE','Lo Xz','Hangul'], -q{EM}=>[9241,'','Cc Xa','Common'], -q{EQ}=>[9221,'','Cc Xa','Common'], -q{ES}=>[135,'','Cc Xl','Common'], -q{ET}=>[9220,'','Cc Xa','Common'], -q{EX}=>[9219,'','Cc Xa','Common'], +q{EM}=>[25,'','Cc Xa','Common','␙'], +q{EQ}=>[5,'','Cc Xa','Common','␅'], +q{ES}=>[135,'','Cc Xl','Common','�'], +q{ET}=>[4,'','Cc Xa','Common','␄'], +q{EX}=>[3,'','Cc Xa','Common','␃'], q{EZ}=>[494,'LATIN CAPITAL LETTER EZH WITH CARON','Lu','Latin'], q{Eh}=>[8962,'HOUSE','So','Common'], q{Eu}=>[8364,'EURO SIGN','Sc Xz','Common'], @@ -507,11 +507,11 @@ q{F=}=>[1060,'CYRILLIC CAPITAL LETTER EF','Lu','Cyrillic'], q{FA}=>[8704,'FOR ALL','Sm','Common'], q{FB}=>[9608,'FULL BLOCK','So','Common'], q{FD}=>[9585,'BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT','So','Common'], -q{FF}=>[9228,'
','Cc Xa','Common'], +q{FF}=>[12,'','Cc Xa','Common','␌'], q{FI}=>[8498,'TURNED CAPITAL F','So Xz','Common'], -q{FS}=>[9244,'','Cc Xa','Common'], +q{FS}=>[28,'','Cc Xa','Common','␜'], q{Fd}=>[9698,'BLACK LOWER RIGHT TRIANGLE','So','Common'], -q{Fl}=>[9216,'','Cc Xa','Common'], +q{Fl}=>[0,'','Cc Xa','Common','␀'], q{Fm}=>[9792,'FEMALE SIGN','So','Common'], q{G%}=>[1027,'CYRILLIC CAPITAL LETTER GJE','Lu','Cyrillic'], q{G'}=>[500,'LATIN CAPITAL LETTER G WITH ACUTE','Lu','Latin'], @@ -527,10 +527,10 @@ q{G3}=>[1168,'CYRILLIC CAPITAL LETTER GHE WITH UPTURN','Lu','Cyrillic'], q{G<}=>[486,'LATIN CAPITAL LETTER G WITH CARON','Lu','Latin'], q{G=}=>[1043,'CYRILLIC CAPITAL LETTER GHE','Lu','Cyrillic'], q{G>}=>[284,'LATIN CAPITAL LETTER G WITH CIRCUMFLEX','Lu','Latin'], -q{GC}=>[153,'','Cc Xl','Common'], +q{GC}=>[153,'','Cc Xl','Common','�'], q{GF}=>[915,'GREEK CAPITAL LETTER GAMMA','Lu','Greek'], q{GJ}=>[12594,'HANGUL LETTER SSANGKIYEOK','Lo Xz','Hangul'], -q{GS}=>[9245,'','Cc Xa','Common'], +q{GS}=>[29,'','Cc Xa','Common','␝'], q{Ga}=>[12460,'KATAKANA LETTER GA','Lo','Katakana'], q{Ge}=>[12466,'KATAKANA LETTER GE','Lo','Katakana'], q{Gi}=>[12462,'KATAKANA LETTER GI','Lo','Katakana'], @@ -547,10 +547,10 @@ q{H=}=>[1061,'CYRILLIC CAPITAL LETTER HA','Lu','Cyrillic'], q{H>}=>[292,'LATIN CAPITAL LETTER H WITH CIRCUMFLEX','Lu','Latin'], q{HH}=>[9473,'BOX DRAWINGS HEAVY HORIZONTAL','So','Common'], q{HI}=>[8787,'IMAGE OF OR APPROXIMATELY EQUAL TO','Sm','Common'], -q{HJ}=>[137,'','Cc Xl','Common'], -q{HO}=>[129,'','Cc Xl','Common'], -q{HS}=>[136,'','Cc Xl','Common'], -q{HT}=>[9225,'','Cc Xa','Common'], +q{HJ}=>[137,'','Cc Xl','Common','�'], +q{HO}=>[129,'','Cc Xl','Common','�'], +q{HS}=>[136,'','Cc Xl','Common','�'], +q{HT}=>[9,'','Cc Xa','Common','␉'], q{HW}=>[502,'LATIN CAPITAL LETTER HWAIR','Lu Xz','Latin'], q{Ha}=>[12495,'KATAKANA LETTER HA','Lo','Katakana'], q{He}=>[12504,'KATAKANA LETTER HE','Lo','Katakana'], @@ -579,7 +579,7 @@ q{I?}=>[296,'LATIN CAPITAL LETTER I WITH TILDE','Lu','Latin'], q{IE}=>[1028,'CYRILLIC CAPITAL LETTER UKRAINIAN IE','Lu','Cyrillic'], q{II}=>[1030,'CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I','Lu','Cyrillic'], q{IJ}=>[306,'LATIN CAPITAL LIGATURE IJ','Lu','Latin'], -q{IN}=>[132,'','Cc Xl','Common'], +q{IN}=>[132,'','Cc Xl','Common','�'], q{IO}=>[1025,'CYRILLIC CAPITAL LETTER IO','Lu','Cyrillic'], q{IS}=>[12288,'IDEOGRAPHIC SPACE','Zs','Common'], q{Ic}=>[9689,'INVERSE WHITE CIRCLE','So','Common'], @@ -627,7 +627,7 @@ q{L=}=>[1051,'CYRILLIC CAPITAL LETTER EL','Lu','Cyrillic'], q{L?}=>[11362,'','Xn Xz',''], q{LB}=>[9604,'LOWER HALF BLOCK','So','Common'], q{LD}=>[9491,'BOX DRAWINGS HEAVY DOWN AND LEFT','So','Common'], -q{LF}=>[9226,'','Cc Xa','Common'], +q{LF}=>[10,'','Cc Xa','Common','␊'], q{LJ}=>[1033,'CYRILLIC CAPITAL LETTER LJE','Lu','Cyrillic'], q{LZ}=>[9674,'LOZENGE','So','Common'], q{L_}=>[7738,'LATIN CAPITAL LETTER L WITH LINE BELOW','Lu','Latin'], @@ -642,7 +642,7 @@ q{M3}=>[988,'GREEK LETTER DIGAMMA','Lu','Greek'], q{M8}=>[9834,'EIGHTH NOTE','So','Common'], q{M=}=>[1052,'CYRILLIC CAPITAL LETTER EM','Lu','Cyrillic'], q{MI}=>[412,'LATIN CAPITAL LETTER TURNED M','Lu Xz','Latin'], -q{MW}=>[149,'','Cc Xl','Common'], +q{MW}=>[149,'','Cc Xl','Common','�'], q{MX}=>[9839,'MUSIC SHARP SIGN','Sm','Common'], q{Ma}=>[12510,'KATAKANA LETTER MA','Lo','Katakana'], q{Mb}=>[9837,'MUSIC FLAT SIGN','So','Common'], @@ -669,14 +669,14 @@ q{N=}=>[1053,'CYRILLIC CAPITAL LETTER EN','Lu','Cyrillic'], q{N?}=>[209,'LATIN CAPITAL LETTER N WITH TILDE','Lu Xl','Latin'], q{NB}=>[8711,'NABLA','Sm','Common'], q{NG}=>[330,'LATIN CAPITAL LETTER ENG','Lu','Latin'], -q{NH}=>[131,'','Cc Xl','Common'], +q{NH}=>[131,'','Cc Xl','Common','�'], q{NI}=>[8976,'REVERSED NOT SIGN','So','Common'], q{NJ}=>[1034,'CYRILLIC CAPITAL LETTER NJE','Lu','Cyrillic'], -q{NK}=>[9237,'','Cc Xa','Common'], -q{NL}=>[133,'','Cc Xl','Common'], +q{NK}=>[21,'','Cc Xa','Common','␕'], +q{NL}=>[133,'','Cc Xl','Common','�'], q{NO}=>[172,'NOT SIGN','Sm Xl','Common'], q{NS}=>[160,'NO-BREAK SPACE','Zs Xl','Common'], -q{NU}=>[9216,'','Cc Xa','Common'], +q{NU}=>[0,'','Cc Xa','Common','␀'], q{N_}=>[7752,'LATIN CAPITAL LETTER N WITH LINE BELOW','Lu','Latin'], q{Na}=>[12490,'KATAKANA LETTER NA','Lo','Katakana'], q{Nb}=>[35,'NUMBER SIGN','Po Xa','Common'], @@ -707,7 +707,7 @@ q{O<}=>[465,'LATIN CAPITAL LETTER O WITH CARON','Lu','Latin'], q{O=}=>[1054,'CYRILLIC CAPITAL LETTER O','Lu','Cyrillic'], q{O>}=>[212,'LATIN CAPITAL LETTER O WITH CIRCUMFLEX','Lu Xl','Latin'], q{O?}=>[213,'LATIN CAPITAL LETTER O WITH TILDE','Lu Xl','Latin'], -q{OC}=>[157,'','Cc Xl','Common'], +q{OC}=>[157,'','Cc Xl','Common','�'], q{OE}=>[338,'LATIN CAPITAL LIGATURE OE','Lu','Latin'], q{OI}=>[418,'LATIN CAPITAL LETTER OI','Lu','Latin'], q{OJ}=>[12634,'HANGUL LETTER OE','Lo Xz','Hangul'], @@ -723,19 +723,19 @@ q{P*}=>[928,'GREEK CAPITAL LETTER PI','Lu','Greek'], q{P+}=>[1508,'HEBREW LETTER PE','Lo','Hebrew'], q{P.}=>[7766,'LATIN CAPITAL LETTER P WITH DOT ABOVE','Lu','Latin'], q{P/}=>[11363,'','Xn Xz',''], -q{P1}=>[145,'','Cc Xl','Common'], -q{P2}=>[146,'','Cc Xl','Common'], +q{P1}=>[145,'','Cc Xl','Common','�'], +q{P2}=>[146,'','Cc Xl','Common','�'], q{P3}=>[992,'GREEK LETTER SAMPI','Lu','Greek'], q{P=}=>[1055,'CYRILLIC CAPITAL LETTER PE','Lu','Cyrillic'], -q{PA}=>[128,'','Cc Xl','Common'], -q{PD}=>[139,'','Cc Xl','Common'], +q{PA}=>[128,'','Cc Xl','Common','�'], +q{PD}=>[139,'','Cc Xl','Common','�'], q{PI}=>[182,'PILCROW SIGN','So Xl','Common'], q{PL}=>[9664,'BLACK LEFT-POINTING TRIANGLE','So','Common'], -q{PM}=>[158,'','Cc Xl','Common'], +q{PM}=>[158,'','Cc Xl','Common','�'], q{PO}=>[8471,'SOUND RECORDING COPYRIGHT','So','Common'], q{PP}=>[8741,'PARALLEL TO','Sm','Common'], q{PR}=>[9654,'BLACK RIGHT-POINTING TRIANGLE','So','Common'], -q{PU}=>[140,'','Cc Xl','Common'], +q{PU}=>[140,'','Cc Xl','Common','�'], q{Pa}=>[12497,'KATAKANA LETTER PA','Lo','Katakana'], q{Pd}=>[163,'POUND SIGN','Sc Xl','Common'], q{Pe}=>[12506,'KATAKANA LETTER PE','Lo','Katakana'], @@ -760,10 +760,10 @@ q{R=}=>[1056,'CYRILLIC CAPITAL LETTER ER','Lu','Cyrillic'], q{RB}=>[9616,'RIGHT HALF BLOCK','So','Common'], q{RF}=>[9636,'SQUARE WITH HORIZONTAL FILL','So','Common'], q{RH}=>[9638,'SQUARE WITH ORTHOGONAL CROSSHATCH FILL','So','Common'], -q{RI}=>[141,'','Cc Xl','Common'], +q{RI}=>[141,'','Cc Xl','Common','�'], q{RK}=>[9640,'SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL','So','Common'], q{RO}=>[9634,'WHITE SQUARE WITH ROUNDED CORNERS','So','Common'], -q{RS}=>[9246,'','Cc Xa','Common'], +q{RS}=>[30,'','Cc Xa','Common','␞'], q{RT}=>[8730,'SQUARE ROOT','Sm','Common'], q{RX}=>[9641,'SQUARE WITH DIAGONAL CROSSHATCH FILL','So','Common'], q{RY}=>[9637,'SQUARE WITH VERTICAL FILL','So','Common'], @@ -783,27 +783,27 @@ q{S*}=>[931,'GREEK CAPITAL LETTER SIGMA','Lu','Greek'], q{S+}=>[1505,'HEBREW LETTER SAMEKH','Lo','Hebrew'], q{S,}=>[350,'LATIN CAPITAL LETTER S WITH CEDILLA','Lu','Latin'], q{S.}=>[7776,'LATIN CAPITAL LETTER S WITH DOT ABOVE','Lu','Latin'], -q{S2}=>[142,'','Cc Xl','Common'], -q{S3}=>[143,'','Cc Xl','Common'], +q{S2}=>[142,'','Cc Xl','Common','�'], +q{S3}=>[143,'','Cc Xl','Common','�'], q{S<}=>[352,'LATIN CAPITAL LETTER S WITH CARON','Lu','Latin'], q{S=}=>[1057,'CYRILLIC CAPITAL LETTER ES','Lu','Cyrillic'], q{S>}=>[348,'LATIN CAPITAL LETTER S WITH CIRCUMFLEX','Lu','Latin'], -q{SA}=>[134,'','Cc Xl','Common'], -q{SB}=>[9242,'','Cc Xa','Common'], -q{SC}=>[154,'','Cc Xl','Common'], +q{SA}=>[134,'','Cc Xl','Common','�'], +q{SB}=>[26,'','Cc Xa','Common','␚'], +q{SC}=>[154,'','Cc Xl','Common','�'], q{SE}=>[167,'SECTION SIGN','So Xl','Common'], -q{SG}=>[150,'','Cc Xl','Common'], -q{SH}=>[9217,'','Cc Xa','Common'], -q{SI}=>[9231,'','Cc Xa','Common'], +q{SG}=>[150,'','Cc Xl','Common','�'], +q{SH}=>[1,'','Cc Xa','Common','␁'], +q{SI}=>[15,'','Cc Xa','Common','␏'], q{SJ}=>[12614,'HANGUL LETTER SSANGSIOS','Lo Xz','Hangul'], q{SM}=>[8480,'SERVICE MARK','So','Common'], -q{SO}=>[9230,'','Cc Xa','Common'], +q{SO}=>[14,'','Cc Xa','Common','␎'], q{SR}=>[9644,'BLACK RECTANGLE','So','Common'], -q{SS}=>[152,'','Cc Xl','Common'], -q{ST}=>[156,'','Cc Xl','Common'], +q{SS}=>[152,'','Cc Xl','Common','�'], +q{ST}=>[156,'','Cc Xl','Common','�'], q{SU}=>[9788,'WHITE SUN WITH RAYS','So','Common'], -q{SX}=>[9218,'','Cc Xa','Common'], -q{SY}=>[9238,'','Cc Xa','Common'], +q{SX}=>[2,'','Cc Xa','Common','␂'], +q{SY}=>[22,'','Cc Xa','Common','␖'], q{Sa}=>[12469,'KATAKANA LETTER SA','Lo','Katakana'], q{Sb}=>[8729,'BULLET OPERATOR','Sm','Common'], q{Sc}=>[1065,'CYRILLIC CAPITAL LETTER SHCHA','Lu','Cyrillic'], @@ -828,7 +828,7 @@ q{TE}=>[8707,'THERE EXISTS','Sm','Common'], q{TH}=>[222,'LATIN CAPITAL LETTER THORN','Lu Xl','Latin'], q{TM}=>[8482,'TRADE MARK SIGN','So','Common'], q{TR}=>[8981,'TELEPHONE RECORDER','So','Common'], -q{TS}=>[147,'','Cc Xl','Common'], +q{TS}=>[147,'','Cc Xl','Common','�'], q{TU}=>[12483,'KATAKANA LETTER SMALL TU','Lo','Katakana'], q{T_}=>[7790,'LATIN CAPITAL LETTER T WITH LINE BELOW','Lu','Latin'], q{Ta}=>[12479,'KATAKANA LETTER TA','Lo','Katakana'], @@ -869,7 +869,7 @@ q{UJ}=>[12637,'HANGUL LETTER WEO','Lo Xz','Hangul'], q{UK}=>[12684,'HANGUL LETTER YU-I','Lo Xz','Hangul'], q{UL}=>[9499,'BOX DRAWINGS HEAVY UP AND LEFT','So','Common'], q{UR}=>[9495,'BOX DRAWINGS HEAVY UP AND RIGHT','So','Common'], -q{US}=>[9247,'','Cc Xa','Common'], +q{US}=>[31,'','Cc Xa','Common','␟'], q{UT}=>[9650,'BLACK UP-POINTING TRIANGLE','So','Common'], q{Uh}=>[9528,'BOX DRAWINGS UP HEAVY AND HORIZONTAL LIGHT','So','Common'], q{Ul}=>[9498,'BOX DRAWINGS UP HEAVY AND LEFT LIGHT','So','Common'], @@ -885,8 +885,8 @@ q{VI}=>[581,'','Xn Xz',''], q{VJ}=>[12638,'HANGUL LETTER WE','Lo Xz','Hangul'], q{VL}=>[9515,'BOX DRAWINGS HEAVY VERTICAL AND LEFT','So','Common'], q{VR}=>[9507,'BOX DRAWINGS HEAVY VERTICAL AND RIGHT','So','Common'], -q{VS}=>[138,'','Cc Xl','Common'], -q{VT}=>[9227,'','Cc Xa','Common'], +q{VS}=>[138,'','Cc Xl','Common','�'], +q{VT}=>[11,'','Cc Xa','Common','␋'], q{VV}=>[9475,'BOX DRAWINGS HEAVY VERTICAL','So','Common'], q{Va}=>[12535,'KATAKANA LETTER VA','Lo','Katakana'], q{Ve}=>[12537,'KATAKANA LETTER VE','Lo','Katakana'], @@ -990,7 +990,7 @@ q{aJ}=>[12623,'HANGUL LETTER A','Lo Xz','Hangul'], q{aM}=>[1570,'ARABIC LETTER ALEF WITH MADDA ABOVE','Lo','Arabic'], q{aN}=>[12580,'BOPOMOFO LETTER ANG','Lo','Bopomofo'], q{aR}=>[8553,'ROMAN NUMERAL TEN','Nl','Common'], -q{aS}=>[1648,'ARABIC LETTER SUPERSCRIPT ALEF','Mn','Inherited'], +q{aS}=>[1648,'ARABIC LETTER SUPERSCRIPT ALEF','Mn','Inherited','◌ٰ'], q{aa}=>[229,'LATIN SMALL LETTER A WITH RING ABOVE','Ll Xl','Latin'], q{ac}=>[8448,'ACCOUNT OF','So Xz','Common'], q{ae}=>[230,'LATIN SMALL LETTER AE','Ll Xl','Latin'], diff --git a/digraphs.plp b/digraphs.plp index a8f681d..ab04efd 100644 --- a/digraphs.plp +++ b/digraphs.plp @@ -61,9 +61,9 @@ for my $c1group (@chars) { printf '', quote($mnem); next; } - my ($codepoint, $name, $prop, $script) = @{ $di->{$mnem} }; + my ($codepoint, $name, $prop, $script, $string) = @{ $di->{$mnem} }; - my $glyph = chr $codepoint; + my $glyph = $string || chr $codepoint; utf8::upgrade($glyph); # prevent latin1 output my $desc = $mnem . ($name && " ($name)"); my @class = ('X', grep {$_} $prop, $script); diff --git a/rfc1345convert b/rfc1345convert index c6131ad..cf2d937 100644 --- a/rfc1345convert +++ b/rfc1345convert @@ -124,14 +124,15 @@ for (values %info) { $info{$_}->{category} .= ' Xz' for @extra; for (keys %di) { + $info{$_}->{string} = chr(9676) . chr($di{$_}) if $info{$_}->{combining}; # find control characters (first 32 chars from 0 and 128) - next if $di{$_} & ~0b1001_1111; + next unless ($di{$_} & ~0b1001_1111) == 0 or $di{$_} == 127; # rename to something more descriptive $info{$_}->{name} = $info{$_}->{unicode10} ? '<'.$info{$_}->{unicode10}.'>' # the old name was much more useful : sprintf('', $di{$_}); # at least identify by value # show descriptive symbols instead of control chars themselves - $di{$_} += 0x2400 if $di{$_} < 32; + $info{$_}->{string} = $di{$_} < 32 ? chr($di{$_} + 0x2400) : chr(0xFFFD); } # output perl code of hash @@ -141,9 +142,10 @@ printf '(map {$_=>0} qw{%s}),'."\n", join(' ', map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %di ); printf "q{%s}=>[%s],\n", $_, join(',', - $di{$_}, # glyph code point + $di{$_}, # original code point $info{$_} # optional additional arguments - ? map {"'$_'"} @{ $info{$_} }{qw/name category script/} + ? map {"'$_'"} @{ $info{$_} }{qw/name category script/}, + $info{$_}->{string} || () : () ) for sort keys %di; print "}\n"; -- 2.30.0