From dbddc268edc610c50c7ec27de30e13bdd585b377 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sat, 13 Sep 2008 21:42:24 +0000 Subject: [PATCH] digraphs: map private use characters to modern equivalents RFC-1345 contains several characters in the private use block (for various unofficial proposals at that time) which by now mostly have official Unicode designations. Using the character value instead of intended meaning is imho stupid (as the digraphs don't make any kind of sense for most modern usage) even though Vim and other adopters do so, probably unknowingly. Try to convert these to suitable standard equivalents (going by character names, Google, context, and some guesswork). --- digraphs.css | 6 ++- digraphs.inc.pl | 116 +++++++++++++++++++++++++++++++++--------------- digraphs.plp | 1 + rfc1345convert | 46 +++++++++++++++++++ shiar.inc.txt | 48 ++++++++++++++++++++ 5 files changed, 178 insertions(+), 39 deletions(-) diff --git a/digraphs.css b/digraphs.css index dc62529..52fbc21 100644 --- a/digraphs.css +++ b/digraphs.css @@ -86,8 +86,9 @@ td.Cyrillic {background: #FFDDA8} td.Latin {background: #FFB} td.Hebrew {background: #FFD} td.Arabic {background: #EFE} -td.Hiragana {background: #DFC} +td.Hangul {background: #DEA} td.Katakana {background: #DFA} +td.Hiragana {background: #DFC} td.Bopomofo {background: #BFC} /* other categories */ @@ -110,8 +111,9 @@ td.Cyrillic:hover {background: #FB7} td.Latin:hover {background: #FF6} td.Hebrew:hover {background: #FFA} td.Arabic:hover {background: #CFD} -td.Hiragana:hover {background: #AF8} +td.Hangul:hover {background: #CE6} td.Katakana:hover {background: #BF7} +td.Hiragana:hover {background: #AF8} td.Bopomofo:hover {background: #8FA} td.Nd:hover, td.Nl:hover, td.No:hover {background: #F88} /* number */ td.Sc:hover {background: #F8C} /* currency */ diff --git a/digraphs.inc.pl b/digraphs.inc.pl index 79d09ec..4b4fcae 100644 --- a/digraphs.inc.pl +++ b/digraphs.inc.pl @@ -1,8 +1,8 @@ +{ -(map {$_=>0} qw{!! )! *! -! 2! :! ! I! b! c! q! x! !" "" '" (" +" ," -" ." /" 0" 1" 3" 5" 6" 9" :" ;" <" =" >" ?" _" d" i" p" "% '% 0% 2% 5% 6% !' "' %' '' (' +' ,' -' .' 0' 6' 9' :' ;' <' >' ?' G' m' n' !( "( '( -( A( C( I( S( U( _( s( ") ') >) C) I) S) U) _) s) ** -* 1* 2* 5* 6* >* P* X* _* s* "+ ++ -+ 5+ 6+ S+ Z+ _+ s+ !, ', +, ,, G, _, !- )- +- -- 1- 2- 3- 6- :- >- ?- L- M- N- S- T- V- X- a- o- s- v- .. 3. 6. 9. :. M. P. S. _. +/ -/ // 0/ =/ >/ c/ f/ (0 +0 .0 00 20 50 L0 M0 R0 S0 U0 _0 a0 m0 o0 s0 u0 "1 '1 +1 .1 21 31 41 51 61 81 H1 M1 N1 R1 S1 T1 a1 c1 h1 j1 r1 s1 "2 %2 '2 .2 /2 32 52 R2 S2 a2 c2 h2 j2 r2 s2 !3 "3 '3 *3 +3 -3 .3 /3 43 53 83 M3 R3 S3 _3 a3 c3 h3 j3 r3 s3 !4 -4 .4 /4 54 M4 R4 S4 _4 a4 c4 h4 j4 r4 s4 %5 .5 65 85 R5 S5 a5 c5 r5 s5 %6 .6 M6 R6 S6 a6 c6 r6 s6 .7 87 <7 >7 R7 S7 a7 c7 r7 s7 .8 R8 S8 a8 c8 r8 s8 "9 '9 .9 R9 S9 a9 c9 r9 s9 (: ): +: .: 3: 9: :: R: S: X: !; '; +; ;; S; _; "< '< (< *< +< -< /< 1< 3< 7< << =< >< H< "= '= += 2= 3= <= == >= ?= S= T= _= e= s= "> '> +> 1> 7> => >> H> V> !? *? +? ,? -? .? /? 1? 2? :? ;? =? I? S? !A %A 'A (A )A *A +A -A .A 0A 1A 2A 3A 5A 6A 7A 8A :A ;A A ?A AA CA EA KA NA OA tA *B +B -B .B /B 2B =B BB DB HB LB SB _B aB dB eB iB oB uB %C 'C *C ,C .C 2C 3C C CC GC IC NC RC aC iC oC tC uC %D *D +D ,D -D .D /D 1D 2D 3D 4D ;D E ?E BE CE DE GE ME QE SE TE XE ZE hE uE *F .F 2F 3F =F AF BF DF FF IF SF dF lF mF %G 'G (G *G +G ,G -G .G /G 2G 3G G CG FG SG aG eG iG oG uG 'H *H +H ,H .H /H :H =H >H HH IH JH OH SH TH WH aH eH iH oH uH !I %I 'I (I )I *I -I .I /I 2I 5I 6I 8I :I ;I I ?I EI II JI NI OI SI cI lI nI oI uI %J *J +J /J J AJ EJ UJ %K 'K *K +K ,K 2K 3K O ?O CO EO IO KO RO SO bO mO rO %P 'P *P +P .P /P 1P 2P 3P =P AP DP IP LP MP OP PP RP UP aP dP eP iP oP tP uP *Q +Q ;Q 'R )R *R +R ,R .R /R 8R ;R S AS BS CS ES GS HS IS MS OS RS SS TS US XS YS aS bS cS eS hS iS nS oS uS *T +T ,T .T /T 2T 3T ;T U ?U AU BU DU HU LU RU SU TU hU lU rU %V *V 2V 3V =V ?V HV IV LV RV SV TV VV aV eV hV iV lV oV rV sV uV !W %W 'W *W +W .W :W =W >W AW WW aW eW iW oW *X +X .X :X XX !Y %Y 'Y *Y -Y .Y /Y 2Y 3Y :Y ;Y =Y >Y ?Y AY IY OY UY YY aY eY oY uY %Z 'Z *Z +Z .Z /Z 2Z Z JZ _Z aZ eZ iZ jZ oZ uZ !a %a 'a (a )a *a +a -a .a 0a 1a 2a 3a 4a 5a 6a 7a 8a :a ;a a ?a Ha Ia Ma Na Ra Sa aa ca ea ha ia ma na ra sa ua *b +b -b .b /b 2b 3b 4b =b ?b Rb _b ab eb ib ob rb ub %c 'c *c +c ,c .c 2c 3c 4c 9c c Cc Dc Hc Rc Sc hc oc rc uc %d *d +d ,d -d .d /d 2d 4d 9d ;d e ?e Ie Ne de ie ne re ze (f *f +f .f 2f 3f 4f =f ?f If Sf ff if lf tf %g 'g (g *g +g ,g -g .g /g 2g 3g 4g g Ig ag eg fg ig ng og ug *h +h ,h .h /h 2h 4h :h =h >h Ih _h ah eh hh ih kh oh uh wh !i %i 'i (i )i *i +i -i .i /i 2i 3i 4i 5i 6i 8i :i ;i i ?i Ii ei ii ji oi ui %j *j +j .j /j 3j 4j j aj ej uj 'k *k +k ,k 2k 3k 4k o ?o Co Fo Io eo io uo 'p *p +p .p /p 2p 3p 4p =p ?p ap ep ip mp op up *q +q 2q 4q ;q pq 'r )r *r +r ,r .r /r 4r 8r ;r s ?s Bs as cs es hs is ns os ss ts us *t +t ,t .t /t 2t 3t 4t 9t :t ;t u ?u Hu Lu Ru Tu hu lu ru %v *v +v 2v 3v 4v 9v =v ?v Hv Iv Lv Rv hv lv rv uv vv !w %w 'w *w +w .w 0w :w >w Aw Hw Iw aw ew iw ow ww *x +x .x 4x :x !y %y 'y *y +y -y .y /y 0y 2y 3y :y ;y =y >y ?y Ay Hy Iy Oy Uy ay iy oy ry uy yy %z 'z *z +z .z /z 2z 4z 9z ;z z ?z Hz _z az ez hz iz oz uz}), +(map {$_=>0} qw{!! )! *! -! 2! :! ! I! b! c! q! x! !" "" '" (" +" ," -" ." /" 0" 1" 3" 5" 6" 9" :" ;" <" =" >" ?" _" d" i" p" "% '% 0% 2% 5% 6% !' "' %' '' (' +' ,' -' .' 0' 6' 9' :' ;' <' >' ?' G' m' n' !( "( '( -( A( C( I( S( U( _( s( ") ') >) C) I) S) U) _) s) ** -* 1* 2* 5* 6* >* P* X* _* s* "+ ++ -+ 5+ 6+ J+ S+ Z+ _+ s+ !, ', +, ,, G, _, !- )- +- -- 1- 2- 3- 6- :- >- ?- J- L- M- N- S- T- V- X- a- o- s- v- .. 3. 6. 9. :. M. P. S. _. +/ -/ // 0/ =/ >/ c/ f/ (0 +0 .0 00 20 50 J0 L0 M0 R0 S0 U0 _0 a0 m0 o0 s0 u0 "1 '1 +1 .1 21 31 41 51 61 81 H1 M1 N1 R1 S1 T1 a1 c1 h1 j1 r1 s1 "2 %2 '2 .2 /2 32 52 R2 S2 a2 c2 h2 j2 r2 s2 !3 "3 '3 *3 +3 -3 .3 /3 43 53 83 M3 R3 S3 _3 a3 c3 h3 j3 r3 s3 !4 -4 .4 /4 54 M4 R4 S4 _4 a4 c4 h4 j4 r4 s4 %5 .5 65 85 R5 S5 a5 c5 r5 s5 %6 .6 M6 R6 S6 a6 c6 r6 s6 .7 87 <7 >7 R7 S7 a7 c7 r7 s7 .8 R8 S8 a8 c8 r8 s8 "9 '9 .9 R9 S9 a9 c9 r9 s9 (: ): +: .: 3: 9: :: R: S: X: !; '; +; ;; S; _; "< '< (< *< +< -< /< 1< 3< 7< << =< >< H< "= '= += 2= 3= <= == >= ?= S= T= _= e= s= "> '> +> 1> 7> => >> H> V> !? *? +? ,? -? .? /? 1? 2? :? ;? =? I? S? !A %A 'A (A )A *A +A -A .A 0A 1A 2A 3A 5A 6A 7A 8A :A ;A A ?A AA CA EA JA KA NA OA tA *B +B -B .B /B 2B =B BB DB HB JB LB SB _B aB dB eB iB oB uB %C 'C *C ,C .C 2C 3C C CC GC IC NC RC aC iC oC tC uC %D *D +D ,D -D .D /D 1D 2D 3D 4D ;D E ?E BE CE DE GE JE KE ME QE SE TE XE ZE hE uE *F .F 2F 3F =F AF BF DF FF IF SF dF lF mF %G 'G (G *G +G ,G -G .G /G 2G 3G G CG FG JG SG aG eG iG oG uG 'H *H +H ,H .H /H :H =H >H HH IH JH OH SH TH WH aH eH iH oH uH !I %I 'I (I )I *I -I .I /I 2I 5I 6I 8I :I ;I I ?I EI II JI NI OI SI cI lI nI oI uI %J *J +J /J J AJ EJ JJ UJ %K 'K *K +K ,K 2K 3K O ?O CO EO IO JO KO RO SO bO mO rO %P 'P *P +P .P /P 1P 2P 3P =P AP DP IP LP MP OP PP RP UP aP dP eP iP oP tP uP *Q +Q ;Q 'R )R *R +R ,R .R /R 8R ;R S AS BS CS ES GS HS IS JS MS OS RS SS TS US XS YS aS bS cS eS hS iS nS oS uS *T +T ,T .T /T 2T 3T ;T U ?U AU BU DU HU JU KU LU RU SU TU hU lU rU %V *V 2V 3V =V ?V HV IV JV LV RV SV TV VV aV eV hV iV lV oV rV sV uV !W %W 'W *W +W .W :W =W >W AW JW WW aW eW iW oW *X +X .X :X XX !Y %Y 'Y *Y -Y .Y /Y 2Y 3Y :Y ;Y =Y >Y ?Y AY IY JY OY UY YY aY eY oY uY %Z 'Z *Z +Z .Z /Z 2Z Z JZ _Z aZ eZ iZ jZ oZ uZ !a %a 'a (a )a *a +a -a .a 0a 1a 2a 3a 4a 5a 6a 7a 8a :a ;a a ?a Ha Ia Ja Ma Na Ra Sa aa ca ea ha ia ma na ra sa ua *b +b -b .b /b 2b 3b 4b =b ?b Jb Rb _b ab eb ib ob rb ub %c 'c *c +c ,c .c 2c 3c 4c 9c c Cc Dc Hc Jc Rc Sc hc oc rc uc %d *d +d ,d -d .d /d 2d 4d 9d ;d e ?e Ie Je Ke Ne de ie ne re ze (f *f +f .f 2f 3f 4f =f ?f If Sf ff if lf tf %g 'g (g *g +g ,g -g .g /g 2g 3g 4g g Ig Jg ag eg fg ig ng og ug *h +h ,h .h /h 2h 4h :h =h >h Ih Jh _h ah eh hh ih kh oh uh wh !i %i 'i (i )i *i +i -i .i /i 2i 3i 4i 5i 6i 8i :i ;i i ?i Ii Ji ei ii ji oi ui %j *j +j .j /j 3j 4j j Jj aj ej uj 'k *k +k ,k 2k 3k 4k o ?o Co Fo Io Jo Ko eo io uo 'p *p +p .p /p 2p 3p 4p =p ?p Jp ap ep ip mp op up *q +q 2q 4q ;q pq 'r )r *r +r ,r .r /r 4r 8r ;r s ?s Bs Js as cs es hs is ns os ss ts us *t +t ,t .t /t 2t 3t 4t 9t :t ;t u ?u Hu Ju Ku Lu Ru Tu hu lu ru %v *v +v 2v 3v 4v 9v =v ?v Hv Iv Jv Lv Rv hv lv rv uv vv !w %w 'w *w +w .w 0w :w >w Aw Hw Iw Jw aw ew iw ow ww *x +x .x 4x :x Jx !y %y 'y *y +y -y .y /y 0y 2y 3y :y ;y =y >y ?y Ay Hy Iy Jy Oy Uy ay iy oy ry uy yy %z 'z *z +z .z /z 2z 4z 9z ;z z ?z Hz _z az ez hz iz oz uz}), q{!!}=>[124,'VERTICAL LINE','Sm Xa','Common'], q{!)}=>[125,'RIGHT CURLY BRACKET','Pe Xa','Common'], -q{!*}=>[57382,'','Co','Common'], +q{!*}=>[8175,'GREEK VARIA','Sk','Greek'], q{!-}=>[450,'LATIN LETTER ALVEOLAR CLICK','Lo Xz','Latin'], q{!2}=>[8214,'DOUBLE VERTICAL LINE','Po','Common'], q{!:}=>[7942,'GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI','Ll','Greek'], @@ -14,31 +14,31 @@ q{!b}=>[664,'LATIN LETTER BILABIAL CLICK','Ll Xz','Latin'], q{!c}=>[448,'LATIN LETTER DENTAL CLICK','Lo Xz','Latin'], q{!q}=>[451,'LATIN LETTER RETROFLEX CLICK','Lo Xz','Latin'], q{!x}=>[449,'LATIN LETTER LATERAL CLICK','Lo Xz','Latin'], -q{"!}=>[57350,'','Co','Common'], -q{""}=>[57359,'','Co','Common'], -q{"'}=>[57351,'','Co','Common'], -q{"(}=>[57355,'','Co','Common'], +q{"!}=>[768,'COMBINING GRAVE ACCENT','Mn','Inherited'], +q{""}=>[779,'COMBINING DOUBLE ACUTE ACCENT','Mn','Inherited'], +q{"'}=>[769,'COMBINING ACUTE ACCENT','Mn','Inherited'], +q{"(}=>[774,'COMBINING BREVE','Mn','Inherited'], q{"+}=>[1612,'ARABIC DAMMATAN','Mn','Inherited'], -q{",}=>[57361,'','Co','Common'], -q{"-}=>[57354,'','Co','Common'], -q{".}=>[57356,'','Co','Common'], -q{"/}=>[57365,'','Co','Common'], -q{"0}=>[57358,'','Co','Common'], -q{"1}=>[57349,'','Co','Common'], -q{"3}=>[57348,'','Co','Common'], +q{",}=>[807,'COMBINING CEDILLA','Mn','Inherited'], +q{"-}=>[772,'COMBINING MACRON','Mn','Inherited'], +q{".}=>[775,'COMBINING DOT ABOVE','Mn','Inherited'], +q{"/}=>[824,'COMBINING LONG SOLIDUS OVERLAY','Mn','Inherited'], +q{"0}=>[778,'COMBINING RING ABOVE','Mn','Inherited'], +q{"1}=>[836,'COMBINING GREEK DIALYTIKA TONOS','Mn','Inherited'], +q{"3}=>[9216,'','Cc Xa','Common'], q{"5}=>[12443,'KATAKANA-HIRAGANA VOICED SOUND MARK','Sk','Common'], q{"6}=>[8220,'LEFT DOUBLE QUOTATION MARK','Pi','Common'], q{"9}=>[8221,'RIGHT DOUBLE QUOTATION MARK','Pf','Common'], -q{":}=>[57357,'','Co','Common'], -q{";}=>[57362,'','Co','Common'], -q{"<}=>[57360,'','Co','Common'], -q{"=}=>[57364,'','Co','Common'], -q{">}=>[57352,'','Co','Common'], -q{"?}=>[57353,'','Co','Common'], -q{"_}=>[57363,'','Co','Common'], -q{"d}=>[57367,'','Co','Common'], -q{"i}=>[57366,'','Co','Common'], -q{"p}=>[57368,'','Co','Common'], +q{":}=>[776,'COMBINING DIAERESIS','Mn','Inherited'], +q{";}=>[808,'COMBINING OGONEK','Mn','Inherited'], +q{"<}=>[780,'COMBINING CARON','Mn','Inherited'], +q{"=}=>[819,'COMBINING DOUBLE LOW LINE','Mn','Inherited'], +q{">}=>[770,'COMBINING CIRCUMFLEX ACCENT','Mn','Inherited'], +q{"?}=>[771,'COMBINING TILDE','Mn','Inherited'], +q{"_}=>[818,'COMBINING LOW LINE','Mn','Inherited'], +q{"d}=>[788,'COMBINING REVERSED COMMA ABOVE','Mn','Inherited'], +q{"i}=>[837,'COMBINING GREEK YPOGEGRAMMENI','Mn','Inherited'], +q{"p}=>[787,'COMBINING COMMA ABOVE','Mn','Inherited'], q{%"}=>[1068,'CYRILLIC CAPITAL LETTER SOFT SIGN','Lu','Cyrillic'], q{%'}=>[1100,'CYRILLIC SMALL LETTER SOFT SIGN','Ll','Cyrillic'], q{%0}=>[8240,'PER MILLE SIGN','Po','Common'], @@ -101,6 +101,7 @@ q{++}=>[1600,'ARABIC TATWEEL','Lm','Common'], q{+-}=>[177,'PLUS-MINUS SIGN','Sm Xl','Common'], q{+5}=>[12446,'HIRAGANA VOICED ITERATION MARK','Lm','Hiragana'], q{+6}=>[12542,'KATAKANA VOICED ITERATION MARK','Lm','Katakana'], +q{+J}=>[12642,'HANGUL LETTER YI','Lo Xz','Hangul'], q{+S}=>[8314,'SUPERSCRIPT PLUS SIGN','Sm','Common'], q{+Z}=>[8721,'N-ARY SUMMATION','Sm','Common'], q{+_}=>[12292,'JAPANESE INDUSTRIAL STANDARD SYMBOL','So','Common'], @@ -108,7 +109,7 @@ q{+s}=>[8330,'SUBSCRIPT PLUS SIGN','Sm','Common'], q{,!}=>[7939,'GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA','Ll','Greek'], q{,'}=>[7937,'GREEK SMALL LETTER ALPHA WITH DASIA','Ll','Greek'], q{,+}=>[1548,'ARABIC COMMA','Po','Common'], -q{,,}=>[57370,'','Co','Common'], +q{,,}=>[8127,'GREEK PSILI','Sk','Greek'], q{,G}=>[985,'GREEK SMALL LETTER ARCHAIC KOPPA','Ll','Greek'], q{,_}=>[12289,'IDEOGRAPHIC COMMA','Po','Common'], q{-!}=>[8593,'UPWARDS ARROW','Sm','Common'], @@ -122,6 +123,7 @@ q{-6}=>[12540,'KATAKANA-HIRAGANA PROLONGED SOUND MARK','Lm','Common'], q{-:}=>[247,'DIVISION SIGN','Sm Xl','Common'], q{->}=>[8594,'RIGHTWARDS ARROW','Sm','Common'], q{-?}=>[12316,'WAVE DASH','Pd','Common'], +q{-J}=>[12641,'HANGUL LETTER EU','Lo Xz','Hangul'], q{-L}=>[8735,'RIGHT ANGLE','Sm','Common'], q{-M}=>[8212,'EM DASH','Pd','Common'], q{-N}=>[8211,'EN DASH','Pd','Common'], @@ -148,7 +150,7 @@ q{//}=>[92,'REVERSE SOLIDUS','Po Xa','Common'], q{/0}=>[8709,'EMPTY SET','Sm','Common'], q{/=}=>[8225,'DOUBLE DAGGER','Po','Common'], q{/>}=>[9002,'RIGHT-POINTING ANGLE BRACKET','Pe','Common'], -q{/c}=>[57345,'','Co','Common'], +q{/c}=>[9216,'','Cc Xa','Common'], q{/f}=>[8260,'FRACTION SLASH','Sm','Common'], q{0(}=>[8733,'PROPORTIONAL TO','Sm','Common'], q{0+}=>[1618,'ARABIC SUKUN','Mn','Inherited'], @@ -156,6 +158,7 @@ q{0.}=>[8857,'CIRCLED DOT OPERATOR','Sm','Common'], q{00}=>[8734,'INFINITY','Sm','Common'], q{02}=>[8858,'CIRCLED RING OPERATOR','Sm','Common'], q{05}=>[12444,'KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK','Sk','Common'], +q{0J}=>[12615,'HANGUL LETTER IEUNG','Lo Xz','Hangul'], q{0L}=>[9680,'CIRCLE WITH LEFT HALF BLACK','So','Common'], q{0M}=>[9679,'BLACK CIRCLE','So','Common'], q{0R}=>[9681,'CIRCLE WITH RIGHT HALF BLACK','So','Common'], @@ -298,7 +301,7 @@ q{:X}=>[8251,'REFERENCE MARK','Po','Common'], q{;!}=>[7938,'GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA','Ll','Greek'], q{;'}=>[7936,'GREEK SMALL LETTER ALPHA WITH PSILI','Ll','Greek'], q{;+}=>[1563,'ARABIC SEMICOLON','Po','Common'], -q{;;}=>[57369,'','Co','Common'], +q{;;}=>[8190,'GREEK DASIA','Sk','Greek'], q{;S}=>[703,'MODIFIER LETTER LEFT HALF RING','Lm','Common'], q{;_}=>[12294,'IDEOGRAPHIC CLOSING MARK','Lo','Common'], q{<"}=>[12302,'LEFT WHITE CORNER BRACKET','Ps','Common'], @@ -337,9 +340,9 @@ q{>7}=>[8969,'RIGHT CEILING','Sm','Common'], q{>=}=>[8805,'GREATER-THAN OR EQUAL TO','Sm','Common'], q{>>}=>[187,'RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK','Pf Xl','Common'], q{>H}=>[9758,'WHITE RIGHT POINTING INDEX','So','Common'], -q{>V}=>[57381,'','Co','Common'], +q{>V}=>[8407,'COMBINING RIGHT ARROW ABOVE','Mn','Inherited'], q{?!}=>[8253,'INTERROBANG','Po Xz','Common'], -q{?*}=>[57383,'','Co','Common'], +q{?*}=>[8128,'GREEK PERISPOMENI','Sk','Greek'], q{?+}=>[1567,'ARABIC QUESTION MARK','Po','Common'], q{?,}=>[7941,'GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA','Ll','Greek'], q{?-}=>[8771,'ASYMPTOTICALLY EQUAL TO','Sm','Common'], @@ -378,6 +381,7 @@ q{A?}=>[195,'LATIN CAPITAL LETTER A WITH TILDE','Lu Xl','Latin'], q{AA}=>[197,'LATIN CAPITAL LETTER A WITH RING ABOVE','Lu Xl','Latin'], q{AC}=>[159,'','Cc Xl','Common'], q{AE}=>[198,'LATIN CAPITAL LETTER AE','Lu Xl','Latin'], +q{AJ}=>[12624,'HANGUL LETTER AE','Lo Xz','Hangul'], q{AK}=>[9222,'','Cc Xa','Common'], q{AN}=>[8743,'LOGICAL AND','Sm','Common'], q{AO}=>[8491,'ANGSTROM SIGN','Lu','Latin'], @@ -392,6 +396,7 @@ q{B=}=>[1041,'CYRILLIC CAPITAL LETTER BE','Lu','Cyrillic'], q{BB}=>[166,'BROKEN BAR','So Xl','Common'], q{BD}=>[9586,'BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT','So','Common'], q{BH}=>[130,'','Cc Xl','Common'], +q{BJ}=>[12611,'HANGUL LETTER SSANGPIEUP','Lo Xz','Hangul'], q{BL}=>[9223,'','Cc Xa','Common'], q{BS}=>[9224,'','Cc Xa','Common'], q{B_}=>[7686,'LATIN CAPITAL LETTER B WITH LINE BELOW','Lu','Latin'], @@ -417,7 +422,7 @@ q{CI}=>[155,'','Cc Xl','Common'], q{CN}=>[9240,'','Cc Xa','Common'], q{CR}=>[9229,'','Cc Xa','Common'], q{Ca}=>[8248,'CARET','Po','Common'], -q{Ci}=>[57372,'','Co','Common'], +q{Ci}=>[9675,'WHITE CIRCLE','So','Common'], q{Co}=>[169,'COPYRIGHT SIGN','So Xl','Common'], q{Ct}=>[162,'CENT SIGN','Sc Xl','Common'], q{Cu}=>[164,'CURRENCY SIGN','Sc Xl','Common'], @@ -440,6 +445,7 @@ q{DE}=>[8710,'INCREMENT','Sm','Common'], q{DG}=>[176,'DEGREE SIGN','So Xl','Common'], q{DH}=>[9523,'BOX DRAWINGS HEAVY DOWN AND HORIZONTAL','So','Common'], q{DI}=>[8748,'DOUBLE INTEGRAL','Sm','Common'], +q{DJ}=>[12600,'HANGUL LETTER SSANGTIKEUT','Lo Xz','Hangul'], q{DL}=>[9232,'','Cc Xa','Common'], q{DO}=>[36,'DOLLAR SIGN','Sc Xa','Common'], q{DR}=>[9487,'BOX DRAWINGS HEAVY DOWN AND RIGHT','So','Common'], @@ -483,6 +489,8 @@ q{EB}=>[9239,'','Cc Xa','Common'], q{EC}=>[9243,'','Cc Xa','Common'], q{ED}=>[439,'LATIN CAPITAL LETTER EZH','Lu','Latin'], q{EG}=>[151,'','Cc Xl','Common'], +q{EJ}=>[12628,'HANGUL LETTER E','Lo Xz','Hangul'], +q{EK}=>[12630,'HANGUL LETTER YE','Lo Xz','Hangul'], q{EM}=>[9241,'','Cc Xa','Common'], q{EQ}=>[9221,'','Cc Xa','Common'], q{ES}=>[135,'','Cc Xl','Common'], @@ -503,7 +511,7 @@ q{FF}=>[9228,'
','Cc Xa','Common'], q{FI}=>[8498,'TURNED CAPITAL F','So Xz','Common'], q{FS}=>[9244,'','Cc Xa','Common'], q{Fd}=>[9698,'BLACK LOWER RIGHT TRIANGLE','So','Common'], -q{Fl}=>[57379,'','Co','Common'], +q{Fl}=>[9216,'','Cc Xa','Common'], q{Fm}=>[9792,'FEMALE SIGN','So','Common'], q{G%}=>[1027,'CYRILLIC CAPITAL LETTER GJE','Lu','Cyrillic'], q{G'}=>[500,'LATIN CAPITAL LETTER G WITH ACUTE','Lu','Latin'], @@ -520,7 +528,8 @@ q{G<}=>[486,'LATIN CAPITAL LETTER G WITH CARON','Lu','Latin'], q{G=}=>[1043,'CYRILLIC CAPITAL LETTER GHE','Lu','Cyrillic'], q{G>}=>[284,'LATIN CAPITAL LETTER G WITH CIRCUMFLEX','Lu','Latin'], q{GC}=>[153,'','Cc Xl','Common'], -q{GF}=>[57380,'','Co','Common'], +q{GF}=>[915,'GREEK CAPITAL LETTER GAMMA','Lu','Greek'], +q{GJ}=>[12594,'HANGUL LETTER SSANGKIYEOK','Lo Xz','Hangul'], q{GS}=>[9245,'','Cc Xa','Common'], q{Ga}=>[12460,'KATAKANA LETTER GA','Lo','Katakana'], q{Ge}=>[12466,'KATAKANA LETTER GE','Lo','Katakana'], @@ -582,11 +591,12 @@ q{J%}=>[1032,'CYRILLIC CAPITAL LETTER JE','Lu','Cyrillic'], q{J*}=>[938,'GREEK CAPITAL LETTER IOTA WITH DIALYTIKA','Lu','Greek'], q{J+}=>[1497,'HEBREW LETTER YOD','Lo','Hebrew'], q{J/}=>[584,'','Xn Xz',''], -q{J<}=>[57384,'','Co','Common'], +q{J<}=>[496,'LATIN SMALL LETTER J WITH CARON','Ll','Latin'], q{J=}=>[1049,'CYRILLIC CAPITAL LETTER SHORT I','Lu','Cyrillic'], q{J>}=>[308,'LATIN CAPITAL LETTER J WITH CIRCUMFLEX','Lu','Latin'], q{JA}=>[1071,'CYRILLIC CAPITAL LETTER YA','Lu','Cyrillic'], q{JE}=>[1069,'CYRILLIC CAPITAL LETTER E','Lu','Cyrillic'], +q{JJ}=>[12617,'HANGUL LETTER SSANGCIEUC','Lo Xz','Hangul'], q{JU}=>[1070,'CYRILLIC CAPITAL LETTER YU','Lu','Cyrillic'], q{K%}=>[1498,'HEBREW LETTER FINAL KAF','Lo','Hebrew'], q{K'}=>[7728,'LATIN CAPITAL LETTER K WITH ACUTE','Lu','Latin'], @@ -700,6 +710,7 @@ q{O?}=>[213,'LATIN CAPITAL LETTER O WITH TILDE','Lu Xl','Latin'], q{OC}=>[157,'','Cc Xl','Common'], q{OE}=>[338,'LATIN CAPITAL LIGATURE OE','Lu','Latin'], q{OI}=>[418,'LATIN CAPITAL LETTER OI','Lu','Latin'], +q{OJ}=>[12634,'HANGUL LETTER OE','Lo Xz','Hangul'], q{OK}=>[10003,'CHECK MARK','So','Common'], q{OR}=>[8744,'LOGICAL OR','Sm','Common'], q{OS}=>[9633,'WHITE SQUARE','So','Common'], @@ -784,6 +795,7 @@ q{SE}=>[167,'SECTION SIGN','So Xl','Common'], q{SG}=>[150,'','Cc Xl','Common'], q{SH}=>[9217,'','Cc Xa','Common'], q{SI}=>[9231,'','Cc Xa','Common'], +q{SJ}=>[12614,'HANGUL LETTER SSANGSIOS','Lo Xz','Hangul'], q{SM}=>[8480,'SERVICE MARK','So','Common'], q{SO}=>[9230,'','Cc Xa','Common'], q{SR}=>[9644,'BLACK RECTANGLE','So','Common'], @@ -853,6 +865,8 @@ q{UA}=>[57346,'','Co','Common'], q{UB}=>[57347,'','Co','Common'], q{UD}=>[8597,'UP DOWN ARROW','So','Common'], q{UH}=>[9531,'BOX DRAWINGS HEAVY UP AND HORIZONTAL','So','Common'], +q{UJ}=>[12637,'HANGUL LETTER WEO','Lo Xz','Hangul'], +q{UK}=>[12684,'HANGUL LETTER YU-I','Lo Xz','Hangul'], q{UL}=>[9499,'BOX DRAWINGS HEAVY UP AND LEFT','So','Common'], q{UR}=>[9495,'BOX DRAWINGS HEAVY UP AND RIGHT','So','Common'], q{US}=>[9247,'','Cc Xa','Common'], @@ -868,6 +882,7 @@ q{V=}=>[1042,'CYRILLIC CAPITAL LETTER VE','Lu','Cyrillic'], q{V?}=>[7804,'LATIN CAPITAL LETTER V WITH TILDE','Lu','Latin'], q{VH}=>[9547,'BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL','So','Common'], q{VI}=>[581,'','Xn Xz',''], +q{VJ}=>[12638,'HANGUL LETTER WE','Lo Xz','Hangul'], q{VL}=>[9515,'BOX DRAWINGS HEAVY VERTICAL AND LEFT','So','Common'], q{VR}=>[9507,'BOX DRAWINGS HEAVY VERTICAL AND RIGHT','So','Common'], q{VS}=>[138,'','Cc Xl','Common'], @@ -892,6 +907,7 @@ q{W:}=>[7812,'LATIN CAPITAL LETTER W WITH DIAERESIS','Lu','Latin'], q{W=}=>[8361,'WON SIGN','Sc','Common'], q{W>}=>[372,'LATIN CAPITAL LETTER W WITH CIRCUMFLEX','Lu','Latin'], q{WA}=>[12526,'KATAKANA LETTER SMALL WA','Lo','Katakana'], +q{WJ}=>[12633,'HANGUL LETTER WAE','Lo Xz','Hangul'], q{WW}=>[503,'LATIN CAPITAL LETTER WYNN','Lu Xz','Latin'], q{Wa}=>[12527,'KATAKANA LETTER WA','Lo','Katakana'], q{We}=>[12529,'KATAKANA LETTER WE','Lo','Katakana'], @@ -918,6 +934,7 @@ q{Y>}=>[374,'LATIN CAPITAL LETTER Y WITH CIRCUMFLEX','Lu','Latin'], q{Y?}=>[7928,'LATIN CAPITAL LETTER Y WITH TILDE','Lu','Latin'], q{YA}=>[12515,'KATAKANA LETTER SMALL YA','Lo','Katakana'], q{YI}=>[1031,'CYRILLIC CAPITAL LETTER YI','Lu','Cyrillic'], +q{YJ}=>[12626,'HANGUL LETTER YAE','Lo Xz','Hangul'], q{YO}=>[12519,'KATAKANA LETTER SMALL YO','Lo','Katakana'], q{YU}=>[12517,'KATAKANA LETTER SMALL YU','Lo','Katakana'], q{YY}=>[540,'LATIN CAPITAL LETTER YOGH','Lu Xz','Latin'], @@ -969,6 +986,7 @@ q{a>}=>[226,'LATIN SMALL LETTER A WITH CIRCUMFLEX','Ll Xl','Latin'], q{a?}=>[227,'LATIN SMALL LETTER A WITH TILDE','Ll Xl','Latin'], q{aH}=>[1571,'ARABIC LETTER ALEF WITH HAMZA ABOVE','Lo','Arabic'], q{aI}=>[592,'LATIN SMALL LETTER TURNED A','Ll Xz','Latin'], +q{aJ}=>[12623,'HANGUL LETTER A','Lo Xz','Hangul'], q{aM}=>[1570,'ARABIC LETTER ALEF WITH MADDA ABOVE','Lo','Arabic'], q{aN}=>[12580,'BOPOMOFO LETTER ANG','Lo','Bopomofo'], q{aR}=>[8553,'ROMAN NUMERAL TEN','Nl','Common'], @@ -978,7 +996,7 @@ q{ac}=>[8448,'ACCOUNT OF','So Xz','Common'], q{ae}=>[230,'LATIN SMALL LETTER AE','Ll Xl','Latin'], q{ah}=>[1573,'ARABIC LETTER ALEF WITH HAMZA BELOW','Lo','Arabic'], q{ai}=>[12574,'BOPOMOFO LETTER AI','Lo','Bopomofo'], -q{am}=>[57375,'','Co','Common'], +q{am}=>[13250,'SQUARE AM','So','Common'], q{an}=>[12578,'BOPOMOFO LETTER AN','Lo','Bopomofo'], q{ar}=>[8569,'SMALL ROMAN NUMERAL TEN','Nl','Common'], q{as}=>[8449,'ADDRESSED TO THE SUBJECT','So Xz','Common'], @@ -989,10 +1007,11 @@ q{b-}=>[387,'LATIN SMALL LETTER B WITH TOPBAR','Ll Xz','Latin'], q{b.}=>[7683,'LATIN SMALL LETTER B WITH DOT ABOVE','Ll','Latin'], q{b/}=>[384,'LATIN SMALL LETTER B WITH STROKE','Ll Xz','Latin'], q{b2}=>[595,'LATIN SMALL LETTER B WITH HOOK','Ll Xz','Latin'], -q{b3}=>[57371,'','Co','Common'], +q{b3}=>[976,'GREEK BETA SYMBOL','Ll','Greek'], q{b4}=>[12549,'BOPOMOFO LETTER B','Lo','Bopomofo'], q{b=}=>[1073,'CYRILLIC SMALL LETTER BE','Ll','Cyrillic'], q{b?}=>[7532,'LATIN SMALL LETTER B WITH MIDDLE TILDE','Ll Xz','Latin'], +q{bJ}=>[12610,'HANGUL LETTER PIEUP','Lo Xz','Hangul'], q{bR}=>[8554,'ROMAN NUMERAL ELEVEN','Nl','Common'], q{b_}=>[7687,'LATIN SMALL LETTER B WITH LINE BELOW','Ll','Latin'], q{ba}=>[12400,'HIRAGANA LETTER BA','Lo','Hiragana'], @@ -1017,6 +1036,7 @@ q{c>}=>[265,'LATIN SMALL LETTER C WITH CIRCUMFLEX','Ll','Latin'], q{cC}=>[9827,'BLACK CLUB SUIT','So','Common'], q{cD}=>[9826,'WHITE DIAMOND SUIT','So','Common'], q{cH}=>[9825,'WHITE HEART SUIT','So','Common'], +q{cJ}=>[12618,'HANGUL LETTER CHIEUCH','Lo Xz','Hangul'], q{cR}=>[8555,'ROMAN NUMERAL TWELVE','Nl','Common'], q{cS}=>[9824,'BLACK SPADE SUIT','So','Common'], q{ch}=>[12564,'BOPOMOFO LETTER CH','Lo','Bopomofo'], @@ -1038,6 +1058,7 @@ q{d<}=>[271,'LATIN SMALL LETTER D WITH CARON','Ll','Latin'], q{d=}=>[1076,'CYRILLIC SMALL LETTER DE','Ll','Cyrillic'], q{d?}=>[7533,'LATIN SMALL LETTER D WITH MIDDLE TILDE','Ll Xz','Latin'], q{dH}=>[9519,'BOX DRAWINGS DOWN LIGHT AND HORIZONTAL HEAVY','So','Common'], +q{dJ}=>[12599,'HANGUL LETTER TIKEUT','Lo Xz','Hangul'], q{dL}=>[9489,'BOX DRAWINGS DOWN LIGHT AND LEFT HEAVY','So','Common'], q{dP}=>[8706,'PARTIAL DIFFERENTIAL','Sm','Common'], q{dR}=>[9485,'BOX DRAWINGS DOWN LIGHT AND RIGHT HEAVY','So','Common'], @@ -1079,13 +1100,15 @@ q{e=}=>[1077,'CYRILLIC SMALL LETTER IE','Ll','Cyrillic'], q{e>}=>[234,'LATIN SMALL LETTER E WITH CIRCUMFLEX','Ll Xl','Latin'], q{e?}=>[7869,'LATIN SMALL LETTER E WITH TILDE','Ll','Latin'], q{eI}=>[601,'LATIN SMALL LETTER SCHWA','Ll Xz','Latin'], +q{eJ}=>[12627,'HANGUL LETTER EO','Lo Xz','Hangul'], +q{eK}=>[12629,'HANGUL LETTER YEO','Lo Xz','Hangul'], q{eN}=>[12581,'BOPOMOFO LETTER ENG','Lo','Bopomofo'], -q{ed}=>[57374,'','Co','Common'], +q{ed}=>[658,'LATIN SMALL LETTER EZH','Ll','Latin'], q{ei}=>[12575,'BOPOMOFO LETTER EI','Lo','Bopomofo'], q{en}=>[12579,'BOPOMOFO LETTER EN','Lo','Bopomofo'], q{er}=>[12582,'BOPOMOFO LETTER ER','Lo','Bopomofo'], q{ez}=>[495,'LATIN SMALL LETTER EZH WITH CARON','Ll','Latin'], -q{f(}=>[57373,'','Co','Common'], +q{f(}=>[402,'LATIN SMALL LETTER F WITH HOOK','Ll','Latin'], q{f*}=>[966,'GREEK SMALL LETTER PHI','Ll','Greek'], q{f+}=>[1601,'ARABIC LETTER FEH','Lo','Arabic'], q{f.}=>[7711,'LATIN SMALL LETTER F WITH DOT ABOVE','Ll','Latin'], @@ -1116,6 +1139,7 @@ q{g<}=>[487,'LATIN SMALL LETTER G WITH CARON','Ll','Latin'], q{g=}=>[1075,'CYRILLIC SMALL LETTER GHE','Ll','Cyrillic'], q{g>}=>[285,'LATIN SMALL LETTER G WITH CIRCUMFLEX','Ll','Latin'], q{gI}=>[7543,'LATIN SMALL LETTER TURNED G','Ll Xz','Latin'], +q{gJ}=>[12593,'HANGUL LETTER KIYEOK','Lo Xz','Hangul'], q{ga}=>[12364,'HIRAGANA LETTER GA','Lo','Hiragana'], q{ge}=>[12370,'HIRAGANA LETTER GE','Lo','Hiragana'], q{gf}=>[1711,'ARABIC LETTER GAF','Lo','Arabic'], @@ -1134,6 +1158,7 @@ q{h:}=>[7719,'LATIN SMALL LETTER H WITH DIAERESIS','Ll','Latin'], q{h=}=>[1093,'CYRILLIC SMALL LETTER HA','Ll','Cyrillic'], q{h>}=>[293,'LATIN SMALL LETTER H WITH CIRCUMFLEX','Ll','Latin'], q{hI}=>[613,'LATIN SMALL LETTER TURNED H','Ll Xz','Latin'], +q{hJ}=>[12622,'HANGUL LETTER HIEUH','Lo Xz','Hangul'], q{h_}=>[7830,'LATIN SMALL LETTER H WITH LINE BELOW','Ll','Latin'], q{ha}=>[12399,'HIRAGANA LETTER HA','Lo','Hiragana'], q{he}=>[12408,'HIRAGANA LETTER HE','Lo','Hiragana'], @@ -1166,6 +1191,7 @@ q{i=}=>[1080,'CYRILLIC SMALL LETTER I','Ll','Cyrillic'], q{i>}=>[238,'LATIN SMALL LETTER I WITH CIRCUMFLEX','Ll Xl','Latin'], q{i?}=>[297,'LATIN SMALL LETTER I WITH TILDE','Ll','Latin'], q{iI}=>[7433,'LATIN SMALL LETTER TURNED I','Ll Xz','Latin'], +q{iJ}=>[12643,'HANGUL LETTER I','Lo Xz','Hangul'], q{ie}=>[1108,'CYRILLIC SMALL LETTER UKRAINIAN IE','Ll','Cyrillic'], q{ii}=>[1110,'CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I','Ll','Cyrillic'], q{ij}=>[307,'LATIN SMALL LIGATURE IJ','Ll','Latin'], @@ -1181,6 +1207,7 @@ q{j4}=>[12560,'BOPOMOFO LETTER J','Lo','Bopomofo'], q{j<}=>[496,'LATIN SMALL LETTER J WITH CARON','Ll','Latin'], q{j=}=>[1081,'CYRILLIC SMALL LETTER SHORT I','Ll','Cyrillic'], q{j>}=>[309,'LATIN SMALL LETTER J WITH CIRCUMFLEX','Ll','Latin'], +q{jJ}=>[12616,'HANGUL LETTER CIEUC','Lo Xz','Hangul'], q{ja}=>[1103,'CYRILLIC SMALL LETTER YA','Ll','Cyrillic'], q{je}=>[1101,'CYRILLIC SMALL LETTER E','Ll','Cyrillic'], q{ju}=>[1102,'CYRILLIC SMALL LETTER YU','Ll','Cyrillic'], @@ -1194,6 +1221,7 @@ q{k4}=>[12558,'BOPOMOFO LETTER K','Lo','Bopomofo'], q{k<}=>[489,'LATIN SMALL LETTER K WITH CARON','Ll','Latin'], q{k=}=>[1082,'CYRILLIC SMALL LETTER KA','Ll','Cyrillic'], q{kI}=>[670,'LATIN SMALL LETTER TURNED K','Ll Xz','Latin'], +q{kJ}=>[12619,'HANGUL LETTER KHIEUKH','Lo Xz','Hangul'], q{k_}=>[7733,'LATIN SMALL LETTER K WITH LINE BELOW','Ll','Latin'], q{ka}=>[12363,'HIRAGANA LETTER KA','Lo','Hiragana'], q{ke}=>[12369,'HIRAGANA LETTER KE','Lo','Hiragana'], @@ -1228,6 +1256,7 @@ q{m;}=>[625,'LATIN SMALL LETTER M WITH HOOK','Ll Xz','Latin'], q{m=}=>[1084,'CYRILLIC SMALL LETTER EM','Ll','Cyrillic'], q{m?}=>[7535,'LATIN SMALL LETTER M WITH MIDDLE TILDE','Ll Xz','Latin'], q{mI}=>[623,'LATIN SMALL LETTER TURNED M','Ll Xz','Latin'], +q{mJ}=>[12609,'HANGUL LETTER MIEUM','Lo Xz','Hangul'], q{ma}=>[12414,'HIRAGANA LETTER MA','Lo','Hiragana'], q{me}=>[12417,'HIRAGANA LETTER ME','Lo','Hiragana'], q{mi}=>[12415,'HIRAGANA LETTER MI','Lo','Hiragana'], @@ -1248,6 +1277,7 @@ q{n<}=>[328,'LATIN SMALL LETTER N WITH CARON','Ll','Latin'], q{n=}=>[1085,'CYRILLIC SMALL LETTER EN','Ll','Cyrillic'], q{n?}=>[241,'LATIN SMALL LETTER N WITH TILDE','Ll Xl','Latin'], q{nG}=>[12587,'BOPOMOFO LETTER NG','Lo','Bopomofo'], +q{nJ}=>[12596,'HANGUL LETTER NIEUN','Lo Xz','Hangul'], q{nS}=>[8319,'SUPERSCRIPT LATIN SMALL LETTER N','Ll','Latin'], q{n_}=>[7753,'LATIN SMALL LETTER N WITH LINE BELOW','Ll','Latin'], q{na}=>[12394,'HIRAGANA LETTER NA','Lo','Hiragana'], @@ -1284,6 +1314,8 @@ q{o?}=>[245,'LATIN SMALL LETTER O WITH TILDE','Ll Xl','Latin'], q{oC}=>[8451,'DEGREE CELSIUS','So','Common'], q{oF}=>[8457,'DEGREE FAHRENHEIT','So','Common'], q{oI}=>[596,'LATIN SMALL LETTER OPEN O','Ll Xz','Latin'], +q{oJ}=>[12631,'HANGUL LETTER O','Lo Xz','Hangul'], +q{oK}=>[12635,'HANGUL LETTER YO','Lo Xz','Hangul'], q{oe}=>[339,'LATIN SMALL LIGATURE OE','Ll','Latin'], q{oi}=>[419,'LATIN SMALL LETTER OI','Ll','Latin'], q{ou}=>[12577,'BOPOMOFO LETTER OU','Lo','Bopomofo'], @@ -1297,10 +1329,11 @@ q{p3}=>[993,'GREEK SMALL LETTER SAMPI','Ll','Greek'], q{p4}=>[12550,'BOPOMOFO LETTER P','Lo','Bopomofo'], q{p=}=>[1087,'CYRILLIC SMALL LETTER PE','Ll','Cyrillic'], q{p?}=>[7537,'LATIN SMALL LETTER P WITH MIDDLE TILDE','Ll Xz','Latin'], +q{pJ}=>[12621,'HANGUL LETTER PHIEUPH','Lo Xz','Hangul'], q{pa}=>[12401,'HIRAGANA LETTER PA','Lo','Hiragana'], q{pe}=>[12410,'HIRAGANA LETTER PE','Lo','Hiragana'], q{pi}=>[12404,'HIRAGANA LETTER PI','Lo','Hiragana'], -q{pm}=>[57376,'','Co','Common'], +q{pm}=>[13272,'SQUARE PM','So','Common'], q{po}=>[12413,'HIRAGANA LETTER PO','Lo','Hiragana'], q{pu}=>[12407,'HIRAGANA LETTER PU','Lo','Hiragana'], q{q*}=>[968,'GREEK SMALL LETTER PSI','Ll','Greek'], @@ -1323,6 +1356,7 @@ q{r<}=>[345,'LATIN SMALL LETTER R WITH CARON','Ll','Latin'], q{r=}=>[1088,'CYRILLIC SMALL LETTER ER','Ll','Cyrillic'], q{r?}=>[7538,'LATIN SMALL LETTER R WITH MIDDLE TILDE','Ll Xz','Latin'], q{rI}=>[633,'LATIN SMALL LETTER TURNED R','Ll Xz','Latin'], +q{rJ}=>[12601,'HANGUL LETTER RIEUL','Lo Xz','Hangul'], q{r_}=>[7775,'LATIN SMALL LETTER R WITH LINE BELOW','Ll','Latin'], q{ra}=>[12425,'HIRAGANA LETTER RA','Lo','Hiragana'], q{re}=>[12428,'HIRAGANA LETTER RE','Lo','Hiragana'], @@ -1342,6 +1376,7 @@ q{s=}=>[1089,'CYRILLIC SMALL LETTER ES','Ll','Cyrillic'], q{s>}=>[349,'LATIN SMALL LETTER S WITH CIRCUMFLEX','Ll','Latin'], q{s?}=>[7540,'LATIN SMALL LETTER S WITH MIDDLE TILDE','Ll Xz','Latin'], q{sB}=>[9642,'BLACK SMALL SQUARE','So','Common'], +q{sJ}=>[12613,'HANGUL LETTER SIOS','Lo Xz','Hangul'], q{sa}=>[12373,'HIRAGANA LETTER SA','Lo','Hiragana'], q{sc}=>[1097,'CYRILLIC SMALL LETTER SHCHA','Ll','Cyrillic'], q{se}=>[12379,'HIRAGANA LETTER SE','Lo','Hiragana'], @@ -1367,6 +1402,7 @@ q{t<}=>[357,'LATIN SMALL LETTER T WITH CARON','Ll','Latin'], q{t=}=>[1090,'CYRILLIC SMALL LETTER TE','Ll','Cyrillic'], q{t?}=>[7541,'LATIN SMALL LETTER T WITH MIDDLE TILDE','Ll Xz','Latin'], q{tI}=>[647,'LATIN SMALL LETTER TURNED T','Ll Xz','Latin'], +q{tJ}=>[12620,'HANGUL LETTER THIEUTH','Lo Xz','Hangul'], q{tU}=>[12387,'HIRAGANA LETTER SMALL TU','Lo','Hiragana'], q{t_}=>[7791,'LATIN SMALL LETTER T WITH LINE BELOW','Ll','Latin'], q{ta}=>[12383,'HIRAGANA LETTER TA','Lo','Hiragana'], @@ -1403,6 +1439,8 @@ q{u=}=>[1091,'CYRILLIC SMALL LETTER U','Ll','Cyrillic'], q{u>}=>[251,'LATIN SMALL LETTER U WITH CIRCUMFLEX','Ll Xl','Latin'], q{u?}=>[361,'LATIN SMALL LETTER U WITH TILDE','Ll','Latin'], q{uH}=>[9527,'BOX DRAWINGS UP LIGHT AND HORIZONTAL HEAVY','So','Common'], +q{uJ}=>[12636,'HANGUL LETTER U','Lo Xz','Hangul'], +q{uK}=>[12640,'HANGUL LETTER YU','Lo Xz','Hangul'], q{uL}=>[9497,'BOX DRAWINGS UP LIGHT AND LEFT HEAVY','So','Common'], q{uR}=>[9493,'BOX DRAWINGS UP LIGHT AND RIGHT HEAVY','So','Common'], q{uT}=>[9651,'WHITE UP-POINTING TRIANGLE','So','Common'], @@ -1420,6 +1458,7 @@ q{v=}=>[1074,'CYRILLIC SMALL LETTER VE','Ll','Cyrillic'], q{v?}=>[7805,'LATIN SMALL LETTER V WITH TILDE','Ll','Latin'], q{vH}=>[9535,'BOX DRAWINGS VERTICAL LIGHT AND HORIZONTAL HEAVY','So','Common'], q{vI}=>[652,'LATIN SMALL LETTER TURNED V','Ll Xz','Latin'], +q{vJ}=>[12637,'HANGUL LETTER WEO','Lo Xz','Hangul'], q{vL}=>[9509,'BOX DRAWINGS VERTICAL LIGHT AND LEFT HEAVY','So','Common'], q{vR}=>[9501,'BOX DRAWINGS VERTICAL LIGHT AND RIGHT HEAVY','So','Common'], q{vh}=>[9532,'BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL','So','Common'], @@ -1439,6 +1478,7 @@ q{w>}=>[373,'LATIN SMALL LETTER W WITH CIRCUMFLEX','Ll','Latin'], q{wA}=>[12430,'HIRAGANA LETTER SMALL WA','Lo','Hiragana'], q{wH}=>[1572,'ARABIC LETTER WAW WITH HAMZA ABOVE','Lo','Arabic'], q{wI}=>[653,'LATIN SMALL LETTER TURNED W','Ll Xz','Latin'], +q{wJ}=>[12632,'HANGUL LETTER WA','Lo Xz','Hangul'], q{wa}=>[12431,'HIRAGANA LETTER WA','Lo','Hiragana'], q{we}=>[12433,'HIRAGANA LETTER WE','Lo','Hiragana'], q{wi}=>[12432,'HIRAGANA LETTER WI','Lo','Hiragana'], @@ -1449,6 +1489,7 @@ q{x+}=>[1582,'ARABIC LETTER KHAH','Lo','Arabic'], q{x.}=>[7819,'LATIN SMALL LETTER X WITH DOT ABOVE','Ll','Latin'], q{x4}=>[12562,'BOPOMOFO LETTER X','Lo','Bopomofo'], q{x:}=>[7821,'LATIN SMALL LETTER X WITH DIAERESIS','Ll','Latin'], +q{xJ}=>[12595,'HANGUL LETTER KIYEOK-SIOS','Lo Xz','Hangul'], q{y!}=>[7923,'LATIN SMALL LETTER Y WITH GRAVE','Ll','Latin'], q{y%}=>[942,'GREEK SMALL LETTER ETA WITH TONOS','Ll','Greek'], q{y'}=>[253,'LATIN SMALL LETTER Y WITH ACUTE','Ll Xl','Latin'], @@ -1468,6 +1509,7 @@ q{y?}=>[7929,'LATIN SMALL LETTER Y WITH TILDE','Ll','Latin'], q{yA}=>[12419,'HIRAGANA LETTER SMALL YA','Lo','Hiragana'], q{yH}=>[1574,'ARABIC LETTER YEH WITH HAMZA ABOVE','Lo','Arabic'], q{yI}=>[654,'LATIN SMALL LETTER TURNED Y','Ll Xz','Latin'], +q{yJ}=>[12625,'HANGUL LETTER YA','Lo Xz','Hangul'], q{yO}=>[12423,'HIRAGANA LETTER SMALL YO','Lo','Hiragana'], q{yU}=>[12421,'HIRAGANA LETTER SMALL YU','Lo','Hiragana'], q{ya}=>[12420,'HIRAGANA LETTER YA','Lo','Hiragana'], diff --git a/digraphs.plp b/digraphs.plp index 075a398..a8f681d 100644 --- a/digraphs.plp +++ b/digraphs.plp @@ -96,6 +96,7 @@ print "\n"; latin hebrew arabic + korean japanese chinese diff --git a/rfc1345convert b/rfc1345convert index c5a3e80..c6131ad 100644 --- a/rfc1345convert +++ b/rfc1345convert @@ -46,6 +46,52 @@ for (@t) { $di{$mnem} = hex $char; } +# XXX +my %trans = ( + 0xE001 => 0, # join lines: not accepted + 0xE004 => 0, # umlaut is no different from diaeresis 0x0308 + 0xE005 => 0x0344, # discouraged + 0xE006 => 0x0300, + 0xE007 => 0x0301, + 0xE008 => 0x0302, + 0xE009 => 0x0303, + 0xE00A => 0x0304, + 0xE00B => 0x0306, + 0xE00C => 0x0307, + 0xE00D => 0x0308, + 0xE00E => 0x030A, + 0xE00F => 0x030B, + 0xE010 => 0x030C, + 0xE011 => 0x0327, + 0xE012 => 0x0328, + 0xE013 => 0x0332, + 0xE014 => 0x0333, + 0xE015 => 0x0338, + 0xE016 => 0x0345, + 0xE017 => 0x0314, + 0xE018 => 0x0313, + 0xE019 => 0x1FFE, + 0xE01A => 0x1FBF, + 0xE01B => 0x03D0, # middle beta = curled beta? + 0xE01C => 0x25CB, + 0xE01D => 0x0192, + 0xE01E => 0x0292, + 0xE01F => 0x33C2, # am, compatibility char + 0xE020 => 0x33D8, # pm, compatibility char + 0xE021 => 0x2121, + 0xE022 => 0xFE8E, + 0xE023 => 0, # dutch guilder 0192 is already encoded, and not very useful anyway + 0xE024 => 0x0393, + 0xE025 => 0x20D7, # also 20D1; non-spacing + 0xE026 => 0x1FEF, + 0xE027 => 0x1FC0, + 0xE028 => 0x01F0, #but uppercase +); +for (values %di) { + $_ >= 0xE000 or next; + $_ = $trans{$_} if defined $trans{$_}; +} + # personal addendums my @extra; if (-r 'shiar.inc.txt') { diff --git a/shiar.inc.txt b/shiar.inc.txt index 661dc67..ac4671a 100644 --- a/shiar.inc.txt +++ b/shiar.inc.txt @@ -226,3 +226,51 @@ yI ʎ 5 Hiragana 6 Katakana +# hangeul consonants at *J +gJ ㄱ +GJ ㄲ +xJ ㄳ +nJ ㄴ +dJ ㄷ +DJ ㄸ +rJ ㄹ +mJ ㅁ +bJ ㅂ +BJ ㅃ +sJ ㅅ +SJ ㅆ +0J ㅇ +jJ ㅈ +JJ ㅉ +cJ ㅊ +kJ ㅋ +tJ ㅌ +pJ ㅍ +hJ ㅎ + +# hangeul vowels at *J +aJ ㅏ +AJ ㅐ +eJ ㅓ +EJ ㅔ +oJ ㅗ +OJ ㅚ +uJ ㅜ +UJ ㅝ +iJ ㅣ +vJ ㅝ +VJ ㅞ +wJ ㅘ +WJ ㅙ +yJ ㅑ +YJ ㅒ +-J ㅡ # qJ? ++J ㅢ # QJ? + +# hangeul iotized vowels at *K +uK ㅠ +UK ㆌ +oK ㅛ +eK ㅕ +EK ㅖ + -- 2.30.0