3 # Copyright (C) 1998, 2013 Jungshik Shin, Paul Hardy
6 # This script(working as filter) converts Hangul "Johab encoded fonts"
7 # with an unofficial XLFD name "-johab" in BDF format
8 # to UCS-2 encoded font in a format defined by
9 # Roman Czyborra <roman@czyborra.com> at
10 # http://czyborra.com/unifont/
14 # This program is free software: you can redistribute it and/or modify
15 # it under the terms of the GNU General Public License as published by
16 # the Free Software Foundation, either version 2 of the License, or
17 # (at your option) any later version.
19 # This program is distributed in the hope that it will be useful,
20 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 # GNU General Public License for more details.
24 # You should have received a copy of the GNU General Public License
25 # along with this program. If not, see <http://www.gnu.org/licenses/>.
28 # 'hanterm304font.tar.gz contains about a dozen of
29 # "Johab-encoded" fonts. The package is available
30 # ftp://ftp.kaist.ac.kr/hangul/terminal/hanterm/hanterm304beta/fonts
31 # Please, note that this script only works with fonts whose
34 # --16-160-75-75-c-160-johab-1
35 # (and whose file name in the package doesn't include 's' or 'sh' preceding
38 # There are four of them :
39 # johabg16.bdf,johabm16.bdf,johabp16.bdf,iyagi16.bdf.
41 # Fonts in the package with other XLFD names
42 # (johabs and johabsh) contain glyphs for about 5000 Hanjas and special symbols
43 # defined in KS C 5601-1987.
46 # Jungshik Shin <jshin@pantheon.yale.edu>
48 # A more complete routine which not only covers
49 # *modern* pre-composed Hangul syllables in UAC00-UD7A3
50 # but also supports dynamic rendering of
51 # Hangul syllables(medieval as well as modern)
52 # using Hangul comibining Jamos at [U1100-U11FF]
53 # was made by Deog-tae Kim <dtkim@calab.kaist.ac.kr
54 # to be used in Java font-properties file.
55 # It's available at http://calab.kaist.ac.kr/~dtkim/java/
57 # 2 May 2008: changes by Paul Hardy (unifoundry <at> unfoundry.com):
59 # - In tconBase, "459" index corrected to "449".
60 # - Modified subroutine get_ind to always return 0 for final
61 # if no final consonant is in the composite syllable.
62 # Previously it always added $tconMap[$m] to the final
63 # consonant location even if there was no final consonant.
64 # - Index arrays were extended to cover all of Johab encoded
65 # Hangul, even though not all glyphs are used to generate
66 # the Unicode Hangul Syllables range.
67 # - Added comments on the letters in the letter arrays
70 # Conversion routine from Hangul Jamo index to glyph index
71 # of Hangul "Johab-encoded" fonts as used by
72 # Hangul xterm, hanterm.
73 # The following routine is based on Hanterm by Song, Jaekyung
74 # available at ftp://ftp.kaist.ac.kr/hangul/terminal/hanterm
76 # Leading Consonant index values:
78 # Modern Letters: Archaic Letters (no Romanization):
80 # 0 G (choseong kiyeok) 19 (choseong kapyeounpieup)
81 # 1 GG (choseong ssangkiyeok) 20 (choseong pieup-kiyeok)
82 # 2 N (choseong nieun) 21 (choseong sios-kiyeok)
83 # 3 D (choseong tikeut) 22 (choseong pieup-tikeut)
84 # 4 DD (choseong ssangtikeut) 23 (choseong sios-tikeut)
85 # 5 R (choseong rieul) 24 (choseong sios-pieup)
86 # 6 M (choseong mieum) 25 (choseong pieup-sios)
87 # 7 B (choseong pieup) 26 (choseong pansios)
88 # 8 BB (choseong ssangpieup) 27 (choseong yesieung)
89 # 9 S (choseong sios) 28 (choseong pieup-cieuc)
90 # 10 SS (choseong ssangsios) 29 (choseong sios-cieuc)
91 # 11 ieung (choseong ieung) 30 (choseong yeorinhieuh)
92 # 12 J (choseong cieuc)
93 # 13 JJ (choseong ssangcieuc)
94 # 14 C (choseong chieuch)
95 # 15 K (choseong khieukh)
96 # 16 T (choseong thieuth)
97 # 17 P (choseong phieuph)
98 # 18 H (choseong hieuh)
101 # Middle Letter index values:
103 # Modern Letters: Archaic Letters (no Romanization):
105 # 0 Filler (blank) 22 YO-YA (jungseong yo-ya)
106 # 1 A (jungseong a) 23 YO-YAE (jungseong yo-yae)
107 # 2 AE (jungseong ae) 24 YO-I (jungseong yo-i)
108 # 3 YA (jungseong ya) 25 YU-YEO (jungseong yu-yeo)
109 # 4 YAE (jungseong yae) 26 YU-YE (jungseong yu-ye)
110 # 5 EO (jungseong eo) 27 YU-I (jungseong yu-i)
111 # 6 E (jungseong e) 28 araea (jungseong araea)
112 # 7 YEO (jungseong yeo) 29 araea-i (jungseong araea-i)
113 # 8 YE (jungseong ye)
115 # 10 WA (jungseong wa)
116 # 11 WAE (jungseong wae)
117 # 12 OE (jungseong oe)
118 # 13 YO (jungseong yo)
120 # 15 WEO (jungseong weo)
121 # 16 WE (jungseong we)
122 # 17 WI (jungseong wi)
123 # 18 YU (jungseong yu)
124 # 19 EU (jungseong eu)
125 # 20 YI (jungseong yi)
129 # Terminal (Final) Letter index values:
131 # Modern Letters: Archaic Letters (no Romanization):
133 # 0 Filler (blank) 28 (jongseong rieul-hieuh)
134 # 1 G (jongseong kiyeok) 29 (jongseong mieum-kiyeok)
135 # 2 GG (jongseong ssangkiyeok) 30 (jongseong yeorinhieuh)
136 # 3 GS (jongseong kiyeok-sios) 31 (jongseong yesieung)
137 # 4 N (jongseong nieun)
138 # 5 NJ (jongseong nieun-cieuc)
139 # 6 NH (jongseong niuen-hieuh)
140 # 7 D (jongseong tikeut)
141 # 8 L (jongseong rieul)
142 # 9 LG (jongseong rieul-kiyeok)
143 # 10 LM (jongseong rieul-mieum)
144 # 11 LB (jongseong rieul-pieup)
145 # 12 LS (jongseong rieul-sios)
146 # 13 LT (jongseong rieul-thieuth)
147 # 14 LP (jongseong rieul-phieuph)
148 # 15 LH (jongseong rieul-hieuh)
149 # 16 M (jongseong mieum)
150 # 17 B (jongseong pieup)
151 # 18 BS (jongseong pieup-sios)
152 # 19 S (jongseong sios)
153 # 20 SS (jongseong ssangsios)
154 # 21 NG (jongseong ieung)
155 # 22 J (jongseong cieuc)
156 # 23 C (jongseong chieuch)
157 # 24 K (jongseong khieukh)
158 # 25 T (jongseong thieuth)
159 # 26 P (jongseong phieuph)
160 # 27 H (jongseong hieuh)
164 # The base font index for leading consonants
166 1, 11, 21, 31, 41, 51, # G, GG, N, D, DD, R
167 61, 71, 81, 91, 101, 111, # M, B, BB, S, SS, ieung
168 121, 131, 141, 151, 161, 171, # J, JJ, C, K, T, P
169 181, # H -- end of modern set
170 191, 201, 211, 221, 231, 241, #
171 251, 261, 271, 281, 291, 301
174 # The base index for vowels
177 0,311,314,317,320,323, # (Fill), A, AE, YA, YAE, EO
178 326,329,332,335,339,343, # E, YEO, YE, O, WA, WAE
179 347,351,355,358,361,364, # OI, YO, U, WEO, WE, WI
180 367,370,374,378, # YU, EU, UI, I -- end of modern set
181 381, 384, 387, # YO-YA, YO-YAE, YO-YI
182 390, 393, 396, # YU-YEO, YU-YE, YU-I
183 399, 402 # araea, araea-i
186 # The base font index for trailing consonants
189 # modern trailing consonants (filler + 27)
191 405, 409, 413, 417, 421, # G, GG, GS, N, NJ
192 425, 429, 433, 437, 441, # NH, D, L, LG, LM
193 445, 449, 453, 457, 461, # LB, LS, LT, LP, LH
194 465, 469, 473, 477, 481, # M, B, BS, S, SS
195 485, 489, 493, 497, 501, # NG, J, C, K, T
196 505, 509, # P, H -- end of modern set
200 # The mapping from vowels to leading consonant type
201 # in absence of trailing consonant
204 0,0,0,0,0,0, # (Fill), A, AE, YA, YAE, EO
205 0,0,0,1,3,3, # E, YEO, YE, O, WA, WAE
206 3,1,2,4,4,4, # OI, YO, U, WEO, WE, WI
207 2,1,3,0, # YU, EU, UI, I -- end of modern set
208 3,3,3,4,4,4, # YO-YA, YO-YAE, YO-I, YU-YEO, YU-YE, YU-I
212 # The mapping from vowels to leading consonant type
213 # in presence of trailing consonant
216 5,5,5,5,5,5, # (Fill), A, AE, YA, YAE, EO
217 5,5,5,6,8,8, # E, YEO, YE, O, WA, WAE
218 8,6,7,9,9,9, # OI, YO, U, WEO, WE, WI
219 7,6,8,5, # YU, EU, UI, I -- end of modern set
220 8,8,8,9,9,9, # YO-YA, YO-YAE, YO-I, YU-YEO, YU-YE, YU-I
224 # vowel type ; 1 = o and its alikes, 0 = others
230 0,1,1,0, # end of modern set
234 # The mapping from trailing consonants to vowel type
241 1, 1, 1, 1, # end of moder set
245 # The mapping from vowels to trailing consonant type
248 0, 0, 2, 0, 2, 1, # (Fill), A, AE, YA, YAE, EO
249 2, 1, 2, 3, 0, 0, # E, YEO, YE, O, WA, WAE
250 0, 3, 3, 1, 1, 1, # OI, YO, U, WEO, WE, WI
251 3, 3, 0, 1, # YU, EU, UI, I -- end of modern set
252 0, 0, 0, 1, 1, 1, # YO-YA, YO-YAE, YO-I, YU-YEO, YU-YE, YU-I
253 3, 0 # araea, araea-i
258 # read in BITMAP patterns for Jamos from JOHAB-encoded BDF font file
263 if (/^ENCODING\s+(\d+)/) { $i = $1; $jamo[$i]=""; }
264 elsif (/^BITMAP/) { $BITMAP=1; }
265 elsif (/^ENDCHAR/) { $BITMAP=0;
270 $jamo[$i] = $jamo[$i] . $_;
274 for ( $j=0 ; $j < 11172 ; $j++ ) {
276 $init = int( $j / 21 / 28) ;
277 $medial = int($j / 28 ) % 21+1 ;
280 printf ("%04X:%64s\n", $j+0xAC00, &compose_hangul($init,$medial,$final));
286 local($l,$m,$f) = @_;
288 @l_bit = unpack("a2" x 32, $jamo[&get_ind($l,$m,$f,1)]);
289 @m_bit = unpack("a2" x 32, $jamo[&get_ind($l,$m,$f,2)]);
290 @f_bit = unpack("a2" x 32, $jamo[&get_ind($l,$m,$f,3)]);
293 for ( $i = 0; $i < 32; $i++) {
294 $bit[$i]=sprintf("%02X",
295 hex($l_bit[$i]) | hex($m_bit[$i]) | hex($f_bit[$i]) );
298 return pack("a2" x 32, @bit );
304 local($l,$m,$f,$wh) = @_;
306 # ($l = 0 && $l < 19 && $m =0 && $m < 21 && $f =0 && $f < 28) or
307 # die ("$0: get_ind() : invalid Jamo index\n");
309 if ( $wh == 1 ) { # leading consonant index; no final consonant if $f==0
310 $ind = $lconBase[$l] +
311 ($f > 0 ? $lconMap2[$m] : $lconMap1[$m] ) ;
313 elsif ( $wh == 2 ) { # medial vowel index
316 if ( $vowType[$m] == 1 ) {
317 # For vowels 'o' and alikes,
318 # Giyeok and Kieuk get special treatment
319 $ind += ( ($l==0 || $l == 15) ? 0 : 1)
323 $ind+= $tconType[$f];
331 $ind = $tconBase[$f] + $tconMap[$m];