2 unibmp2hex - program to turn a .bmp or .wbmp glyph matrix into a
3 GNU Unifont hex glyph set of 256 characters
5 Synopsis: unibmp2hex [-iin_file.bmp] [-oout_file.hex] [-phex_page_num] [-w]
8 Author: Paul Hardy, unifoundry <at> unifoundry.com, December 2007
11 Copyright (C) 2007, 2008, 2013 Paul Hardy
15 This program is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 2 of the License, or
18 (at your option) any later version.
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program. If not, see <http://www.gnu.org/licenses/>.
36 unsigned hexdigit[16][4]; /* 32 bit representation of 16x8 0..F bitmap */
38 unsigned uniplane=0; /* Unicode plane number, 0..0xff ff ff */
39 unsigned planeset=0; /* =1: use plane specified with -p parameter */
40 unsigned flip=0; /* =1 if we're transposing glyph matrix */
41 unsigned forcewide=0; /* =1 to set each glyph to 16 pixels wide */
43 /* The six Unicode plane digits, from left-most (0) to right-most (5) */
44 unsigned unidigit[6][4];
48 int main(int argc, char *argv[]) {
50 int i, j, k; /* loop variables */
51 unsigned char inchar; /* temporary input character */
52 char header[MAXBUF]; /* input buffer for bitmap file header */
53 int wbmp=0; /* =0 for Windows Bitmap (.bmp); 1 for Wireless Bitmap (.wbmp) */
54 int fatal; /* =1 if a fatal error occurred */
55 int match; /* =1 if we're still matching a pattern, 0 if no match */
56 int empty1, empty2; /* =1 if bytes tested are all zeroes */
57 unsigned char thischar1[16], thischar2[16]; /* bytes of hex char */
58 int thisrow; /* index to point into thischar1[] and thischar2[] */
59 int tmpsum; /* temporary sum to see if a character is blank */
61 unsigned char bitmap[17*32][18*32/8]; /* final bitmap */
62 char wide[65536]={65536 * 0}; /* 1 = force double width code point */
64 char *infile="", *outfile=""; /* names of input and output files */
65 FILE *infp, *outfp; /* file pointers of input and output files */
68 for (i = 1; i < argc; i++) {
69 if (argv[i][0] == '-') { /* this is an option argument */
71 case 'i': /* name of input file */
74 case 'o': /* name of output file */
75 outfile = &argv[i][2];
77 case 'p': /* specify a Unicode plane */
78 sscanf(&argv[i][2], "%x", &uniplane); /* Get Unicode plane */
79 planeset = 1; /* Use specified range, not what's in bitmap */
81 case 'w': /* force wide (16 pixels) for each glyph */
84 default: /* if unrecognized option, print list and exit */
85 fprintf(stderr, "\nSyntax:\n\n");
86 fprintf(stderr, " %s -p<Unicode_Page> ", argv[0]);
87 fprintf(stderr, "-i<Input_File> -o<Output_File> -w\n\n");
88 fprintf(stderr, " -w specifies .wbmp output instead of ");
89 fprintf(stderr, "default Windows .bmp output.\n\n");
90 fprintf(stderr, " -p is followed by 1 to 6 ");
91 fprintf(stderr, "Unicode plane hex digits ");
92 fprintf(stderr, "(default is Page 0).\n\n");
93 fprintf(stderr, "\nExample:\n\n");
94 fprintf(stderr, " %s -p83 -iunifont.hex -ou83.bmp\n\n\n",
102 Make sure we can open any I/O files that were specified before
105 if (strlen(infile) > 0) {
106 if ((infp = fopen(infile, "r")) == NULL) {
107 fprintf(stderr, "Error: can't open %s for input.\n", infile);
114 if (strlen(outfile) > 0) {
115 if ((outfp = fopen(outfile, "w")) == NULL) {
116 fprintf(stderr, "Error: can't open %s for output.\n", outfile);
124 Initialize selected code points for double width (16x16).
125 Double-width is forced in cases where a glyph (usually a combining
126 glyph) only occupies the left-hand side of a 16x16 grid, but must
127 be rendered as double-width to appear properly with other glyphs
128 in a given script. If additions were made to a script after
129 Unicode 5.0, the Unicode version is given in parentheses after
132 for (i = 0x0700; i <= 0x074F; i++) wide[i] = 1; /* Syriac */
133 for (i = 0x0800; i <= 0x083F; i++) wide[i] = 1; /* Samaritan (5.2) */
134 for (i = 0x0900; i <= 0x0DFF; i++) wide[i] = 1; /* Indic */
135 for (i = 0x0F00; i <= 0x0FFF; i++) wide[i] = 1; /* Tibetan */
136 for (i = 0x1100; i <= 0x11FF; i++) wide[i] = 1; /* Hangul Jamo */
137 for (i = 0x1800; i <= 0x18AF; i++) wide[i] = 1; /* Mongolian */
138 for (i = 0x1900; i <= 0x194F; i++) wide[i] = 1; /* Limbu */
139 for (i = 0x1980; i <= 0x19DF; i++) wide[i] = 1; /* New Tai Lue */
140 for (i = 0x1A00; i <= 0x1A1F; i++) wide[i] = 1; /* Buginese */
141 for (i = 0x1B00; i <= 0x1B7F; i++) wide[i] = 1; /* Balinese */
142 for (i = 0x1B80; i <= 0x1BBF; i++) wide[i] = 1; /* Sundanese (5.1) */
143 for (i = 0x1BC0; i <= 0x1BFF; i++) wide[i] = 1; /* Batak (6.0) */
144 for (i = 0x1C00; i <= 0x1C4F; i++) wide[i] = 1; /* Lepcha (5.1) */
145 for (i = 0x1CD0; i <= 0x1CFF; i++) wide[i] = 1; /* Vedic Extensions (5.2) */
146 for (i = 0x2E80; i <= 0xA4CF; i++) wide[i] = 1; /* CJK */
147 for (i = 0x1A20; i <= 0x1AAF; i++) wide[i] = 1; /* Tai Tham (5.2) */
148 for (i = 0xA930; i <= 0xA95F; i++) wide[i] = 1; /* Rejang (5.1) */
149 for (i = 0xA980; i <= 0xA9DF; i++) wide[i] = 1; /* Javanese (5.2) */
150 for (i = 0xAA00; i <= 0xAA5F; i++) wide[i] = 1; /* Cham (5.1) */
151 for (i = 0xAAE0; i <= 0xAAFF; i++) wide[i] = 1; /* Meetei Mayek Ext (6.0) */
152 for (i = 0xABC0; i <= 0xABFF; i++) wide[i] = 1; /* Meetei Mayek (5.2) */
154 wide[0x303F] = 0; /* CJK half-space fill */
157 Determine whether or not the file is a Microsoft Windows Bitmap file.
158 If it starts with 'B', 'M', assume it's a Windows Bitmap file.
159 Otherwise, assume it's a Wireless Bitmap file.
161 WARNING: There isn't much in the way of error checking here --
162 if you give it a file that wasn't first created by hex2bmp.c,
165 fatal = 0; /* assume everything is okay with reading input file */
166 if ((header[0] = fgetc(infp)) != EOF) {
167 if ((header[1] = fgetc(infp)) != EOF) {
168 if (header[0] == 'B' && header[1] == 'M') {
169 wbmp = 0; /* Not a Wireless Bitmap -- it's a Windows Bitmap */
172 wbmp = 1; /* Assume it's a Wireless Bitmap */
182 fprintf(stderr, "Fatal error; end of input file.\n\n");
186 If this is a Wireless Bitmap (.wbmp) format file,
187 skip the header and point to the start of the bitmap itself.
191 header[i] = fgetc(infp);
195 for (i=0; i < 32*17; i++) {
196 for (j=0; j < 32*18/8; j++) {
197 inchar = fgetc(infp);
198 bitmap[i][j] = ~inchar; /* invert bits for proper color */
203 Otherwise, this must be a Windows Bitmap file, because we check
204 for that first. Skip past the header, but save it for possible
208 for (i=2; i<0x3e; i++)
209 header[i] = fgetc(infp);
213 for (i = 32*17-1; i >= 0; i--) {
214 for (j=0; j < 32*18/8; j++) {
215 inchar = fgetc(infp);
216 bitmap[i][j] = ~inchar; /* invert bits for proper color */
221 We've read the entire file. Now close the input file pointer.
225 We now have the header portion in the header[] array,
226 and have the bitmap portion from top-to-bottom in the bitmap[] array.
229 If no Unicode range (U+nnnnnn00 through U+nnnnnnFF) was specified
230 with a -p parameter, determine the range from the digits in the
233 Store bitmaps for the hex digit patterns that this file uses.
235 if (!planeset) { /* If Unicode range not specified with -p parameter */
236 for (i = 0x0; i <= 0xF; i++) { /* hex digit pattern we're storing */
237 for (j = 0; j < 4; j++) {
239 ((unsigned)bitmap[32 * (i+1) + 4 * j + 8 ][6] << 24 ) |
240 ((unsigned)bitmap[32 * (i+1) + 4 * j + 8 + 1][6] << 16 ) |
241 ((unsigned)bitmap[32 * (i+1) + 4 * j + 8 + 2][6] << 8 ) |
242 ((unsigned)bitmap[32 * (i+1) + 4 * j + 8 + 3][6] );
246 Read the Unicode plane digits into arrays for comparison, to
247 determine the upper four hex digits of the glyph addresses.
249 for (i = 0; i < 4; i++) {
250 for (j = 0; j < 4; j++) {
252 ((unsigned)bitmap[32 * 0 + 4 * j + 8 + 1][i + 3] << 24 ) |
253 ((unsigned)bitmap[32 * 0 + 4 * j + 8 + 2][i + 3] << 16 ) |
254 ((unsigned)bitmap[32 * 0 + 4 * j + 8 + 3][i + 3] << 8 ) |
255 ((unsigned)bitmap[32 * 0 + 4 * j + 8 + 4][i + 3] );
260 for (i = 4; i < 6; i++) {
261 for (j = 0; j < 4; j++) {
263 ((unsigned)bitmap[32 * 1 + 4 * j + 8 ][i] << 24 ) |
264 ((unsigned)bitmap[32 * 1 + 4 * j + 8 + 1][i] << 16 ) |
265 ((unsigned)bitmap[32 * 1 + 4 * j + 8 + 2][i] << 8 ) |
266 ((unsigned)bitmap[32 * 1 + 4 * j + 8 + 3][i] );
267 tmpsum |= unidigit[i][j];
270 if (tmpsum == 0) { /* the glyph matrix is transposed */
271 flip = 1; /* note transposed order for processing glyphs in matrix */
273 Get 5th and 6th hex digits by shifting first column header left by
274 1.5 columns, thereby shifting the hex digit right after the leading
275 "U+nnnn" page number.
277 for (i = 0x08; i < 0x18; i++) {
278 bitmap[i][7] = (bitmap[i][8] << 4) | ((bitmap[i][ 9] >> 4) & 0xf);
279 bitmap[i][8] = (bitmap[i][9] << 4) | ((bitmap[i][10] >> 4) & 0xf);
281 for (i = 4; i < 6; i++) {
282 for (j = 0; j < 4; j++) {
284 ((unsigned)bitmap[4 * j + 8 + 1][i + 3] << 24 ) |
285 ((unsigned)bitmap[4 * j + 8 + 2][i + 3] << 16 ) |
286 ((unsigned)bitmap[4 * j + 8 + 3][i + 3] << 8 ) |
287 ((unsigned)bitmap[4 * j + 8 + 4][i + 3] );
293 Now determine the Unicode plane by comparing unidigit[0..5] to
294 the hexdigit[0x0..0xF] array.
297 for (i=0; i<6; i++) { /* go through one bitmap digit at a time */
298 match = 0; /* haven't found pattern yet */
299 for (j = 0x0; !match && j <= 0xF; j++) {
300 if (unidigit[i][0] == hexdigit[j][0] &&
301 unidigit[i][1] == hexdigit[j][1] &&
302 unidigit[i][2] == hexdigit[j][2] &&
303 unidigit[i][3] == hexdigit[j][3]) { /* we found the digit */
313 Now read each glyph and print it as hex.
315 for (i = 0x0; i <= 0xf; i++) {
316 for (j = 0x0; j <= 0xf; j++) {
317 for (k = 0; k < 16; k++) {
318 if (flip) { /* transpose glyph matrix */
319 thischar1[k] = bitmap[32*(j+1) + k + 7][4 * (i+2) + 1];
320 thischar2[k] = bitmap[32*(j+1) + k + 7][4 * (i+2) + 2];
323 thischar1[k] = bitmap[32*(i+1) + k + 7][4 * (j+2) + 1];
324 thischar2[k] = bitmap[32*(i+1) + k + 7][4 * (j+2) + 2];
328 If the second half of the 16*16 character is all zeroes, this
329 character is only 8 bits wide, so print a half-width character.
332 for (k=0; (empty1 || empty2) && k < 16; k++) {
333 if (thischar1[k] != 0) empty1 = 0;
334 if (thischar2[k] != 0) empty2 = 0;
337 Only print this glyph if it isn't blank.
339 if (!empty1 || !empty2) {
341 If the second half is empty, this is a half-width character.
342 Only print the first half.
345 Original GNU Unifont format is four hexadecimal digit character
346 code followed by a colon followed by a hex string. Add support
347 for codes beyond the Basic Multilingual Plane.
349 Unicode ranges from U+0000 to U+10FFFF, so print either a
350 4-digit or a 6-digit code point. Note that this software
351 should support up to an 8-digit code point, extending beyond
352 the normal Unicode range, but this has not been fully tested.
355 fprintf(outfp, "%04X%X%X:", uniplane, i, j); // 6 digit code pt.
357 fprintf(outfp, "%02X%X%X:", uniplane, i, j); // 4 digit code pt.
358 for (thisrow=0; thisrow<16; thisrow++) {
360 If second half is empty and we're not forcing this
361 code point to double width, print as single width
364 empty2 && !wide[(uniplane << 8) | (i << 4) | j])
371 thischar1[thisrow], thischar2[thisrow]);
373 fprintf(outfp, "\n");