Commit | Line | Data |
---|---|---|
58352bb0 LM |
1 | /* depend: */ |
2 | /* cflags: */ | |
c9987f3b | 3 | /* linker: code.o debug.o */ |
58352bb0 LM |
4 | |
5 | #include <assert.h> | |
6 | #include <getopt.h> | |
7 | #include <malloc.h> | |
8 | #include <stdio.h> | |
9 | #include <stdlib.h> | |
10 | #include <string.h> | |
c9987f3b LM |
11 | #include "code.h" |
12 | #include "debug.h" | |
58352bb0 LM |
13 | |
14 | /* constants */ | |
15 | ||
58352bb0 LM |
16 | #define BUFFER_SIZE 4096 |
17 | ||
58352bb0 LM |
18 | /* macros */ |
19 | ||
58352bb0 LM |
20 | /* gobal variables */ |
21 | ||
22 | char *progname = NULL; | |
58352bb0 LM |
23 | |
24 | /* help function */ | |
25 | ||
26 | void usage (int ret) | |
27 | { | |
28 | FILE *fd = ret ? stderr : stdout; | |
29 | fprintf (fd, "usage: %s\n", progname); | |
30 | fprintf (fd, " -h : help message\n"); | |
31 | fprintf (fd, " -i <file>: input file\n"); | |
32 | fprintf (fd, " -o <file>: output file\n"); | |
33 | fprintf (fd, " -v : verbose level (%d)\n", verbose); | |
34 | ||
35 | exit (ret); | |
36 | } | |
37 | ||
38 | /* create occurence table */ | |
39 | ||
40 | int *create_table (char *filename) | |
41 | { | |
c9987f3b | 42 | byte_t buffer[BUFFER_SIZE] = {0}; |
58352bb0 LM |
43 | int nbread; |
44 | int *table = NULL; | |
45 | FILE *fid = NULL; | |
46 | ||
37062814 | 47 | VERBOSE (DEBUG, PRINTF ("start creating occurence table\n")); |
58352bb0 LM |
48 | |
49 | /* memory allocation */ | |
c9987f3b | 50 | table = (int *) calloc (NB_BYTES, sizeof (int)); |
58352bb0 LM |
51 | if (table == NULL) { |
52 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
53 | return NULL; | |
54 | } | |
55 | VERBOSE (INFO, printf ("memory allocated\n")); | |
56 | ||
57 | /* open file */ | |
58 | fid = fopen (filename, "rb"); | |
59 | if (fid == NULL) { | |
60 | VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); | |
61 | free (table); | |
62 | return NULL; | |
63 | } | |
64 | VERBOSE (INFO, printf ("file '%s' opened\n", filename)); | |
65 | ||
66 | /* read file */ | |
67 | while (!feof (fid)) { | |
68 | nbread = fread (buffer, 1, BUFFER_SIZE, fid); | |
69 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); | |
70 | while (nbread--) { | |
71 | table[(int)buffer[nbread]]++; | |
72 | } | |
73 | } | |
74 | ||
75 | /* close file */ | |
76 | fclose (fid); | |
77 | ||
37062814 | 78 | VERBOSE (DEBUG, PRINTF ("end creating occurence table\n")); |
58352bb0 LM |
79 | |
80 | return table; | |
81 | } | |
82 | ||
83 | /* print occurence table */ | |
84 | ||
85 | void print_occ_table (int *table) | |
86 | { | |
87 | int i; | |
88 | ||
89 | printf ("Occurence table\n"); | |
c9987f3b | 90 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
91 | if (table[i]) { |
92 | printf ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); | |
93 | } | |
94 | } | |
95 | } | |
96 | ||
97 | /* leaf structure */ | |
98 | ||
99 | typedef struct _leaf_t | |
100 | { | |
101 | struct _leaf_t *left; | |
102 | struct _leaf_t *right; | |
103 | int occ; | |
c9987f3b | 104 | byte_t c; |
58352bb0 LM |
105 | } leaf_t; |
106 | ||
107 | /* initialize forest */ | |
108 | ||
109 | leaf_t **init_forest (int *table) | |
110 | { | |
111 | leaf_t **leafs = NULL; | |
112 | int nb_leafs = 0; | |
113 | int i, l; | |
114 | ||
115 | VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n")); | |
116 | ||
117 | /* count number of leafs */ | |
c9987f3b | 118 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
119 | if (table[i] > 0) { |
120 | nb_leafs++; | |
121 | } | |
122 | } | |
123 | ||
124 | /* allocate memory */ | |
125 | leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *)); | |
126 | if (leafs == NULL) { | |
127 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
128 | return NULL; | |
129 | } | |
130 | ||
131 | /* initialize leafs */ | |
c9987f3b | 132 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
133 | if (table[i] > 0) { |
134 | leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
135 | if (leafs[l] == NULL) { | |
136 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
137 | return NULL; | |
138 | } | |
139 | leafs[l]->occ = table[i]; | |
140 | leafs[l]->c = i; | |
141 | l++; | |
142 | } | |
143 | } | |
144 | ||
145 | VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n")); | |
146 | ||
147 | return leafs; | |
148 | } | |
149 | ||
150 | /* create tree */ | |
151 | ||
152 | leaf_t *create_tree (leaf_t **leafs) | |
153 | { | |
154 | leaf_t *branch = NULL; | |
155 | int nb_leafs = 0; | |
37062814 LM |
156 | int last = -1; |
157 | int ante; | |
58352bb0 LM |
158 | int i, j; |
159 | ||
160 | VERBOSE (DEBUG, PRINTF ("start creating tree\n")); | |
161 | ||
162 | /* count number of leafs */ | |
163 | while (leafs[nb_leafs] != NULL) { | |
164 | nb_leafs++; | |
165 | } | |
166 | ||
167 | /* create tree */ | |
168 | for (j = 0; j < nb_leafs - 1; j++) { | |
169 | ||
170 | /* look for leatest occurence */ | |
171 | last = -1; | |
172 | for (i = 0; i < nb_leafs; i++) { | |
173 | if (leafs[i] == NULL) { | |
174 | continue; | |
175 | } | |
176 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { | |
177 | last = i; | |
178 | } | |
179 | } | |
180 | ||
181 | /* look for ante leatest occurence */ | |
182 | ante = -1; | |
183 | for (i = 0; i < nb_leafs; i++) { | |
184 | if ((i == last) || (leafs[i] == NULL)) { | |
185 | continue; | |
186 | } | |
187 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { | |
188 | ante = i; | |
189 | } | |
190 | } | |
191 | ||
192 | /* create branch */ | |
193 | if ((last == -1) || (ante == -1)) { | |
194 | VERBOSE (ERROR, printf ("error during tree building\n")); | |
195 | return NULL; | |
196 | } | |
197 | branch = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
198 | if (branch == NULL) { | |
199 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
200 | return NULL; | |
201 | } | |
202 | branch->left = leafs[last]; | |
203 | branch->right = leafs[ante]; | |
204 | branch->occ = branch->left->occ + branch->right->occ; | |
205 | leafs[last] = branch; | |
206 | leafs[ante] = NULL; | |
207 | } | |
208 | ||
209 | VERBOSE (DEBUG, PRINTF ("end creating tree\n")); | |
210 | ||
37062814 | 211 | return (last != -1) ? leafs[last] : NULL; |
58352bb0 LM |
212 | } |
213 | ||
214 | /* free tree */ | |
215 | ||
216 | void free_tree (leaf_t *root) { | |
217 | if (root) { | |
218 | if (root->left) { | |
219 | free_tree (root->left); | |
220 | } | |
221 | if (root->right) { | |
222 | free_tree (root->right); | |
223 | } | |
224 | free (root); | |
225 | } | |
226 | } | |
227 | ||
58352bb0 LM |
228 | /* explore tree */ |
229 | ||
230 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) | |
231 | { | |
232 | if ((root->left == NULL) && (root->right == NULL)) { | |
c9987f3b | 233 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
58352bb0 LM |
234 | } |
235 | else { | |
c9987f3b | 236 | codcpy (code + index, sizeof (code_t), "1"); |
58352bb0 | 237 | explore_tree (table, root->left, code, index + 1); |
c9987f3b | 238 | codcpy (code + index, sizeof (code_t), "0"); |
58352bb0 LM |
239 | explore_tree (table, root->right, code, index + 1); |
240 | } | |
241 | } | |
242 | ||
243 | /* create code table */ | |
244 | ||
245 | code_t *create_code (leaf_t *root) | |
246 | { | |
247 | code_t *table = NULL; | |
248 | code_t code = {0}; | |
249 | ||
250 | VERBOSE (DEBUG, PRINTF ("start creating code table\n")); | |
251 | ||
252 | /* allocate table */ | |
c9987f3b | 253 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
58352bb0 LM |
254 | if (table == NULL) { |
255 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
256 | return NULL; | |
257 | } | |
258 | ||
259 | explore_tree (table, root, (char *)&code, 0); | |
260 | ||
261 | VERBOSE (DEBUG, PRINTF ("end creating code table\n")); | |
262 | ||
263 | return table; | |
264 | } | |
265 | ||
266 | /* print code table */ | |
267 | ||
268 | void print_code_table (code_t *codes) | |
269 | { | |
270 | char *code; | |
271 | int i; | |
272 | ||
273 | printf ("Code table\n"); | |
c9987f3b | 274 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 275 | code = (char *)(codes + i); |
c9987f3b | 276 | if (codlen (code) == 0) { |
58352bb0 LM |
277 | continue; |
278 | } | |
279 | printf ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); | |
280 | } | |
281 | } | |
282 | ||
283 | /* encode header and code table */ | |
284 | ||
c9987f3b | 285 | byte_t *encode_header_table (code_t *codes, int *occ) |
58352bb0 | 286 | { |
c9987f3b LM |
287 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
288 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
58352bb0 | 289 | char *code; |
c9987f3b | 290 | byte_t *header = buffer; |
58352bb0 LM |
291 | int i, j, length, mode; |
292 | int nb = 0; | |
293 | int size = 0; | |
294 | ||
295 | VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n")); | |
296 | ||
297 | /* mode 1 or 2 */ | |
c9987f3b | 298 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 299 | code = (char *)(codes + i); |
c9987f3b | 300 | if (codlen (code) > 0) { |
58352bb0 | 301 | nb++; |
c9987f3b | 302 | size += codlen (code) * occ[i]; |
58352bb0 LM |
303 | } |
304 | } | |
c9987f3b | 305 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
58352bb0 LM |
306 | VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); |
307 | VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); | |
308 | VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); | |
37062814 | 309 | VERBOSE (DEBUG, PRINTF ("rem: %d\n", size % 256)); |
58352bb0 LM |
310 | |
311 | /* header */ | |
c9987f3b | 312 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
58352bb0 LM |
313 | header += 6; |
314 | ||
315 | /* size */ | |
316 | switch (mode) { | |
317 | case 1: | |
c9987f3b | 318 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 319 | code = (char *)(codes + i); |
c9987f3b | 320 | *(header++) = (byte_t) codlen (code); |
58352bb0 LM |
321 | } |
322 | break; | |
323 | case 2: | |
c9987f3b LM |
324 | *(header++) = (byte_t)(nb - 1); |
325 | for (i = 0; i < NB_BYTES; i++) { | |
58352bb0 | 326 | code = (char *)(codes + i); |
c9987f3b LM |
327 | if (codlen (code) > 0) { |
328 | *(header++) = (byte_t) i; | |
329 | *(header++) = (byte_t) codlen (code); | |
58352bb0 LM |
330 | } |
331 | } | |
332 | break; | |
333 | } | |
334 | ||
335 | /* bits */ | |
c9987f3b | 336 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 337 | code = (char *)(codes + i); |
c9987f3b LM |
338 | if (codlen (code) > 0) { |
339 | codcat (bits, sizeof (code_t), code); | |
340 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
341 | for (j = 0; j < 8; j++) { |
342 | *header <<= 1; | |
343 | if (bits[j] == '1') { | |
344 | (*header)++; | |
345 | } | |
346 | } | |
c9987f3b | 347 | codcpy (bits, sizeof (code_t), bits + 8); |
58352bb0 LM |
348 | header++; |
349 | } | |
350 | } | |
351 | } | |
c9987f3b LM |
352 | if (codlen (bits) > 0) { |
353 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
354 | *header <<= 1; |
355 | if (bits[j] == '1') { | |
356 | (*header)++; | |
357 | } | |
358 | } | |
c9987f3b | 359 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 LM |
360 | *header <<= 1; |
361 | } | |
58352bb0 LM |
362 | header++; |
363 | } | |
364 | ||
365 | /* length */ | |
366 | length = (int)(header - buffer - 6); | |
367 | VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); | |
c9987f3b LM |
368 | buffer[3] = (byte_t)(length >> 8); |
369 | buffer[4] = (byte_t)(length & 0xff); | |
370 | buffer[5] = (byte_t)(size % 256); | |
58352bb0 LM |
371 | |
372 | /* allocation */ | |
c9987f3b | 373 | header = (byte_t *) calloc (length + 6, 1); |
58352bb0 LM |
374 | memcpy (header, buffer, length + 6); |
375 | ||
376 | VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n")); | |
377 | ||
c9987f3b | 378 | return header; |
58352bb0 LM |
379 | } |
380 | ||
381 | /* print header */ | |
382 | ||
c9987f3b | 383 | void print_header (byte_t *header) |
58352bb0 LM |
384 | { |
385 | int length, i; | |
386 | ||
c9987f3b | 387 | length = (header[3] << 8) + header[4]; |
58352bb0 LM |
388 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
389 | for (i = 0; i < length + 6; i++) { | |
c9987f3b | 390 | printf ("%02x", header[i]); |
58352bb0 LM |
391 | } |
392 | printf ("\n"); | |
393 | } | |
394 | ||
395 | /* write crompressed file */ | |
396 | ||
c9987f3b | 397 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
58352bb0 | 398 | { |
c9987f3b LM |
399 | byte_t bufin[BUFFER_SIZE] = {0}; |
400 | byte_t bufout[BUFFER_SIZE] = {0}; | |
401 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
58352bb0 LM |
402 | FILE *fin, *fout; |
403 | int length = 0; | |
404 | int i, j, nbread; | |
c9987f3b | 405 | byte_t *pt; |
58352bb0 LM |
406 | |
407 | VERBOSE (DEBUG, PRINTF ("start writting compressed file\n")); | |
408 | ||
409 | /* open input file */ | |
410 | fin = fopen (input, "rb"); | |
411 | if (fin == NULL) { | |
37062814 | 412 | VERBOSE (ERROR, printf ("can't open file '%s' for reading\n", input)); |
58352bb0 LM |
413 | return 1; |
414 | } | |
415 | VERBOSE (INFO, printf ("file '%s' opened\n", input)); | |
416 | ||
417 | /* open output file */ | |
418 | fout = fopen (output, "wb"); | |
419 | if (fin == NULL) { | |
37062814 | 420 | VERBOSE (ERROR, printf ("can't open file '%s' for writing\n", output)); |
58352bb0 LM |
421 | return 1; |
422 | } | |
423 | VERBOSE (INFO, printf ("file '%s' opened\n", output)); | |
424 | ||
425 | /* write header */ | |
c9987f3b | 426 | length = (header[3] << 8) + header[4]; |
58352bb0 LM |
427 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
428 | fwrite (header, 1, length + 6, fout); | |
429 | ||
430 | /* write file */ | |
431 | pt = bufout; | |
432 | while (!feof (fin)) { | |
433 | nbread = fread (bufin, 1, BUFFER_SIZE, fin); | |
434 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); | |
435 | for (i = 0; i < nbread; i++) { | |
c9987f3b LM |
436 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
437 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
438 | for (j = 0; j < 8; j++) { |
439 | *pt <<= 1; | |
440 | if (bits[j] == '1') { | |
441 | (*pt)++; | |
442 | } | |
443 | } | |
c9987f3b | 444 | codcpy (bits, sizeof (code_t), bits + 8); |
37062814 | 445 | if (pt - bufout == BUFFER_SIZE - 1) { |
58352bb0 LM |
446 | fwrite (bufout, 1, BUFFER_SIZE, fout); |
447 | pt = bufout; | |
37062814 LM |
448 | } else { |
449 | pt++; | |
58352bb0 LM |
450 | } |
451 | } | |
452 | } | |
453 | } | |
c9987f3b LM |
454 | VERBOSE (DEBUG, PRINTF ("lastest bits : %d\n", codlen (bits))); |
455 | if (codlen (bits) > 0) { | |
456 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
457 | *pt <<= 1; |
458 | if (bits[j] == '1') { | |
459 | (*pt)++; | |
460 | } | |
461 | } | |
c9987f3b | 462 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 | 463 | *pt <<= 1; |
58352bb0 LM |
464 | } |
465 | pt++; | |
466 | } | |
467 | if (pt != bufout) { | |
37062814 | 468 | VERBOSE (DEBUG, PRINTF ("last partial buffer written: %u\n", pt - bufout)); |
58352bb0 LM |
469 | fwrite (bufout, 1, pt - bufout, fout); |
470 | } | |
471 | ||
472 | /* closing */ | |
473 | fclose (fin); | |
474 | fclose (fout); | |
475 | ||
476 | VERBOSE (DEBUG, PRINTF ("end writting compressed file\n")); | |
477 | ||
478 | return 0; | |
479 | } | |
480 | ||
37062814 LM |
481 | /* read header */ |
482 | ||
483 | code_t *read_header (char *filename) { | |
c9987f3b | 484 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
37062814 | 485 | code_t *table = NULL; |
c9987f3b LM |
486 | byte_t *codes = NULL; |
487 | byte_t cur; | |
488 | int lengths[NB_BYTES] = {0}; | |
37062814 LM |
489 | FILE *fid = NULL; |
490 | int mode = 0; | |
491 | size_t i, j, l, nb, size; | |
492 | ||
493 | VERBOSE (DEBUG, PRINTF ("start reading header\n")); | |
494 | ||
495 | /* open file */ | |
496 | fid = fopen (filename, "rb"); | |
497 | if (fid == NULL) { | |
498 | VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); | |
499 | return NULL; | |
500 | } | |
501 | VERBOSE (INFO, printf ("file '%s' opened\n", filename)); | |
502 | ||
503 | /* read magic number */ | |
504 | nb = fread (buffer, 1, 6, fid); | |
505 | VERBOSE (DEBUG, PRINTF ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); | |
506 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { | |
507 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; | |
c9987f3b | 508 | size = (buffer[3] << 8) + buffer[4]; |
37062814 | 509 | VERBOSE (DEBUG, PRINTF ("mode, size: %d %d\n", mode, size)); |
c9987f3b | 510 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
37062814 LM |
511 | mode = 0; |
512 | } else { | |
513 | nb = fread (buffer, 1, size, fid); | |
514 | VERBOSE (DEBUG, PRINTF ("nb read: %d\n", nb)); | |
515 | if (nb != size) { | |
516 | mode = 0; | |
517 | } | |
518 | } | |
519 | } | |
520 | fclose (fid); | |
521 | if (mode == 0) { | |
522 | VERBOSE (ERROR, printf ("incorrect file\n")); | |
523 | return NULL; | |
524 | } | |
525 | ||
526 | /* analyse header */ | |
c9987f3b | 527 | codes = buffer; |
37062814 LM |
528 | switch (mode) { |
529 | case 1: | |
c9987f3b | 530 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
531 | lengths[i] = *(codes++); |
532 | } | |
533 | break; | |
534 | case 2: | |
535 | nb = *(codes++) + 1; | |
536 | VERBOSE (DEBUG, PRINTF ("nb codes: %d\n", nb)); | |
537 | for (i = 0; i < nb; i++) { | |
538 | j = *(codes++); | |
539 | lengths[j] = *(codes++); | |
540 | } | |
541 | break; | |
542 | } | |
c9987f3b | 543 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTF ("%d: %d\n", i, lengths[i])); |
37062814 LM |
544 | |
545 | /* check lengths */ | |
c9987f3b | 546 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
37062814 LM |
547 | l += lengths[i]; |
548 | } | |
549 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || | |
550 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { | |
551 | VERBOSE (ERROR, printf ("incorrect code table length\n")); | |
552 | return NULL; | |
553 | } | |
554 | ||
555 | /* allocate table */ | |
c9987f3b | 556 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
37062814 LM |
557 | if (table == NULL) { |
558 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
559 | return NULL; | |
560 | } | |
561 | ||
562 | /* decode code */ | |
563 | cur = *(codes++); | |
564 | l = 8; | |
c9987f3b | 565 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
566 | if (lengths[i] == 0) { |
567 | continue; | |
568 | } | |
569 | while (lengths[i]--) { | |
c9987f3b | 570 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
571 | l--; |
572 | cur <<= 1; | |
573 | if (l == 0) { | |
574 | cur = *(codes++); | |
575 | l = 8; | |
576 | } | |
577 | } | |
578 | } | |
579 | ||
580 | VERBOSE (DEBUG, PRINTF ("end reading header\n")); | |
581 | ||
582 | return table; | |
583 | } | |
584 | ||
585 | /* write decompressed file */ | |
586 | ||
587 | int write_decompress (char *output, char *input, code_t *codes) | |
588 | { | |
c9987f3b LM |
589 | byte_t bufin[BUFFER_SIZE] = {0}; |
590 | byte_t bufout[BUFFER_SIZE] = {0}; | |
591 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; | |
592 | char bits[(NB_BYTES - 1) + 1] = {0}; | |
37062814 LM |
593 | FILE *fin, *fout; |
594 | int i, j, k, nb, size, rem; | |
595 | int is_found; | |
596 | int l = 0; | |
c9987f3b | 597 | byte_t *pt; |
37062814 LM |
598 | |
599 | VERBOSE (DEBUG, PRINTF ("start writing decompressed file\n")); | |
600 | ||
601 | /* open file for reading */ | |
602 | fin = fopen (input, "rb"); | |
603 | if (fin == NULL) { | |
604 | VERBOSE (ERROR, printf ("can't open file '%s' for reading\n", input)); | |
605 | return 1; | |
606 | } | |
607 | VERBOSE (INFO, printf ("file '%s' opened\n", input)); | |
608 | ||
609 | /* read magic number */ | |
c9987f3b | 610 | nb = fread (bufhea, 1, 6, fin); |
37062814 LM |
611 | if (nb != 6) { |
612 | VERBOSE (ERROR, printf ("can't read file\n")); | |
613 | fclose (fin); | |
614 | return 1; | |
615 | } | |
c9987f3b | 616 | size = (bufhea[3] << 8) + bufhea[4]; |
37062814 | 617 | VERBOSE (DEBUG, printf ("table size: %d\n", size)); |
c9987f3b | 618 | rem = bufhea[5]; |
37062814 | 619 | VERBOSE (DEBUG, printf ("remainder: %d\n", rem)); |
c9987f3b | 620 | nb = fread (bufhea, 1, size, fin); |
37062814 LM |
621 | if (nb != size) { |
622 | VERBOSE (ERROR, printf ("can't read file\n")); | |
623 | fclose (fin); | |
624 | return 1; | |
625 | } | |
626 | ||
627 | /* open file for writing */ | |
628 | fout = fopen (output, "wb"); | |
629 | if (fout == NULL) { | |
630 | VERBOSE (ERROR, printf ("can't open file '%s' for writing\n", output)); | |
631 | return 2; | |
632 | } | |
633 | VERBOSE (INFO, printf ("file '%s' opened\n", output)); | |
634 | ||
635 | /* write file */ | |
636 | pt = bufout; | |
637 | while (!feof (fin)) { | |
638 | nb = fread (bufin, 1, BUFFER_SIZE, fin); | |
639 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nb)); | |
640 | for (i = 0; i < nb; i++) { | |
641 | for (j = 0; j < 8; j++) { | |
c9987f3b | 642 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
643 | bufin[i] <<= 1; |
644 | l++; | |
c9987f3b | 645 | VERBOSE (DEBUG, PRINTF ("bits: %d - %s\n", codlen (bits), bits)); |
37062814 LM |
646 | |
647 | /* look for correct code */ | |
648 | is_found = 0; | |
c9987f3b LM |
649 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
650 | if (codcmp ((char *)(codes + k), bits) == 0) { | |
37062814 LM |
651 | is_found = 1; |
652 | VERBOSE (DEBUG, PRINTF ("found: %d\n", k)); | |
653 | *pt= k; | |
654 | bits[0] = 0; | |
655 | if (pt - bufout == BUFFER_SIZE - 1) { | |
656 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); | |
657 | fwrite (bufout, 1, BUFFER_SIZE, fout); | |
658 | pt = bufout; | |
659 | } else { | |
660 | pt++; | |
661 | } | |
662 | } | |
663 | } | |
664 | if ((i == nb - 1) && (l % 256 == rem) && (feof (fin))) { | |
665 | VERBOSE (DEBUG, PRINTF ("break\n")); | |
666 | break; | |
667 | } | |
668 | } | |
669 | } | |
670 | } | |
671 | if (pt != bufout) { | |
672 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); | |
673 | fwrite (bufout, 1, pt - bufout, fout); | |
674 | } | |
675 | ||
676 | /* close files */ | |
677 | fclose (fin); | |
678 | fclose (fout); | |
679 | ||
680 | VERBOSE (DEBUG, PRINTF ("end writing decompressed file\n")); | |
681 | ||
682 | return 0; | |
683 | } | |
684 | ||
58352bb0 LM |
685 | /* main function */ |
686 | ||
687 | int main (int argc, char *argv[]) | |
688 | { | |
689 | char *input = NULL; | |
690 | char *output = NULL; | |
691 | int *table = NULL; | |
692 | leaf_t **leafs = NULL; | |
693 | leaf_t *root = NULL; | |
694 | code_t *codes = NULL; | |
c9987f3b | 695 | byte_t *header = NULL; |
58352bb0 | 696 | int mode = COMPRESS; |
37062814 | 697 | int rc = 1; |
58352bb0 LM |
698 | |
699 | progname = argv[0]; | |
700 | ||
701 | int c; | |
37062814 | 702 | VERBOSE (DEBUG, PRINTF ("start processing arguments\n")); |
58352bb0 | 703 | while ((c = getopt(argc, argv, "cdhi:o:v:")) != EOF) { |
37062814 | 704 | VERBOSE (DEBUG, PRINTF ("option: %c\n", c)); |
58352bb0 LM |
705 | switch (c) { |
706 | case 'c': | |
707 | mode = COMPRESS; | |
708 | break; | |
709 | case 'd': | |
710 | mode = DECOMPRESS; | |
711 | break; | |
712 | case 'i': | |
58352bb0 | 713 | input = optarg; |
37062814 | 714 | VERBOSE (DEBUG, PRINTF ("input: %s\n", input)); |
58352bb0 LM |
715 | break; |
716 | case 'o': | |
58352bb0 | 717 | output = optarg; |
37062814 | 718 | VERBOSE (DEBUG, PRINTF ("output: %s\n", output)); |
58352bb0 LM |
719 | break; |
720 | case 'v': | |
58352bb0 LM |
721 | verbose = atoi (optarg); |
722 | VERBOSE (INFO, printf ("verbose: %d\n", verbose)); | |
723 | break; | |
724 | case 'h': | |
725 | default: | |
726 | usage (c != 'h'); | |
727 | } | |
728 | } | |
729 | if (argc - optind != 0) { | |
730 | fprintf (stderr, "%s: invalid option -- %s\n", progname, argv[optind]); | |
731 | usage (1); | |
732 | } | |
37062814 | 733 | VERBOSE (DEBUG, PRINTF ("end processing arguments\n")); |
58352bb0 LM |
734 | |
735 | switch (mode) { | |
736 | case COMPRESS: | |
737 | table = create_table (input); | |
738 | if (table == NULL) break; | |
739 | VERBOSE (INFO, print_occ_table (table)); | |
740 | ||
741 | leafs = init_forest (table); | |
742 | if (leafs == NULL) break; | |
743 | root = create_tree (leafs); | |
744 | if (root == NULL) break; | |
745 | codes = create_code (root); | |
746 | if (codes == NULL) break; | |
747 | VERBOSE (INFO, print_code_table (codes)); | |
748 | header = encode_header_table (codes, table); | |
749 | if (header == NULL) break; | |
750 | VERBOSE (INFO, print_header (header)); | |
751 | rc = write_compress (output, input, codes, header); | |
752 | break; | |
753 | case DECOMPRESS: | |
37062814 LM |
754 | codes = read_header (input); |
755 | if (codes == NULL) break; | |
756 | VERBOSE (INFO, print_code_table (codes)); | |
757 | rc = write_decompress (output, input, codes); | |
58352bb0 LM |
758 | break; |
759 | } | |
760 | ||
761 | /* clean everything */ | |
c9987f3b | 762 | fflush (stdout); |
58352bb0 LM |
763 | if (header) free (header); |
764 | if (codes) free (codes); | |
765 | if (root) free_tree (root); | |
766 | if (leafs) free (leafs); | |
767 | if (table) free (table); | |
768 | ||
769 | return rc; | |
770 | } | |
771 | ||
772 | // test: compress.exe -h | |
773 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
774 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' | |
775 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
37062814 LM |
776 | // test: compress.exe -c -i compress.c -o compress.mz |
777 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz | |
778 | // test: compress.exe -d -i compress.mz -o tmp.c | |
779 | // test: cmp compress.c tmp.c | |
780 | // test: rm compress.mz tmp.c | |
58352bb0 LM |
781 | |
782 | // vim: ts=4 sw=4 et |