Commit | Line | Data |
---|---|---|
58352bb0 LM |
1 | /* depend: */ |
2 | /* cflags: */ | |
c9987f3b | 3 | /* linker: code.o debug.o */ |
58352bb0 | 4 | |
58352bb0 LM |
5 | #include <malloc.h> |
6 | #include <stdio.h> | |
c9987f3b LM |
7 | #include "code.h" |
8 | #include "debug.h" | |
58352bb0 LM |
9 | |
10 | /* constants */ | |
11 | ||
58352bb0 LM |
12 | #define BUFFER_SIZE 4096 |
13 | ||
58352bb0 LM |
14 | /* macros */ |
15 | ||
58352bb0 LM |
16 | /* gobal variables */ |
17 | ||
18 | char *progname = NULL; | |
58352bb0 LM |
19 | |
20 | /* help function */ | |
21 | ||
22 | void usage (int ret) | |
23 | { | |
24 | FILE *fd = ret ? stderr : stdout; | |
25 | fprintf (fd, "usage: %s\n", progname); | |
26 | fprintf (fd, " -h : help message\n"); | |
27 | fprintf (fd, " -i <file>: input file\n"); | |
28 | fprintf (fd, " -o <file>: output file\n"); | |
29 | fprintf (fd, " -v : verbose level (%d)\n", verbose); | |
30 | ||
31 | exit (ret); | |
32 | } | |
33 | ||
d3dbaf98 LM |
34 | void blkcpy (void *dst, const void *src, int len) |
35 | { | |
36 | while (len--) { | |
37 | *((char *)dst++) = *((char *)src++); | |
38 | } | |
39 | } | |
40 | ||
58352bb0 LM |
41 | /* create occurence table */ |
42 | ||
43 | int *create_table (char *filename) | |
44 | { | |
c9987f3b | 45 | byte_t buffer[BUFFER_SIZE] = {0}; |
58352bb0 LM |
46 | int nbread; |
47 | int *table = NULL; | |
48 | FILE *fid = NULL; | |
49 | ||
37062814 | 50 | VERBOSE (DEBUG, PRINTF ("start creating occurence table\n")); |
58352bb0 LM |
51 | |
52 | /* memory allocation */ | |
c9987f3b | 53 | table = (int *) calloc (NB_BYTES, sizeof (int)); |
58352bb0 LM |
54 | if (table == NULL) { |
55 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
56 | return NULL; | |
57 | } | |
58 | VERBOSE (INFO, printf ("memory allocated\n")); | |
59 | ||
60 | /* open file */ | |
61 | fid = fopen (filename, "rb"); | |
62 | if (fid == NULL) { | |
63 | VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); | |
64 | free (table); | |
65 | return NULL; | |
66 | } | |
67 | VERBOSE (INFO, printf ("file '%s' opened\n", filename)); | |
68 | ||
69 | /* read file */ | |
70 | while (!feof (fid)) { | |
71 | nbread = fread (buffer, 1, BUFFER_SIZE, fid); | |
72 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); | |
73 | while (nbread--) { | |
74 | table[(int)buffer[nbread]]++; | |
75 | } | |
76 | } | |
77 | ||
78 | /* close file */ | |
79 | fclose (fid); | |
80 | ||
37062814 | 81 | VERBOSE (DEBUG, PRINTF ("end creating occurence table\n")); |
58352bb0 LM |
82 | |
83 | return table; | |
84 | } | |
85 | ||
86 | /* print occurence table */ | |
87 | ||
88 | void print_occ_table (int *table) | |
89 | { | |
90 | int i; | |
91 | ||
92 | printf ("Occurence table\n"); | |
c9987f3b | 93 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
94 | if (table[i]) { |
95 | printf ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); | |
96 | } | |
97 | } | |
98 | } | |
99 | ||
100 | /* leaf structure */ | |
101 | ||
102 | typedef struct _leaf_t | |
103 | { | |
104 | struct _leaf_t *left; | |
105 | struct _leaf_t *right; | |
106 | int occ; | |
c9987f3b | 107 | byte_t c; |
58352bb0 LM |
108 | } leaf_t; |
109 | ||
110 | /* initialize forest */ | |
111 | ||
112 | leaf_t **init_forest (int *table) | |
113 | { | |
114 | leaf_t **leafs = NULL; | |
115 | int nb_leafs = 0; | |
116 | int i, l; | |
117 | ||
118 | VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n")); | |
119 | ||
120 | /* count number of leafs */ | |
c9987f3b | 121 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
122 | if (table[i] > 0) { |
123 | nb_leafs++; | |
124 | } | |
125 | } | |
126 | ||
127 | /* allocate memory */ | |
128 | leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *)); | |
129 | if (leafs == NULL) { | |
130 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
131 | return NULL; | |
132 | } | |
133 | ||
134 | /* initialize leafs */ | |
c9987f3b | 135 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
136 | if (table[i] > 0) { |
137 | leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
138 | if (leafs[l] == NULL) { | |
139 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
140 | return NULL; | |
141 | } | |
142 | leafs[l]->occ = table[i]; | |
143 | leafs[l]->c = i; | |
144 | l++; | |
145 | } | |
146 | } | |
147 | ||
148 | VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n")); | |
149 | ||
150 | return leafs; | |
151 | } | |
152 | ||
153 | /* create tree */ | |
154 | ||
155 | leaf_t *create_tree (leaf_t **leafs) | |
156 | { | |
157 | leaf_t *branch = NULL; | |
158 | int nb_leafs = 0; | |
37062814 LM |
159 | int last = -1; |
160 | int ante; | |
58352bb0 LM |
161 | int i, j; |
162 | ||
163 | VERBOSE (DEBUG, PRINTF ("start creating tree\n")); | |
164 | ||
165 | /* count number of leafs */ | |
166 | while (leafs[nb_leafs] != NULL) { | |
167 | nb_leafs++; | |
168 | } | |
169 | ||
170 | /* create tree */ | |
171 | for (j = 0; j < nb_leafs - 1; j++) { | |
172 | ||
173 | /* look for leatest occurence */ | |
174 | last = -1; | |
175 | for (i = 0; i < nb_leafs; i++) { | |
176 | if (leafs[i] == NULL) { | |
177 | continue; | |
178 | } | |
179 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { | |
180 | last = i; | |
181 | } | |
182 | } | |
183 | ||
184 | /* look for ante leatest occurence */ | |
185 | ante = -1; | |
186 | for (i = 0; i < nb_leafs; i++) { | |
187 | if ((i == last) || (leafs[i] == NULL)) { | |
188 | continue; | |
189 | } | |
190 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { | |
191 | ante = i; | |
192 | } | |
193 | } | |
194 | ||
195 | /* create branch */ | |
196 | if ((last == -1) || (ante == -1)) { | |
197 | VERBOSE (ERROR, printf ("error during tree building\n")); | |
198 | return NULL; | |
199 | } | |
200 | branch = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
201 | if (branch == NULL) { | |
202 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
203 | return NULL; | |
204 | } | |
205 | branch->left = leafs[last]; | |
206 | branch->right = leafs[ante]; | |
207 | branch->occ = branch->left->occ + branch->right->occ; | |
208 | leafs[last] = branch; | |
209 | leafs[ante] = NULL; | |
210 | } | |
211 | ||
212 | VERBOSE (DEBUG, PRINTF ("end creating tree\n")); | |
213 | ||
37062814 | 214 | return (last != -1) ? leafs[last] : NULL; |
58352bb0 LM |
215 | } |
216 | ||
217 | /* free tree */ | |
218 | ||
219 | void free_tree (leaf_t *root) { | |
220 | if (root) { | |
221 | if (root->left) { | |
222 | free_tree (root->left); | |
223 | } | |
224 | if (root->right) { | |
225 | free_tree (root->right); | |
226 | } | |
227 | free (root); | |
228 | } | |
229 | } | |
230 | ||
58352bb0 LM |
231 | /* explore tree */ |
232 | ||
233 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) | |
234 | { | |
235 | if ((root->left == NULL) && (root->right == NULL)) { | |
c9987f3b | 236 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
58352bb0 LM |
237 | } |
238 | else { | |
c9987f3b | 239 | codcpy (code + index, sizeof (code_t), "1"); |
58352bb0 | 240 | explore_tree (table, root->left, code, index + 1); |
c9987f3b | 241 | codcpy (code + index, sizeof (code_t), "0"); |
58352bb0 LM |
242 | explore_tree (table, root->right, code, index + 1); |
243 | } | |
244 | } | |
245 | ||
246 | /* create code table */ | |
247 | ||
248 | code_t *create_code (leaf_t *root) | |
249 | { | |
250 | code_t *table = NULL; | |
251 | code_t code = {0}; | |
252 | ||
253 | VERBOSE (DEBUG, PRINTF ("start creating code table\n")); | |
254 | ||
255 | /* allocate table */ | |
c9987f3b | 256 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
58352bb0 LM |
257 | if (table == NULL) { |
258 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
259 | return NULL; | |
260 | } | |
261 | ||
262 | explore_tree (table, root, (char *)&code, 0); | |
263 | ||
264 | VERBOSE (DEBUG, PRINTF ("end creating code table\n")); | |
265 | ||
266 | return table; | |
267 | } | |
268 | ||
269 | /* print code table */ | |
270 | ||
271 | void print_code_table (code_t *codes) | |
272 | { | |
273 | char *code; | |
274 | int i; | |
275 | ||
276 | printf ("Code table\n"); | |
c9987f3b | 277 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 278 | code = (char *)(codes + i); |
c9987f3b | 279 | if (codlen (code) == 0) { |
58352bb0 LM |
280 | continue; |
281 | } | |
282 | printf ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); | |
283 | } | |
284 | } | |
285 | ||
286 | /* encode header and code table */ | |
287 | ||
c9987f3b | 288 | byte_t *encode_header_table (code_t *codes, int *occ) |
58352bb0 | 289 | { |
c9987f3b LM |
290 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
291 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
58352bb0 | 292 | char *code; |
c9987f3b | 293 | byte_t *header = buffer; |
58352bb0 LM |
294 | int i, j, length, mode; |
295 | int nb = 0; | |
296 | int size = 0; | |
297 | ||
298 | VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n")); | |
299 | ||
300 | /* mode 1 or 2 */ | |
c9987f3b | 301 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 302 | code = (char *)(codes + i); |
c9987f3b | 303 | if (codlen (code) > 0) { |
58352bb0 | 304 | nb++; |
c9987f3b | 305 | size += codlen (code) * occ[i]; |
58352bb0 LM |
306 | } |
307 | } | |
c9987f3b | 308 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
58352bb0 LM |
309 | VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); |
310 | VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); | |
311 | VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); | |
37062814 | 312 | VERBOSE (DEBUG, PRINTF ("rem: %d\n", size % 256)); |
58352bb0 LM |
313 | |
314 | /* header */ | |
c9987f3b | 315 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
58352bb0 LM |
316 | header += 6; |
317 | ||
318 | /* size */ | |
319 | switch (mode) { | |
320 | case 1: | |
c9987f3b | 321 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 322 | code = (char *)(codes + i); |
c9987f3b | 323 | *(header++) = (byte_t) codlen (code); |
58352bb0 LM |
324 | } |
325 | break; | |
326 | case 2: | |
c9987f3b LM |
327 | *(header++) = (byte_t)(nb - 1); |
328 | for (i = 0; i < NB_BYTES; i++) { | |
58352bb0 | 329 | code = (char *)(codes + i); |
c9987f3b LM |
330 | if (codlen (code) > 0) { |
331 | *(header++) = (byte_t) i; | |
332 | *(header++) = (byte_t) codlen (code); | |
58352bb0 LM |
333 | } |
334 | } | |
335 | break; | |
336 | } | |
337 | ||
338 | /* bits */ | |
c9987f3b | 339 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 340 | code = (char *)(codes + i); |
c9987f3b LM |
341 | if (codlen (code) > 0) { |
342 | codcat (bits, sizeof (code_t), code); | |
343 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
344 | for (j = 0; j < 8; j++) { |
345 | *header <<= 1; | |
346 | if (bits[j] == '1') { | |
347 | (*header)++; | |
348 | } | |
349 | } | |
c9987f3b | 350 | codcpy (bits, sizeof (code_t), bits + 8); |
58352bb0 LM |
351 | header++; |
352 | } | |
353 | } | |
354 | } | |
c9987f3b LM |
355 | if (codlen (bits) > 0) { |
356 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
357 | *header <<= 1; |
358 | if (bits[j] == '1') { | |
359 | (*header)++; | |
360 | } | |
361 | } | |
c9987f3b | 362 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 LM |
363 | *header <<= 1; |
364 | } | |
58352bb0 LM |
365 | header++; |
366 | } | |
367 | ||
368 | /* length */ | |
369 | length = (int)(header - buffer - 6); | |
370 | VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); | |
c9987f3b LM |
371 | buffer[3] = (byte_t)(length >> 8); |
372 | buffer[4] = (byte_t)(length & 0xff); | |
373 | buffer[5] = (byte_t)(size % 256); | |
58352bb0 LM |
374 | |
375 | /* allocation */ | |
c9987f3b | 376 | header = (byte_t *) calloc (length + 6, 1); |
d3dbaf98 | 377 | blkcpy (header, buffer, length + 6); |
58352bb0 LM |
378 | |
379 | VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n")); | |
380 | ||
c9987f3b | 381 | return header; |
58352bb0 LM |
382 | } |
383 | ||
384 | /* print header */ | |
385 | ||
c9987f3b | 386 | void print_header (byte_t *header) |
58352bb0 LM |
387 | { |
388 | int length, i; | |
389 | ||
c9987f3b | 390 | length = (header[3] << 8) + header[4]; |
58352bb0 LM |
391 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
392 | for (i = 0; i < length + 6; i++) { | |
c9987f3b | 393 | printf ("%02x", header[i]); |
58352bb0 LM |
394 | } |
395 | printf ("\n"); | |
396 | } | |
397 | ||
398 | /* write crompressed file */ | |
399 | ||
c9987f3b | 400 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
58352bb0 | 401 | { |
c9987f3b LM |
402 | byte_t bufin[BUFFER_SIZE] = {0}; |
403 | byte_t bufout[BUFFER_SIZE] = {0}; | |
404 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
58352bb0 LM |
405 | FILE *fin, *fout; |
406 | int length = 0; | |
407 | int i, j, nbread; | |
c9987f3b | 408 | byte_t *pt; |
58352bb0 LM |
409 | |
410 | VERBOSE (DEBUG, PRINTF ("start writting compressed file\n")); | |
411 | ||
412 | /* open input file */ | |
413 | fin = fopen (input, "rb"); | |
414 | if (fin == NULL) { | |
37062814 | 415 | VERBOSE (ERROR, printf ("can't open file '%s' for reading\n", input)); |
58352bb0 LM |
416 | return 1; |
417 | } | |
418 | VERBOSE (INFO, printf ("file '%s' opened\n", input)); | |
419 | ||
420 | /* open output file */ | |
421 | fout = fopen (output, "wb"); | |
422 | if (fin == NULL) { | |
37062814 | 423 | VERBOSE (ERROR, printf ("can't open file '%s' for writing\n", output)); |
58352bb0 LM |
424 | return 1; |
425 | } | |
426 | VERBOSE (INFO, printf ("file '%s' opened\n", output)); | |
427 | ||
428 | /* write header */ | |
c9987f3b | 429 | length = (header[3] << 8) + header[4]; |
58352bb0 LM |
430 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
431 | fwrite (header, 1, length + 6, fout); | |
432 | ||
433 | /* write file */ | |
434 | pt = bufout; | |
435 | while (!feof (fin)) { | |
436 | nbread = fread (bufin, 1, BUFFER_SIZE, fin); | |
437 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); | |
438 | for (i = 0; i < nbread; i++) { | |
c9987f3b LM |
439 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
440 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
441 | for (j = 0; j < 8; j++) { |
442 | *pt <<= 1; | |
443 | if (bits[j] == '1') { | |
444 | (*pt)++; | |
445 | } | |
446 | } | |
c9987f3b | 447 | codcpy (bits, sizeof (code_t), bits + 8); |
37062814 | 448 | if (pt - bufout == BUFFER_SIZE - 1) { |
58352bb0 LM |
449 | fwrite (bufout, 1, BUFFER_SIZE, fout); |
450 | pt = bufout; | |
37062814 LM |
451 | } else { |
452 | pt++; | |
58352bb0 LM |
453 | } |
454 | } | |
455 | } | |
456 | } | |
c9987f3b LM |
457 | VERBOSE (DEBUG, PRINTF ("lastest bits : %d\n", codlen (bits))); |
458 | if (codlen (bits) > 0) { | |
459 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
460 | *pt <<= 1; |
461 | if (bits[j] == '1') { | |
462 | (*pt)++; | |
463 | } | |
464 | } | |
c9987f3b | 465 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 | 466 | *pt <<= 1; |
58352bb0 LM |
467 | } |
468 | pt++; | |
469 | } | |
470 | if (pt != bufout) { | |
37062814 | 471 | VERBOSE (DEBUG, PRINTF ("last partial buffer written: %u\n", pt - bufout)); |
58352bb0 LM |
472 | fwrite (bufout, 1, pt - bufout, fout); |
473 | } | |
474 | ||
475 | /* closing */ | |
476 | fclose (fin); | |
477 | fclose (fout); | |
478 | ||
479 | VERBOSE (DEBUG, PRINTF ("end writting compressed file\n")); | |
480 | ||
481 | return 0; | |
482 | } | |
483 | ||
37062814 LM |
484 | /* read header */ |
485 | ||
486 | code_t *read_header (char *filename) { | |
c9987f3b | 487 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
37062814 | 488 | code_t *table = NULL; |
c9987f3b LM |
489 | byte_t *codes = NULL; |
490 | byte_t cur; | |
491 | int lengths[NB_BYTES] = {0}; | |
37062814 LM |
492 | FILE *fid = NULL; |
493 | int mode = 0; | |
494 | size_t i, j, l, nb, size; | |
495 | ||
496 | VERBOSE (DEBUG, PRINTF ("start reading header\n")); | |
497 | ||
498 | /* open file */ | |
499 | fid = fopen (filename, "rb"); | |
500 | if (fid == NULL) { | |
501 | VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); | |
502 | return NULL; | |
503 | } | |
504 | VERBOSE (INFO, printf ("file '%s' opened\n", filename)); | |
505 | ||
506 | /* read magic number */ | |
507 | nb = fread (buffer, 1, 6, fid); | |
508 | VERBOSE (DEBUG, PRINTF ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); | |
509 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { | |
510 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; | |
c9987f3b | 511 | size = (buffer[3] << 8) + buffer[4]; |
37062814 | 512 | VERBOSE (DEBUG, PRINTF ("mode, size: %d %d\n", mode, size)); |
c9987f3b | 513 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
37062814 LM |
514 | mode = 0; |
515 | } else { | |
516 | nb = fread (buffer, 1, size, fid); | |
517 | VERBOSE (DEBUG, PRINTF ("nb read: %d\n", nb)); | |
518 | if (nb != size) { | |
519 | mode = 0; | |
520 | } | |
521 | } | |
522 | } | |
523 | fclose (fid); | |
524 | if (mode == 0) { | |
525 | VERBOSE (ERROR, printf ("incorrect file\n")); | |
526 | return NULL; | |
527 | } | |
528 | ||
529 | /* analyse header */ | |
c9987f3b | 530 | codes = buffer; |
37062814 LM |
531 | switch (mode) { |
532 | case 1: | |
c9987f3b | 533 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
534 | lengths[i] = *(codes++); |
535 | } | |
536 | break; | |
537 | case 2: | |
538 | nb = *(codes++) + 1; | |
539 | VERBOSE (DEBUG, PRINTF ("nb codes: %d\n", nb)); | |
540 | for (i = 0; i < nb; i++) { | |
541 | j = *(codes++); | |
542 | lengths[j] = *(codes++); | |
543 | } | |
544 | break; | |
545 | } | |
c9987f3b | 546 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTF ("%d: %d\n", i, lengths[i])); |
37062814 LM |
547 | |
548 | /* check lengths */ | |
c9987f3b | 549 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
37062814 LM |
550 | l += lengths[i]; |
551 | } | |
552 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || | |
553 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { | |
554 | VERBOSE (ERROR, printf ("incorrect code table length\n")); | |
555 | return NULL; | |
556 | } | |
557 | ||
558 | /* allocate table */ | |
c9987f3b | 559 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
37062814 LM |
560 | if (table == NULL) { |
561 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
562 | return NULL; | |
563 | } | |
564 | ||
565 | /* decode code */ | |
566 | cur = *(codes++); | |
567 | l = 8; | |
c9987f3b | 568 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
569 | if (lengths[i] == 0) { |
570 | continue; | |
571 | } | |
572 | while (lengths[i]--) { | |
c9987f3b | 573 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
574 | l--; |
575 | cur <<= 1; | |
576 | if (l == 0) { | |
577 | cur = *(codes++); | |
578 | l = 8; | |
579 | } | |
580 | } | |
581 | } | |
582 | ||
583 | VERBOSE (DEBUG, PRINTF ("end reading header\n")); | |
584 | ||
585 | return table; | |
586 | } | |
587 | ||
588 | /* write decompressed file */ | |
589 | ||
590 | int write_decompress (char *output, char *input, code_t *codes) | |
591 | { | |
c9987f3b LM |
592 | byte_t bufin[BUFFER_SIZE] = {0}; |
593 | byte_t bufout[BUFFER_SIZE] = {0}; | |
594 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; | |
595 | char bits[(NB_BYTES - 1) + 1] = {0}; | |
37062814 LM |
596 | FILE *fin, *fout; |
597 | int i, j, k, nb, size, rem; | |
598 | int is_found; | |
599 | int l = 0; | |
c9987f3b | 600 | byte_t *pt; |
37062814 LM |
601 | |
602 | VERBOSE (DEBUG, PRINTF ("start writing decompressed file\n")); | |
603 | ||
604 | /* open file for reading */ | |
605 | fin = fopen (input, "rb"); | |
606 | if (fin == NULL) { | |
607 | VERBOSE (ERROR, printf ("can't open file '%s' for reading\n", input)); | |
608 | return 1; | |
609 | } | |
610 | VERBOSE (INFO, printf ("file '%s' opened\n", input)); | |
611 | ||
612 | /* read magic number */ | |
c9987f3b | 613 | nb = fread (bufhea, 1, 6, fin); |
37062814 LM |
614 | if (nb != 6) { |
615 | VERBOSE (ERROR, printf ("can't read file\n")); | |
616 | fclose (fin); | |
617 | return 1; | |
618 | } | |
c9987f3b | 619 | size = (bufhea[3] << 8) + bufhea[4]; |
37062814 | 620 | VERBOSE (DEBUG, printf ("table size: %d\n", size)); |
c9987f3b | 621 | rem = bufhea[5]; |
37062814 | 622 | VERBOSE (DEBUG, printf ("remainder: %d\n", rem)); |
c9987f3b | 623 | nb = fread (bufhea, 1, size, fin); |
37062814 LM |
624 | if (nb != size) { |
625 | VERBOSE (ERROR, printf ("can't read file\n")); | |
626 | fclose (fin); | |
627 | return 1; | |
628 | } | |
629 | ||
630 | /* open file for writing */ | |
631 | fout = fopen (output, "wb"); | |
632 | if (fout == NULL) { | |
633 | VERBOSE (ERROR, printf ("can't open file '%s' for writing\n", output)); | |
634 | return 2; | |
635 | } | |
636 | VERBOSE (INFO, printf ("file '%s' opened\n", output)); | |
637 | ||
638 | /* write file */ | |
639 | pt = bufout; | |
640 | while (!feof (fin)) { | |
641 | nb = fread (bufin, 1, BUFFER_SIZE, fin); | |
642 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nb)); | |
643 | for (i = 0; i < nb; i++) { | |
644 | for (j = 0; j < 8; j++) { | |
c9987f3b | 645 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
646 | bufin[i] <<= 1; |
647 | l++; | |
c9987f3b | 648 | VERBOSE (DEBUG, PRINTF ("bits: %d - %s\n", codlen (bits), bits)); |
37062814 LM |
649 | |
650 | /* look for correct code */ | |
651 | is_found = 0; | |
c9987f3b LM |
652 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
653 | if (codcmp ((char *)(codes + k), bits) == 0) { | |
37062814 LM |
654 | is_found = 1; |
655 | VERBOSE (DEBUG, PRINTF ("found: %d\n", k)); | |
656 | *pt= k; | |
657 | bits[0] = 0; | |
658 | if (pt - bufout == BUFFER_SIZE - 1) { | |
659 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); | |
660 | fwrite (bufout, 1, BUFFER_SIZE, fout); | |
661 | pt = bufout; | |
662 | } else { | |
663 | pt++; | |
664 | } | |
665 | } | |
666 | } | |
667 | if ((i == nb - 1) && (l % 256 == rem) && (feof (fin))) { | |
668 | VERBOSE (DEBUG, PRINTF ("break\n")); | |
669 | break; | |
670 | } | |
671 | } | |
672 | } | |
673 | } | |
674 | if (pt != bufout) { | |
675 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); | |
676 | fwrite (bufout, 1, pt - bufout, fout); | |
677 | } | |
678 | ||
679 | /* close files */ | |
680 | fclose (fin); | |
681 | fclose (fout); | |
682 | ||
683 | VERBOSE (DEBUG, PRINTF ("end writing decompressed file\n")); | |
684 | ||
685 | return 0; | |
686 | } | |
687 | ||
58352bb0 LM |
688 | /* main function */ |
689 | ||
690 | int main (int argc, char *argv[]) | |
691 | { | |
692 | char *input = NULL; | |
693 | char *output = NULL; | |
694 | int *table = NULL; | |
695 | leaf_t **leafs = NULL; | |
696 | leaf_t *root = NULL; | |
697 | code_t *codes = NULL; | |
c9987f3b | 698 | byte_t *header = NULL; |
58352bb0 | 699 | int mode = COMPRESS; |
37062814 | 700 | int rc = 1; |
58352bb0 LM |
701 | |
702 | progname = argv[0]; | |
703 | ||
704 | int c; | |
d3dbaf98 | 705 | char * arg; |
37062814 | 706 | VERBOSE (DEBUG, PRINTF ("start processing arguments\n")); |
d3dbaf98 LM |
707 | while (argc-- > 1) { |
708 | arg = *(++argv); | |
709 | if (arg[0] != '-') { | |
710 | fprintf (stderr, "%s: invalid option -- %s\n", progname, arg); | |
711 | usage (1); | |
712 | } | |
713 | c = arg[1]; | |
37062814 | 714 | VERBOSE (DEBUG, PRINTF ("option: %c\n", c)); |
58352bb0 LM |
715 | switch (c) { |
716 | case 'c': | |
717 | mode = COMPRESS; | |
718 | break; | |
719 | case 'd': | |
720 | mode = DECOMPRESS; | |
721 | break; | |
722 | case 'i': | |
d3dbaf98 | 723 | input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
37062814 | 724 | VERBOSE (DEBUG, PRINTF ("input: %s\n", input)); |
58352bb0 LM |
725 | break; |
726 | case 'o': | |
d3dbaf98 | 727 | output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
37062814 | 728 | VERBOSE (DEBUG, PRINTF ("output: %s\n", output)); |
58352bb0 LM |
729 | break; |
730 | case 'v': | |
d3dbaf98 LM |
731 | arg = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
732 | if (arg == NULL) { | |
733 | fprintf (stderr, "%s: missing verbose level\n", progname); | |
734 | usage (1); | |
735 | } | |
736 | verbose = atoi (arg); | |
58352bb0 LM |
737 | VERBOSE (INFO, printf ("verbose: %d\n", verbose)); |
738 | break; | |
739 | case 'h': | |
740 | default: | |
741 | usage (c != 'h'); | |
742 | } | |
743 | } | |
d3dbaf98 LM |
744 | if ((input == NULL) || (output == NULL)) { |
745 | fprintf (stderr, "%s: missing file\n", progname); | |
58352bb0 LM |
746 | usage (1); |
747 | } | |
37062814 | 748 | VERBOSE (DEBUG, PRINTF ("end processing arguments\n")); |
58352bb0 LM |
749 | |
750 | switch (mode) { | |
751 | case COMPRESS: | |
752 | table = create_table (input); | |
753 | if (table == NULL) break; | |
754 | VERBOSE (INFO, print_occ_table (table)); | |
755 | ||
756 | leafs = init_forest (table); | |
757 | if (leafs == NULL) break; | |
758 | root = create_tree (leafs); | |
759 | if (root == NULL) break; | |
760 | codes = create_code (root); | |
761 | if (codes == NULL) break; | |
762 | VERBOSE (INFO, print_code_table (codes)); | |
763 | header = encode_header_table (codes, table); | |
764 | if (header == NULL) break; | |
765 | VERBOSE (INFO, print_header (header)); | |
766 | rc = write_compress (output, input, codes, header); | |
767 | break; | |
768 | case DECOMPRESS: | |
37062814 LM |
769 | codes = read_header (input); |
770 | if (codes == NULL) break; | |
771 | VERBOSE (INFO, print_code_table (codes)); | |
772 | rc = write_decompress (output, input, codes); | |
58352bb0 LM |
773 | break; |
774 | } | |
775 | ||
776 | /* clean everything */ | |
c9987f3b | 777 | fflush (stdout); |
58352bb0 LM |
778 | if (header) free (header); |
779 | if (codes) free (codes); | |
780 | if (root) free_tree (root); | |
781 | if (leafs) free (leafs); | |
782 | if (table) free (table); | |
783 | ||
784 | return rc; | |
785 | } | |
786 | ||
787 | // test: compress.exe -h | |
788 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
789 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' | |
790 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
37062814 LM |
791 | // test: compress.exe -c -i compress.c -o compress.mz |
792 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz | |
793 | // test: compress.exe -d -i compress.mz -o tmp.c | |
794 | // test: cmp compress.c tmp.c | |
795 | // test: rm compress.mz tmp.c | |
58352bb0 LM |
796 | |
797 | // vim: ts=4 sw=4 et |