Commit | Line | Data |
---|---|---|
58352bb0 LM |
1 | /* depend: */ |
2 | /* cflags: */ | |
3 | /* linker: */ | |
4 | ||
5 | #include <assert.h> | |
6 | #include <getopt.h> | |
7 | #include <malloc.h> | |
8 | #include <stdio.h> | |
9 | #include <stdlib.h> | |
10 | #include <string.h> | |
11 | ||
12 | /* constants */ | |
13 | ||
14 | #define COMPRESS 1 | |
15 | #define DECOMPRESS 2 | |
16 | ||
17 | #define NB_CHARS 256 | |
18 | #define BUFFER_SIZE 4096 | |
19 | ||
20 | #define DEBUG 4 | |
21 | #define INFO 3 | |
22 | #define WARNING 2 | |
23 | #define ERROR 1 | |
24 | ||
25 | /* macros */ | |
26 | ||
27 | #define CEIL(x, y) (((x) + (y) - 1) / (y)) | |
28 | #define MIN(x, y) (((x) < (y)) ? (x) : (y)) | |
29 | #define MAX(x, y) (((x) > (y)) ? (x) : (y)) | |
30 | #define VERBOSE(level, statement...) do { if (level <= verbose) { statement; } } while(0) | |
31 | #define PRINTF(args...) do { fprintf (stdout, args); fflush (stdout); } while (0) | |
32 | ||
33 | /* gobal variables */ | |
34 | ||
35 | char *progname = NULL; | |
36 | int verbose = 2; | |
37 | ||
38 | /* help function */ | |
39 | ||
40 | void usage (int ret) | |
41 | { | |
42 | FILE *fd = ret ? stderr : stdout; | |
43 | fprintf (fd, "usage: %s\n", progname); | |
44 | fprintf (fd, " -h : help message\n"); | |
45 | fprintf (fd, " -i <file>: input file\n"); | |
46 | fprintf (fd, " -o <file>: output file\n"); | |
47 | fprintf (fd, " -v : verbose level (%d)\n", verbose); | |
48 | ||
49 | exit (ret); | |
50 | } | |
51 | ||
52 | /* create occurence table */ | |
53 | ||
54 | int *create_table (char *filename) | |
55 | { | |
56 | char buffer[BUFFER_SIZE] = {0}; | |
57 | int nbread; | |
58 | int *table = NULL; | |
59 | FILE *fid = NULL; | |
60 | ||
61 | VERBOSE (DEBUG, PRINTF ("start create occurence table\n")); | |
62 | ||
63 | /* memory allocation */ | |
64 | table = (int *) calloc (NB_CHARS, sizeof (int)); | |
65 | if (table == NULL) { | |
66 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
67 | return NULL; | |
68 | } | |
69 | VERBOSE (INFO, printf ("memory allocated\n")); | |
70 | ||
71 | /* open file */ | |
72 | fid = fopen (filename, "rb"); | |
73 | if (fid == NULL) { | |
74 | VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); | |
75 | free (table); | |
76 | return NULL; | |
77 | } | |
78 | VERBOSE (INFO, printf ("file '%s' opened\n", filename)); | |
79 | ||
80 | /* read file */ | |
81 | while (!feof (fid)) { | |
82 | nbread = fread (buffer, 1, BUFFER_SIZE, fid); | |
83 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); | |
84 | while (nbread--) { | |
85 | table[(int)buffer[nbread]]++; | |
86 | } | |
87 | } | |
88 | ||
89 | /* close file */ | |
90 | fclose (fid); | |
91 | ||
92 | VERBOSE (DEBUG, PRINTF ("end create occurence table\n")); | |
93 | ||
94 | return table; | |
95 | } | |
96 | ||
97 | /* print occurence table */ | |
98 | ||
99 | void print_occ_table (int *table) | |
100 | { | |
101 | int i; | |
102 | ||
103 | printf ("Occurence table\n"); | |
104 | for (i = 0; i < NB_CHARS; i++) { | |
105 | if (table[i]) { | |
106 | printf ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); | |
107 | } | |
108 | } | |
109 | } | |
110 | ||
111 | /* leaf structure */ | |
112 | ||
113 | typedef struct _leaf_t | |
114 | { | |
115 | struct _leaf_t *left; | |
116 | struct _leaf_t *right; | |
117 | int occ; | |
118 | char c; | |
119 | } leaf_t; | |
120 | ||
121 | /* initialize forest */ | |
122 | ||
123 | leaf_t **init_forest (int *table) | |
124 | { | |
125 | leaf_t **leafs = NULL; | |
126 | int nb_leafs = 0; | |
127 | int i, l; | |
128 | ||
129 | VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n")); | |
130 | ||
131 | /* count number of leafs */ | |
132 | for (i = 0; i < NB_CHARS; i++) { | |
133 | if (table[i] > 0) { | |
134 | nb_leafs++; | |
135 | } | |
136 | } | |
137 | ||
138 | /* allocate memory */ | |
139 | leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *)); | |
140 | if (leafs == NULL) { | |
141 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
142 | return NULL; | |
143 | } | |
144 | ||
145 | /* initialize leafs */ | |
146 | for (i = 0, l = 0; i < NB_CHARS; i++) { | |
147 | if (table[i] > 0) { | |
148 | leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
149 | if (leafs[l] == NULL) { | |
150 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
151 | return NULL; | |
152 | } | |
153 | leafs[l]->occ = table[i]; | |
154 | leafs[l]->c = i; | |
155 | l++; | |
156 | } | |
157 | } | |
158 | ||
159 | VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n")); | |
160 | ||
161 | return leafs; | |
162 | } | |
163 | ||
164 | /* create tree */ | |
165 | ||
166 | leaf_t *create_tree (leaf_t **leafs) | |
167 | { | |
168 | leaf_t *branch = NULL; | |
169 | int nb_leafs = 0; | |
170 | int last, ante; | |
171 | int i, j; | |
172 | ||
173 | VERBOSE (DEBUG, PRINTF ("start creating tree\n")); | |
174 | ||
175 | /* count number of leafs */ | |
176 | while (leafs[nb_leafs] != NULL) { | |
177 | nb_leafs++; | |
178 | } | |
179 | ||
180 | /* create tree */ | |
181 | for (j = 0; j < nb_leafs - 1; j++) { | |
182 | ||
183 | /* look for leatest occurence */ | |
184 | last = -1; | |
185 | for (i = 0; i < nb_leafs; i++) { | |
186 | if (leafs[i] == NULL) { | |
187 | continue; | |
188 | } | |
189 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { | |
190 | last = i; | |
191 | } | |
192 | } | |
193 | ||
194 | /* look for ante leatest occurence */ | |
195 | ante = -1; | |
196 | for (i = 0; i < nb_leafs; i++) { | |
197 | if ((i == last) || (leafs[i] == NULL)) { | |
198 | continue; | |
199 | } | |
200 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { | |
201 | ante = i; | |
202 | } | |
203 | } | |
204 | ||
205 | /* create branch */ | |
206 | if ((last == -1) || (ante == -1)) { | |
207 | VERBOSE (ERROR, printf ("error during tree building\n")); | |
208 | return NULL; | |
209 | } | |
210 | branch = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
211 | if (branch == NULL) { | |
212 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
213 | return NULL; | |
214 | } | |
215 | branch->left = leafs[last]; | |
216 | branch->right = leafs[ante]; | |
217 | branch->occ = branch->left->occ + branch->right->occ; | |
218 | leafs[last] = branch; | |
219 | leafs[ante] = NULL; | |
220 | } | |
221 | ||
222 | VERBOSE (DEBUG, PRINTF ("end creating tree\n")); | |
223 | ||
224 | return leafs[last]; | |
225 | } | |
226 | ||
227 | /* free tree */ | |
228 | ||
229 | void free_tree (leaf_t *root) { | |
230 | if (root) { | |
231 | if (root->left) { | |
232 | free_tree (root->left); | |
233 | } | |
234 | if (root->right) { | |
235 | free_tree (root->right); | |
236 | } | |
237 | free (root); | |
238 | } | |
239 | } | |
240 | ||
241 | /* code structure */ | |
242 | ||
243 | typedef struct { | |
244 | char code[NB_CHARS - 1 + 1]; | |
245 | } code_t; | |
246 | ||
247 | /* explore tree */ | |
248 | ||
249 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) | |
250 | { | |
251 | if ((root->left == NULL) && (root->right == NULL)) { | |
252 | strcpy ((char *)(table + (int)(root->c)), code); | |
253 | } | |
254 | else { | |
255 | strcpy (code + index, "1"); | |
256 | explore_tree (table, root->left, code, index + 1); | |
257 | strcpy (code + index, "0"); | |
258 | explore_tree (table, root->right, code, index + 1); | |
259 | } | |
260 | } | |
261 | ||
262 | /* create code table */ | |
263 | ||
264 | code_t *create_code (leaf_t *root) | |
265 | { | |
266 | code_t *table = NULL; | |
267 | code_t code = {0}; | |
268 | ||
269 | VERBOSE (DEBUG, PRINTF ("start creating code table\n")); | |
270 | ||
271 | /* allocate table */ | |
272 | table = (code_t *) calloc (NB_CHARS, sizeof (code_t)); | |
273 | if (table == NULL) { | |
274 | VERBOSE (ERROR, printf ("can't allocate memory\n")); | |
275 | return NULL; | |
276 | } | |
277 | ||
278 | explore_tree (table, root, (char *)&code, 0); | |
279 | ||
280 | VERBOSE (DEBUG, PRINTF ("end creating code table\n")); | |
281 | ||
282 | return table; | |
283 | } | |
284 | ||
285 | /* print code table */ | |
286 | ||
287 | void print_code_table (code_t *codes) | |
288 | { | |
289 | char *code; | |
290 | int i; | |
291 | ||
292 | printf ("Code table\n"); | |
293 | for (i = 0; i < NB_CHARS; i++) { | |
294 | code = (char *)(codes + i); | |
295 | if (strlen (code) == 0) { | |
296 | continue; | |
297 | } | |
298 | printf ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); | |
299 | } | |
300 | } | |
301 | ||
302 | /* encode header and code table */ | |
303 | ||
304 | char *encode_header_table (code_t *codes, int *occ) | |
305 | { | |
306 | unsigned char buffer[NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS + 2] = {0}; | |
307 | char bits[(NB_CHARS - 1) + 8 + 1] = {0}; | |
308 | char *code; | |
309 | unsigned char *header = buffer; | |
310 | int i, j, length, mode; | |
311 | int nb = 0; | |
312 | int size = 0; | |
313 | ||
314 | VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n")); | |
315 | ||
316 | /* mode 1 or 2 */ | |
317 | for (i = 0; i < NB_CHARS; i++) { | |
318 | code = (char *)(codes + i); | |
319 | if (strlen (code) > 0) { | |
320 | nb++; | |
321 | size += strlen (code) * occ[i]; | |
322 | } | |
323 | } | |
324 | mode = (NB_CHARS < 2 * nb + 1) ? 1 : 2; | |
325 | VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); | |
326 | VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); | |
327 | VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); | |
328 | ||
329 | /* header */ | |
330 | strcpy ((char *)header, (mode == 1) ? "MZ1 " : "MZ2 "); | |
331 | header += 6; | |
332 | ||
333 | /* size */ | |
334 | switch (mode) { | |
335 | case 1: | |
336 | for (i = 0; i < NB_CHARS; i++) { | |
337 | code = (char *)(codes + i); | |
338 | *(header++) = (unsigned char) strlen (code); | |
339 | } | |
340 | break; | |
341 | case 2: | |
342 | *(header++) = (unsigned char)(nb - 1); | |
343 | for (i = 0; i < NB_CHARS; i++) { | |
344 | code = (char *)(codes + i); | |
345 | if (strlen (code) > 0) { | |
346 | *(header++) = (unsigned char)i; | |
347 | *(header++) = (unsigned char) strlen (code); | |
348 | } | |
349 | } | |
350 | break; | |
351 | } | |
352 | ||
353 | /* bits */ | |
354 | for (i = 0; i < NB_CHARS; i++) { | |
355 | code = (char *)(codes + i); | |
356 | if (strlen (code) > 0) { | |
357 | strcat (bits, code); | |
358 | while (strlen (bits) > (8 - 1)) { | |
359 | for (j = 0; j < 8; j++) { | |
360 | *header <<= 1; | |
361 | if (bits[j] == '1') { | |
362 | (*header)++; | |
363 | } | |
364 | } | |
365 | strcpy (bits, bits + 8); | |
366 | header++; | |
367 | } | |
368 | } | |
369 | } | |
370 | if (strlen (bits) > 0) { | |
371 | for (j = 0; j < (int)strlen (bits); j++) { | |
372 | *header <<= 1; | |
373 | if (bits[j] == '1') { | |
374 | (*header)++; | |
375 | } | |
376 | } | |
377 | header++; | |
378 | } | |
379 | ||
380 | /* length */ | |
381 | length = (int)(header - buffer - 6); | |
382 | VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); | |
383 | buffer[3] = (unsigned char)(length >> 8); | |
384 | buffer[4] = (unsigned char)(length & 0xff); | |
385 | buffer[5] = (unsigned char)(size % 8); | |
386 | ||
387 | /* allocation */ | |
388 | header = (unsigned char *) calloc (length + 6, 1); | |
389 | memcpy (header, buffer, length + 6); | |
390 | ||
391 | VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n")); | |
392 | ||
393 | return (char *)header; | |
394 | } | |
395 | ||
396 | /* print header */ | |
397 | ||
398 | void print_header (char *header) | |
399 | { | |
400 | int length, i; | |
401 | ||
402 | length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]); | |
403 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); | |
404 | for (i = 0; i < length + 6; i++) { | |
405 | printf ("%02x", (unsigned char)header[i]); | |
406 | } | |
407 | printf ("\n"); | |
408 | } | |
409 | ||
410 | /* write crompressed file */ | |
411 | ||
412 | int write_compress (char *output, char *input, code_t *codes, char *header) | |
413 | { | |
414 | char bufin[BUFFER_SIZE] = {0}; | |
415 | char bufout[BUFFER_SIZE] = {0}; | |
416 | char bits[(NB_CHARS - 1) + 8 + 1] = {0}; | |
417 | FILE *fin, *fout; | |
418 | int length = 0; | |
419 | int i, j, nbread; | |
420 | char *pt; | |
421 | ||
422 | VERBOSE (DEBUG, PRINTF ("start writting compressed file\n")); | |
423 | ||
424 | /* open input file */ | |
425 | fin = fopen (input, "rb"); | |
426 | if (fin == NULL) { | |
427 | VERBOSE (ERROR, printf ("can't open file '%s'\n", input)); | |
428 | return 1; | |
429 | } | |
430 | VERBOSE (INFO, printf ("file '%s' opened\n", input)); | |
431 | ||
432 | /* open output file */ | |
433 | fout = fopen (output, "wb"); | |
434 | if (fin == NULL) { | |
435 | VERBOSE (ERROR, printf ("can't open file '%s'\n", output)); | |
436 | return 1; | |
437 | } | |
438 | VERBOSE (INFO, printf ("file '%s' opened\n", output)); | |
439 | ||
440 | /* write header */ | |
441 | length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]); | |
442 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); | |
443 | fwrite (header, 1, length + 6, fout); | |
444 | ||
445 | /* write file */ | |
446 | pt = bufout; | |
447 | while (!feof (fin)) { | |
448 | nbread = fread (bufin, 1, BUFFER_SIZE, fin); | |
449 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); | |
450 | for (i = 0; i < nbread; i++) { | |
451 | strcat (bits, (char *)(codes + bufin[i])); | |
452 | while (strlen (bits) > (8 - 1)) { | |
453 | for (j = 0; j < 8; j++) { | |
454 | *pt <<= 1; | |
455 | if (bits[j] == '1') { | |
456 | (*pt)++; | |
457 | } | |
458 | } | |
459 | strcpy (bits, bits + 8); | |
460 | if (pt - bufout < BUFFER_SIZE) { | |
461 | pt++; | |
462 | } else { | |
463 | fwrite (bufout, 1, BUFFER_SIZE, fout); | |
464 | pt = bufout; | |
465 | } | |
466 | } | |
467 | } | |
468 | } | |
469 | if (strlen (bits) > 0) { | |
470 | for (j = 0; j < (int)strlen (bits); j++) { | |
471 | *pt <<= 1; | |
472 | if (bits[j] == '1') { | |
473 | (*pt)++; | |
474 | } | |
475 | } | |
476 | if (pt - bufout < BUFFER_SIZE) { | |
477 | pt++; | |
478 | } else { | |
479 | fwrite (bufout, 1, BUFFER_SIZE, fout); | |
480 | pt = bufout; | |
481 | } | |
482 | pt++; | |
483 | } | |
484 | if (pt != bufout) { | |
485 | fwrite (bufout, 1, pt - bufout, fout); | |
486 | } | |
487 | ||
488 | /* closing */ | |
489 | fclose (fin); | |
490 | fclose (fout); | |
491 | ||
492 | VERBOSE (DEBUG, PRINTF ("end writting compressed file\n")); | |
493 | ||
494 | return 0; | |
495 | } | |
496 | ||
497 | /* main function */ | |
498 | ||
499 | int main (int argc, char *argv[]) | |
500 | { | |
501 | char *input = NULL; | |
502 | char *output = NULL; | |
503 | int *table = NULL; | |
504 | leaf_t **leafs = NULL; | |
505 | leaf_t *root = NULL; | |
506 | code_t *codes = NULL; | |
507 | char *header = NULL; | |
508 | int mode = COMPRESS; | |
509 | int rc = 0; | |
510 | ||
511 | progname = argv[0]; | |
512 | ||
513 | int c; | |
514 | VERBOSE (DEBUG, PRINTF ("start argument processing\n")); | |
515 | while ((c = getopt(argc, argv, "cdhi:o:v:")) != EOF) { | |
516 | switch (c) { | |
517 | case 'c': | |
518 | mode = COMPRESS; | |
519 | break; | |
520 | case 'd': | |
521 | mode = DECOMPRESS; | |
522 | break; | |
523 | case 'i': | |
524 | VERBOSE (DEBUG, PRINTF ("-i\n")); | |
525 | VERBOSE (DEBUG, PRINTF ("optarg: %s\n", optarg)); | |
526 | input = optarg; | |
527 | break; | |
528 | case 'o': | |
529 | VERBOSE (DEBUG, PRINTF ("-o\n")); | |
530 | output = optarg; | |
531 | break; | |
532 | case 'v': | |
533 | VERBOSE (DEBUG, PRINTF ("-v\n")); | |
534 | verbose = atoi (optarg); | |
535 | VERBOSE (INFO, printf ("verbose: %d\n", verbose)); | |
536 | break; | |
537 | case 'h': | |
538 | default: | |
539 | usage (c != 'h'); | |
540 | } | |
541 | } | |
542 | if (argc - optind != 0) { | |
543 | fprintf (stderr, "%s: invalid option -- %s\n", progname, argv[optind]); | |
544 | usage (1); | |
545 | } | |
546 | VERBOSE (DEBUG, PRINTF ("end argument processing\n")); | |
547 | ||
548 | switch (mode) { | |
549 | case COMPRESS: | |
550 | table = create_table (input); | |
551 | if (table == NULL) break; | |
552 | VERBOSE (INFO, print_occ_table (table)); | |
553 | ||
554 | leafs = init_forest (table); | |
555 | if (leafs == NULL) break; | |
556 | root = create_tree (leafs); | |
557 | if (root == NULL) break; | |
558 | codes = create_code (root); | |
559 | if (codes == NULL) break; | |
560 | VERBOSE (INFO, print_code_table (codes)); | |
561 | header = encode_header_table (codes, table); | |
562 | if (header == NULL) break; | |
563 | VERBOSE (INFO, print_header (header)); | |
564 | rc = write_compress (output, input, codes, header); | |
565 | break; | |
566 | case DECOMPRESS: | |
567 | rc = 1; | |
568 | break; | |
569 | } | |
570 | ||
571 | /* clean everything */ | |
572 | if (header) free (header); | |
573 | if (codes) free (codes); | |
574 | if (root) free_tree (root); | |
575 | if (leafs) free (leafs); | |
576 | if (table) free (table); | |
577 | ||
578 | return rc; | |
579 | } | |
580 | ||
581 | // test: compress.exe -h | |
582 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
583 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' | |
584 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
585 | ||
586 | // vim: ts=4 sw=4 et |