compression is ready
[compress.git] / compress.c
CommitLineData
58352bb0
LM
1/* depend: */
2/* cflags: */
3/* linker: */
4
5#include <assert.h>
6#include <getopt.h>
7#include <malloc.h>
8#include <stdio.h>
9#include <stdlib.h>
10#include <string.h>
11
12/* constants */
13
14#define COMPRESS 1
15#define DECOMPRESS 2
16
17#define NB_CHARS 256
18#define BUFFER_SIZE 4096
19
20#define DEBUG 4
21#define INFO 3
22#define WARNING 2
23#define ERROR 1
24
25/* macros */
26
27#define CEIL(x, y) (((x) + (y) - 1) / (y))
28#define MIN(x, y) (((x) < (y)) ? (x) : (y))
29#define MAX(x, y) (((x) > (y)) ? (x) : (y))
30#define VERBOSE(level, statement...) do { if (level <= verbose) { statement; } } while(0)
31#define PRINTF(args...) do { fprintf (stdout, args); fflush (stdout); } while (0)
32
33/* gobal variables */
34
35char *progname = NULL;
36int verbose = 2;
37
38/* help function */
39
40void usage (int ret)
41{
42 FILE *fd = ret ? stderr : stdout;
43 fprintf (fd, "usage: %s\n", progname);
44 fprintf (fd, " -h : help message\n");
45 fprintf (fd, " -i <file>: input file\n");
46 fprintf (fd, " -o <file>: output file\n");
47 fprintf (fd, " -v : verbose level (%d)\n", verbose);
48
49 exit (ret);
50}
51
52/* create occurence table */
53
54int *create_table (char *filename)
55{
56 char buffer[BUFFER_SIZE] = {0};
57 int nbread;
58 int *table = NULL;
59 FILE *fid = NULL;
60
61 VERBOSE (DEBUG, PRINTF ("start create occurence table\n"));
62
63 /* memory allocation */
64 table = (int *) calloc (NB_CHARS, sizeof (int));
65 if (table == NULL) {
66 VERBOSE (ERROR, printf ("can't allocate memory\n"));
67 return NULL;
68 }
69 VERBOSE (INFO, printf ("memory allocated\n"));
70
71 /* open file */
72 fid = fopen (filename, "rb");
73 if (fid == NULL) {
74 VERBOSE (ERROR, printf ("can't open file '%s'\n", filename));
75 free (table);
76 return NULL;
77 }
78 VERBOSE (INFO, printf ("file '%s' opened\n", filename));
79
80 /* read file */
81 while (!feof (fid)) {
82 nbread = fread (buffer, 1, BUFFER_SIZE, fid);
83 VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread));
84 while (nbread--) {
85 table[(int)buffer[nbread]]++;
86 }
87 }
88
89 /* close file */
90 fclose (fid);
91
92 VERBOSE (DEBUG, PRINTF ("end create occurence table\n"));
93
94 return table;
95}
96
97/* print occurence table */
98
99void print_occ_table (int *table)
100{
101 int i;
102
103 printf ("Occurence table\n");
104 for (i = 0; i < NB_CHARS; i++) {
105 if (table[i]) {
106 printf ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]);
107 }
108 }
109}
110
111/* leaf structure */
112
113typedef struct _leaf_t
114{
115 struct _leaf_t *left;
116 struct _leaf_t *right;
117 int occ;
118 char c;
119} leaf_t;
120
121/* initialize forest */
122
123leaf_t **init_forest (int *table)
124{
125 leaf_t **leafs = NULL;
126 int nb_leafs = 0;
127 int i, l;
128
129 VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n"));
130
131 /* count number of leafs */
132 for (i = 0; i < NB_CHARS; i++) {
133 if (table[i] > 0) {
134 nb_leafs++;
135 }
136 }
137
138 /* allocate memory */
139 leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *));
140 if (leafs == NULL) {
141 VERBOSE (ERROR, printf ("can't allocate memory\n"));
142 return NULL;
143 }
144
145 /* initialize leafs */
146 for (i = 0, l = 0; i < NB_CHARS; i++) {
147 if (table[i] > 0) {
148 leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t));
149 if (leafs[l] == NULL) {
150 VERBOSE (ERROR, printf ("can't allocate memory\n"));
151 return NULL;
152 }
153 leafs[l]->occ = table[i];
154 leafs[l]->c = i;
155 l++;
156 }
157 }
158
159 VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n"));
160
161 return leafs;
162}
163
164/* create tree */
165
166leaf_t *create_tree (leaf_t **leafs)
167{
168 leaf_t *branch = NULL;
169 int nb_leafs = 0;
170 int last, ante;
171 int i, j;
172
173 VERBOSE (DEBUG, PRINTF ("start creating tree\n"));
174
175 /* count number of leafs */
176 while (leafs[nb_leafs] != NULL) {
177 nb_leafs++;
178 }
179
180 /* create tree */
181 for (j = 0; j < nb_leafs - 1; j++) {
182
183 /* look for leatest occurence */
184 last = -1;
185 for (i = 0; i < nb_leafs; i++) {
186 if (leafs[i] == NULL) {
187 continue;
188 }
189 if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) {
190 last = i;
191 }
192 }
193
194 /* look for ante leatest occurence */
195 ante = -1;
196 for (i = 0; i < nb_leafs; i++) {
197 if ((i == last) || (leafs[i] == NULL)) {
198 continue;
199 }
200 if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) {
201 ante = i;
202 }
203 }
204
205 /* create branch */
206 if ((last == -1) || (ante == -1)) {
207 VERBOSE (ERROR, printf ("error during tree building\n"));
208 return NULL;
209 }
210 branch = (leaf_t *) calloc (1, sizeof (leaf_t));
211 if (branch == NULL) {
212 VERBOSE (ERROR, printf ("can't allocate memory\n"));
213 return NULL;
214 }
215 branch->left = leafs[last];
216 branch->right = leafs[ante];
217 branch->occ = branch->left->occ + branch->right->occ;
218 leafs[last] = branch;
219 leafs[ante] = NULL;
220 }
221
222 VERBOSE (DEBUG, PRINTF ("end creating tree\n"));
223
224 return leafs[last];
225}
226
227/* free tree */
228
229void free_tree (leaf_t *root) {
230 if (root) {
231 if (root->left) {
232 free_tree (root->left);
233 }
234 if (root->right) {
235 free_tree (root->right);
236 }
237 free (root);
238 }
239}
240
241/* code structure */
242
243typedef struct {
244 char code[NB_CHARS - 1 + 1];
245} code_t;
246
247/* explore tree */
248
249void explore_tree (code_t *table, leaf_t *root, char *code, int index)
250{
251 if ((root->left == NULL) && (root->right == NULL)) {
252 strcpy ((char *)(table + (int)(root->c)), code);
253 }
254 else {
255 strcpy (code + index, "1");
256 explore_tree (table, root->left, code, index + 1);
257 strcpy (code + index, "0");
258 explore_tree (table, root->right, code, index + 1);
259 }
260}
261
262/* create code table */
263
264code_t *create_code (leaf_t *root)
265{
266 code_t *table = NULL;
267 code_t code = {0};
268
269 VERBOSE (DEBUG, PRINTF ("start creating code table\n"));
270
271 /* allocate table */
272 table = (code_t *) calloc (NB_CHARS, sizeof (code_t));
273 if (table == NULL) {
274 VERBOSE (ERROR, printf ("can't allocate memory\n"));
275 return NULL;
276 }
277
278 explore_tree (table, root, (char *)&code, 0);
279
280 VERBOSE (DEBUG, PRINTF ("end creating code table\n"));
281
282 return table;
283}
284
285/* print code table */
286
287void print_code_table (code_t *codes)
288{
289 char *code;
290 int i;
291
292 printf ("Code table\n");
293 for (i = 0; i < NB_CHARS; i++) {
294 code = (char *)(codes + i);
295 if (strlen (code) == 0) {
296 continue;
297 }
298 printf ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code);
299 }
300}
301
302/* encode header and code table */
303
304char *encode_header_table (code_t *codes, int *occ)
305{
306 unsigned char buffer[NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS + 2] = {0};
307 char bits[(NB_CHARS - 1) + 8 + 1] = {0};
308 char *code;
309 unsigned char *header = buffer;
310 int i, j, length, mode;
311 int nb = 0;
312 int size = 0;
313
314 VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n"));
315
316 /* mode 1 or 2 */
317 for (i = 0; i < NB_CHARS; i++) {
318 code = (char *)(codes + i);
319 if (strlen (code) > 0) {
320 nb++;
321 size += strlen (code) * occ[i];
322 }
323 }
324 mode = (NB_CHARS < 2 * nb + 1) ? 1 : 2;
325 VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb));
326 VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode));
327 VERBOSE (DEBUG, PRINTF ("size: %d\n", size));
328
329 /* header */
330 strcpy ((char *)header, (mode == 1) ? "MZ1 " : "MZ2 ");
331 header += 6;
332
333 /* size */
334 switch (mode) {
335 case 1:
336 for (i = 0; i < NB_CHARS; i++) {
337 code = (char *)(codes + i);
338 *(header++) = (unsigned char) strlen (code);
339 }
340 break;
341 case 2:
342 *(header++) = (unsigned char)(nb - 1);
343 for (i = 0; i < NB_CHARS; i++) {
344 code = (char *)(codes + i);
345 if (strlen (code) > 0) {
346 *(header++) = (unsigned char)i;
347 *(header++) = (unsigned char) strlen (code);
348 }
349 }
350 break;
351 }
352
353 /* bits */
354 for (i = 0; i < NB_CHARS; i++) {
355 code = (char *)(codes + i);
356 if (strlen (code) > 0) {
357 strcat (bits, code);
358 while (strlen (bits) > (8 - 1)) {
359 for (j = 0; j < 8; j++) {
360 *header <<= 1;
361 if (bits[j] == '1') {
362 (*header)++;
363 }
364 }
365 strcpy (bits, bits + 8);
366 header++;
367 }
368 }
369 }
370 if (strlen (bits) > 0) {
371 for (j = 0; j < (int)strlen (bits); j++) {
372 *header <<= 1;
373 if (bits[j] == '1') {
374 (*header)++;
375 }
376 }
377 header++;
378 }
379
380 /* length */
381 length = (int)(header - buffer - 6);
382 VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff));
383 buffer[3] = (unsigned char)(length >> 8);
384 buffer[4] = (unsigned char)(length & 0xff);
385 buffer[5] = (unsigned char)(size % 8);
386
387 /* allocation */
388 header = (unsigned char *) calloc (length + 6, 1);
389 memcpy (header, buffer, length + 6);
390
391 VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n"));
392
393 return (char *)header;
394}
395
396/* print header */
397
398void print_header (char *header)
399{
400 int length, i;
401
402 length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]);
403 VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length));
404 for (i = 0; i < length + 6; i++) {
405 printf ("%02x", (unsigned char)header[i]);
406 }
407 printf ("\n");
408}
409
410/* write crompressed file */
411
412int write_compress (char *output, char *input, code_t *codes, char *header)
413{
414 char bufin[BUFFER_SIZE] = {0};
415 char bufout[BUFFER_SIZE] = {0};
416 char bits[(NB_CHARS - 1) + 8 + 1] = {0};
417 FILE *fin, *fout;
418 int length = 0;
419 int i, j, nbread;
420 char *pt;
421
422 VERBOSE (DEBUG, PRINTF ("start writting compressed file\n"));
423
424 /* open input file */
425 fin = fopen (input, "rb");
426 if (fin == NULL) {
427 VERBOSE (ERROR, printf ("can't open file '%s'\n", input));
428 return 1;
429 }
430 VERBOSE (INFO, printf ("file '%s' opened\n", input));
431
432 /* open output file */
433 fout = fopen (output, "wb");
434 if (fin == NULL) {
435 VERBOSE (ERROR, printf ("can't open file '%s'\n", output));
436 return 1;
437 }
438 VERBOSE (INFO, printf ("file '%s' opened\n", output));
439
440 /* write header */
441 length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]);
442 VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length));
443 fwrite (header, 1, length + 6, fout);
444
445 /* write file */
446 pt = bufout;
447 while (!feof (fin)) {
448 nbread = fread (bufin, 1, BUFFER_SIZE, fin);
449 VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread));
450 for (i = 0; i < nbread; i++) {
451 strcat (bits, (char *)(codes + bufin[i]));
452 while (strlen (bits) > (8 - 1)) {
453 for (j = 0; j < 8; j++) {
454 *pt <<= 1;
455 if (bits[j] == '1') {
456 (*pt)++;
457 }
458 }
459 strcpy (bits, bits + 8);
460 if (pt - bufout < BUFFER_SIZE) {
461 pt++;
462 } else {
463 fwrite (bufout, 1, BUFFER_SIZE, fout);
464 pt = bufout;
465 }
466 }
467 }
468 }
469 if (strlen (bits) > 0) {
470 for (j = 0; j < (int)strlen (bits); j++) {
471 *pt <<= 1;
472 if (bits[j] == '1') {
473 (*pt)++;
474 }
475 }
476 if (pt - bufout < BUFFER_SIZE) {
477 pt++;
478 } else {
479 fwrite (bufout, 1, BUFFER_SIZE, fout);
480 pt = bufout;
481 }
482 pt++;
483 }
484 if (pt != bufout) {
485 fwrite (bufout, 1, pt - bufout, fout);
486 }
487
488 /* closing */
489 fclose (fin);
490 fclose (fout);
491
492 VERBOSE (DEBUG, PRINTF ("end writting compressed file\n"));
493
494 return 0;
495}
496
497/* main function */
498
499int main (int argc, char *argv[])
500{
501 char *input = NULL;
502 char *output = NULL;
503 int *table = NULL;
504 leaf_t **leafs = NULL;
505 leaf_t *root = NULL;
506 code_t *codes = NULL;
507 char *header = NULL;
508 int mode = COMPRESS;
509 int rc = 0;
510
511 progname = argv[0];
512
513 int c;
514 VERBOSE (DEBUG, PRINTF ("start argument processing\n"));
515 while ((c = getopt(argc, argv, "cdhi:o:v:")) != EOF) {
516 switch (c) {
517 case 'c':
518 mode = COMPRESS;
519 break;
520 case 'd':
521 mode = DECOMPRESS;
522 break;
523 case 'i':
524 VERBOSE (DEBUG, PRINTF ("-i\n"));
525 VERBOSE (DEBUG, PRINTF ("optarg: %s\n", optarg));
526 input = optarg;
527 break;
528 case 'o':
529 VERBOSE (DEBUG, PRINTF ("-o\n"));
530 output = optarg;
531 break;
532 case 'v':
533 VERBOSE (DEBUG, PRINTF ("-v\n"));
534 verbose = atoi (optarg);
535 VERBOSE (INFO, printf ("verbose: %d\n", verbose));
536 break;
537 case 'h':
538 default:
539 usage (c != 'h');
540 }
541 }
542 if (argc - optind != 0) {
543 fprintf (stderr, "%s: invalid option -- %s\n", progname, argv[optind]);
544 usage (1);
545 }
546 VERBOSE (DEBUG, PRINTF ("end argument processing\n"));
547
548 switch (mode) {
549 case COMPRESS:
550 table = create_table (input);
551 if (table == NULL) break;
552 VERBOSE (INFO, print_occ_table (table));
553
554 leafs = init_forest (table);
555 if (leafs == NULL) break;
556 root = create_tree (leafs);
557 if (root == NULL) break;
558 codes = create_code (root);
559 if (codes == NULL) break;
560 VERBOSE (INFO, print_code_table (codes));
561 header = encode_header_table (codes, table);
562 if (header == NULL) break;
563 VERBOSE (INFO, print_header (header));
564 rc = write_compress (output, input, codes, header);
565 break;
566 case DECOMPRESS:
567 rc = 1;
568 break;
569 }
570
571 /* clean everything */
572 if (header) free (header);
573 if (codes) free (codes);
574 if (root) free_tree (root);
575 if (leafs) free (leafs);
576 if (table) free (table);
577
578 return rc;
579}
580
581// test: compress.exe -h
582// test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
583// test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
584// test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
585
586// vim: ts=4 sw=4 et