010ef80f94edc3867b6537b09881049b91cbf142
18 #define BUFFER_SIZE 4096
27 #define CEIL(x, y) (((x) + (y) - 1) / (y))
28 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
29 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
30 #define VERBOSE(level, statement...) do { if (level <= verbose) { statement; } } while(0)
31 #define PRINTF(args...) do { fprintf (stdout, args); fflush (stdout); } while (0)
35 char *progname
= NULL
;
42 FILE *fd
= ret
? stderr
: stdout
;
43 fprintf (fd
, "usage: %s\n", progname
);
44 fprintf (fd
, " -h : help message\n");
45 fprintf (fd
, " -i <file>: input file\n");
46 fprintf (fd
, " -o <file>: output file\n");
47 fprintf (fd
, " -v : verbose level (%d)\n", verbose
);
52 /* create occurence table */
54 int *create_table (char *filename
)
56 char buffer
[BUFFER_SIZE
] = {0};
61 VERBOSE (DEBUG
, PRINTF ("start creating occurence table\n"));
63 /* memory allocation */
64 table
= (int *) calloc (NB_CHARS
, sizeof (int));
66 VERBOSE (ERROR
, printf ("can't allocate memory\n"));
69 VERBOSE (INFO
, printf ("memory allocated\n"));
72 fid
= fopen (filename
, "rb");
74 VERBOSE (ERROR
, printf ("can't open file '%s'\n", filename
));
78 VERBOSE (INFO
, printf ("file '%s' opened\n", filename
));
82 nbread
= fread (buffer
, 1, BUFFER_SIZE
, fid
);
83 VERBOSE (DEBUG
, PRINTF ("nbread: %d\n", nbread
));
85 table
[(int)buffer
[nbread
]]++;
92 VERBOSE (DEBUG
, PRINTF ("end creating occurence table\n"));
97 /* print occurence table */
99 void print_occ_table (int *table
)
103 printf ("Occurence table\n");
104 for (i
= 0; i
< NB_CHARS
; i
++) {
106 printf ("0x%02x '%c': %d\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, table
[i
]);
113 typedef struct _leaf_t
115 struct _leaf_t
*left
;
116 struct _leaf_t
*right
;
121 /* initialize forest */
123 leaf_t
**init_forest (int *table
)
125 leaf_t
**leafs
= NULL
;
129 VERBOSE (DEBUG
, PRINTF ("start initiliazing forest\n"));
131 /* count number of leafs */
132 for (i
= 0; i
< NB_CHARS
; i
++) {
138 /* allocate memory */
139 leafs
= (leaf_t
**) calloc (nb_leafs
+ 1, sizeof (leaf_t
*));
141 VERBOSE (ERROR
, printf ("can't allocate memory\n"));
145 /* initialize leafs */
146 for (i
= 0, l
= 0; i
< NB_CHARS
; i
++) {
148 leafs
[l
] = (leaf_t
*) calloc (1, sizeof (leaf_t
));
149 if (leafs
[l
] == NULL
) {
150 VERBOSE (ERROR
, printf ("can't allocate memory\n"));
153 leafs
[l
]->occ
= table
[i
];
159 VERBOSE (DEBUG
, PRINTF ("end initiliazing forest\n"));
166 leaf_t
*create_tree (leaf_t
**leafs
)
168 leaf_t
*branch
= NULL
;
174 VERBOSE (DEBUG
, PRINTF ("start creating tree\n"));
176 /* count number of leafs */
177 while (leafs
[nb_leafs
] != NULL
) {
182 for (j
= 0; j
< nb_leafs
- 1; j
++) {
184 /* look for leatest occurence */
186 for (i
= 0; i
< nb_leafs
; i
++) {
187 if (leafs
[i
] == NULL
) {
190 if ((last
== -1) || (leafs
[i
]->occ
< leafs
[last
]->occ
)) {
195 /* look for ante leatest occurence */
197 for (i
= 0; i
< nb_leafs
; i
++) {
198 if ((i
== last
) || (leafs
[i
] == NULL
)) {
201 if ((ante
== -1) || (leafs
[i
]->occ
< leafs
[ante
]->occ
)) {
207 if ((last
== -1) || (ante
== -1)) {
208 VERBOSE (ERROR
, printf ("error during tree building\n"));
211 branch
= (leaf_t
*) calloc (1, sizeof (leaf_t
));
212 if (branch
== NULL
) {
213 VERBOSE (ERROR
, printf ("can't allocate memory\n"));
216 branch
->left
= leafs
[last
];
217 branch
->right
= leafs
[ante
];
218 branch
->occ
= branch
->left
->occ
+ branch
->right
->occ
;
219 leafs
[last
] = branch
;
223 VERBOSE (DEBUG
, PRINTF ("end creating tree\n"));
225 return (last
!= -1) ? leafs
[last
] : NULL
;
230 void free_tree (leaf_t
*root
) {
233 free_tree (root
->left
);
236 free_tree (root
->right
);
245 char code
[NB_CHARS
- 1 + 1];
250 void explore_tree (code_t
*table
, leaf_t
*root
, char *code
, int index
)
252 if ((root
->left
== NULL
) && (root
->right
== NULL
)) {
253 strcpy ((char *)(table
+ (int)(root
->c
)), code
);
256 strcpy (code
+ index
, "1");
257 explore_tree (table
, root
->left
, code
, index
+ 1);
258 strcpy (code
+ index
, "0");
259 explore_tree (table
, root
->right
, code
, index
+ 1);
263 /* create code table */
265 code_t
*create_code (leaf_t
*root
)
267 code_t
*table
= NULL
;
270 VERBOSE (DEBUG
, PRINTF ("start creating code table\n"));
273 table
= (code_t
*) calloc (NB_CHARS
, sizeof (code_t
));
275 VERBOSE (ERROR
, printf ("can't allocate memory\n"));
279 explore_tree (table
, root
, (char *)&code
, 0);
281 VERBOSE (DEBUG
, PRINTF ("end creating code table\n"));
286 /* print code table */
288 void print_code_table (code_t
*codes
)
293 printf ("Code table\n");
294 for (i
= 0; i
< NB_CHARS
; i
++) {
295 code
= (char *)(codes
+ i
);
296 if (strlen (code
) == 0) {
299 printf ("0x%02x '%c': %s\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, code
);
303 /* encode header and code table */
305 char *encode_header_table (code_t
*codes
, int *occ
)
307 unsigned char buffer
[NB_CHARS
* (NB_CHARS
- 1) / 2 / 8 + NB_CHARS
+ 6] = {0};
308 char bits
[(NB_CHARS
- 1) + 8 + 1] = {0};
310 unsigned char *header
= buffer
;
311 int i
, j
, length
, mode
;
315 VERBOSE (DEBUG
, PRINTF ("start encoding header and code table\n"));
318 for (i
= 0; i
< NB_CHARS
; i
++) {
319 code
= (char *)(codes
+ i
);
320 if (strlen (code
) > 0) {
322 size
+= strlen (code
) * occ
[i
];
325 mode
= (NB_CHARS
< 2 * nb
+ 1) ? 1 : 2;
326 VERBOSE (DEBUG
, PRINTF ("nb chars: %d\n", nb
));
327 VERBOSE (DEBUG
, PRINTF ("mode: %d\n", mode
));
328 VERBOSE (DEBUG
, PRINTF ("size: %d\n", size
));
329 VERBOSE (DEBUG
, PRINTF ("rem: %d\n", size
% 256));
332 strcpy ((char *)header
, (mode
== 1) ? "MZ1 " : "MZ2 ");
338 for (i
= 0; i
< NB_CHARS
; i
++) {
339 code
= (char *)(codes
+ i
);
340 *(header
++) = (unsigned char) strlen (code
);
344 *(header
++) = (unsigned char)(nb
- 1);
345 for (i
= 0; i
< NB_CHARS
; i
++) {
346 code
= (char *)(codes
+ i
);
347 if (strlen (code
) > 0) {
348 *(header
++) = (unsigned char)i
;
349 *(header
++) = (unsigned char) strlen (code
);
356 for (i
= 0; i
< NB_CHARS
; i
++) {
357 code
= (char *)(codes
+ i
);
358 if (strlen (code
) > 0) {
360 while (strlen (bits
) > (8 - 1)) {
361 for (j
= 0; j
< 8; j
++) {
363 if (bits
[j
] == '1') {
367 strcpy (bits
, bits
+ 8);
372 if (strlen (bits
) > 0) {
373 for (j
= 0; j
< (int)strlen (bits
); j
++) {
375 if (bits
[j
] == '1') {
379 for (j
= (int)strlen (bits
); j
< 8; j
++) {
386 length
= (int)(header
- buffer
- 6);
387 VERBOSE (DEBUG
, PRINTF ("lengh: %d %02x %02x\n", length
, length
>> 8, length
& 0xff));
388 buffer
[3] = (unsigned char)(length
>> 8);
389 buffer
[4] = (unsigned char)(length
& 0xff);
390 buffer
[5] = (unsigned char)(size
% 256);
393 header
= (unsigned char *) calloc (length
+ 6, 1);
394 memcpy (header
, buffer
, length
+ 6);
396 VERBOSE (DEBUG
, PRINTF ("end encoding header and code table\n"));
398 return (char *)header
;
403 void print_header (char *header
)
407 length
= ((unsigned char)(header
[3]) << 8) + (unsigned char)(header
[4]);
408 VERBOSE (DEBUG
, PRINTF ("lengh: %d\n", length
));
409 for (i
= 0; i
< length
+ 6; i
++) {
410 printf ("%02x", (unsigned char)header
[i
]);
415 /* write crompressed file */
417 int write_compress (char *output
, char *input
, code_t
*codes
, char *header
)
419 char bufin
[BUFFER_SIZE
] = {0};
420 char bufout
[BUFFER_SIZE
] = {0};
421 char bits
[(NB_CHARS
- 1) + 8 + 1] = {0};
427 VERBOSE (DEBUG
, PRINTF ("start writting compressed file\n"));
429 /* open input file */
430 fin
= fopen (input
, "rb");
432 VERBOSE (ERROR
, printf ("can't open file '%s' for reading\n", input
));
435 VERBOSE (INFO
, printf ("file '%s' opened\n", input
));
437 /* open output file */
438 fout
= fopen (output
, "wb");
440 VERBOSE (ERROR
, printf ("can't open file '%s' for writing\n", output
));
443 VERBOSE (INFO
, printf ("file '%s' opened\n", output
));
446 length
= ((unsigned char)(header
[3]) << 8) + (unsigned char)(header
[4]);
447 VERBOSE (DEBUG
, PRINTF ("lengh: %d\n", length
));
448 fwrite (header
, 1, length
+ 6, fout
);
452 while (!feof (fin
)) {
453 nbread
= fread (bufin
, 1, BUFFER_SIZE
, fin
);
454 VERBOSE (DEBUG
, PRINTF ("nbread: %d\n", nbread
));
455 for (i
= 0; i
< nbread
; i
++) {
456 strcat (bits
, (char *)(codes
+ bufin
[i
]));
457 while (strlen (bits
) > (8 - 1)) {
458 for (j
= 0; j
< 8; j
++) {
460 if (bits
[j
] == '1') {
464 strcpy (bits
, bits
+ 8);
465 if (pt
- bufout
== BUFFER_SIZE
- 1) {
466 fwrite (bufout
, 1, BUFFER_SIZE
, fout
);
474 VERBOSE (DEBUG
, PRINTF ("lastest bits : %d\n", strlen (bits
)));
475 if (strlen (bits
) > 0) {
476 for (j
= 0; j
< (int)strlen (bits
); j
++) {
478 if (bits
[j
] == '1') {
482 for (j
= (int)strlen (bits
); j
< 8; j
++) {
488 VERBOSE (DEBUG
, PRINTF ("last partial buffer written: %u\n", pt
- bufout
));
489 fwrite (bufout
, 1, pt
- bufout
, fout
);
496 VERBOSE (DEBUG
, PRINTF ("end writting compressed file\n"));
503 code_t
*read_header (char *filename
) {
504 unsigned char buffer
[NB_CHARS
* (NB_CHARS
- 1) / 2 / 8 + NB_CHARS
+ 6] = {0};
505 code_t
*table
= NULL
;
506 unsigned char *codes
= NULL
;
508 int lengths
[NB_CHARS
] = {0};
511 size_t i
, j
, l
, nb
, size
;
513 VERBOSE (DEBUG
, PRINTF ("start reading header\n"));
516 fid
= fopen (filename
, "rb");
518 VERBOSE (ERROR
, printf ("can't open file '%s'\n", filename
));
521 VERBOSE (INFO
, printf ("file '%s' opened\n", filename
));
523 /* read magic number */
524 nb
= fread (buffer
, 1, 6, fid
);
525 VERBOSE (DEBUG
, PRINTF ("nb, buffer: %d 0x%02x 0x%02x\n", nb
, buffer
[0], buffer
[1]));
526 if ((nb
== 6) && (buffer
[0] == 'M') && (buffer
[1] == 'Z')) {
527 mode
= (buffer
[2] == '1') ? 1 : (buffer
[2] == '2') ? 2 : 0;
528 size
= ((unsigned char)(buffer
[3]) << 8) + (unsigned char)(buffer
[4]);
529 VERBOSE (DEBUG
, PRINTF ("mode, size: %d %d\n", mode
, size
));
530 if (size
> NB_CHARS
* (NB_CHARS
- 1) / 2 / 8 + NB_CHARS
) {
533 nb
= fread (buffer
, 1, size
, fid
);
534 VERBOSE (DEBUG
, PRINTF ("nb read: %d\n", nb
));
542 VERBOSE (ERROR
, printf ("incorrect file\n"));
547 codes
= (unsigned char *)buffer
;
550 for (i
= 0; i
< NB_CHARS
; i
++) {
551 lengths
[i
] = *(codes
++);
556 VERBOSE (DEBUG
, PRINTF ("nb codes: %d\n", nb
));
557 for (i
= 0; i
< nb
; i
++) {
559 lengths
[j
] = *(codes
++);
563 VERBOSE (DEBUG
, for (i
= 0; i
< NB_CHARS
; i
++) if (lengths
[i
]) PRINTF ("%d: %d\n", i
, lengths
[i
]));
566 for (i
= 0, l
= 0; i
< NB_CHARS
; i
++) {
569 if (((mode
== 1) && (size
- 256 != (l
+ 7) / 8)) ||
570 ((mode
== 2) && (size
- 2 * nb
- 1 != (l
+ 7) / 8))) {
571 VERBOSE (ERROR
, printf ("incorrect code table length\n"));
576 table
= (code_t
*) calloc (NB_CHARS
, sizeof (code_t
));
578 VERBOSE (ERROR
, printf ("can't allocate memory\n"));
585 for (i
= 0; i
< NB_CHARS
; i
++) {
586 if (lengths
[i
] == 0) {
589 while (lengths
[i
]--) {
590 strcat ((char *)(table
+ i
), ((cur
& 0x80) == 0) ? "0" : "1");
600 VERBOSE (DEBUG
, PRINTF ("end reading header\n"));
605 /* write decompressed file */
607 int write_decompress (char *output
, char *input
, code_t
*codes
)
609 char bufin
[BUFFER_SIZE
] = {0};
610 char bufout
[BUFFER_SIZE
] = {0};
611 unsigned char buffer
[MAX(NB_CHARS
* (NB_CHARS
- 1) / 2 / 8 + NB_CHARS
+ 6, BUFFER_SIZE
)] = {0};
612 char bits
[(NB_CHARS
- 1) + 1] = {0};
614 int i
, j
, k
, nb
, size
, rem
;
619 VERBOSE (DEBUG
, PRINTF ("start writing decompressed file\n"));
621 /* open file for reading */
622 fin
= fopen (input
, "rb");
624 VERBOSE (ERROR
, printf ("can't open file '%s' for reading\n", input
));
627 VERBOSE (INFO
, printf ("file '%s' opened\n", input
));
629 /* read magic number */
630 nb
= fread (buffer
, 1, 6, fin
);
632 VERBOSE (ERROR
, printf ("can't read file\n"));
636 size
= ((unsigned char)(buffer
[3]) << 8) + (unsigned char)(buffer
[4]);
637 VERBOSE (DEBUG
, printf ("table size: %d\n", size
));
639 VERBOSE (DEBUG
, printf ("remainder: %d\n", rem
));
640 nb
= fread (buffer
, 1, size
, fin
);
642 VERBOSE (ERROR
, printf ("can't read file\n"));
647 /* open file for writing */
648 fout
= fopen (output
, "wb");
650 VERBOSE (ERROR
, printf ("can't open file '%s' for writing\n", output
));
653 VERBOSE (INFO
, printf ("file '%s' opened\n", output
));
657 while (!feof (fin
)) {
658 nb
= fread (bufin
, 1, BUFFER_SIZE
, fin
);
659 VERBOSE (DEBUG
, PRINTF ("nbread: %d\n", nb
));
660 for (i
= 0; i
< nb
; i
++) {
661 for (j
= 0; j
< 8; j
++) {
662 strcat (bits
, ((bufin
[i
] & 0x80) == 0) ? "0" : "1");
665 VERBOSE (DEBUG
, PRINTF ("bits: %d - %s\n", strlen (bits
), bits
));
667 /* look for correct code */
669 for (k
= 0; (k
< NB_CHARS
) && (!is_found
); k
++) {
670 if (strcmp ((char *)(codes
+ k
), bits
) == 0) {
672 VERBOSE (DEBUG
, PRINTF ("found: %d\n", k
));
675 if (pt
- bufout
== BUFFER_SIZE
- 1) {
676 VERBOSE (DEBUG
, PRINTF ("nb buffer out: %u\n", (pt
- bufout
)));
677 fwrite (bufout
, 1, BUFFER_SIZE
, fout
);
684 if ((i
== nb
- 1) && (l
% 256 == rem
) && (feof (fin
))) {
685 VERBOSE (DEBUG
, PRINTF ("break\n"));
692 VERBOSE (DEBUG
, PRINTF ("nb buffer out: %u\n", (pt
- bufout
)));
693 fwrite (bufout
, 1, pt
- bufout
, fout
);
700 VERBOSE (DEBUG
, PRINTF ("end writing decompressed file\n"));
707 int main (int argc
, char *argv
[])
712 leaf_t
**leafs
= NULL
;
714 code_t
*codes
= NULL
;
722 VERBOSE (DEBUG
, PRINTF ("start processing arguments\n"));
723 while ((c
= getopt(argc
, argv
, "cdhi:o:v:")) != EOF
) {
724 VERBOSE (DEBUG
, PRINTF ("option: %c\n", c
));
734 VERBOSE (DEBUG
, PRINTF ("input: %s\n", input
));
738 VERBOSE (DEBUG
, PRINTF ("output: %s\n", output
));
741 verbose
= atoi (optarg
);
742 VERBOSE (INFO
, printf ("verbose: %d\n", verbose
));
749 if (argc
- optind
!= 0) {
750 fprintf (stderr
, "%s: invalid option -- %s\n", progname
, argv
[optind
]);
753 VERBOSE (DEBUG
, PRINTF ("end processing arguments\n"));
757 table
= create_table (input
);
758 if (table
== NULL
) break;
759 VERBOSE (INFO
, print_occ_table (table
));
761 leafs
= init_forest (table
);
762 if (leafs
== NULL
) break;
763 root
= create_tree (leafs
);
764 if (root
== NULL
) break;
765 codes
= create_code (root
);
766 if (codes
== NULL
) break;
767 VERBOSE (INFO
, print_code_table (codes
));
768 header
= encode_header_table (codes
, table
);
769 if (header
== NULL
) break;
770 VERBOSE (INFO
, print_header (header
));
771 rc
= write_compress (output
, input
, codes
, header
);
774 codes
= read_header (input
);
775 if (codes
== NULL
) break;
776 VERBOSE (INFO
, print_code_table (codes
));
777 rc
= write_decompress (output
, input
, codes
);
781 /* clean everything */
782 if (header
) free (header
);
783 if (codes
) free (codes
);
784 if (root
) free_tree (root
);
785 if (leafs
) free (leafs
);
786 if (table
) free (table
);
791 // test: compress.exe -h
792 // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
793 // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
794 // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
795 // test: compress.exe -c -i compress.c -o compress.mz
796 // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz
797 // test: compress.exe -d -i compress.mz -o tmp.c
798 // test: cmp compress.c tmp.c
799 // test: rm compress.mz tmp.c