3 /* linker: atoi.o code.o debug.o fprintf.o */
14 #define BUFFER_SIZE 4096
20 char *progname
= NULL
;
26 FILE *fd
= ret
? stderr
: stdout
;
27 myfprintf (fd
, "usage: %s\n", progname
);
28 myfprintf (fd
, " -h : help message\n");
29 myfprintf (fd
, " -i <file>: input file\n");
30 myfprintf (fd
, " -o <file>: output file\n");
31 myfprintf (fd
, " -v : verbose level (%d)\n", verbose
);
36 void blkcpy (void *dst
, const void *src
, int len
)
39 *((char *)dst
++) = *((char *)src
++);
43 /* create occurence table */
45 int *create_table (char *filename
)
47 byte_t buffer
[BUFFER_SIZE
] = {0};
52 VERBOSE (DEBUG
, PRINTF ("start creating occurence table\n"));
54 /* memory allocation */
55 table
= (int *) calloc (NB_BYTES
, sizeof (int));
57 VERBOSE (ERROR
, myfprintf (stdout
, "can't allocate memory\n"));
60 VERBOSE (INFO
, myfprintf (stdout
, "memory allocated\n"));
63 fid
= fopen (filename
, "rb");
65 VERBOSE (ERROR
, myfprintf (stdout
, "can't open file '%s'\n", filename
));
69 VERBOSE (INFO
, myfprintf (stdout
, "file '%s' opened\n", filename
));
73 nbread
= fread (buffer
, 1, BUFFER_SIZE
, fid
);
74 VERBOSE (DEBUG
, PRINTF ("nbread: %d\n", nbread
));
76 table
[(int)buffer
[nbread
]]++;
83 VERBOSE (DEBUG
, PRINTF ("end creating occurence table\n"));
88 /* print occurence table */
90 void print_occ_table (int *table
)
94 myfprintf (stdout
, "Occurence table\n");
95 for (i
= 0; i
< NB_BYTES
; i
++) {
97 myfprintf (stdout
, "0x%02x '%c': %d\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, table
[i
]);
104 typedef struct _leaf_t
106 struct _leaf_t
*left
;
107 struct _leaf_t
*right
;
112 /* initialize forest */
114 leaf_t
**init_forest (int *table
)
116 leaf_t
**leafs
= NULL
;
120 VERBOSE (DEBUG
, PRINTF ("start initiliazing forest\n"));
122 /* count number of leafs */
123 for (i
= 0; i
< NB_BYTES
; i
++) {
129 /* allocate memory */
130 leafs
= (leaf_t
**) calloc (nb_leafs
+ 1, sizeof (leaf_t
*));
132 VERBOSE (ERROR
, myfprintf (stdout
, "can't allocate memory\n"));
136 /* initialize leafs */
137 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
139 leafs
[l
] = (leaf_t
*) calloc (1, sizeof (leaf_t
));
140 if (leafs
[l
] == NULL
) {
141 VERBOSE (ERROR
, myfprintf (stdout
, "can't allocate memory\n"));
144 leafs
[l
]->occ
= table
[i
];
150 VERBOSE (DEBUG
, PRINTF ("end initiliazing forest\n"));
157 leaf_t
*create_tree (leaf_t
**leafs
)
159 leaf_t
*branch
= NULL
;
165 VERBOSE (DEBUG
, PRINTF ("start creating tree\n"));
167 /* count number of leafs */
168 while (leafs
[nb_leafs
] != NULL
) {
173 for (j
= 0; j
< nb_leafs
- 1; j
++) {
175 /* look for leatest occurence */
177 for (i
= 0; i
< nb_leafs
; i
++) {
178 if (leafs
[i
] == NULL
) {
181 if ((last
== -1) || (leafs
[i
]->occ
< leafs
[last
]->occ
)) {
186 /* look for ante leatest occurence */
188 for (i
= 0; i
< nb_leafs
; i
++) {
189 if ((i
== last
) || (leafs
[i
] == NULL
)) {
192 if ((ante
== -1) || (leafs
[i
]->occ
< leafs
[ante
]->occ
)) {
198 if ((last
== -1) || (ante
== -1)) {
199 VERBOSE (ERROR
, myfprintf (stdout
, "error during tree building\n"));
202 branch
= (leaf_t
*) calloc (1, sizeof (leaf_t
));
203 if (branch
== NULL
) {
204 VERBOSE (ERROR
, myfprintf (stdout
, "can't allocate memory\n"));
207 branch
->left
= leafs
[last
];
208 branch
->right
= leafs
[ante
];
209 branch
->occ
= branch
->left
->occ
+ branch
->right
->occ
;
210 leafs
[last
] = branch
;
214 VERBOSE (DEBUG
, PRINTF ("end creating tree\n"));
216 return (last
!= -1) ? leafs
[last
] : NULL
;
221 void free_tree (leaf_t
*root
) {
224 free_tree (root
->left
);
227 free_tree (root
->right
);
235 void explore_tree (code_t
*table
, leaf_t
*root
, char *code
, int index
)
237 if ((root
->left
== NULL
) && (root
->right
== NULL
)) {
238 codcpy ((char *)(table
+ (int)(root
->c
)), sizeof (code_t
), code
);
241 codcpy (code
+ index
, sizeof (code_t
), "1");
242 explore_tree (table
, root
->left
, code
, index
+ 1);
243 codcpy (code
+ index
, sizeof (code_t
), "0");
244 explore_tree (table
, root
->right
, code
, index
+ 1);
248 /* create code table */
250 code_t
*create_code (leaf_t
*root
)
252 code_t
*table
= NULL
;
255 VERBOSE (DEBUG
, PRINTF ("start creating code table\n"));
258 table
= (code_t
*) calloc (NB_BYTES
, sizeof (code_t
));
260 VERBOSE (ERROR
, myfprintf (stdout
, "can't allocate memory\n"));
264 explore_tree (table
, root
, (char *)&code
, 0);
266 VERBOSE (DEBUG
, PRINTF ("end creating code table\n"));
271 /* print code table */
273 void print_code_table (code_t
*codes
)
278 myfprintf (stdout
, "Code table\n");
279 for (i
= 0; i
< NB_BYTES
; i
++) {
280 code
= (char *)(codes
+ i
);
281 if (codlen (code
) == 0) {
284 myfprintf (stdout
, "0x%02x '%c': %s\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, code
);
288 /* encode header and code table */
290 byte_t
*encode_header_table (code_t
*codes
, int *occ
)
292 byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
293 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
295 byte_t
*header
= buffer
;
296 int i
, j
, length
, mode
;
300 VERBOSE (DEBUG
, PRINTF ("start encoding header and code table\n"));
303 for (i
= 0; i
< NB_BYTES
; i
++) {
304 code
= (char *)(codes
+ i
);
305 if (codlen (code
) > 0) {
307 size
+= codlen (code
) * occ
[i
];
310 mode
= (NB_BYTES
< 2 * nb
+ 1) ? 1 : 2;
311 VERBOSE (DEBUG
, PRINTF ("nb chars: %d\n", nb
));
312 VERBOSE (DEBUG
, PRINTF ("mode: %d\n", mode
));
313 VERBOSE (DEBUG
, PRINTF ("size: %d\n", size
));
314 VERBOSE (DEBUG
, PRINTF ("rem: %d\n", size
% 256));
317 codcpy ((char *)header
, sizeof (buffer
), (mode
== 1) ? "MZ1 " : "MZ2 ");
323 for (i
= 0; i
< NB_BYTES
; i
++) {
324 code
= (char *)(codes
+ i
);
325 *(header
++) = (byte_t
) codlen (code
);
329 *(header
++) = (byte_t
)(nb
- 1);
330 for (i
= 0; i
< NB_BYTES
; i
++) {
331 code
= (char *)(codes
+ i
);
332 if (codlen (code
) > 0) {
333 *(header
++) = (byte_t
) i
;
334 *(header
++) = (byte_t
) codlen (code
);
341 for (i
= 0; i
< NB_BYTES
; i
++) {
342 code
= (char *)(codes
+ i
);
343 if (codlen (code
) > 0) {
344 codcat (bits
, sizeof (code_t
), code
);
345 while (codlen (bits
) > (8 - 1)) {
346 for (j
= 0; j
< 8; j
++) {
348 if (bits
[j
] == '1') {
352 codcpy (bits
, sizeof (code_t
), bits
+ 8);
357 if (codlen (bits
) > 0) {
358 for (j
= 0; j
< (int)codlen (bits
); j
++) {
360 if (bits
[j
] == '1') {
364 for (j
= (int)codlen (bits
); j
< 8; j
++) {
371 length
= (int)(header
- buffer
- 6);
372 VERBOSE (DEBUG
, PRINTF ("lengh: %d %02x %02x\n", length
, length
>> 8, length
& 0xff));
373 buffer
[3] = (byte_t
)(length
>> 8);
374 buffer
[4] = (byte_t
)(length
& 0xff);
375 buffer
[5] = (byte_t
)(size
% 256);
378 header
= (byte_t
*) calloc (length
+ 6, 1);
379 blkcpy (header
, buffer
, length
+ 6);
381 VERBOSE (DEBUG
, PRINTF ("end encoding header and code table\n"));
388 void print_header (byte_t
*header
)
392 length
= (header
[3] << 8) + header
[4];
393 VERBOSE (DEBUG
, PRINTF ("lengh: %d\n", length
));
394 for (i
= 0; i
< length
+ 6; i
++) {
395 myfprintf (stdout
, "%02x", header
[i
]);
397 myfprintf (stdout
, "\n");
400 /* write crompressed file */
402 int write_compress (char *output
, char *input
, code_t
*codes
, byte_t
*header
)
404 byte_t bufin
[BUFFER_SIZE
] = {0};
405 byte_t bufout
[BUFFER_SIZE
] = {0};
406 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
412 VERBOSE (DEBUG
, PRINTF ("start writting compressed file\n"));
414 /* open input file */
415 fin
= fopen (input
, "rb");
417 VERBOSE (ERROR
, myfprintf (stdout
, "can't open file '%s' for reading\n", input
));
420 VERBOSE (INFO
, myfprintf (stdout
, "file '%s' opened\n", input
));
422 /* open output file */
423 fout
= fopen (output
, "wb");
425 VERBOSE (ERROR
, myfprintf (stdout
, "can't open file '%s' for writing\n", output
));
428 VERBOSE (INFO
, myfprintf (stdout
, "file '%s' opened\n", output
));
431 length
= (header
[3] << 8) + header
[4];
432 VERBOSE (DEBUG
, PRINTF ("lengh: %d\n", length
));
433 fwrite (header
, 1, length
+ 6, fout
);
437 while (!feof (fin
)) {
438 nbread
= fread (bufin
, 1, BUFFER_SIZE
, fin
);
439 VERBOSE (DEBUG
, PRINTF ("nbread: %d\n", nbread
));
440 for (i
= 0; i
< nbread
; i
++) {
441 codcat (bits
, sizeof (code_t
), (char *)(codes
+ bufin
[i
]));
442 while (codlen (bits
) > (8 - 1)) {
443 for (j
= 0; j
< 8; j
++) {
445 if (bits
[j
] == '1') {
449 codcpy (bits
, sizeof (code_t
), bits
+ 8);
450 if (pt
- bufout
== BUFFER_SIZE
- 1) {
451 fwrite (bufout
, 1, BUFFER_SIZE
, fout
);
459 VERBOSE (DEBUG
, PRINTF ("lastest bits : %d\n", codlen (bits
)));
460 if (codlen (bits
) > 0) {
461 for (j
= 0; j
< (int)codlen (bits
); j
++) {
463 if (bits
[j
] == '1') {
467 for (j
= (int)codlen (bits
); j
< 8; j
++) {
473 VERBOSE (DEBUG
, PRINTF ("last partial buffer written: %u\n", pt
- bufout
));
474 fwrite (bufout
, 1, pt
- bufout
, fout
);
481 VERBOSE (DEBUG
, PRINTF ("end writting compressed file\n"));
488 code_t
*read_header (char *filename
) {
489 byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
490 code_t
*table
= NULL
;
491 byte_t
*codes
= NULL
;
493 int lengths
[NB_BYTES
] = {0};
496 size_t i
, j
, l
, nb
, size
;
498 VERBOSE (DEBUG
, PRINTF ("start reading header\n"));
501 fid
= fopen (filename
, "rb");
503 VERBOSE (ERROR
, myfprintf (stdout
, "can't open file '%s'\n", filename
));
506 VERBOSE (INFO
, myfprintf (stdout
, "file '%s' opened\n", filename
));
508 /* read magic number */
509 nb
= fread (buffer
, 1, 6, fid
);
510 VERBOSE (DEBUG
, PRINTF ("nb, buffer: %d 0x%02x 0x%02x\n", nb
, buffer
[0], buffer
[1]));
511 if ((nb
== 6) && (buffer
[0] == 'M') && (buffer
[1] == 'Z')) {
512 mode
= (buffer
[2] == '1') ? 1 : (buffer
[2] == '2') ? 2 : 0;
513 size
= (buffer
[3] << 8) + buffer
[4];
514 VERBOSE (DEBUG
, PRINTF ("mode, size: %d %d\n", mode
, size
));
515 if (size
> NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
) {
518 nb
= fread (buffer
, 1, size
, fid
);
519 VERBOSE (DEBUG
, PRINTF ("nb read: %d\n", nb
));
527 VERBOSE (ERROR
, myfprintf (stdout
, "incorrect file\n"));
535 for (i
= 0; i
< NB_BYTES
; i
++) {
536 lengths
[i
] = *(codes
++);
541 VERBOSE (DEBUG
, PRINTF ("nb codes: %d\n", nb
));
542 for (i
= 0; i
< nb
; i
++) {
544 lengths
[j
] = *(codes
++);
548 VERBOSE (DEBUG
, for (i
= 0; i
< NB_BYTES
; i
++) if (lengths
[i
]) PRINTF ("%d: %d\n", i
, lengths
[i
]));
551 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
554 if (((mode
== 1) && (size
- 256 != (l
+ 7) / 8)) ||
555 ((mode
== 2) && (size
- 2 * nb
- 1 != (l
+ 7) / 8))) {
556 VERBOSE (ERROR
, myfprintf (stdout
, "incorrect code table length\n"));
561 table
= (code_t
*) calloc (NB_BYTES
, sizeof (code_t
));
563 VERBOSE (ERROR
, myfprintf (stdout
, "can't allocate memory\n"));
570 for (i
= 0; i
< NB_BYTES
; i
++) {
571 if (lengths
[i
] == 0) {
574 while (lengths
[i
]--) {
575 codcat ((char *)(table
+ i
), sizeof (code_t
), ((cur
& 0x80) == 0) ? "0" : "1");
585 VERBOSE (DEBUG
, PRINTF ("end reading header\n"));
590 /* write decompressed file */
592 int write_decompress (char *output
, char *input
, code_t
*codes
)
594 byte_t bufin
[BUFFER_SIZE
] = {0};
595 byte_t bufout
[BUFFER_SIZE
] = {0};
596 byte_t bufhea
[MAX(NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6, BUFFER_SIZE
)] = {0};
597 char bits
[(NB_BYTES
- 1) + 1] = {0};
599 int i
, j
, k
, nb
, size
, rem
;
604 VERBOSE (DEBUG
, PRINTF ("start writing decompressed file\n"));
606 /* open file for reading */
607 fin
= fopen (input
, "rb");
609 VERBOSE (ERROR
, myfprintf (stdout
, "can't open file '%s' for reading\n", input
));
612 VERBOSE (INFO
, myfprintf (stdout
, "file '%s' opened\n", input
));
614 /* read magic number */
615 nb
= fread (bufhea
, 1, 6, fin
);
617 VERBOSE (ERROR
, myfprintf (stdout
, "can't read file\n"));
621 size
= (bufhea
[3] << 8) + bufhea
[4];
622 VERBOSE (DEBUG
, myfprintf (stdout
, "table size: %d\n", size
));
624 VERBOSE (DEBUG
, myfprintf (stdout
, "remainder: %d\n", rem
));
625 nb
= fread (bufhea
, 1, size
, fin
);
627 VERBOSE (ERROR
, myfprintf (stdout
, "can't read file\n"));
632 /* open file for writing */
633 fout
= fopen (output
, "wb");
635 VERBOSE (ERROR
, myfprintf (stdout
, "can't open file '%s' for writing\n", output
));
638 VERBOSE (INFO
, myfprintf (stdout
, "file '%s' opened\n", output
));
642 while (!feof (fin
)) {
643 nb
= fread (bufin
, 1, BUFFER_SIZE
, fin
);
644 VERBOSE (DEBUG
, PRINTF ("nbread: %d\n", nb
));
645 for (i
= 0; i
< nb
; i
++) {
646 for (j
= 0; j
< 8; j
++) {
647 codcat (bits
, sizeof (bits
), ((bufin
[i
] & 0x80) == 0) ? "0" : "1");
650 VERBOSE (DEBUG
, PRINTF ("bits: %d - %s\n", codlen (bits
), bits
));
652 /* look for correct code */
654 for (k
= 0; (k
< NB_BYTES
) && (!is_found
); k
++) {
655 if (codcmp ((char *)(codes
+ k
), bits
) == 0) {
657 VERBOSE (DEBUG
, PRINTF ("found: %d\n", k
));
660 if (pt
- bufout
== BUFFER_SIZE
- 1) {
661 VERBOSE (DEBUG
, PRINTF ("nb buffer out: %u\n", (pt
- bufout
)));
662 fwrite (bufout
, 1, BUFFER_SIZE
, fout
);
669 if ((i
== nb
- 1) && (l
% 256 == rem
) && (feof (fin
))) {
670 VERBOSE (DEBUG
, PRINTF ("break\n"));
677 VERBOSE (DEBUG
, PRINTF ("nb buffer out: %u\n", (pt
- bufout
)));
678 fwrite (bufout
, 1, pt
- bufout
, fout
);
685 VERBOSE (DEBUG
, PRINTF ("end writing decompressed file\n"));
692 int main (int argc
, char *argv
[])
697 leaf_t
**leafs
= NULL
;
699 code_t
*codes
= NULL
;
700 byte_t
*header
= NULL
;
708 VERBOSE (DEBUG
, PRINTF ("start processing arguments\n"));
712 myfprintf (stderr
, "%s: invalid option -- %s\n", progname
, arg
);
716 VERBOSE (DEBUG
, PRINTF ("option: %c\n", c
));
725 input
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
726 VERBOSE (DEBUG
, PRINTF ("input: %s\n", input
));
729 output
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
730 VERBOSE (DEBUG
, PRINTF ("output: %s\n", output
));
733 arg
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
735 myfprintf (stderr
, "%s: missing verbose level\n", progname
);
738 verbose
= myatoi (arg
);
739 VERBOSE (INFO
, myfprintf (stdout
, "verbose: %d\n", verbose
));
746 if ((input
== NULL
) || (output
== NULL
)) {
747 myfprintf (stderr
, "%s: missing file\n", progname
);
750 VERBOSE (DEBUG
, PRINTF ("end processing arguments\n"));
754 table
= create_table (input
);
755 if (table
== NULL
) break;
756 VERBOSE (INFO
, print_occ_table (table
));
758 leafs
= init_forest (table
);
759 if (leafs
== NULL
) break;
760 root
= create_tree (leafs
);
761 if (root
== NULL
) break;
762 codes
= create_code (root
);
763 if (codes
== NULL
) break;
764 VERBOSE (INFO
, print_code_table (codes
));
765 header
= encode_header_table (codes
, table
);
766 if (header
== NULL
) break;
767 VERBOSE (INFO
, print_header (header
));
768 rc
= write_compress (output
, input
, codes
, header
);
771 codes
= read_header (input
);
772 if (codes
== NULL
) break;
773 VERBOSE (INFO
, print_code_table (codes
));
774 rc
= write_decompress (output
, input
, codes
);
778 /* clean everything */
780 if (header
) free (header
);
781 if (codes
) free (codes
);
782 if (root
) free_tree (root
);
783 if (leafs
) free (leafs
);
784 if (table
) free (table
);
789 // test: compress.exe -h
790 // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
791 // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
792 // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
793 // test: compress.exe -c -i compress.c -o compress.mz
794 // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz
795 // test: compress.exe -d -i compress.mz -o tmp.c
796 // test: cmp compress.c tmp.c
797 // test: rm compress.mz tmp.c
799 /* vim: set ts=4 sw=4 et */