3 /* linker: atoi.o code.o debug.o fdprintf.o */
15 #define BUFFER_SIZE 4096
28 char *progname
= NULL
;
34 int fd
= ret
? stdfderr
: stdfdout
;
35 fdprintf (fd
, "usage: %s\n", progname
);
36 fdprintf (fd
, " -c : mode compress\n");
37 fdprintf (fd
, " -d : mode decompress\n");
38 fdprintf (fd
, " -h : help message\n");
39 fdprintf (fd
, " -i <file>: input file\n");
40 fdprintf (fd
, " -o <file>: output file\n");
41 fdprintf (fd
, " -v : verbose level (%d)\n", verbose
);
46 /* create occurence table */
48 int *create_table (char *filename
)
50 byte_t buffer
[BUFFER_SIZE
] = {0};
52 static int table
[NB_BYTES
] = {0};
55 VERBOSE (DEBUG
, PRINTOUT ("start creating occurence table\n"));
58 fid
= open (filename
, O_RDONLY
|O_RAW
);
60 VERBOSE (ERROR
, PRINTERR ("can't open file '%s'\n", filename
));
63 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", filename
));
66 while ((nbread
= read (fid
, buffer
, BUFFER_SIZE
)) > 0) {
67 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nbread
));
69 table
[(int)buffer
[nbread
]]++;
76 VERBOSE (DEBUG
, PRINTOUT ("end creating occurence table\n"));
81 /* print occurence table */
83 void print_occ_table (int *table
)
87 PRINTOUT ("Occurence table\n");
88 for (i
= 0; i
< NB_BYTES
; i
++) {
90 PRINTOUT ("0x%02x '%c': %d\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, table
[i
]);
95 /* initialize forest */
97 leaf_t
**init_forest (int *table
)
99 static leaf_t
*leafs
[NB_BYTES
+ 1] = {0};
103 VERBOSE (DEBUG
, PRINTOUT ("start initiliazing forest\n"));
105 /* count number of leafs */
106 for (i
= 0; i
< NB_BYTES
; i
++) {
112 /* initialize leafs */
113 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
115 leafs
[l
] = getleaf (1);
116 if (leafs
[l
] == NULL
) {
117 VERBOSE (ERROR
, PRINTERR ("can't allocate memory\n"));
120 leafs
[l
]->occ
= table
[i
];
126 VERBOSE (DEBUG
, PRINTOUT ("end initiliazing forest\n"));
133 leaf_t
*create_tree (leaf_t
**leafs
)
135 leaf_t
*branch
= NULL
;
141 VERBOSE (DEBUG
, PRINTOUT ("start creating tree\n"));
143 /* count number of leafs */
144 while (leafs
[nb_leafs
] != NULL
) {
149 for (j
= 0; j
< nb_leafs
- 1; j
++) {
151 /* look for leatest occurence */
153 for (i
= 0; i
< nb_leafs
; i
++) {
154 if (leafs
[i
] == NULL
) {
157 if ((last
== -1) || (leafs
[i
]->occ
< leafs
[last
]->occ
)) {
162 /* look for ante leatest occurence */
164 for (i
= 0; i
< nb_leafs
; i
++) {
165 if ((i
== last
) || (leafs
[i
] == NULL
)) {
168 if ((ante
== -1) || (leafs
[i
]->occ
< leafs
[ante
]->occ
)) {
174 if ((last
== -1) || (ante
== -1)) {
175 VERBOSE (ERROR
, PRINTERR ("error during tree building\n"));
178 branch
= getleaf (1);
179 if (branch
== NULL
) {
180 VERBOSE (ERROR
, PRINTERR ("can't allocate memory\n"));
183 branch
->left
= leafs
[last
];
184 branch
->right
= leafs
[ante
];
185 branch
->occ
= branch
->left
->occ
+ branch
->right
->occ
;
186 leafs
[last
] = branch
;
190 VERBOSE (DEBUG
, PRINTOUT ("end creating tree\n"));
192 return (last
!= -1) ? leafs
[last
] : NULL
;
197 void explore_tree (code_t
*table
, leaf_t
*root
, char *code
, int index
)
200 VERBOSE (DEBUG
, PRINTOUT ("start exploring code tree\n"));
202 if ((root
->left
== NULL
) && (root
->right
== NULL
)) {
203 codcpy ((char *)(table
+ (int)(root
->c
)), sizeof (code_t
), code
);
206 codcpy (code
+ index
, sizeof (code_t
), "1");
207 explore_tree (table
, root
->left
, code
, index
+ 1);
208 codcpy (code
+ index
, sizeof (code_t
), "0");
209 explore_tree (table
, root
->right
, code
, index
+ 1);
212 VERBOSE (DEBUG
, PRINTOUT ("end exploring code tree\n"));
215 /* create code table */
216 code_t
*create_code (leaf_t
*root
)
218 static code_t table
[NB_BYTES
] = {0};
221 VERBOSE (DEBUG
, PRINTOUT ("start creating code table\n"));
223 explore_tree (table
, root
, (char *)&code
, 0);
225 VERBOSE (DEBUG
, PRINTOUT ("end creating code table\n"));
230 /* print code table */
232 void print_code_table (code_t
*codes
)
237 PRINTOUT ("Code table\n");
238 for (i
= 0; i
< NB_BYTES
; i
++) {
239 code
= (char *)(codes
+ i
);
240 if (codlen (code
) == 0) {
243 PRINTOUT ("0x%02x '%c': %s\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, code
);
247 /* encode header and code table */
249 byte_t
*encode_header_table (code_t
*codes
, int *occ
)
251 static byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
252 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
254 byte_t
*header
= buffer
;
255 int i
, j
, length
, mode
;
259 VERBOSE (DEBUG
, PRINTOUT ("start encoding header and code table\n"));
262 for (i
= 0; i
< NB_BYTES
; i
++) {
263 code
= (char *)(codes
+ i
);
264 if (codlen (code
) > 0) {
266 size
+= codlen (code
) * occ
[i
];
269 mode
= (NB_BYTES
< 2 * nb
+ 1) ? 1 : 2;
270 VERBOSE (DEBUG
, PRINTOUT ("nb chars: %d\n", nb
));
271 VERBOSE (DEBUG
, PRINTOUT ("mode: %d\n", mode
));
272 VERBOSE (DEBUG
, PRINTOUT ("size: %d\n", size
));
273 VERBOSE (DEBUG
, PRINTOUT ("rem: %d\n", size
% 256));
276 codcpy ((char *)header
, sizeof (buffer
), (mode
== 1) ? "M1Z " : "M2Z ");
282 for (i
= 0; i
< NB_BYTES
; i
++) {
283 code
= (char *)(codes
+ i
);
284 *(header
++) = (byte_t
) codlen (code
);
288 *(header
++) = (byte_t
)(nb
- 1);
289 for (i
= 0; i
< NB_BYTES
; i
++) {
290 code
= (char *)(codes
+ i
);
291 if (codlen (code
) > 0) {
292 *(header
++) = (byte_t
) i
;
293 *(header
++) = (byte_t
) codlen (code
);
300 for (i
= 0; i
< NB_BYTES
; i
++) {
301 code
= (char *)(codes
+ i
);
302 if (codlen (code
) > 0) {
303 codcat (bits
, sizeof (code_t
), code
);
304 while (codlen (bits
) > (8 - 1)) {
305 for (j
= 0; j
< 8; j
++) {
307 if (bits
[j
] == '1') {
311 codcpy (bits
, sizeof (code_t
), bits
+ 8);
316 if (codlen (bits
) > 0) {
317 for (j
= 0; j
< (int)codlen (bits
); j
++) {
319 if (bits
[j
] == '1') {
323 for (j
= (int)codlen (bits
); j
< 8; j
++) {
330 length
= (int)(header
- buffer
- 6);
331 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d %02x %02x\n", length
, length
>> 8, length
& 0xff));
332 buffer
[3] = (byte_t
)(length
>> 8);
333 buffer
[4] = (byte_t
)(length
& 0xff);
334 buffer
[5] = (byte_t
)(size
% 256);
337 VERBOSE (DEBUG
, PRINTOUT ("end encoding header and code table\n"));
344 void print_header (byte_t
*header
)
348 length
= (header
[3] << 8) + header
[4];
349 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d\n", length
));
350 for (i
= 0; i
< length
+ 6; i
++) {
351 PRINTOUT ("%02x", header
[i
]);
356 /* write crompressed file */
358 int write_compress (char *output
, char *input
, code_t
*codes
, byte_t
*header
)
360 byte_t bufin
[BUFFER_SIZE
] = {0};
361 byte_t bufout
[BUFFER_SIZE
] = {0};
362 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
365 int i
, j
, nbread
, nbwrite
;
368 VERBOSE (DEBUG
, PRINTOUT ("start writting compressed file\n"));
370 /* open input file */
371 fin
= open (input
, O_RDONLY
|O_RAW
);
373 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for reading\n", input
));
376 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", input
));
378 /* open output file */
379 fout
= open (output
, O_WRONLY
|O_CREAT
|O_RAW
, 0700);
381 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for writing\n", output
));
385 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", output
));
388 length
= (header
[3] << 8) + header
[4];
389 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d\n", length
));
390 nbwrite
= write (fout
, header
, length
+ 6);
391 if (nbwrite
!= length
+ 6) {
392 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n", length
+ 6 - nbwrite
, output
));
401 while ((nbread
= read (fin
, bufin
, BUFFER_SIZE
)) > 0) {
402 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nbread
));
403 for (i
= 0; i
< nbread
; i
++) {
404 codcat (bits
, sizeof (code_t
), (char *)(codes
+ bufin
[i
]));
405 while (codlen (bits
) > (8 - 1)) {
406 for (j
= 0; j
< 8; j
++) {
408 if (bits
[j
] == '1') {
412 codcpy (bits
, sizeof (code_t
), bits
+ 8);
413 if (pt
- bufout
== BUFFER_SIZE
- 1) {
414 nbwrite
= write (fout
, bufout
, BUFFER_SIZE
);
415 if (nbwrite
!= BUFFER_SIZE
) {
416 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n", BUFFER_SIZE
- nbwrite
, output
));
428 VERBOSE (DEBUG
, PRINTOUT ("lastest bits : %d\n", codlen (bits
)));
429 if (codlen (bits
) > 0) {
430 for (j
= 0; j
< (int)codlen (bits
); j
++) {
432 if (bits
[j
] == '1') {
436 for (j
= (int)codlen (bits
); j
< 8; j
++) {
442 VERBOSE (DEBUG
, PRINTOUT ("last partial buffer written: %u\n", pt
- bufout
));
443 nbwrite
= write (fout
, bufout
, pt
- bufout
);
444 if (nbwrite
!= pt
- bufout
) {
445 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n", pt
- bufout
- nbwrite
, output
));
456 VERBOSE (DEBUG
, PRINTOUT ("end writting compressed file\n"));
463 code_t
*read_header (char *filename
) {
464 static code_t table
[NB_BYTES
] = {0};
465 byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
466 byte_t
*codes
= NULL
;
468 int lengths
[NB_BYTES
] = {0};
471 int i
, j
, l
, nb
, size
;
473 VERBOSE (DEBUG
, PRINTOUT ("start reading header\n"));
476 fid
= open (filename
, O_RDONLY
|O_RAW
);
478 VERBOSE (ERROR
, PRINTERR ("can't open file '%s'\n", filename
));
481 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", filename
));
483 /* read magic number */
484 nb
= read (fid
, buffer
, 6);
485 VERBOSE (DEBUG
, PRINTOUT ("nb, buffer: %d 0x%02x 0x%02x\n", nb
, buffer
[0], buffer
[1]));
486 if ((nb
== 6) && (buffer
[0] == 'M') && (buffer
[2] == 'Z')) {
487 mode
= (buffer
[1] == '1') ? 1 : (buffer
[1] == '2') ? 2 : 0;
488 size
= (buffer
[3] << 8) + buffer
[4];
489 VERBOSE (DEBUG
, PRINTOUT ("mode, size: %d %d\n", mode
, size
));
490 if (size
> NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
) {
493 nb
= read (fid
, buffer
, size
);
494 VERBOSE (DEBUG
, PRINTOUT ("nb read: %d/%d\n", nb
, size
));
502 VERBOSE (ERROR
, PRINTERR ("incorrect file\n"));
510 for (i
= 0; i
< NB_BYTES
; i
++) {
511 lengths
[i
] = *(codes
++);
516 VERBOSE (DEBUG
, PRINTOUT ("nb codes: %d\n", nb
));
517 for (i
= 0; i
< nb
; i
++) {
519 lengths
[j
] = *(codes
++);
523 VERBOSE (DEBUG
, for (i
= 0; i
< NB_BYTES
; i
++) if (lengths
[i
]) PRINTOUT ("%d: %d\n", i
, lengths
[i
]));
526 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
529 if (((mode
== 1) && (size
- 256 != (l
+ 7) / 8)) ||
530 ((mode
== 2) && (size
- 2 * nb
- 1 != (l
+ 7) / 8))) {
531 VERBOSE (ERROR
, PRINTERR ("incorrect code table length: %d %d %d\n", size
, nb
, l
));
538 for (i
= 0; i
< NB_BYTES
; i
++) {
539 if (lengths
[i
] == 0) {
542 while (lengths
[i
]--) {
543 codcat ((char *)(table
+ i
), sizeof (code_t
), ((cur
& 0x80) == 0) ? "0" : "1");
553 VERBOSE (DEBUG
, PRINTOUT ("end reading header\n"));
558 /* write decompressed file */
560 int write_decompress (char *output
, char *input
, code_t
*codes
)
562 byte_t bufin
[BUFFER_SIZE
] = {0};
563 byte_t bufout
[BUFFER_SIZE
] = {0};
564 byte_t bufhea
[MAX(NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6, BUFFER_SIZE
)] = {0};
565 char bits
[(NB_BYTES
- 1) + 1] = {0};
567 int i
, j
, k
, nb
, size
, nbwrite
, rem
;
572 VERBOSE (DEBUG
, PRINTOUT ("start writing decompressed file\n"));
574 /* open file for reading */
575 fin
= open (input
, O_RDONLY
|O_RAW
);
577 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for reading\n", input
));
580 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", input
));
582 /* read magic number */
583 nb
= read (fin
, bufhea
, 6);
585 VERBOSE (ERROR
, PRINTERR ("can't read file\n"));
589 size
= (bufhea
[3] << 8) + bufhea
[4];
590 VERBOSE (DEBUG
, PRINTOUT ("table size: %d\n", size
));
592 VERBOSE (DEBUG
, PRINTOUT ("remainder: %d\n", rem
));
593 nb
= read (fin
, bufhea
, size
);
595 VERBOSE (ERROR
, PRINTERR ("can't read file\n"));
600 /* open file for writing */
601 fout
= open (output
, O_WRONLY
|O_CREAT
|O_RAW
, 0700);
603 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for writing\n", output
));
607 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", output
));
611 while ((nb
= read (fin
, bufin
, BUFFER_SIZE
)) > 0) {
612 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nb
));
613 for (i
= 0; i
< nb
; i
++) {
614 for (j
= 0; j
< 8; j
++) {
615 codcat (bits
, sizeof (bits
), ((bufin
[i
] & 0x80) == 0) ? "0" : "1");
618 VERBOSE (DEBUG
, PRINTOUT ("bits: %d - %s\n", codlen (bits
), bits
));
620 /* look for correct code */
622 for (k
= 0; (k
< NB_BYTES
) && (!is_found
); k
++) {
623 if (codcmp ((char *)(codes
+ k
), bits
) == 0) {
625 VERBOSE (DEBUG
, PRINTOUT ("found: %d\n", k
));
628 if (pt
- bufout
== BUFFER_SIZE
- 1) {
629 VERBOSE (DEBUG
, PRINTOUT ("nb buffer out: %u\n", (pt
- bufout
)));
630 nbwrite
= write (fout
, bufout
, BUFFER_SIZE
);
631 if (nbwrite
!= BUFFER_SIZE
) {
632 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n'", BUFFER_SIZE
- nbwrite
, output
));
643 if ((i
== nb
- 1) && (l
% 256 == rem
) && (nb
!= BUFFER_SIZE
)) {
644 VERBOSE (DEBUG
, PRINTOUT ("break\n"));
651 VERBOSE (DEBUG
, PRINTOUT ("nb buffer out: %u\n", (pt
- bufout
)));
652 nbwrite
= write (fout
, bufout
, pt
- bufout
);
653 if (nbwrite
!= pt
- bufout
) {
654 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n'", pt
- bufout
- nbwrite
, output
));
665 VERBOSE (DEBUG
, PRINTOUT ("end writing decompressed file\n"));
672 int main (int argc
, char *argv
[])
677 leaf_t
**leafs
= NULL
;
679 code_t
*codes
= NULL
;
680 byte_t
*header
= NULL
;
688 VERBOSE (DEBUG
, PRINTOUT ("start processing arguments\n"));
692 PRINTERR ("%s: invalid option -- %s\n", progname
, arg
);
696 VERBOSE (DEBUG
, PRINTOUT ("option: %c\n", c
));
705 input
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
706 VERBOSE (DEBUG
, PRINTOUT ("input: %s\n", input
));
709 output
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
710 VERBOSE (DEBUG
, PRINTOUT ("output: %s\n", output
));
713 arg
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
715 PRINTERR ("%s: missing verbose level\n", progname
);
718 verbose
= atoi (arg
);
719 VERBOSE (INFO
, PRINTOUT ("verbose: %d\n", verbose
));
723 return usage (c
!= 'h');
726 if ((input
== NULL
) || (output
== NULL
)) {
727 PRINTERR ("%s: missing file\n", progname
);
730 VERBOSE (DEBUG
, PRINTOUT ("end processing arguments\n"));
734 table
= create_table (input
);
735 if (table
== NULL
) break;
736 VERBOSE (INFO
, print_occ_table (table
));
738 leafs
= init_forest (table
);
739 if (leafs
== NULL
) break;
740 root
= create_tree (leafs
);
741 if (root
== NULL
) break;
742 codes
= create_code (root
);
743 if (codes
== NULL
) break;
744 VERBOSE (INFO
, print_code_table (codes
));
745 header
= encode_header_table (codes
, table
);
746 if (header
== NULL
) break;
747 VERBOSE (INFO
, print_header (header
));
748 rc
= write_compress (output
, input
, codes
, header
);
751 codes
= read_header (input
);
752 if (codes
== NULL
) break;
753 VERBOSE (INFO
, print_code_table (codes
));
754 rc
= write_decompress (output
, input
, codes
);
761 // test: compress.exe -h
762 // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
763 // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
764 // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
765 // test: compress.exe -v 2>&1 | grep -q 'missing verbose level'
766 // test: compress.exe -c -i compress.c 2>&1 | grep -q 'missing file'
767 // test: compress.exe -c -v 4 -i compress.c -o compress.mz | grep -q "Occurence table"
768 // test: compress.exe -c -i compress.c -o compress.mz
769 // test: ls -sS1 compress.c compress.mz | tail -1 | grep -q compress.mz
770 // test: compress.exe -d -i compress.mz -o tmp.c
771 // test: cmp compress.c tmp.c; x=$?; rm compress.mz tmp.c; test x$x = x0
772 // test: compress.exe -c -i test/compress.c -o compress.mz 2>&1 | grep -q "can't open file"
773 // test: compress.exe -c -i compress.c -o test/compress.mz 2>&1 | grep -q "can't open file"
775 /* vim: set ts=4 sw=4 et: */