3 /* linker: atoi.o code.o debug.o fdprintf.o */
15 #define BUFFER_SIZE 4096
28 char *progname
= NULL
;
34 int fd
= ret
? stdfderr
: stdfdout
;
35 fdprintf (fd
, "usage: %s\n", progname
);
36 fdprintf (fd
, " -h : help message\n");
37 fdprintf (fd
, " -i <file>: input file\n");
38 fdprintf (fd
, " -o <file>: output file\n");
39 fdprintf (fd
, " -v : verbose level (%d)\n", verbose
);
44 /* create occurence table */
46 int *create_table (char *filename
)
48 byte_t buffer
[BUFFER_SIZE
] = {0};
50 static int table
[NB_BYTES
] = {0};
53 VERBOSE (DEBUG
, PRINTOUT ("start creating occurence table\n"));
56 fid
= open (filename
, O_RDONLY
|O_RAW
);
58 VERBOSE (ERROR
, PRINTERR ("can't open file '%s'\n", filename
));
61 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", filename
));
64 while ((nbread
= read (fid
, buffer
, BUFFER_SIZE
)) > 0) {
65 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nbread
));
67 table
[(int)buffer
[nbread
]]++;
74 VERBOSE (DEBUG
, PRINTOUT ("end creating occurence table\n"));
79 /* print occurence table */
81 void print_occ_table (int *table
)
85 PRINTOUT ("Occurence table\n");
86 for (i
= 0; i
< NB_BYTES
; i
++) {
88 PRINTOUT ("0x%02x '%c': %d\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, table
[i
]);
93 /* initialize forest */
95 leaf_t
**init_forest (int *table
)
97 static leaf_t
*leafs
[NB_BYTES
] = {0};
101 VERBOSE (DEBUG
, PRINTOUT ("start initiliazing forest\n"));
103 /* count number of leafs */
104 for (i
= 0; i
< NB_BYTES
; i
++) {
110 /* initialize leafs */
111 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
113 leafs
[l
] = getleaf (1);
114 if (leafs
[l
] == NULL
) {
115 VERBOSE (ERROR
, PRINTERR ("can't allocate memory\n"));
118 leafs
[l
]->occ
= table
[i
];
124 VERBOSE (DEBUG
, PRINTOUT ("end initiliazing forest\n"));
131 leaf_t
*create_tree (leaf_t
**leafs
)
133 leaf_t
*branch
= NULL
;
139 VERBOSE (DEBUG
, PRINTOUT ("start creating tree\n"));
141 /* count number of leafs */
142 while (leafs
[nb_leafs
] != NULL
) {
147 for (j
= 0; j
< nb_leafs
- 1; j
++) {
149 /* look for leatest occurence */
151 for (i
= 0; i
< nb_leafs
; i
++) {
152 if (leafs
[i
] == NULL
) {
155 if ((last
== -1) || (leafs
[i
]->occ
< leafs
[last
]->occ
)) {
160 /* look for ante leatest occurence */
162 for (i
= 0; i
< nb_leafs
; i
++) {
163 if ((i
== last
) || (leafs
[i
] == NULL
)) {
166 if ((ante
== -1) || (leafs
[i
]->occ
< leafs
[ante
]->occ
)) {
172 if ((last
== -1) || (ante
== -1)) {
173 VERBOSE (ERROR
, PRINTERR ("error during tree building\n"));
176 branch
= getleaf (1);
177 if (branch
== NULL
) {
178 VERBOSE (ERROR
, PRINTERR ("can't allocate memory\n"));
181 branch
->left
= leafs
[last
];
182 branch
->right
= leafs
[ante
];
183 branch
->occ
= branch
->left
->occ
+ branch
->right
->occ
;
184 leafs
[last
] = branch
;
188 VERBOSE (DEBUG
, PRINTOUT ("end creating tree\n"));
190 return (last
!= -1) ? leafs
[last
] : NULL
;
195 void explore_tree (code_t
*table
, leaf_t
*root
, char *code
, int index
)
198 VERBOSE (DEBUG
, PRINTOUT ("start exploring code tree\n"));
200 if ((root
->left
== NULL
) && (root
->right
== NULL
)) {
201 codcpy ((char *)(table
+ (int)(root
->c
)), sizeof (code_t
), code
);
204 codcpy (code
+ index
, sizeof (code_t
), "1");
205 explore_tree (table
, root
->left
, code
, index
+ 1);
206 codcpy (code
+ index
, sizeof (code_t
), "0");
207 explore_tree (table
, root
->right
, code
, index
+ 1);
210 VERBOSE (DEBUG
, PRINTOUT ("end exploring code tree\n"));
213 /* create code table */
214 code_t
*create_code (leaf_t
*root
)
216 static code_t table
[NB_BYTES
] = {0};
219 VERBOSE (DEBUG
, PRINTOUT ("start creating code table\n"));
221 explore_tree (table
, root
, (char *)&code
, 0);
223 VERBOSE (DEBUG
, PRINTOUT ("end creating code table\n"));
228 /* print code table */
230 void print_code_table (code_t
*codes
)
235 PRINTOUT ("Code table\n");
236 for (i
= 0; i
< NB_BYTES
; i
++) {
237 code
= (char *)(codes
+ i
);
238 if (codlen (code
) == 0) {
241 PRINTOUT ("0x%02x '%c': %s\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, code
);
245 /* encode header and code table */
247 byte_t
*encode_header_table (code_t
*codes
, int *occ
)
249 static byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
250 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
252 byte_t
*header
= buffer
;
253 int i
, j
, length
, mode
;
257 VERBOSE (DEBUG
, PRINTOUT ("start encoding header and code table\n"));
260 for (i
= 0; i
< NB_BYTES
; i
++) {
261 code
= (char *)(codes
+ i
);
262 if (codlen (code
) > 0) {
264 size
+= codlen (code
) * occ
[i
];
267 mode
= (NB_BYTES
< 2 * nb
+ 1) ? 1 : 2;
268 VERBOSE (DEBUG
, PRINTOUT ("nb chars: %d\n", nb
));
269 VERBOSE (DEBUG
, PRINTOUT ("mode: %d\n", mode
));
270 VERBOSE (DEBUG
, PRINTOUT ("size: %d\n", size
));
271 VERBOSE (DEBUG
, PRINTOUT ("rem: %d\n", size
% 256));
274 codcpy ((char *)header
, sizeof (buffer
), (mode
== 1) ? "MZ1 " : "MZ2 ");
280 for (i
= 0; i
< NB_BYTES
; i
++) {
281 code
= (char *)(codes
+ i
);
282 *(header
++) = (byte_t
) codlen (code
);
286 *(header
++) = (byte_t
)(nb
- 1);
287 for (i
= 0; i
< NB_BYTES
; i
++) {
288 code
= (char *)(codes
+ i
);
289 if (codlen (code
) > 0) {
290 *(header
++) = (byte_t
) i
;
291 *(header
++) = (byte_t
) codlen (code
);
298 for (i
= 0; i
< NB_BYTES
; i
++) {
299 code
= (char *)(codes
+ i
);
300 if (codlen (code
) > 0) {
301 codcat (bits
, sizeof (code_t
), code
);
302 while (codlen (bits
) > (8 - 1)) {
303 for (j
= 0; j
< 8; j
++) {
305 if (bits
[j
] == '1') {
309 codcpy (bits
, sizeof (code_t
), bits
+ 8);
314 if (codlen (bits
) > 0) {
315 for (j
= 0; j
< (int)codlen (bits
); j
++) {
317 if (bits
[j
] == '1') {
321 for (j
= (int)codlen (bits
); j
< 8; j
++) {
328 length
= (int)(header
- buffer
- 6);
329 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d %02x %02x\n", length
, length
>> 8, length
& 0xff));
330 buffer
[3] = (byte_t
)(length
>> 8);
331 buffer
[4] = (byte_t
)(length
& 0xff);
332 buffer
[5] = (byte_t
)(size
% 256);
335 VERBOSE (DEBUG
, PRINTOUT ("end encoding header and code table\n"));
342 void print_header (byte_t
*header
)
346 length
= (header
[3] << 8) + header
[4];
347 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d\n", length
));
348 for (i
= 0; i
< length
+ 6; i
++) {
349 PRINTOUT ("%02x", header
[i
]);
354 /* write crompressed file */
356 int write_compress (char *output
, char *input
, code_t
*codes
, byte_t
*header
)
358 byte_t bufin
[BUFFER_SIZE
] = {0};
359 byte_t bufout
[BUFFER_SIZE
] = {0};
360 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
363 int i
, j
, nbread
, nbwrite
;
366 VERBOSE (DEBUG
, PRINTOUT ("start writting compressed file\n"));
368 /* open input file */
369 fin
= open (input
, O_RDONLY
|O_RAW
);
371 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for reading\n", input
));
374 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", input
));
376 /* open output file */
377 fout
= open (output
, O_WRONLY
|O_CREAT
|O_RAW
, 0700);
379 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for writing\n", output
));
383 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", output
));
386 length
= (header
[3] << 8) + header
[4];
387 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d\n", length
));
388 nbwrite
= write (fout
, header
, length
+ 6);
389 if (nbwrite
!= length
+ 6) {
390 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n", length
+ 6 - nbwrite
, output
));
399 while ((nbread
= read (fin
, bufin
, BUFFER_SIZE
)) > 0) {
400 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nbread
));
401 for (i
= 0; i
< nbread
; i
++) {
402 codcat (bits
, sizeof (code_t
), (char *)(codes
+ bufin
[i
]));
403 while (codlen (bits
) > (8 - 1)) {
404 for (j
= 0; j
< 8; j
++) {
406 if (bits
[j
] == '1') {
410 codcpy (bits
, sizeof (code_t
), bits
+ 8);
411 if (pt
- bufout
== BUFFER_SIZE
- 1) {
412 nbwrite
= write (fout
, bufout
, BUFFER_SIZE
);
413 if (nbwrite
!= BUFFER_SIZE
) {
414 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n", BUFFER_SIZE
- nbwrite
, output
));
426 VERBOSE (DEBUG
, PRINTOUT ("lastest bits : %d\n", codlen (bits
)));
427 if (codlen (bits
) > 0) {
428 for (j
= 0; j
< (int)codlen (bits
); j
++) {
430 if (bits
[j
] == '1') {
434 for (j
= (int)codlen (bits
); j
< 8; j
++) {
440 VERBOSE (DEBUG
, PRINTOUT ("last partial buffer written: %u\n", pt
- bufout
));
441 nbwrite
= write (fout
, bufout
, pt
- bufout
);
442 if (nbwrite
!= pt
- bufout
) {
443 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n", pt
- bufout
- nbwrite
, output
));
454 VERBOSE (DEBUG
, PRINTOUT ("end writting compressed file\n"));
461 code_t
*read_header (char *filename
) {
462 static code_t table
[NB_BYTES
] = {0};
463 byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
464 byte_t
*codes
= NULL
;
466 int lengths
[NB_BYTES
] = {0};
469 int i
, j
, l
, nb
, size
;
471 VERBOSE (DEBUG
, PRINTOUT ("start reading header\n"));
474 fid
= open (filename
, O_RDONLY
|O_RAW
);
476 VERBOSE (ERROR
, PRINTERR ("can't open file '%s'\n", filename
));
479 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", filename
));
481 /* read magic number */
482 nb
= read (fid
, buffer
, 6);
483 VERBOSE (DEBUG
, PRINTOUT ("nb, buffer: %d 0x%02x 0x%02x\n", nb
, buffer
[0], buffer
[1]));
484 if ((nb
== 6) && (buffer
[0] == 'M') && (buffer
[1] == 'Z')) {
485 mode
= (buffer
[2] == '1') ? 1 : (buffer
[2] == '2') ? 2 : 0;
486 size
= (buffer
[3] << 8) + buffer
[4];
487 VERBOSE (DEBUG
, PRINTOUT ("mode, size: %d %d\n", mode
, size
));
488 if (size
> NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
) {
491 nb
= read (fid
, buffer
, size
);
492 VERBOSE (DEBUG
, PRINTOUT ("nb read: %d/%d\n", nb
, size
));
500 VERBOSE (ERROR
, PRINTERR ("incorrect file\n"));
508 for (i
= 0; i
< NB_BYTES
; i
++) {
509 lengths
[i
] = *(codes
++);
514 VERBOSE (DEBUG
, PRINTOUT ("nb codes: %d\n", nb
));
515 for (i
= 0; i
< nb
; i
++) {
517 lengths
[j
] = *(codes
++);
521 VERBOSE (DEBUG
, for (i
= 0; i
< NB_BYTES
; i
++) if (lengths
[i
]) PRINTOUT ("%d: %d\n", i
, lengths
[i
]));
524 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
527 if (((mode
== 1) && (size
- 256 != (l
+ 7) / 8)) ||
528 ((mode
== 2) && (size
- 2 * nb
- 1 != (l
+ 7) / 8))) {
529 VERBOSE (ERROR
, PRINTERR ("incorrect code table length: %d %d %d\n", size
, nb
, l
));
536 for (i
= 0; i
< NB_BYTES
; i
++) {
537 if (lengths
[i
] == 0) {
540 while (lengths
[i
]--) {
541 codcat ((char *)(table
+ i
), sizeof (code_t
), ((cur
& 0x80) == 0) ? "0" : "1");
551 VERBOSE (DEBUG
, PRINTOUT ("end reading header\n"));
556 /* write decompressed file */
558 int write_decompress (char *output
, char *input
, code_t
*codes
)
560 byte_t bufin
[BUFFER_SIZE
] = {0};
561 byte_t bufout
[BUFFER_SIZE
] = {0};
562 byte_t bufhea
[MAX(NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6, BUFFER_SIZE
)] = {0};
563 char bits
[(NB_BYTES
- 1) + 1] = {0};
565 int i
, j
, k
, nb
, size
, nbwrite
, rem
;
570 VERBOSE (DEBUG
, PRINTOUT ("start writing decompressed file\n"));
572 /* open file for reading */
573 fin
= open (input
, O_RDONLY
|O_RAW
);
575 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for reading\n", input
));
578 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", input
));
580 /* read magic number */
581 nb
= read (fin
, bufhea
, 6);
583 VERBOSE (ERROR
, PRINTERR ("can't read file\n"));
587 size
= (bufhea
[3] << 8) + bufhea
[4];
588 VERBOSE (DEBUG
, PRINTOUT ("table size: %d\n", size
));
590 VERBOSE (DEBUG
, PRINTOUT ("remainder: %d\n", rem
));
591 nb
= read (fin
, bufhea
, size
);
593 VERBOSE (ERROR
, PRINTERR ("can't read file\n"));
598 /* open file for writing */
599 fout
= open (output
, O_WRONLY
|O_CREAT
|O_RAW
, 0700);
601 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for writing\n", output
));
605 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", output
));
609 while ((nb
= read (fin
, bufin
, BUFFER_SIZE
)) > 0) {
610 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nb
));
611 for (i
= 0; i
< nb
; i
++) {
612 for (j
= 0; j
< 8; j
++) {
613 codcat (bits
, sizeof (bits
), ((bufin
[i
] & 0x80) == 0) ? "0" : "1");
616 VERBOSE (DEBUG
, PRINTOUT ("bits: %d - %s\n", codlen (bits
), bits
));
618 /* look for correct code */
620 for (k
= 0; (k
< NB_BYTES
) && (!is_found
); k
++) {
621 if (codcmp ((char *)(codes
+ k
), bits
) == 0) {
623 VERBOSE (DEBUG
, PRINTOUT ("found: %d\n", k
));
626 if (pt
- bufout
== BUFFER_SIZE
- 1) {
627 VERBOSE (DEBUG
, PRINTOUT ("nb buffer out: %u\n", (pt
- bufout
)));
628 nbwrite
= write (fout
, bufout
, BUFFER_SIZE
);
629 if (nbwrite
!= BUFFER_SIZE
) {
630 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n'", BUFFER_SIZE
- nbwrite
, output
));
641 if ((i
== nb
- 1) && (l
% 256 == rem
) && (nb
!= BUFFER_SIZE
)) {
642 VERBOSE (DEBUG
, PRINTOUT ("break\n"));
649 VERBOSE (DEBUG
, PRINTOUT ("nb buffer out: %u\n", (pt
- bufout
)));
650 nbwrite
= write (fout
, bufout
, pt
- bufout
);
651 if (nbwrite
!= pt
- bufout
) {
652 VERBOSE (ERROR
, PRINTERR ("can't write %d bytes in file '%s'\n'", pt
- bufout
- nbwrite
, output
));
663 VERBOSE (DEBUG
, PRINTOUT ("end writing decompressed file\n"));
670 int main (int argc
, char *argv
[])
675 leaf_t
**leafs
= NULL
;
677 code_t
*codes
= NULL
;
678 byte_t
*header
= NULL
;
686 VERBOSE (DEBUG
, PRINTOUT ("start processing arguments\n"));
690 PRINTERR ("%s: invalid option -- %s\n", progname
, arg
);
694 VERBOSE (DEBUG
, PRINTOUT ("option: %c\n", c
));
703 input
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
704 VERBOSE (DEBUG
, PRINTOUT ("input: %s\n", input
));
707 output
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
708 VERBOSE (DEBUG
, PRINTOUT ("output: %s\n", output
));
711 arg
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
713 PRINTERR ("%s: missing verbose level\n", progname
);
716 verbose
= atoi (arg
);
717 VERBOSE (INFO
, PRINTOUT ("verbose: %d\n", verbose
));
721 return usage (c
!= 'h');
724 if ((input
== NULL
) || (output
== NULL
)) {
725 PRINTERR ("%s: missing file\n", progname
);
728 VERBOSE (DEBUG
, PRINTOUT ("end processing arguments\n"));
732 table
= create_table (input
);
733 if (table
== NULL
) break;
734 VERBOSE (INFO
, print_occ_table (table
));
736 leafs
= init_forest (table
);
737 if (leafs
== NULL
) break;
738 root
= create_tree (leafs
);
739 if (root
== NULL
) break;
740 codes
= create_code (root
);
741 if (codes
== NULL
) break;
742 VERBOSE (INFO
, print_code_table (codes
));
743 header
= encode_header_table (codes
, table
);
744 if (header
== NULL
) break;
745 VERBOSE (INFO
, print_header (header
));
746 rc
= write_compress (output
, input
, codes
, header
);
749 codes
= read_header (input
);
750 if (codes
== NULL
) break;
751 VERBOSE (INFO
, print_code_table (codes
));
752 rc
= write_decompress (output
, input
, codes
);
759 // test: compress.exe -h
760 // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
761 // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
762 // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
763 // test: compress.exe -v 2>&1 | grep -q 'missing verbose level'
764 // test: compress.exe -c -i compress.c 2>&1 | grep -q 'missing file'
765 // test: compress.exe -c -v 4 -i compress.c -o compress.mz | grep -q "Occurence table"
766 // test: compress.exe -c -i compress.c -o compress.mz
767 // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz
768 // test: compress.exe -d -i compress.mz -o tmp.c
769 // test: cmp compress.c tmp.c; x=$?; rm compress.mz tmp.c; test x$x = x0
770 // test: compress.exe -c -i test/compress.c -o compress.mz 2>&1 | grep "can't open file"
771 // test: compress.exe -c -i compress.c -o test/compress.mz 2>&1 | grep "can't open file"
773 /* vim: set ts=4 sw=4 et: */