713fc5b0b94b415e6cfaa74db3a918da74d370e1
3 /* linker: atoi.o code.o debug.o fprintf.o */
15 #define BUFFER_SIZE 4096
24 char *progname
= NULL
;
30 int fd
= ret
? _fderr
: _fdout
;
31 fdprintf (fd
, "usage: %s\n", progname
);
32 fdprintf (fd
, " -h : help message\n");
33 fdprintf (fd
, " -i <file>: input file\n");
34 fdprintf (fd
, " -o <file>: output file\n");
35 fdprintf (fd
, " -v : verbose level (%d)\n", verbose
);
40 /* create occurence table */
42 int *create_table (char *filename
)
44 byte_t buffer
[BUFFER_SIZE
] = {0};
46 static int table
[NB_BYTES
] = {0};
49 VERBOSE (DEBUG
, PRINTOUT ("start creating occurence table\n"));
52 fid
= open (filename
, O_RDONLY
|O_RAW
);
54 VERBOSE (ERROR
, PRINTERR ("can't open file '%s'\n", filename
));
57 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", filename
));
60 while ((nbread
= read (fid
, buffer
, BUFFER_SIZE
)) > 0) {
61 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nbread
));
63 table
[(int)buffer
[nbread
]]++;
70 VERBOSE (DEBUG
, PRINTOUT ("end creating occurence table\n"));
75 /* print occurence table */
77 void print_occ_table (int *table
)
81 PRINTOUT ("Occurence table\n");
82 for (i
= 0; i
< NB_BYTES
; i
++) {
84 PRINTOUT ("0x%02x '%c': %d\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, table
[i
]);
89 /* initialize forest */
91 leaf_t
**init_forest (int *table
)
93 static leaf_t
*leafs
[NB_BYTES
] = {0};
97 VERBOSE (DEBUG
, PRINTOUT ("start initiliazing forest\n"));
99 /* count number of leafs */
100 for (i
= 0; i
< NB_BYTES
; i
++) {
106 /* initialize leafs */
107 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
109 leafs
[l
] = getleaf (1);
110 if (leafs
[l
] == NULL
) {
111 VERBOSE (ERROR
, PRINTERR ("can't allocate memory\n"));
114 leafs
[l
]->occ
= table
[i
];
120 VERBOSE (DEBUG
, PRINTOUT ("end initiliazing forest\n"));
127 leaf_t
*create_tree (leaf_t
**leafs
)
129 leaf_t
*branch
= NULL
;
135 VERBOSE (DEBUG
, PRINTOUT ("start creating tree\n"));
137 /* count number of leafs */
138 while (leafs
[nb_leafs
] != NULL
) {
143 for (j
= 0; j
< nb_leafs
- 1; j
++) {
145 /* look for leatest occurence */
147 for (i
= 0; i
< nb_leafs
; i
++) {
148 if (leafs
[i
] == NULL
) {
151 if ((last
== -1) || (leafs
[i
]->occ
< leafs
[last
]->occ
)) {
156 /* look for ante leatest occurence */
158 for (i
= 0; i
< nb_leafs
; i
++) {
159 if ((i
== last
) || (leafs
[i
] == NULL
)) {
162 if ((ante
== -1) || (leafs
[i
]->occ
< leafs
[ante
]->occ
)) {
168 if ((last
== -1) || (ante
== -1)) {
169 VERBOSE (ERROR
, PRINTERR ("error during tree building\n"));
172 branch
= getleaf (1);
173 if (branch
== NULL
) {
174 VERBOSE (ERROR
, PRINTERR ("can't allocate memory\n"));
177 branch
->left
= leafs
[last
];
178 branch
->right
= leafs
[ante
];
179 branch
->occ
= branch
->left
->occ
+ branch
->right
->occ
;
180 leafs
[last
] = branch
;
184 VERBOSE (DEBUG
, PRINTOUT ("end creating tree\n"));
186 return (last
!= -1) ? leafs
[last
] : NULL
;
191 void explore_tree (code_t
*table
, leaf_t
*root
, char *code
, int index
)
194 VERBOSE (DEBUG
, PRINTOUT ("start exploring code tree\n"));
196 if ((root
->left
== NULL
) && (root
->right
== NULL
)) {
197 codcpy ((char *)(table
+ (int)(root
->c
)), sizeof (code_t
), code
);
200 codcpy (code
+ index
, sizeof (code_t
), "1");
201 explore_tree (table
, root
->left
, code
, index
+ 1);
202 codcpy (code
+ index
, sizeof (code_t
), "0");
203 explore_tree (table
, root
->right
, code
, index
+ 1);
206 VERBOSE (DEBUG
, PRINTOUT ("end exploring code tree\n"));
209 /* create code table */
210 code_t
*create_code (leaf_t
*root
)
212 static code_t table
[NB_BYTES
] = {0};
215 VERBOSE (DEBUG
, PRINTOUT ("start creating code table\n"));
217 explore_tree (table
, root
, (char *)&code
, 0);
219 VERBOSE (DEBUG
, PRINTOUT ("end creating code table\n"));
224 /* print code table */
226 void print_code_table (code_t
*codes
)
231 PRINTOUT ("Code table\n");
232 for (i
= 0; i
< NB_BYTES
; i
++) {
233 code
= (char *)(codes
+ i
);
234 if (codlen (code
) == 0) {
237 PRINTOUT ("0x%02x '%c': %s\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, code
);
241 /* encode header and code table */
243 byte_t
*encode_header_table (code_t
*codes
, int *occ
)
245 static byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
246 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
248 byte_t
*header
= buffer
;
249 int i
, j
, length
, mode
;
253 VERBOSE (DEBUG
, PRINTOUT ("start encoding header and code table\n"));
256 for (i
= 0; i
< NB_BYTES
; i
++) {
257 code
= (char *)(codes
+ i
);
258 if (codlen (code
) > 0) {
260 size
+= codlen (code
) * occ
[i
];
263 mode
= (NB_BYTES
< 2 * nb
+ 1) ? 1 : 2;
264 VERBOSE (DEBUG
, PRINTOUT ("nb chars: %d\n", nb
));
265 VERBOSE (DEBUG
, PRINTOUT ("mode: %d\n", mode
));
266 VERBOSE (DEBUG
, PRINTOUT ("size: %d\n", size
));
267 VERBOSE (DEBUG
, PRINTOUT ("rem: %d\n", size
% 256));
270 codcpy ((char *)header
, sizeof (buffer
), (mode
== 1) ? "MZ1 " : "MZ2 ");
276 for (i
= 0; i
< NB_BYTES
; i
++) {
277 code
= (char *)(codes
+ i
);
278 *(header
++) = (byte_t
) codlen (code
);
282 *(header
++) = (byte_t
)(nb
- 1);
283 for (i
= 0; i
< NB_BYTES
; i
++) {
284 code
= (char *)(codes
+ i
);
285 if (codlen (code
) > 0) {
286 *(header
++) = (byte_t
) i
;
287 *(header
++) = (byte_t
) codlen (code
);
294 for (i
= 0; i
< NB_BYTES
; i
++) {
295 code
= (char *)(codes
+ i
);
296 if (codlen (code
) > 0) {
297 codcat (bits
, sizeof (code_t
), code
);
298 while (codlen (bits
) > (8 - 1)) {
299 for (j
= 0; j
< 8; j
++) {
301 if (bits
[j
] == '1') {
305 codcpy (bits
, sizeof (code_t
), bits
+ 8);
310 if (codlen (bits
) > 0) {
311 for (j
= 0; j
< (int)codlen (bits
); j
++) {
313 if (bits
[j
] == '1') {
317 for (j
= (int)codlen (bits
); j
< 8; j
++) {
324 length
= (int)(header
- buffer
- 6);
325 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d %02x %02x\n", length
, length
>> 8, length
& 0xff));
326 buffer
[3] = (byte_t
)(length
>> 8);
327 buffer
[4] = (byte_t
)(length
& 0xff);
328 buffer
[5] = (byte_t
)(size
% 256);
331 VERBOSE (DEBUG
, PRINTOUT ("end encoding header and code table\n"));
338 void print_header (byte_t
*header
)
342 length
= (header
[3] << 8) + header
[4];
343 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d\n", length
));
344 for (i
= 0; i
< length
+ 6; i
++) {
345 PRINTOUT ("%02x", header
[i
]);
350 /* write crompressed file */
352 int write_compress (char *output
, char *input
, code_t
*codes
, byte_t
*header
)
354 byte_t bufin
[BUFFER_SIZE
] = {0};
355 byte_t bufout
[BUFFER_SIZE
] = {0};
356 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
362 VERBOSE (DEBUG
, PRINTOUT ("start writting compressed file\n"));
364 /* open input file */
365 fin
= open (input
, O_RDONLY
|O_RAW
);
367 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for reading\n", input
));
370 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", input
));
372 /* open output file */
373 fout
= open (output
, O_WRONLY
|O_CREAT
|O_RAW
, 0700);
375 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for writing\n", output
));
379 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", output
));
382 length
= (header
[3] << 8) + header
[4];
383 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d\n", length
));
384 write (fout
, header
, length
+ 6);
388 while ((nbread
= read (fin
, bufin
, BUFFER_SIZE
)) > 0) {
389 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nbread
));
390 for (i
= 0; i
< nbread
; i
++) {
391 codcat (bits
, sizeof (code_t
), (char *)(codes
+ bufin
[i
]));
392 while (codlen (bits
) > (8 - 1)) {
393 for (j
= 0; j
< 8; j
++) {
395 if (bits
[j
] == '1') {
399 codcpy (bits
, sizeof (code_t
), bits
+ 8);
400 if (pt
- bufout
== BUFFER_SIZE
- 1) {
401 write (fout
, bufout
, BUFFER_SIZE
);
409 VERBOSE (DEBUG
, PRINTOUT ("lastest bits : %d\n", codlen (bits
)));
410 if (codlen (bits
) > 0) {
411 for (j
= 0; j
< (int)codlen (bits
); j
++) {
413 if (bits
[j
] == '1') {
417 for (j
= (int)codlen (bits
); j
< 8; j
++) {
423 VERBOSE (DEBUG
, PRINTOUT ("last partial buffer written: %u\n", pt
- bufout
));
424 write (fout
, bufout
, pt
- bufout
);
431 VERBOSE (DEBUG
, PRINTOUT ("end writting compressed file\n"));
438 code_t
*read_header (char *filename
) {
439 static code_t table
[NB_BYTES
] = {0};
440 byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
441 byte_t
*codes
= NULL
;
443 int lengths
[NB_BYTES
] = {0};
446 int i
, j
, l
, nb
, size
;
448 VERBOSE (DEBUG
, PRINTOUT ("start reading header\n"));
451 fid
= open (filename
, O_RDONLY
|O_RAW
);
453 VERBOSE (ERROR
, PRINTERR ("can't open file '%s'\n", filename
));
456 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", filename
));
458 /* read magic number */
459 nb
= read (fid
, buffer
, 6);
460 VERBOSE (DEBUG
, PRINTOUT ("nb, buffer: %d 0x%02x 0x%02x\n", nb
, buffer
[0], buffer
[1]));
461 if ((nb
== 6) && (buffer
[0] == 'M') && (buffer
[1] == 'Z')) {
462 mode
= (buffer
[2] == '1') ? 1 : (buffer
[2] == '2') ? 2 : 0;
463 size
= (buffer
[3] << 8) + buffer
[4];
464 VERBOSE (DEBUG
, PRINTOUT ("mode, size: %d %d\n", mode
, size
));
465 if (size
> NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
) {
468 nb
= read (fid
, buffer
, size
);
469 VERBOSE (DEBUG
, PRINTOUT ("nb read: %d/%d\n", nb
, size
));
477 VERBOSE (ERROR
, PRINTERR ("incorrect file\n"));
485 for (i
= 0; i
< NB_BYTES
; i
++) {
486 lengths
[i
] = *(codes
++);
491 VERBOSE (DEBUG
, PRINTOUT ("nb codes: %d\n", nb
));
492 for (i
= 0; i
< nb
; i
++) {
494 lengths
[j
] = *(codes
++);
498 VERBOSE (DEBUG
, for (i
= 0; i
< NB_BYTES
; i
++) if (lengths
[i
]) PRINTOUT ("%d: %d\n", i
, lengths
[i
]));
501 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
504 if (((mode
== 1) && (size
- 256 != (l
+ 7) / 8)) ||
505 ((mode
== 2) && (size
- 2 * nb
- 1 != (l
+ 7) / 8))) {
506 VERBOSE (ERROR
, PRINTERR ("incorrect code table length: %d %d %d\n", size
, nb
, l
));
513 for (i
= 0; i
< NB_BYTES
; i
++) {
514 if (lengths
[i
] == 0) {
517 while (lengths
[i
]--) {
518 codcat ((char *)(table
+ i
), sizeof (code_t
), ((cur
& 0x80) == 0) ? "0" : "1");
528 VERBOSE (DEBUG
, PRINTOUT ("end reading header\n"));
533 /* write decompressed file */
535 int write_decompress (char *output
, char *input
, code_t
*codes
)
537 byte_t bufin
[BUFFER_SIZE
] = {0};
538 byte_t bufout
[BUFFER_SIZE
] = {0};
539 byte_t bufhea
[MAX(NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6, BUFFER_SIZE
)] = {0};
540 char bits
[(NB_BYTES
- 1) + 1] = {0};
542 int i
, j
, k
, nb
, size
, rem
;
547 VERBOSE (DEBUG
, PRINTOUT ("start writing decompressed file\n"));
549 /* open file for reading */
550 fin
= open (input
, O_RDONLY
|O_RAW
);
552 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for reading\n", input
));
555 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", input
));
557 /* read magic number */
558 nb
= read (fin
, bufhea
, 6);
560 VERBOSE (ERROR
, PRINTERR ("can't read file\n"));
564 size
= (bufhea
[3] << 8) + bufhea
[4];
565 VERBOSE (DEBUG
, PRINTOUT ("table size: %d\n", size
));
567 VERBOSE (DEBUG
, PRINTOUT ("remainder: %d\n", rem
));
568 nb
= read (fin
, bufhea
, size
);
570 VERBOSE (ERROR
, PRINTERR ("can't read file\n"));
575 /* open file for writing */
576 fout
= open (output
, O_WRONLY
|O_CREAT
|O_RAW
, 0700);
578 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for writing\n", output
));
582 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", output
));
586 while ((nb
= read (fin
, bufin
, BUFFER_SIZE
)) > 0) {
587 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nb
));
588 for (i
= 0; i
< nb
; i
++) {
589 for (j
= 0; j
< 8; j
++) {
590 codcat (bits
, sizeof (bits
), ((bufin
[i
] & 0x80) == 0) ? "0" : "1");
593 VERBOSE (DEBUG
, PRINTOUT ("bits: %d - %s\n", codlen (bits
), bits
));
595 /* look for correct code */
597 for (k
= 0; (k
< NB_BYTES
) && (!is_found
); k
++) {
598 if (codcmp ((char *)(codes
+ k
), bits
) == 0) {
600 VERBOSE (DEBUG
, PRINTOUT ("found: %d\n", k
));
603 if (pt
- bufout
== BUFFER_SIZE
- 1) {
604 VERBOSE (DEBUG
, PRINTOUT ("nb buffer out: %u\n", (pt
- bufout
)));
605 write (fout
, bufout
, BUFFER_SIZE
);
612 if ((i
== nb
- 1) && (l
% 256 == rem
) && (nb
!= BUFFER_SIZE
)) {
613 VERBOSE (DEBUG
, PRINTOUT ("break\n"));
620 VERBOSE (DEBUG
, PRINTOUT ("nb buffer out: %u\n", (pt
- bufout
)));
621 write (fout
, bufout
, pt
- bufout
);
628 VERBOSE (DEBUG
, PRINTOUT ("end writing decompressed file\n"));
635 int main (int argc
, char *argv
[])
640 leaf_t
**leafs
= NULL
;
642 code_t
*codes
= NULL
;
643 byte_t
*header
= NULL
;
651 VERBOSE (DEBUG
, PRINTOUT ("start processing arguments\n"));
655 PRINTERR ("%s: invalid option -- %s\n", progname
, arg
);
659 VERBOSE (DEBUG
, PRINTOUT ("option: %c\n", c
));
668 input
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
669 VERBOSE (DEBUG
, PRINTOUT ("input: %s\n", input
));
672 output
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
673 VERBOSE (DEBUG
, PRINTOUT ("output: %s\n", output
));
676 arg
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
678 PRINTERR ("%s: missing verbose level\n", progname
);
681 verbose
= myatoi (arg
);
682 VERBOSE (INFO
, PRINTOUT ("verbose: %d\n", verbose
));
686 return usage (c
!= 'h');
689 if ((input
== NULL
) || (output
== NULL
)) {
690 PRINTERR ("%s: missing file\n", progname
);
693 VERBOSE (DEBUG
, PRINTOUT ("end processing arguments\n"));
697 table
= create_table (input
);
698 if (table
== NULL
) break;
699 VERBOSE (INFO
, print_occ_table (table
));
701 leafs
= init_forest (table
);
702 if (leafs
== NULL
) break;
703 root
= create_tree (leafs
);
704 if (root
== NULL
) break;
705 codes
= create_code (root
);
706 if (codes
== NULL
) break;
707 VERBOSE (INFO
, print_code_table (codes
));
708 header
= encode_header_table (codes
, table
);
709 if (header
== NULL
) break;
710 VERBOSE (INFO
, print_header (header
));
711 rc
= write_compress (output
, input
, codes
, header
);
714 codes
= read_header (input
);
715 if (codes
== NULL
) break;
716 VERBOSE (INFO
, print_code_table (codes
));
717 rc
= write_decompress (output
, input
, codes
);
724 // test: compress.exe -h
725 // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
726 // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
727 // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
728 // test: compress.exe -c -i compress.c -o compress.mz
729 // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz
730 // test: compress.exe -d -i compress.mz -o tmp.c
731 // test: cmp compress.c tmp.c
732 // test: rm compress.mz tmp.c
734 /* vim: set ts=4 sw=4 et: */