ea65038032707bf22d3ea6396221d19cba6ab0ef
3 /* linker: atoi.o code.o debug.o fprintf.o */
15 #define BUFFER_SIZE 4096
24 char *progname
= NULL
;
30 //int fd = ret ? STDERR_FILENO : STDOUT_FILENO;
31 int fd
= ret
? _fderr
: _fdout
;
32 fdprintf (fd
, "usage: %s\n", progname
);
33 fdprintf (fd
, " -h : help message\n");
34 fdprintf (fd
, " -i <file>: input file\n");
35 fdprintf (fd
, " -o <file>: output file\n");
36 fdprintf (fd
, " -v : verbose level (%d)\n", verbose
);
41 void blkcpy (void *dst
, const void *src
, int len
)
44 *((char *)dst
++) = *((char *)src
++);
48 /* create occurence table */
49 int *create_table (char *filename
)
51 byte_t buffer
[BUFFER_SIZE
] = {0};
53 static int table
[NB_BYTES
] = {0};
56 VERBOSE (DEBUG
, PRINTOUT ("start creating occurence table\n"));
59 fid
= open (filename
, O_RDONLY
|O_RAW
);
61 VERBOSE (ERROR
, PRINTERR ("can't open file '%s'\n", filename
/);
64 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", filename
));
67 while ((nbread
= read (fid
, buffer
, BUFFER_SIZE
)) > 0) {
68 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nbread
));
70 table
[(int)buffer
[nbread
]]++;
77 VERBOSE (DEBUG
, PRINTOUT ("end creating occurence table\n"));
82 /* print occurence table */
84 void print_occ_table (int *table
)
88 PRINTOUT ("Occurence table\n");
89 for (i
= 0; i
< NB_BYTES
; i
++) {
91 PRINTOUT ("0x%02x '%c': %d\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, table
[i
]);
96 /* initialize forest */
98 leaf_t
**init_forest (int *table
)
100 static leaf_t
*leafs
[NB_BYTES
] = {0};
104 VERBOSE (DEBUG
, PRINTOUT ("start initiliazing forest\n"));
106 /* count number of leafs */
107 for (i
= 0; i
< NB_BYTES
; i
++) {
113 /* initialize leafs */
114 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
116 leafs
[l
] = getleaf (1);
117 if (leafs
[l
] == NULL
) {
118 VERBOSE (ERROR
, PRINTERR ("can't allocate memory\n"));
121 leafs
[l
]->occ
= table
[i
];
127 VERBOSE (DEBUG
, PRINTOUT ("end initiliazing forest\n"));
134 leaf_t
*create_tree (leaf_t
**leafs
)
136 leaf_t
*branch
= NULL
;
142 VERBOSE (DEBUG
, PRINTOUT ("start creating tree\n"));
144 /* count number of leafs */
145 while (leafs
[nb_leafs
] != NULL
) {
150 for (j
= 0; j
< nb_leafs
- 1; j
++) {
152 /* look for leatest occurence */
154 for (i
= 0; i
< nb_leafs
; i
++) {
155 if (leafs
[i
] == NULL
) {
158 if ((last
== -1) || (leafs
[i
]->occ
< leafs
[last
]->occ
)) {
163 /* look for ante leatest occurence */
165 for (i
= 0; i
< nb_leafs
; i
++) {
166 if ((i
== last
) || (leafs
[i
] == NULL
)) {
169 if ((ante
== -1) || (leafs
[i
]->occ
< leafs
[ante
]->occ
)) {
175 if ((last
== -1) || (ante
== -1)) {
176 VERBOSE (ERROR
, PRINTERR ("error during tree building\n"));
179 branch
= getleaf (1);
180 if (branch
== NULL
) {
181 VERBOSE (ERROR
, PRINTERR ("can't allocate memory\n"));
184 branch
->left
= leafs
[last
];
185 branch
->right
= leafs
[ante
];
186 branch
->occ
= branch
->left
->occ
+ branch
->right
->occ
;
187 leafs
[last
] = branch
;
191 VERBOSE (DEBUG
, PRINTOUT ("end creating tree\n"));
193 return (last
!= -1) ? leafs
[last
] : NULL
;
198 void explore_tree (code_t
*table
, leaf_t
*root
, char *code
, int index
)
201 VERBOSE (DEBUG
, PRINTOUT ("start exploring code tree\n"));
203 if ((root
->left
== NULL
) && (root
->right
== NULL
)) {
204 codcpy ((char *)(table
+ (int)(root
->c
)), sizeof (code_t
), code
);
207 codcpy (code
+ index
, sizeof (code_t
), "1");
208 explore_tree (table
, root
->left
, code
, index
+ 1);
209 codcpy (code
+ index
, sizeof (code_t
), "0");
210 explore_tree (table
, root
->right
, code
, index
+ 1);
213 VERBOSE (DEBUG
, PRINTOUT ("end exploring code tree\n"));
216 /* create code table */
217 code_t
*create_code (leaf_t
*root
)
219 static code_t table
[NB_BYTES
] = {0};
222 VERBOSE (DEBUG
, PRINTOUT ("start creating code table\n"));
224 explore_tree (table
, root
, (char *)&code
, 0);
226 VERBOSE (DEBUG
, PRINTOUT ("end creating code table\n"));
231 /* print code table */
233 void print_code_table (code_t
*codes
)
238 PRINTOUT ("Code table\n");
239 for (i
= 0; i
< NB_BYTES
; i
++) {
240 code
= (char *)(codes
+ i
);
241 if (codlen (code
) == 0) {
244 PRINTOUT ("0x%02x '%c': %s\n", i
, ((i
< 32) || (i
> 127)) ? '.' : i
, code
);
248 /* encode header and code table */
250 byte_t
*encode_header_table (code_t
*codes
, int *occ
)
252 static byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
253 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
255 byte_t
*header
= buffer
;
256 int i
, j
, length
, mode
;
260 VERBOSE (DEBUG
, PRINTOUT ("start encoding header and code table\n"));
263 for (i
= 0; i
< NB_BYTES
; i
++) {
264 code
= (char *)(codes
+ i
);
265 if (codlen (code
) > 0) {
267 size
+= codlen (code
) * occ
[i
];
270 mode
= (NB_BYTES
< 2 * nb
+ 1) ? 1 : 2;
271 VERBOSE (DEBUG
, PRINTOUT ("nb chars: %d\n", nb
));
272 VERBOSE (DEBUG
, PRINTOUT ("mode: %d\n", mode
));
273 VERBOSE (DEBUG
, PRINTOUT ("size: %d\n", size
));
274 VERBOSE (DEBUG
, PRINTOUT ("rem: %d\n", size
% 256));
277 codcpy ((char *)header
, sizeof (buffer
), (mode
== 1) ? "MZ1 " : "MZ2 ");
283 for (i
= 0; i
< NB_BYTES
; i
++) {
284 code
= (char *)(codes
+ i
);
285 *(header
++) = (byte_t
) codlen (code
);
289 *(header
++) = (byte_t
)(nb
- 1);
290 for (i
= 0; i
< NB_BYTES
; i
++) {
291 code
= (char *)(codes
+ i
);
292 if (codlen (code
) > 0) {
293 *(header
++) = (byte_t
) i
;
294 *(header
++) = (byte_t
) codlen (code
);
301 for (i
= 0; i
< NB_BYTES
; i
++) {
302 code
= (char *)(codes
+ i
);
303 if (codlen (code
) > 0) {
304 codcat (bits
, sizeof (code_t
), code
);
305 while (codlen (bits
) > (8 - 1)) {
306 for (j
= 0; j
< 8; j
++) {
308 if (bits
[j
] == '1') {
312 codcpy (bits
, sizeof (code_t
), bits
+ 8);
317 if (codlen (bits
) > 0) {
318 for (j
= 0; j
< (int)codlen (bits
); j
++) {
320 if (bits
[j
] == '1') {
324 for (j
= (int)codlen (bits
); j
< 8; j
++) {
331 length
= (int)(header
- buffer
- 6);
332 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d %02x %02x\n", length
, length
>> 8, length
& 0xff));
333 buffer
[3] = (byte_t
)(length
>> 8);
334 buffer
[4] = (byte_t
)(length
& 0xff);
335 buffer
[5] = (byte_t
)(size
% 256);
338 VERBOSE (DEBUG
, PRINTOUT ("end encoding header and code table\n"));
345 void print_header (byte_t
*header
)
349 length
= (header
[3] << 8) + header
[4];
350 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d\n", length
));
351 for (i
= 0; i
< length
+ 6; i
++) {
352 PRINTOUT ("%02x", header
[i
]);
357 /* write crompressed file */
359 int write_compress (char *output
, char *input
, code_t
*codes
, byte_t
*header
)
361 byte_t bufin
[BUFFER_SIZE
] = {0};
362 byte_t bufout
[BUFFER_SIZE
] = {0};
363 char bits
[(NB_BYTES
- 1) + 8 + 1] = {0};
369 VERBOSE (DEBUG
, PRINTOUT ("start writting compressed file\n"));
371 /* open input file */
372 fin
= open (input
, O_RDONLY
|O_RAW
);
374 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for reading\n", input
));
377 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", input
));
379 /* open output file */
380 fout
= open (output
, O_WRONLY
|O_CREAT
|O_RAW
, 0700);
382 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for writing\n", output
));
386 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", output
));
389 length
= (header
[3] << 8) + header
[4];
390 VERBOSE (DEBUG
, PRINTOUT ("lengh: %d\n", length
));
391 write (fout
, header
, length
+ 6);
395 while ((nbread
= read (fin
, bufin
, BUFFER_SIZE
)) > 0) {
396 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nbread
));
397 for (i
= 0; i
< nbread
; i
++) {
398 codcat (bits
, sizeof (code_t
), (char *)(codes
+ bufin
[i
]));
399 while (codlen (bits
) > (8 - 1)) {
400 for (j
= 0; j
< 8; j
++) {
402 if (bits
[j
] == '1') {
406 codcpy (bits
, sizeof (code_t
), bits
+ 8);
407 if (pt
- bufout
== BUFFER_SIZE
- 1) {
408 write (fout
, bufout
, BUFFER_SIZE
);
416 VERBOSE (DEBUG
, PRINTOUT ("lastest bits : %d\n", codlen (bits
)));
417 if (codlen (bits
) > 0) {
418 for (j
= 0; j
< (int)codlen (bits
); j
++) {
420 if (bits
[j
] == '1') {
424 for (j
= (int)codlen (bits
); j
< 8; j
++) {
430 VERBOSE (DEBUG
, PRINTOUT ("last partial buffer written: %u\n", pt
- bufout
));
431 write (fout
, bufout
, pt
- bufout
);
438 VERBOSE (DEBUG
, PRINTOUT ("end writting compressed file\n"));
445 code_t
*read_header (char *filename
) {
446 static code_t table
[NB_BYTES
] = {0};
447 byte_t buffer
[NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6] = {0};
448 byte_t
*codes
= NULL
;
450 int lengths
[NB_BYTES
] = {0};
453 int i
, j
, l
, nb
, size
;
455 VERBOSE (DEBUG
, PRINTOUT ("start reading header\n"));
458 fid
= open (filename
, O_RDONLY
|O_RAW
);
460 VERBOSE (ERROR
, PRINTERR ("can't open file '%s'\n", filename
));
463 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", filename
));
465 /* read magic number */
466 nb
= read (fid
, buffer
, 6);
467 VERBOSE (DEBUG
, PRINTOUT ("nb, buffer: %d 0x%02x 0x%02x\n", nb
, buffer
[0], buffer
[1]));
468 if ((nb
== 6) && (buffer
[0] == 'M') && (buffer
[1] == 'Z')) {
469 mode
= (buffer
[2] == '1') ? 1 : (buffer
[2] == '2') ? 2 : 0;
470 size
= (buffer
[3] << 8) + buffer
[4];
471 VERBOSE (DEBUG
, PRINTOUT ("mode, size: %d %d\n", mode
, size
));
472 if (size
> NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
) {
475 nb
= read (fid
, buffer
, size
);
476 VERBOSE (DEBUG
, PRINTOUT ("nb read: %d/%d\n", nb
, size
));
484 VERBOSE (ERROR
, PRINTERR ("incorrect file\n"));
492 for (i
= 0; i
< NB_BYTES
; i
++) {
493 lengths
[i
] = *(codes
++);
498 VERBOSE (DEBUG
, PRINTOUT ("nb codes: %d\n", nb
));
499 for (i
= 0; i
< nb
; i
++) {
501 lengths
[j
] = *(codes
++);
505 VERBOSE (DEBUG
, for (i
= 0; i
< NB_BYTES
; i
++) if (lengths
[i
]) PRINTOUT ("%d: %d\n", i
, lengths
[i
]));
508 for (i
= 0, l
= 0; i
< NB_BYTES
; i
++) {
511 if (((mode
== 1) && (size
- 256 != (l
+ 7) / 8)) ||
512 ((mode
== 2) && (size
- 2 * nb
- 1 != (l
+ 7) / 8))) {
513 VERBOSE (ERROR
, PRINTERR ("incorrect code table length: %d %d %d\n", size
, nb
, l
));
520 for (i
= 0; i
< NB_BYTES
; i
++) {
521 if (lengths
[i
] == 0) {
524 while (lengths
[i
]--) {
525 codcat ((char *)(table
+ i
), sizeof (code_t
), ((cur
& 0x80) == 0) ? "0" : "1");
535 VERBOSE (DEBUG
, PRINTOUT ("end reading header\n"));
540 /* write decompressed file */
542 int write_decompress (char *output
, char *input
, code_t
*codes
)
544 byte_t bufin
[BUFFER_SIZE
] = {0};
545 byte_t bufout
[BUFFER_SIZE
] = {0};
546 byte_t bufhea
[MAX(NB_BYTES
* (NB_BYTES
- 1) / 2 / 8 + NB_BYTES
+ 6, BUFFER_SIZE
)] = {0};
547 char bits
[(NB_BYTES
- 1) + 1] = {0};
549 int i
, j
, k
, nb
, size
, rem
;
554 VERBOSE (DEBUG
, PRINTOUT ("start writing decompressed file\n"));
556 /* open file for reading */
557 fin
= open (input
, O_RDONLY
|O_RAW
);
559 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for reading\n", input
));
562 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", input
));
564 /* read magic number */
565 nb
= read (fin
, bufhea
, 6);
567 VERBOSE (ERROR
, PRINTERR ("can't read file\n"));
571 size
= (bufhea
[3] << 8) + bufhea
[4];
572 VERBOSE (DEBUG
, PRINTOUT ("table size: %d\n", size
));
574 VERBOSE (DEBUG
, PRINTOUT ("remainder: %d\n", rem
));
575 nb
= read (fin
, bufhea
, size
);
577 VERBOSE (ERROR
, PRINTERR ("can't read file\n"));
582 /* open file for writing */
583 fout
= open (output
, O_WRONLY
|O_CREAT
|O_RAW
, 0700);
585 VERBOSE (ERROR
, PRINTERR ("can't open file '%s' for writing\n", output
));
589 VERBOSE (INFO
, PRINTOUT ("file '%s' opened\n", output
));
593 while ((nb
= read (fin
, bufin
, BUFFER_SIZE
)) > 0) {
594 VERBOSE (DEBUG
, PRINTOUT ("nbread: %d\n", nb
));
595 for (i
= 0; i
< nb
; i
++) {
596 for (j
= 0; j
< 8; j
++) {
597 codcat (bits
, sizeof (bits
), ((bufin
[i
] & 0x80) == 0) ? "0" : "1");
600 VERBOSE (DEBUG
, PRINTOUT ("bits: %d - %s\n", codlen (bits
), bits
));
602 /* look for correct code */
604 for (k
= 0; (k
< NB_BYTES
) && (!is_found
); k
++) {
605 if (codcmp ((char *)(codes
+ k
), bits
) == 0) {
607 VERBOSE (DEBUG
, PRINTOUT ("found: %d\n", k
));
610 if (pt
- bufout
== BUFFER_SIZE
- 1) {
611 VERBOSE (DEBUG
, PRINTOUT ("nb buffer out: %u\n", (pt
- bufout
)));
612 write (fout
, bufout
, BUFFER_SIZE
);
619 if ((i
== nb
- 1) && (l
% 256 == rem
) && (nb
!= BUFFER_SIZE
)) {
620 VERBOSE (DEBUG
, PRINTOUT ("break\n"));
627 VERBOSE (DEBUG
, PRINTOUT ("nb buffer out: %u\n", (pt
- bufout
)));
628 write (fout
, bufout
, pt
- bufout
);
635 VERBOSE (DEBUG
, PRINTOUT ("end writing decompressed file\n"));
642 int main (int argc
, char *argv
[])
647 leaf_t
**leafs
= NULL
;
649 code_t
*codes
= NULL
;
650 byte_t
*header
= NULL
;
658 VERBOSE (DEBUG
, PRINTOUT ("start processing arguments\n"));
662 PRINTERR ("%s: invalid option -- %s\n", progname
, arg
);
666 VERBOSE (DEBUG
, PRINTOUT ("option: %c\n", c
));
675 input
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
676 VERBOSE (DEBUG
, PRINTOUT ("input: %s\n", input
));
679 output
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
680 VERBOSE (DEBUG
, PRINTOUT ("output: %s\n", output
));
683 arg
= (arg
[2]) ? arg
+ 2 : (--argc
> 0 ) ? *(++argv
) : NULL
;
685 PRINTERR ("%s: missing verbose level\n", progname
);
688 verbose
= myatoi (arg
);
689 VERBOSE (INFO
, PRINTOUT ("verbose: %d\n", verbose
));
693 return usage (c
!= 'h');
696 if ((input
== NULL
) || (output
== NULL
)) {
697 PRINTERR ("%s: missing file\n", progname
);
700 VERBOSE (DEBUG
, PRINTOUT ("end processing arguments\n"));
704 table
= create_table (input
);
705 if (table
== NULL
) break;
706 VERBOSE (INFO
, print_occ_table (table
));
708 leafs
= init_forest (table
);
709 if (leafs
== NULL
) break;
710 root
= create_tree (leafs
);
711 if (root
== NULL
) break;
712 codes
= create_code (root
);
713 if (codes
== NULL
) break;
714 VERBOSE (INFO
, print_code_table (codes
));
715 header
= encode_header_table (codes
, table
);
716 if (header
== NULL
) break;
717 VERBOSE (INFO
, print_header (header
));
718 rc
= write_compress (output
, input
, codes
, header
);
721 codes
= read_header (input
);
722 if (codes
== NULL
) break;
723 VERBOSE (INFO
, print_code_table (codes
));
724 rc
= write_decompress (output
, input
, codes
);
731 // test: compress.exe -h
732 // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
733 // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
734 // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
735 // test: compress.exe -c -i compress.c -o compress.mz
736 // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz
737 // test: compress.exe -d -i compress.mz -o tmp.c
738 // test: cmp compress.c tmp.c
739 // test: rm compress.mz tmp.c
741 /* vim: set ts=4 sw=4 et: */