Commit | Line | Data |
---|---|---|
58352bb0 LM |
1 | /* depend: */ |
2 | /* cflags: */ | |
5f83300c | 3 | /* linker: code.o debug.o fprintf.o */ |
58352bb0 | 4 | |
58352bb0 LM |
5 | #include <malloc.h> |
6 | #include <stdio.h> | |
c9987f3b LM |
7 | #include "code.h" |
8 | #include "debug.h" | |
5f83300c | 9 | #include "fprintf.h" |
58352bb0 LM |
10 | |
11 | /* constants */ | |
12 | ||
58352bb0 LM |
13 | #define BUFFER_SIZE 4096 |
14 | ||
58352bb0 LM |
15 | /* macros */ |
16 | ||
58352bb0 LM |
17 | /* gobal variables */ |
18 | ||
19 | char *progname = NULL; | |
58352bb0 LM |
20 | |
21 | /* help function */ | |
22 | ||
23 | void usage (int ret) | |
24 | { | |
25 | FILE *fd = ret ? stderr : stdout; | |
5f83300c LM |
26 | _fprintf (fd, "usage: %s\n", progname); |
27 | _fprintf (fd, " -h : help message\n"); | |
28 | _fprintf (fd, " -i <file>: input file\n"); | |
29 | _fprintf (fd, " -o <file>: output file\n"); | |
30 | _fprintf (fd, " -v : verbose level (%d)\n", verbose); | |
58352bb0 LM |
31 | |
32 | exit (ret); | |
33 | } | |
34 | ||
d3dbaf98 LM |
35 | void blkcpy (void *dst, const void *src, int len) |
36 | { | |
37 | while (len--) { | |
38 | *((char *)dst++) = *((char *)src++); | |
39 | } | |
40 | } | |
41 | ||
58352bb0 LM |
42 | /* create occurence table */ |
43 | ||
44 | int *create_table (char *filename) | |
45 | { | |
c9987f3b | 46 | byte_t buffer[BUFFER_SIZE] = {0}; |
58352bb0 LM |
47 | int nbread; |
48 | int *table = NULL; | |
49 | FILE *fid = NULL; | |
50 | ||
37062814 | 51 | VERBOSE (DEBUG, PRINTF ("start creating occurence table\n")); |
58352bb0 LM |
52 | |
53 | /* memory allocation */ | |
c9987f3b | 54 | table = (int *) calloc (NB_BYTES, sizeof (int)); |
58352bb0 | 55 | if (table == NULL) { |
5f83300c | 56 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
57 | return NULL; |
58 | } | |
5f83300c | 59 | VERBOSE (INFO, _fprintf (stdout, "memory allocated\n")); |
58352bb0 LM |
60 | |
61 | /* open file */ | |
62 | fid = fopen (filename, "rb"); | |
63 | if (fid == NULL) { | |
5f83300c | 64 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s'\n", filename)); |
58352bb0 LM |
65 | free (table); |
66 | return NULL; | |
67 | } | |
5f83300c | 68 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", filename)); |
58352bb0 LM |
69 | |
70 | /* read file */ | |
71 | while (!feof (fid)) { | |
72 | nbread = fread (buffer, 1, BUFFER_SIZE, fid); | |
73 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); | |
74 | while (nbread--) { | |
75 | table[(int)buffer[nbread]]++; | |
76 | } | |
77 | } | |
78 | ||
79 | /* close file */ | |
80 | fclose (fid); | |
81 | ||
37062814 | 82 | VERBOSE (DEBUG, PRINTF ("end creating occurence table\n")); |
58352bb0 LM |
83 | |
84 | return table; | |
85 | } | |
86 | ||
87 | /* print occurence table */ | |
88 | ||
89 | void print_occ_table (int *table) | |
90 | { | |
91 | int i; | |
92 | ||
5f83300c | 93 | _fprintf (stdout, "Occurence table\n"); |
c9987f3b | 94 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 95 | if (table[i]) { |
5f83300c | 96 | _fprintf (stdout, "0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); |
58352bb0 LM |
97 | } |
98 | } | |
99 | } | |
100 | ||
101 | /* leaf structure */ | |
102 | ||
103 | typedef struct _leaf_t | |
104 | { | |
105 | struct _leaf_t *left; | |
106 | struct _leaf_t *right; | |
107 | int occ; | |
c9987f3b | 108 | byte_t c; |
58352bb0 LM |
109 | } leaf_t; |
110 | ||
111 | /* initialize forest */ | |
112 | ||
113 | leaf_t **init_forest (int *table) | |
114 | { | |
115 | leaf_t **leafs = NULL; | |
116 | int nb_leafs = 0; | |
117 | int i, l; | |
118 | ||
119 | VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n")); | |
120 | ||
121 | /* count number of leafs */ | |
c9987f3b | 122 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
123 | if (table[i] > 0) { |
124 | nb_leafs++; | |
125 | } | |
126 | } | |
127 | ||
128 | /* allocate memory */ | |
129 | leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *)); | |
130 | if (leafs == NULL) { | |
5f83300c | 131 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
132 | return NULL; |
133 | } | |
134 | ||
135 | /* initialize leafs */ | |
c9987f3b | 136 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
137 | if (table[i] > 0) { |
138 | leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
139 | if (leafs[l] == NULL) { | |
5f83300c | 140 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
141 | return NULL; |
142 | } | |
143 | leafs[l]->occ = table[i]; | |
144 | leafs[l]->c = i; | |
145 | l++; | |
146 | } | |
147 | } | |
148 | ||
149 | VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n")); | |
150 | ||
151 | return leafs; | |
152 | } | |
153 | ||
154 | /* create tree */ | |
155 | ||
156 | leaf_t *create_tree (leaf_t **leafs) | |
157 | { | |
158 | leaf_t *branch = NULL; | |
159 | int nb_leafs = 0; | |
37062814 LM |
160 | int last = -1; |
161 | int ante; | |
58352bb0 LM |
162 | int i, j; |
163 | ||
164 | VERBOSE (DEBUG, PRINTF ("start creating tree\n")); | |
165 | ||
166 | /* count number of leafs */ | |
167 | while (leafs[nb_leafs] != NULL) { | |
168 | nb_leafs++; | |
169 | } | |
170 | ||
171 | /* create tree */ | |
172 | for (j = 0; j < nb_leafs - 1; j++) { | |
173 | ||
174 | /* look for leatest occurence */ | |
175 | last = -1; | |
176 | for (i = 0; i < nb_leafs; i++) { | |
177 | if (leafs[i] == NULL) { | |
178 | continue; | |
179 | } | |
180 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { | |
181 | last = i; | |
182 | } | |
183 | } | |
184 | ||
185 | /* look for ante leatest occurence */ | |
186 | ante = -1; | |
187 | for (i = 0; i < nb_leafs; i++) { | |
188 | if ((i == last) || (leafs[i] == NULL)) { | |
189 | continue; | |
190 | } | |
191 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { | |
192 | ante = i; | |
193 | } | |
194 | } | |
195 | ||
196 | /* create branch */ | |
197 | if ((last == -1) || (ante == -1)) { | |
5f83300c | 198 | VERBOSE (ERROR, _fprintf (stdout, "error during tree building\n")); |
58352bb0 LM |
199 | return NULL; |
200 | } | |
201 | branch = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
202 | if (branch == NULL) { | |
5f83300c | 203 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
204 | return NULL; |
205 | } | |
206 | branch->left = leafs[last]; | |
207 | branch->right = leafs[ante]; | |
208 | branch->occ = branch->left->occ + branch->right->occ; | |
209 | leafs[last] = branch; | |
210 | leafs[ante] = NULL; | |
211 | } | |
212 | ||
213 | VERBOSE (DEBUG, PRINTF ("end creating tree\n")); | |
214 | ||
37062814 | 215 | return (last != -1) ? leafs[last] : NULL; |
58352bb0 LM |
216 | } |
217 | ||
218 | /* free tree */ | |
219 | ||
220 | void free_tree (leaf_t *root) { | |
221 | if (root) { | |
222 | if (root->left) { | |
223 | free_tree (root->left); | |
224 | } | |
225 | if (root->right) { | |
226 | free_tree (root->right); | |
227 | } | |
228 | free (root); | |
229 | } | |
230 | } | |
231 | ||
58352bb0 LM |
232 | /* explore tree */ |
233 | ||
234 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) | |
235 | { | |
236 | if ((root->left == NULL) && (root->right == NULL)) { | |
c9987f3b | 237 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
58352bb0 LM |
238 | } |
239 | else { | |
c9987f3b | 240 | codcpy (code + index, sizeof (code_t), "1"); |
58352bb0 | 241 | explore_tree (table, root->left, code, index + 1); |
c9987f3b | 242 | codcpy (code + index, sizeof (code_t), "0"); |
58352bb0 LM |
243 | explore_tree (table, root->right, code, index + 1); |
244 | } | |
245 | } | |
246 | ||
247 | /* create code table */ | |
248 | ||
249 | code_t *create_code (leaf_t *root) | |
250 | { | |
251 | code_t *table = NULL; | |
252 | code_t code = {0}; | |
253 | ||
254 | VERBOSE (DEBUG, PRINTF ("start creating code table\n")); | |
255 | ||
256 | /* allocate table */ | |
c9987f3b | 257 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
58352bb0 | 258 | if (table == NULL) { |
5f83300c | 259 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
260 | return NULL; |
261 | } | |
262 | ||
263 | explore_tree (table, root, (char *)&code, 0); | |
264 | ||
265 | VERBOSE (DEBUG, PRINTF ("end creating code table\n")); | |
266 | ||
267 | return table; | |
268 | } | |
269 | ||
270 | /* print code table */ | |
271 | ||
272 | void print_code_table (code_t *codes) | |
273 | { | |
274 | char *code; | |
275 | int i; | |
276 | ||
5f83300c | 277 | _fprintf (stdout, "Code table\n"); |
c9987f3b | 278 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 279 | code = (char *)(codes + i); |
c9987f3b | 280 | if (codlen (code) == 0) { |
58352bb0 LM |
281 | continue; |
282 | } | |
5f83300c | 283 | _fprintf (stdout, "0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); |
58352bb0 LM |
284 | } |
285 | } | |
286 | ||
287 | /* encode header and code table */ | |
288 | ||
c9987f3b | 289 | byte_t *encode_header_table (code_t *codes, int *occ) |
58352bb0 | 290 | { |
c9987f3b LM |
291 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
292 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
58352bb0 | 293 | char *code; |
c9987f3b | 294 | byte_t *header = buffer; |
58352bb0 LM |
295 | int i, j, length, mode; |
296 | int nb = 0; | |
297 | int size = 0; | |
298 | ||
299 | VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n")); | |
300 | ||
301 | /* mode 1 or 2 */ | |
c9987f3b | 302 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 303 | code = (char *)(codes + i); |
c9987f3b | 304 | if (codlen (code) > 0) { |
58352bb0 | 305 | nb++; |
c9987f3b | 306 | size += codlen (code) * occ[i]; |
58352bb0 LM |
307 | } |
308 | } | |
c9987f3b | 309 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
58352bb0 LM |
310 | VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); |
311 | VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); | |
312 | VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); | |
37062814 | 313 | VERBOSE (DEBUG, PRINTF ("rem: %d\n", size % 256)); |
58352bb0 LM |
314 | |
315 | /* header */ | |
c9987f3b | 316 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
58352bb0 LM |
317 | header += 6; |
318 | ||
319 | /* size */ | |
320 | switch (mode) { | |
321 | case 1: | |
c9987f3b | 322 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 323 | code = (char *)(codes + i); |
c9987f3b | 324 | *(header++) = (byte_t) codlen (code); |
58352bb0 LM |
325 | } |
326 | break; | |
327 | case 2: | |
c9987f3b LM |
328 | *(header++) = (byte_t)(nb - 1); |
329 | for (i = 0; i < NB_BYTES; i++) { | |
58352bb0 | 330 | code = (char *)(codes + i); |
c9987f3b LM |
331 | if (codlen (code) > 0) { |
332 | *(header++) = (byte_t) i; | |
333 | *(header++) = (byte_t) codlen (code); | |
58352bb0 LM |
334 | } |
335 | } | |
336 | break; | |
337 | } | |
338 | ||
339 | /* bits */ | |
c9987f3b | 340 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 341 | code = (char *)(codes + i); |
c9987f3b LM |
342 | if (codlen (code) > 0) { |
343 | codcat (bits, sizeof (code_t), code); | |
344 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
345 | for (j = 0; j < 8; j++) { |
346 | *header <<= 1; | |
347 | if (bits[j] == '1') { | |
348 | (*header)++; | |
349 | } | |
350 | } | |
c9987f3b | 351 | codcpy (bits, sizeof (code_t), bits + 8); |
58352bb0 LM |
352 | header++; |
353 | } | |
354 | } | |
355 | } | |
c9987f3b LM |
356 | if (codlen (bits) > 0) { |
357 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
358 | *header <<= 1; |
359 | if (bits[j] == '1') { | |
360 | (*header)++; | |
361 | } | |
362 | } | |
c9987f3b | 363 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 LM |
364 | *header <<= 1; |
365 | } | |
58352bb0 LM |
366 | header++; |
367 | } | |
368 | ||
369 | /* length */ | |
370 | length = (int)(header - buffer - 6); | |
371 | VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); | |
c9987f3b LM |
372 | buffer[3] = (byte_t)(length >> 8); |
373 | buffer[4] = (byte_t)(length & 0xff); | |
374 | buffer[5] = (byte_t)(size % 256); | |
58352bb0 LM |
375 | |
376 | /* allocation */ | |
c9987f3b | 377 | header = (byte_t *) calloc (length + 6, 1); |
d3dbaf98 | 378 | blkcpy (header, buffer, length + 6); |
58352bb0 LM |
379 | |
380 | VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n")); | |
381 | ||
c9987f3b | 382 | return header; |
58352bb0 LM |
383 | } |
384 | ||
385 | /* print header */ | |
386 | ||
c9987f3b | 387 | void print_header (byte_t *header) |
58352bb0 LM |
388 | { |
389 | int length, i; | |
390 | ||
c9987f3b | 391 | length = (header[3] << 8) + header[4]; |
58352bb0 LM |
392 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
393 | for (i = 0; i < length + 6; i++) { | |
5f83300c | 394 | _fprintf (stdout, "%02x", header[i]); |
58352bb0 | 395 | } |
5f83300c | 396 | _fprintf (stdout, "\n"); |
58352bb0 LM |
397 | } |
398 | ||
399 | /* write crompressed file */ | |
400 | ||
c9987f3b | 401 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
58352bb0 | 402 | { |
c9987f3b LM |
403 | byte_t bufin[BUFFER_SIZE] = {0}; |
404 | byte_t bufout[BUFFER_SIZE] = {0}; | |
405 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
58352bb0 LM |
406 | FILE *fin, *fout; |
407 | int length = 0; | |
408 | int i, j, nbread; | |
c9987f3b | 409 | byte_t *pt; |
58352bb0 LM |
410 | |
411 | VERBOSE (DEBUG, PRINTF ("start writting compressed file\n")); | |
412 | ||
413 | /* open input file */ | |
414 | fin = fopen (input, "rb"); | |
415 | if (fin == NULL) { | |
5f83300c | 416 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s' for reading\n", input)); |
58352bb0 LM |
417 | return 1; |
418 | } | |
5f83300c | 419 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", input)); |
58352bb0 LM |
420 | |
421 | /* open output file */ | |
422 | fout = fopen (output, "wb"); | |
423 | if (fin == NULL) { | |
5f83300c | 424 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s' for writing\n", output)); |
58352bb0 LM |
425 | return 1; |
426 | } | |
5f83300c | 427 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", output)); |
58352bb0 LM |
428 | |
429 | /* write header */ | |
c9987f3b | 430 | length = (header[3] << 8) + header[4]; |
58352bb0 LM |
431 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
432 | fwrite (header, 1, length + 6, fout); | |
433 | ||
434 | /* write file */ | |
435 | pt = bufout; | |
436 | while (!feof (fin)) { | |
437 | nbread = fread (bufin, 1, BUFFER_SIZE, fin); | |
438 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); | |
439 | for (i = 0; i < nbread; i++) { | |
c9987f3b LM |
440 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
441 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
442 | for (j = 0; j < 8; j++) { |
443 | *pt <<= 1; | |
444 | if (bits[j] == '1') { | |
445 | (*pt)++; | |
446 | } | |
447 | } | |
c9987f3b | 448 | codcpy (bits, sizeof (code_t), bits + 8); |
37062814 | 449 | if (pt - bufout == BUFFER_SIZE - 1) { |
58352bb0 LM |
450 | fwrite (bufout, 1, BUFFER_SIZE, fout); |
451 | pt = bufout; | |
37062814 LM |
452 | } else { |
453 | pt++; | |
58352bb0 LM |
454 | } |
455 | } | |
456 | } | |
457 | } | |
c9987f3b LM |
458 | VERBOSE (DEBUG, PRINTF ("lastest bits : %d\n", codlen (bits))); |
459 | if (codlen (bits) > 0) { | |
460 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
461 | *pt <<= 1; |
462 | if (bits[j] == '1') { | |
463 | (*pt)++; | |
464 | } | |
465 | } | |
c9987f3b | 466 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 | 467 | *pt <<= 1; |
58352bb0 LM |
468 | } |
469 | pt++; | |
470 | } | |
471 | if (pt != bufout) { | |
37062814 | 472 | VERBOSE (DEBUG, PRINTF ("last partial buffer written: %u\n", pt - bufout)); |
58352bb0 LM |
473 | fwrite (bufout, 1, pt - bufout, fout); |
474 | } | |
475 | ||
476 | /* closing */ | |
477 | fclose (fin); | |
478 | fclose (fout); | |
479 | ||
480 | VERBOSE (DEBUG, PRINTF ("end writting compressed file\n")); | |
481 | ||
482 | return 0; | |
483 | } | |
484 | ||
37062814 LM |
485 | /* read header */ |
486 | ||
487 | code_t *read_header (char *filename) { | |
c9987f3b | 488 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
37062814 | 489 | code_t *table = NULL; |
c9987f3b LM |
490 | byte_t *codes = NULL; |
491 | byte_t cur; | |
492 | int lengths[NB_BYTES] = {0}; | |
37062814 LM |
493 | FILE *fid = NULL; |
494 | int mode = 0; | |
495 | size_t i, j, l, nb, size; | |
496 | ||
497 | VERBOSE (DEBUG, PRINTF ("start reading header\n")); | |
498 | ||
499 | /* open file */ | |
500 | fid = fopen (filename, "rb"); | |
501 | if (fid == NULL) { | |
5f83300c | 502 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s'\n", filename)); |
37062814 LM |
503 | return NULL; |
504 | } | |
5f83300c | 505 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", filename)); |
37062814 LM |
506 | |
507 | /* read magic number */ | |
508 | nb = fread (buffer, 1, 6, fid); | |
509 | VERBOSE (DEBUG, PRINTF ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); | |
510 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { | |
511 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; | |
c9987f3b | 512 | size = (buffer[3] << 8) + buffer[4]; |
37062814 | 513 | VERBOSE (DEBUG, PRINTF ("mode, size: %d %d\n", mode, size)); |
c9987f3b | 514 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
37062814 LM |
515 | mode = 0; |
516 | } else { | |
517 | nb = fread (buffer, 1, size, fid); | |
518 | VERBOSE (DEBUG, PRINTF ("nb read: %d\n", nb)); | |
519 | if (nb != size) { | |
520 | mode = 0; | |
521 | } | |
522 | } | |
523 | } | |
524 | fclose (fid); | |
525 | if (mode == 0) { | |
5f83300c | 526 | VERBOSE (ERROR, _fprintf (stdout, "incorrect file\n")); |
37062814 LM |
527 | return NULL; |
528 | } | |
529 | ||
530 | /* analyse header */ | |
c9987f3b | 531 | codes = buffer; |
37062814 LM |
532 | switch (mode) { |
533 | case 1: | |
c9987f3b | 534 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
535 | lengths[i] = *(codes++); |
536 | } | |
537 | break; | |
538 | case 2: | |
539 | nb = *(codes++) + 1; | |
540 | VERBOSE (DEBUG, PRINTF ("nb codes: %d\n", nb)); | |
541 | for (i = 0; i < nb; i++) { | |
542 | j = *(codes++); | |
543 | lengths[j] = *(codes++); | |
544 | } | |
545 | break; | |
546 | } | |
c9987f3b | 547 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTF ("%d: %d\n", i, lengths[i])); |
37062814 LM |
548 | |
549 | /* check lengths */ | |
c9987f3b | 550 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
37062814 LM |
551 | l += lengths[i]; |
552 | } | |
553 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || | |
554 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { | |
5f83300c | 555 | VERBOSE (ERROR, _fprintf (stdout, "incorrect code table length\n")); |
37062814 LM |
556 | return NULL; |
557 | } | |
558 | ||
559 | /* allocate table */ | |
c9987f3b | 560 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
37062814 | 561 | if (table == NULL) { |
5f83300c | 562 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
37062814 LM |
563 | return NULL; |
564 | } | |
565 | ||
566 | /* decode code */ | |
567 | cur = *(codes++); | |
568 | l = 8; | |
c9987f3b | 569 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
570 | if (lengths[i] == 0) { |
571 | continue; | |
572 | } | |
573 | while (lengths[i]--) { | |
c9987f3b | 574 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
575 | l--; |
576 | cur <<= 1; | |
577 | if (l == 0) { | |
578 | cur = *(codes++); | |
579 | l = 8; | |
580 | } | |
581 | } | |
582 | } | |
583 | ||
584 | VERBOSE (DEBUG, PRINTF ("end reading header\n")); | |
585 | ||
586 | return table; | |
587 | } | |
588 | ||
589 | /* write decompressed file */ | |
590 | ||
591 | int write_decompress (char *output, char *input, code_t *codes) | |
592 | { | |
c9987f3b LM |
593 | byte_t bufin[BUFFER_SIZE] = {0}; |
594 | byte_t bufout[BUFFER_SIZE] = {0}; | |
595 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; | |
596 | char bits[(NB_BYTES - 1) + 1] = {0}; | |
37062814 LM |
597 | FILE *fin, *fout; |
598 | int i, j, k, nb, size, rem; | |
599 | int is_found; | |
600 | int l = 0; | |
c9987f3b | 601 | byte_t *pt; |
37062814 LM |
602 | |
603 | VERBOSE (DEBUG, PRINTF ("start writing decompressed file\n")); | |
604 | ||
605 | /* open file for reading */ | |
606 | fin = fopen (input, "rb"); | |
607 | if (fin == NULL) { | |
5f83300c | 608 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s' for reading\n", input)); |
37062814 LM |
609 | return 1; |
610 | } | |
5f83300c | 611 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", input)); |
37062814 LM |
612 | |
613 | /* read magic number */ | |
c9987f3b | 614 | nb = fread (bufhea, 1, 6, fin); |
37062814 | 615 | if (nb != 6) { |
5f83300c | 616 | VERBOSE (ERROR, _fprintf (stdout, "can't read file\n")); |
37062814 LM |
617 | fclose (fin); |
618 | return 1; | |
619 | } | |
c9987f3b | 620 | size = (bufhea[3] << 8) + bufhea[4]; |
5f83300c | 621 | VERBOSE (DEBUG, _fprintf (stdout, "table size: %d\n", size)); |
c9987f3b | 622 | rem = bufhea[5]; |
5f83300c | 623 | VERBOSE (DEBUG, _fprintf (stdout, "remainder: %d\n", rem)); |
c9987f3b | 624 | nb = fread (bufhea, 1, size, fin); |
37062814 | 625 | if (nb != size) { |
5f83300c | 626 | VERBOSE (ERROR, _fprintf (stdout, "can't read file\n")); |
37062814 LM |
627 | fclose (fin); |
628 | return 1; | |
629 | } | |
630 | ||
631 | /* open file for writing */ | |
632 | fout = fopen (output, "wb"); | |
633 | if (fout == NULL) { | |
5f83300c | 634 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s' for writing\n", output)); |
37062814 LM |
635 | return 2; |
636 | } | |
5f83300c | 637 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", output)); |
37062814 LM |
638 | |
639 | /* write file */ | |
640 | pt = bufout; | |
641 | while (!feof (fin)) { | |
642 | nb = fread (bufin, 1, BUFFER_SIZE, fin); | |
643 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nb)); | |
644 | for (i = 0; i < nb; i++) { | |
645 | for (j = 0; j < 8; j++) { | |
c9987f3b | 646 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
647 | bufin[i] <<= 1; |
648 | l++; | |
c9987f3b | 649 | VERBOSE (DEBUG, PRINTF ("bits: %d - %s\n", codlen (bits), bits)); |
37062814 LM |
650 | |
651 | /* look for correct code */ | |
652 | is_found = 0; | |
c9987f3b LM |
653 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
654 | if (codcmp ((char *)(codes + k), bits) == 0) { | |
37062814 LM |
655 | is_found = 1; |
656 | VERBOSE (DEBUG, PRINTF ("found: %d\n", k)); | |
657 | *pt= k; | |
658 | bits[0] = 0; | |
659 | if (pt - bufout == BUFFER_SIZE - 1) { | |
660 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); | |
661 | fwrite (bufout, 1, BUFFER_SIZE, fout); | |
662 | pt = bufout; | |
663 | } else { | |
664 | pt++; | |
665 | } | |
666 | } | |
667 | } | |
668 | if ((i == nb - 1) && (l % 256 == rem) && (feof (fin))) { | |
669 | VERBOSE (DEBUG, PRINTF ("break\n")); | |
670 | break; | |
671 | } | |
672 | } | |
673 | } | |
674 | } | |
675 | if (pt != bufout) { | |
676 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); | |
677 | fwrite (bufout, 1, pt - bufout, fout); | |
678 | } | |
679 | ||
680 | /* close files */ | |
681 | fclose (fin); | |
682 | fclose (fout); | |
683 | ||
684 | VERBOSE (DEBUG, PRINTF ("end writing decompressed file\n")); | |
685 | ||
686 | return 0; | |
687 | } | |
688 | ||
58352bb0 LM |
689 | /* main function */ |
690 | ||
691 | int main (int argc, char *argv[]) | |
692 | { | |
693 | char *input = NULL; | |
694 | char *output = NULL; | |
695 | int *table = NULL; | |
696 | leaf_t **leafs = NULL; | |
697 | leaf_t *root = NULL; | |
698 | code_t *codes = NULL; | |
c9987f3b | 699 | byte_t *header = NULL; |
58352bb0 | 700 | int mode = COMPRESS; |
37062814 | 701 | int rc = 1; |
58352bb0 LM |
702 | |
703 | progname = argv[0]; | |
704 | ||
705 | int c; | |
d3dbaf98 | 706 | char * arg; |
37062814 | 707 | VERBOSE (DEBUG, PRINTF ("start processing arguments\n")); |
d3dbaf98 LM |
708 | while (argc-- > 1) { |
709 | arg = *(++argv); | |
710 | if (arg[0] != '-') { | |
5f83300c | 711 | _fprintf (stderr, "%s: invalid option -- %s\n", progname, arg); |
d3dbaf98 LM |
712 | usage (1); |
713 | } | |
714 | c = arg[1]; | |
37062814 | 715 | VERBOSE (DEBUG, PRINTF ("option: %c\n", c)); |
58352bb0 LM |
716 | switch (c) { |
717 | case 'c': | |
718 | mode = COMPRESS; | |
719 | break; | |
720 | case 'd': | |
721 | mode = DECOMPRESS; | |
722 | break; | |
723 | case 'i': | |
d3dbaf98 | 724 | input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
37062814 | 725 | VERBOSE (DEBUG, PRINTF ("input: %s\n", input)); |
58352bb0 LM |
726 | break; |
727 | case 'o': | |
d3dbaf98 | 728 | output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
37062814 | 729 | VERBOSE (DEBUG, PRINTF ("output: %s\n", output)); |
58352bb0 LM |
730 | break; |
731 | case 'v': | |
d3dbaf98 LM |
732 | arg = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
733 | if (arg == NULL) { | |
5f83300c | 734 | _fprintf (stderr, "%s: missing verbose level\n", progname); |
d3dbaf98 LM |
735 | usage (1); |
736 | } | |
737 | verbose = atoi (arg); | |
5f83300c | 738 | VERBOSE (INFO, _fprintf (stdout, "verbose: %d\n", verbose)); |
58352bb0 LM |
739 | break; |
740 | case 'h': | |
741 | default: | |
742 | usage (c != 'h'); | |
743 | } | |
744 | } | |
d3dbaf98 | 745 | if ((input == NULL) || (output == NULL)) { |
5f83300c | 746 | _fprintf (stderr, "%s: missing file\n", progname); |
58352bb0 LM |
747 | usage (1); |
748 | } | |
37062814 | 749 | VERBOSE (DEBUG, PRINTF ("end processing arguments\n")); |
58352bb0 LM |
750 | |
751 | switch (mode) { | |
752 | case COMPRESS: | |
753 | table = create_table (input); | |
754 | if (table == NULL) break; | |
755 | VERBOSE (INFO, print_occ_table (table)); | |
756 | ||
757 | leafs = init_forest (table); | |
758 | if (leafs == NULL) break; | |
759 | root = create_tree (leafs); | |
760 | if (root == NULL) break; | |
761 | codes = create_code (root); | |
762 | if (codes == NULL) break; | |
763 | VERBOSE (INFO, print_code_table (codes)); | |
764 | header = encode_header_table (codes, table); | |
765 | if (header == NULL) break; | |
766 | VERBOSE (INFO, print_header (header)); | |
767 | rc = write_compress (output, input, codes, header); | |
768 | break; | |
769 | case DECOMPRESS: | |
37062814 LM |
770 | codes = read_header (input); |
771 | if (codes == NULL) break; | |
772 | VERBOSE (INFO, print_code_table (codes)); | |
773 | rc = write_decompress (output, input, codes); | |
58352bb0 LM |
774 | break; |
775 | } | |
776 | ||
777 | /* clean everything */ | |
c9987f3b | 778 | fflush (stdout); |
58352bb0 LM |
779 | if (header) free (header); |
780 | if (codes) free (codes); | |
781 | if (root) free_tree (root); | |
782 | if (leafs) free (leafs); | |
783 | if (table) free (table); | |
784 | ||
785 | return rc; | |
786 | } | |
787 | ||
788 | // test: compress.exe -h | |
789 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
790 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' | |
791 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
37062814 LM |
792 | // test: compress.exe -c -i compress.c -o compress.mz |
793 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz | |
794 | // test: compress.exe -d -i compress.mz -o tmp.c | |
795 | // test: cmp compress.c tmp.c | |
796 | // test: rm compress.mz tmp.c | |
58352bb0 | 797 | |
5f83300c | 798 | /* vim: set ts=4 sw=4 et */ |