Commit | Line | Data |
---|---|---|
58352bb0 LM |
1 | /* depend: */ |
2 | /* cflags: */ | |
5e0c5bc8 | 3 | /* linker: atoi.o code.o debug.o fprintf.o */ |
58352bb0 | 4 | |
bf1d9554 | 5 | #include <fcntl.h> |
58352bb0 | 6 | #include <malloc.h> |
bf1d9554 | 7 | #include <unistd.h> |
5e0c5bc8 | 8 | #include "atoi.h" |
c9987f3b LM |
9 | #include "code.h" |
10 | #include "debug.h" | |
5f83300c | 11 | #include "fprintf.h" |
58352bb0 LM |
12 | |
13 | /* constants */ | |
14 | ||
58352bb0 LM |
15 | #define BUFFER_SIZE 4096 |
16 | ||
58352bb0 LM |
17 | /* macros */ |
18 | ||
58352bb0 LM |
19 | /* gobal variables */ |
20 | ||
21 | char *progname = NULL; | |
58352bb0 LM |
22 | |
23 | /* help function */ | |
24 | ||
25 | void usage (int ret) | |
26 | { | |
27 | FILE *fd = ret ? stderr : stdout; | |
5e0c5bc8 LM |
28 | myfprintf (fd, "usage: %s\n", progname); |
29 | myfprintf (fd, " -h : help message\n"); | |
30 | myfprintf (fd, " -i <file>: input file\n"); | |
31 | myfprintf (fd, " -o <file>: output file\n"); | |
32 | myfprintf (fd, " -v : verbose level (%d)\n", verbose); | |
58352bb0 LM |
33 | |
34 | exit (ret); | |
35 | } | |
36 | ||
d3dbaf98 LM |
37 | void blkcpy (void *dst, const void *src, int len) |
38 | { | |
39 | while (len--) { | |
40 | *((char *)dst++) = *((char *)src++); | |
41 | } | |
42 | } | |
43 | ||
58352bb0 LM |
44 | /* create occurence table */ |
45 | ||
46 | int *create_table (char *filename) | |
47 | { | |
c9987f3b | 48 | byte_t buffer[BUFFER_SIZE] = {0}; |
58352bb0 LM |
49 | int nbread; |
50 | int *table = NULL; | |
bf1d9554 | 51 | int fid = 0; |
58352bb0 | 52 | |
37062814 | 53 | VERBOSE (DEBUG, PRINTF ("start creating occurence table\n")); |
58352bb0 LM |
54 | |
55 | /* memory allocation */ | |
c9987f3b | 56 | table = (int *) calloc (NB_BYTES, sizeof (int)); |
58352bb0 | 57 | if (table == NULL) { |
5e0c5bc8 | 58 | VERBOSE (ERROR, myfprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
59 | return NULL; |
60 | } | |
5e0c5bc8 | 61 | VERBOSE (INFO, myfprintf (stdout, "memory allocated\n")); |
58352bb0 LM |
62 | |
63 | /* open file */ | |
bf1d9554 LM |
64 | fid = open (filename, O_RDONLY|O_RAW); |
65 | if (fid == -1) { | |
5e0c5bc8 | 66 | VERBOSE (ERROR, myfprintf (stdout, "can't open file '%s'\n", filename)); |
58352bb0 LM |
67 | free (table); |
68 | return NULL; | |
69 | } | |
5e0c5bc8 | 70 | VERBOSE (INFO, myfprintf (stdout, "file '%s' opened\n", filename)); |
58352bb0 LM |
71 | |
72 | /* read file */ | |
bf1d9554 | 73 | while ((nbread = read (fid, buffer, BUFFER_SIZE)) > 0) { |
58352bb0 LM |
74 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); |
75 | while (nbread--) { | |
76 | table[(int)buffer[nbread]]++; | |
77 | } | |
78 | } | |
79 | ||
80 | /* close file */ | |
bf1d9554 | 81 | close (fid); |
58352bb0 | 82 | |
37062814 | 83 | VERBOSE (DEBUG, PRINTF ("end creating occurence table\n")); |
58352bb0 LM |
84 | |
85 | return table; | |
86 | } | |
87 | ||
88 | /* print occurence table */ | |
89 | ||
90 | void print_occ_table (int *table) | |
91 | { | |
92 | int i; | |
93 | ||
5e0c5bc8 | 94 | myfprintf (stdout, "Occurence table\n"); |
c9987f3b | 95 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 96 | if (table[i]) { |
5e0c5bc8 | 97 | myfprintf (stdout, "0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); |
58352bb0 LM |
98 | } |
99 | } | |
100 | } | |
101 | ||
102 | /* leaf structure */ | |
103 | ||
104 | typedef struct _leaf_t | |
105 | { | |
106 | struct _leaf_t *left; | |
107 | struct _leaf_t *right; | |
108 | int occ; | |
c9987f3b | 109 | byte_t c; |
58352bb0 LM |
110 | } leaf_t; |
111 | ||
112 | /* initialize forest */ | |
113 | ||
114 | leaf_t **init_forest (int *table) | |
115 | { | |
116 | leaf_t **leafs = NULL; | |
117 | int nb_leafs = 0; | |
118 | int i, l; | |
119 | ||
120 | VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n")); | |
121 | ||
122 | /* count number of leafs */ | |
c9987f3b | 123 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
124 | if (table[i] > 0) { |
125 | nb_leafs++; | |
126 | } | |
127 | } | |
128 | ||
129 | /* allocate memory */ | |
130 | leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *)); | |
131 | if (leafs == NULL) { | |
5e0c5bc8 | 132 | VERBOSE (ERROR, myfprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
133 | return NULL; |
134 | } | |
135 | ||
136 | /* initialize leafs */ | |
c9987f3b | 137 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
138 | if (table[i] > 0) { |
139 | leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
140 | if (leafs[l] == NULL) { | |
5e0c5bc8 | 141 | VERBOSE (ERROR, myfprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
142 | return NULL; |
143 | } | |
144 | leafs[l]->occ = table[i]; | |
145 | leafs[l]->c = i; | |
146 | l++; | |
147 | } | |
148 | } | |
149 | ||
150 | VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n")); | |
151 | ||
152 | return leafs; | |
153 | } | |
154 | ||
155 | /* create tree */ | |
156 | ||
157 | leaf_t *create_tree (leaf_t **leafs) | |
158 | { | |
159 | leaf_t *branch = NULL; | |
160 | int nb_leafs = 0; | |
37062814 LM |
161 | int last = -1; |
162 | int ante; | |
58352bb0 LM |
163 | int i, j; |
164 | ||
165 | VERBOSE (DEBUG, PRINTF ("start creating tree\n")); | |
166 | ||
167 | /* count number of leafs */ | |
168 | while (leafs[nb_leafs] != NULL) { | |
169 | nb_leafs++; | |
170 | } | |
171 | ||
172 | /* create tree */ | |
173 | for (j = 0; j < nb_leafs - 1; j++) { | |
174 | ||
175 | /* look for leatest occurence */ | |
176 | last = -1; | |
177 | for (i = 0; i < nb_leafs; i++) { | |
178 | if (leafs[i] == NULL) { | |
179 | continue; | |
180 | } | |
181 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { | |
182 | last = i; | |
183 | } | |
184 | } | |
185 | ||
186 | /* look for ante leatest occurence */ | |
187 | ante = -1; | |
188 | for (i = 0; i < nb_leafs; i++) { | |
189 | if ((i == last) || (leafs[i] == NULL)) { | |
190 | continue; | |
191 | } | |
192 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { | |
193 | ante = i; | |
194 | } | |
195 | } | |
196 | ||
197 | /* create branch */ | |
198 | if ((last == -1) || (ante == -1)) { | |
5e0c5bc8 | 199 | VERBOSE (ERROR, myfprintf (stdout, "error during tree building\n")); |
58352bb0 LM |
200 | return NULL; |
201 | } | |
202 | branch = (leaf_t *) calloc (1, sizeof (leaf_t)); | |
203 | if (branch == NULL) { | |
5e0c5bc8 | 204 | VERBOSE (ERROR, myfprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
205 | return NULL; |
206 | } | |
207 | branch->left = leafs[last]; | |
208 | branch->right = leafs[ante]; | |
209 | branch->occ = branch->left->occ + branch->right->occ; | |
210 | leafs[last] = branch; | |
211 | leafs[ante] = NULL; | |
212 | } | |
213 | ||
214 | VERBOSE (DEBUG, PRINTF ("end creating tree\n")); | |
215 | ||
37062814 | 216 | return (last != -1) ? leafs[last] : NULL; |
58352bb0 LM |
217 | } |
218 | ||
219 | /* free tree */ | |
220 | ||
221 | void free_tree (leaf_t *root) { | |
222 | if (root) { | |
223 | if (root->left) { | |
224 | free_tree (root->left); | |
225 | } | |
226 | if (root->right) { | |
227 | free_tree (root->right); | |
228 | } | |
229 | free (root); | |
230 | } | |
231 | } | |
232 | ||
58352bb0 LM |
233 | /* explore tree */ |
234 | ||
235 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) | |
236 | { | |
237 | if ((root->left == NULL) && (root->right == NULL)) { | |
c9987f3b | 238 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
58352bb0 LM |
239 | } |
240 | else { | |
c9987f3b | 241 | codcpy (code + index, sizeof (code_t), "1"); |
58352bb0 | 242 | explore_tree (table, root->left, code, index + 1); |
c9987f3b | 243 | codcpy (code + index, sizeof (code_t), "0"); |
58352bb0 LM |
244 | explore_tree (table, root->right, code, index + 1); |
245 | } | |
246 | } | |
247 | ||
248 | /* create code table */ | |
249 | ||
250 | code_t *create_code (leaf_t *root) | |
251 | { | |
252 | code_t *table = NULL; | |
253 | code_t code = {0}; | |
254 | ||
255 | VERBOSE (DEBUG, PRINTF ("start creating code table\n")); | |
256 | ||
257 | /* allocate table */ | |
c9987f3b | 258 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
58352bb0 | 259 | if (table == NULL) { |
5e0c5bc8 | 260 | VERBOSE (ERROR, myfprintf (stdout, "can't allocate memory\n")); |
58352bb0 LM |
261 | return NULL; |
262 | } | |
263 | ||
264 | explore_tree (table, root, (char *)&code, 0); | |
265 | ||
266 | VERBOSE (DEBUG, PRINTF ("end creating code table\n")); | |
267 | ||
268 | return table; | |
269 | } | |
270 | ||
271 | /* print code table */ | |
272 | ||
273 | void print_code_table (code_t *codes) | |
274 | { | |
275 | char *code; | |
276 | int i; | |
277 | ||
5e0c5bc8 | 278 | myfprintf (stdout, "Code table\n"); |
c9987f3b | 279 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 280 | code = (char *)(codes + i); |
c9987f3b | 281 | if (codlen (code) == 0) { |
58352bb0 LM |
282 | continue; |
283 | } | |
5e0c5bc8 | 284 | myfprintf (stdout, "0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); |
58352bb0 LM |
285 | } |
286 | } | |
287 | ||
288 | /* encode header and code table */ | |
289 | ||
c9987f3b | 290 | byte_t *encode_header_table (code_t *codes, int *occ) |
58352bb0 | 291 | { |
c9987f3b LM |
292 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
293 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
58352bb0 | 294 | char *code; |
c9987f3b | 295 | byte_t *header = buffer; |
58352bb0 LM |
296 | int i, j, length, mode; |
297 | int nb = 0; | |
298 | int size = 0; | |
299 | ||
300 | VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n")); | |
301 | ||
302 | /* mode 1 or 2 */ | |
c9987f3b | 303 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 304 | code = (char *)(codes + i); |
c9987f3b | 305 | if (codlen (code) > 0) { |
58352bb0 | 306 | nb++; |
c9987f3b | 307 | size += codlen (code) * occ[i]; |
58352bb0 LM |
308 | } |
309 | } | |
c9987f3b | 310 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
58352bb0 LM |
311 | VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); |
312 | VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); | |
313 | VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); | |
37062814 | 314 | VERBOSE (DEBUG, PRINTF ("rem: %d\n", size % 256)); |
58352bb0 LM |
315 | |
316 | /* header */ | |
c9987f3b | 317 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
58352bb0 LM |
318 | header += 6; |
319 | ||
320 | /* size */ | |
321 | switch (mode) { | |
322 | case 1: | |
c9987f3b | 323 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 324 | code = (char *)(codes + i); |
c9987f3b | 325 | *(header++) = (byte_t) codlen (code); |
58352bb0 LM |
326 | } |
327 | break; | |
328 | case 2: | |
c9987f3b LM |
329 | *(header++) = (byte_t)(nb - 1); |
330 | for (i = 0; i < NB_BYTES; i++) { | |
58352bb0 | 331 | code = (char *)(codes + i); |
c9987f3b LM |
332 | if (codlen (code) > 0) { |
333 | *(header++) = (byte_t) i; | |
334 | *(header++) = (byte_t) codlen (code); | |
58352bb0 LM |
335 | } |
336 | } | |
337 | break; | |
338 | } | |
339 | ||
340 | /* bits */ | |
c9987f3b | 341 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 342 | code = (char *)(codes + i); |
c9987f3b LM |
343 | if (codlen (code) > 0) { |
344 | codcat (bits, sizeof (code_t), code); | |
345 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
346 | for (j = 0; j < 8; j++) { |
347 | *header <<= 1; | |
348 | if (bits[j] == '1') { | |
349 | (*header)++; | |
350 | } | |
351 | } | |
c9987f3b | 352 | codcpy (bits, sizeof (code_t), bits + 8); |
58352bb0 LM |
353 | header++; |
354 | } | |
355 | } | |
356 | } | |
c9987f3b LM |
357 | if (codlen (bits) > 0) { |
358 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
359 | *header <<= 1; |
360 | if (bits[j] == '1') { | |
361 | (*header)++; | |
362 | } | |
363 | } | |
c9987f3b | 364 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 LM |
365 | *header <<= 1; |
366 | } | |
58352bb0 LM |
367 | header++; |
368 | } | |
369 | ||
370 | /* length */ | |
371 | length = (int)(header - buffer - 6); | |
372 | VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); | |
c9987f3b LM |
373 | buffer[3] = (byte_t)(length >> 8); |
374 | buffer[4] = (byte_t)(length & 0xff); | |
375 | buffer[5] = (byte_t)(size % 256); | |
58352bb0 LM |
376 | |
377 | /* allocation */ | |
c9987f3b | 378 | header = (byte_t *) calloc (length + 6, 1); |
d3dbaf98 | 379 | blkcpy (header, buffer, length + 6); |
58352bb0 LM |
380 | |
381 | VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n")); | |
382 | ||
c9987f3b | 383 | return header; |
58352bb0 LM |
384 | } |
385 | ||
386 | /* print header */ | |
387 | ||
c9987f3b | 388 | void print_header (byte_t *header) |
58352bb0 LM |
389 | { |
390 | int length, i; | |
391 | ||
c9987f3b | 392 | length = (header[3] << 8) + header[4]; |
58352bb0 LM |
393 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
394 | for (i = 0; i < length + 6; i++) { | |
5e0c5bc8 | 395 | myfprintf (stdout, "%02x", header[i]); |
58352bb0 | 396 | } |
5e0c5bc8 | 397 | myfprintf (stdout, "\n"); |
58352bb0 LM |
398 | } |
399 | ||
400 | /* write crompressed file */ | |
401 | ||
c9987f3b | 402 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
58352bb0 | 403 | { |
c9987f3b LM |
404 | byte_t bufin[BUFFER_SIZE] = {0}; |
405 | byte_t bufout[BUFFER_SIZE] = {0}; | |
406 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
bf1d9554 | 407 | int fin, fout; |
58352bb0 LM |
408 | int length = 0; |
409 | int i, j, nbread; | |
c9987f3b | 410 | byte_t *pt; |
58352bb0 LM |
411 | |
412 | VERBOSE (DEBUG, PRINTF ("start writting compressed file\n")); | |
413 | ||
414 | /* open input file */ | |
bf1d9554 LM |
415 | fin = open (input, O_RDONLY|O_RAW); |
416 | if (fin == -1) { | |
5e0c5bc8 | 417 | VERBOSE (ERROR, myfprintf (stdout, "can't open file '%s' for reading\n", input)); |
58352bb0 LM |
418 | return 1; |
419 | } | |
5e0c5bc8 | 420 | VERBOSE (INFO, myfprintf (stdout, "file '%s' opened\n", input)); |
58352bb0 LM |
421 | |
422 | /* open output file */ | |
bf1d9554 LM |
423 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
424 | if (fout == -1) { | |
5e0c5bc8 | 425 | VERBOSE (ERROR, myfprintf (stdout, "can't open file '%s' for writing\n", output)); |
bf1d9554 | 426 | close (fin); |
58352bb0 LM |
427 | return 1; |
428 | } | |
5e0c5bc8 | 429 | VERBOSE (INFO, myfprintf (stdout, "file '%s' opened\n", output)); |
58352bb0 LM |
430 | |
431 | /* write header */ | |
c9987f3b | 432 | length = (header[3] << 8) + header[4]; |
58352bb0 | 433 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
bf1d9554 | 434 | write (fout, header, length + 6); |
58352bb0 LM |
435 | |
436 | /* write file */ | |
437 | pt = bufout; | |
bf1d9554 | 438 | while ((nbread = read (fin, bufin, BUFFER_SIZE)) > 0) { |
58352bb0 LM |
439 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); |
440 | for (i = 0; i < nbread; i++) { | |
c9987f3b LM |
441 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
442 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
443 | for (j = 0; j < 8; j++) { |
444 | *pt <<= 1; | |
445 | if (bits[j] == '1') { | |
446 | (*pt)++; | |
447 | } | |
448 | } | |
c9987f3b | 449 | codcpy (bits, sizeof (code_t), bits + 8); |
37062814 | 450 | if (pt - bufout == BUFFER_SIZE - 1) { |
bf1d9554 | 451 | write (fout, bufout, BUFFER_SIZE); |
58352bb0 | 452 | pt = bufout; |
37062814 LM |
453 | } else { |
454 | pt++; | |
58352bb0 LM |
455 | } |
456 | } | |
457 | } | |
458 | } | |
c9987f3b LM |
459 | VERBOSE (DEBUG, PRINTF ("lastest bits : %d\n", codlen (bits))); |
460 | if (codlen (bits) > 0) { | |
461 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
462 | *pt <<= 1; |
463 | if (bits[j] == '1') { | |
464 | (*pt)++; | |
465 | } | |
466 | } | |
c9987f3b | 467 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 | 468 | *pt <<= 1; |
58352bb0 LM |
469 | } |
470 | pt++; | |
471 | } | |
472 | if (pt != bufout) { | |
37062814 | 473 | VERBOSE (DEBUG, PRINTF ("last partial buffer written: %u\n", pt - bufout)); |
bf1d9554 | 474 | write (fout, bufout, pt - bufout); |
58352bb0 LM |
475 | } |
476 | ||
477 | /* closing */ | |
bf1d9554 LM |
478 | close (fin); |
479 | close (fout); | |
58352bb0 LM |
480 | |
481 | VERBOSE (DEBUG, PRINTF ("end writting compressed file\n")); | |
482 | ||
483 | return 0; | |
484 | } | |
485 | ||
37062814 LM |
486 | /* read header */ |
487 | ||
488 | code_t *read_header (char *filename) { | |
c9987f3b | 489 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
37062814 | 490 | code_t *table = NULL; |
c9987f3b LM |
491 | byte_t *codes = NULL; |
492 | byte_t cur; | |
493 | int lengths[NB_BYTES] = {0}; | |
bf1d9554 | 494 | int fid; |
37062814 | 495 | int mode = 0; |
bf1d9554 | 496 | int i, j, l, nb, size; |
37062814 LM |
497 | |
498 | VERBOSE (DEBUG, PRINTF ("start reading header\n")); | |
499 | ||
500 | /* open file */ | |
bf1d9554 LM |
501 | fid = open (filename, O_RDONLY|O_RAW); |
502 | if (fid == -1) { | |
5e0c5bc8 | 503 | VERBOSE (ERROR, myfprintf (stdout, "can't open file '%s'\n", filename)); |
37062814 LM |
504 | return NULL; |
505 | } | |
5e0c5bc8 | 506 | VERBOSE (INFO, myfprintf (stdout, "file '%s' opened\n", filename)); |
37062814 LM |
507 | |
508 | /* read magic number */ | |
bf1d9554 | 509 | nb = read (fid, buffer, 6); |
37062814 LM |
510 | VERBOSE (DEBUG, PRINTF ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); |
511 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { | |
512 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; | |
c9987f3b | 513 | size = (buffer[3] << 8) + buffer[4]; |
37062814 | 514 | VERBOSE (DEBUG, PRINTF ("mode, size: %d %d\n", mode, size)); |
c9987f3b | 515 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
37062814 LM |
516 | mode = 0; |
517 | } else { | |
bf1d9554 LM |
518 | nb = read (fid, buffer, size); |
519 | VERBOSE (DEBUG, PRINTF ("nb read: %d/%d\n", nb, size)); | |
37062814 LM |
520 | if (nb != size) { |
521 | mode = 0; | |
522 | } | |
523 | } | |
524 | } | |
bf1d9554 | 525 | close (fid); |
37062814 | 526 | if (mode == 0) { |
5e0c5bc8 | 527 | VERBOSE (ERROR, myfprintf (stdout, "incorrect file\n")); |
37062814 LM |
528 | return NULL; |
529 | } | |
530 | ||
531 | /* analyse header */ | |
c9987f3b | 532 | codes = buffer; |
37062814 LM |
533 | switch (mode) { |
534 | case 1: | |
c9987f3b | 535 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
536 | lengths[i] = *(codes++); |
537 | } | |
538 | break; | |
539 | case 2: | |
540 | nb = *(codes++) + 1; | |
541 | VERBOSE (DEBUG, PRINTF ("nb codes: %d\n", nb)); | |
542 | for (i = 0; i < nb; i++) { | |
543 | j = *(codes++); | |
544 | lengths[j] = *(codes++); | |
545 | } | |
546 | break; | |
547 | } | |
c9987f3b | 548 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTF ("%d: %d\n", i, lengths[i])); |
37062814 LM |
549 | |
550 | /* check lengths */ | |
c9987f3b | 551 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
37062814 LM |
552 | l += lengths[i]; |
553 | } | |
554 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || | |
555 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { | |
bf1d9554 | 556 | VERBOSE (ERROR, myfprintf (stdout, "incorrect code table length: %d %d %d\n", size, nb, l)); |
37062814 LM |
557 | return NULL; |
558 | } | |
559 | ||
560 | /* allocate table */ | |
c9987f3b | 561 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
37062814 | 562 | if (table == NULL) { |
5e0c5bc8 | 563 | VERBOSE (ERROR, myfprintf (stdout, "can't allocate memory\n")); |
37062814 LM |
564 | return NULL; |
565 | } | |
566 | ||
567 | /* decode code */ | |
568 | cur = *(codes++); | |
569 | l = 8; | |
c9987f3b | 570 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
571 | if (lengths[i] == 0) { |
572 | continue; | |
573 | } | |
574 | while (lengths[i]--) { | |
c9987f3b | 575 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
576 | l--; |
577 | cur <<= 1; | |
578 | if (l == 0) { | |
579 | cur = *(codes++); | |
580 | l = 8; | |
581 | } | |
582 | } | |
583 | } | |
584 | ||
585 | VERBOSE (DEBUG, PRINTF ("end reading header\n")); | |
586 | ||
587 | return table; | |
588 | } | |
589 | ||
590 | /* write decompressed file */ | |
591 | ||
592 | int write_decompress (char *output, char *input, code_t *codes) | |
593 | { | |
c9987f3b LM |
594 | byte_t bufin[BUFFER_SIZE] = {0}; |
595 | byte_t bufout[BUFFER_SIZE] = {0}; | |
596 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; | |
597 | char bits[(NB_BYTES - 1) + 1] = {0}; | |
bf1d9554 | 598 | int fin, fout; |
37062814 LM |
599 | int i, j, k, nb, size, rem; |
600 | int is_found; | |
601 | int l = 0; | |
c9987f3b | 602 | byte_t *pt; |
37062814 LM |
603 | |
604 | VERBOSE (DEBUG, PRINTF ("start writing decompressed file\n")); | |
605 | ||
606 | /* open file for reading */ | |
bf1d9554 LM |
607 | fin = open (input, O_RDONLY|O_RAW); |
608 | if (fin == -1) { | |
5e0c5bc8 | 609 | VERBOSE (ERROR, myfprintf (stdout, "can't open file '%s' for reading\n", input)); |
37062814 LM |
610 | return 1; |
611 | } | |
5e0c5bc8 | 612 | VERBOSE (INFO, myfprintf (stdout, "file '%s' opened\n", input)); |
37062814 LM |
613 | |
614 | /* read magic number */ | |
bf1d9554 | 615 | nb = read (fin, bufhea, 6); |
37062814 | 616 | if (nb != 6) { |
5e0c5bc8 | 617 | VERBOSE (ERROR, myfprintf (stdout, "can't read file\n")); |
bf1d9554 | 618 | close (fin); |
37062814 LM |
619 | return 1; |
620 | } | |
c9987f3b | 621 | size = (bufhea[3] << 8) + bufhea[4]; |
5e0c5bc8 | 622 | VERBOSE (DEBUG, myfprintf (stdout, "table size: %d\n", size)); |
c9987f3b | 623 | rem = bufhea[5]; |
5e0c5bc8 | 624 | VERBOSE (DEBUG, myfprintf (stdout, "remainder: %d\n", rem)); |
bf1d9554 | 625 | nb = read (fin, bufhea, size); |
37062814 | 626 | if (nb != size) { |
5e0c5bc8 | 627 | VERBOSE (ERROR, myfprintf (stdout, "can't read file\n")); |
bf1d9554 | 628 | close (fin); |
37062814 LM |
629 | return 1; |
630 | } | |
631 | ||
632 | /* open file for writing */ | |
bf1d9554 LM |
633 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
634 | if (fout == -1) { | |
5e0c5bc8 | 635 | VERBOSE (ERROR, myfprintf (stdout, "can't open file '%s' for writing\n", output)); |
bf1d9554 LM |
636 | close (fin); |
637 | return 2; | |
37062814 | 638 | } |
5e0c5bc8 | 639 | VERBOSE (INFO, myfprintf (stdout, "file '%s' opened\n", output)); |
37062814 LM |
640 | |
641 | /* write file */ | |
642 | pt = bufout; | |
bf1d9554 | 643 | while ((nb = read (fin, bufin, BUFFER_SIZE)) > 0) { |
37062814 LM |
644 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nb)); |
645 | for (i = 0; i < nb; i++) { | |
646 | for (j = 0; j < 8; j++) { | |
c9987f3b | 647 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
648 | bufin[i] <<= 1; |
649 | l++; | |
c9987f3b | 650 | VERBOSE (DEBUG, PRINTF ("bits: %d - %s\n", codlen (bits), bits)); |
37062814 LM |
651 | |
652 | /* look for correct code */ | |
653 | is_found = 0; | |
c9987f3b LM |
654 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
655 | if (codcmp ((char *)(codes + k), bits) == 0) { | |
37062814 LM |
656 | is_found = 1; |
657 | VERBOSE (DEBUG, PRINTF ("found: %d\n", k)); | |
658 | *pt= k; | |
659 | bits[0] = 0; | |
660 | if (pt - bufout == BUFFER_SIZE - 1) { | |
661 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); | |
bf1d9554 | 662 | write (fout, bufout, BUFFER_SIZE); |
37062814 LM |
663 | pt = bufout; |
664 | } else { | |
665 | pt++; | |
666 | } | |
667 | } | |
668 | } | |
bf1d9554 | 669 | if ((i == nb - 1) && (l % 256 == rem) && (nb != BUFFER_SIZE)) { |
37062814 LM |
670 | VERBOSE (DEBUG, PRINTF ("break\n")); |
671 | break; | |
672 | } | |
673 | } | |
674 | } | |
675 | } | |
676 | if (pt != bufout) { | |
677 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); | |
bf1d9554 | 678 | write (fout, bufout, pt - bufout); |
37062814 LM |
679 | } |
680 | ||
681 | /* close files */ | |
bf1d9554 LM |
682 | close (fin); |
683 | close (fout); | |
37062814 LM |
684 | |
685 | VERBOSE (DEBUG, PRINTF ("end writing decompressed file\n")); | |
686 | ||
687 | return 0; | |
688 | } | |
689 | ||
58352bb0 LM |
690 | /* main function */ |
691 | ||
692 | int main (int argc, char *argv[]) | |
693 | { | |
694 | char *input = NULL; | |
695 | char *output = NULL; | |
696 | int *table = NULL; | |
697 | leaf_t **leafs = NULL; | |
698 | leaf_t *root = NULL; | |
699 | code_t *codes = NULL; | |
c9987f3b | 700 | byte_t *header = NULL; |
58352bb0 | 701 | int mode = COMPRESS; |
37062814 | 702 | int rc = 1; |
58352bb0 LM |
703 | |
704 | progname = argv[0]; | |
705 | ||
706 | int c; | |
d3dbaf98 | 707 | char * arg; |
37062814 | 708 | VERBOSE (DEBUG, PRINTF ("start processing arguments\n")); |
d3dbaf98 LM |
709 | while (argc-- > 1) { |
710 | arg = *(++argv); | |
711 | if (arg[0] != '-') { | |
5e0c5bc8 | 712 | myfprintf (stderr, "%s: invalid option -- %s\n", progname, arg); |
d3dbaf98 LM |
713 | usage (1); |
714 | } | |
715 | c = arg[1]; | |
37062814 | 716 | VERBOSE (DEBUG, PRINTF ("option: %c\n", c)); |
58352bb0 LM |
717 | switch (c) { |
718 | case 'c': | |
719 | mode = COMPRESS; | |
720 | break; | |
721 | case 'd': | |
722 | mode = DECOMPRESS; | |
723 | break; | |
724 | case 'i': | |
d3dbaf98 | 725 | input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
37062814 | 726 | VERBOSE (DEBUG, PRINTF ("input: %s\n", input)); |
58352bb0 LM |
727 | break; |
728 | case 'o': | |
d3dbaf98 | 729 | output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
37062814 | 730 | VERBOSE (DEBUG, PRINTF ("output: %s\n", output)); |
58352bb0 LM |
731 | break; |
732 | case 'v': | |
d3dbaf98 LM |
733 | arg = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
734 | if (arg == NULL) { | |
5e0c5bc8 | 735 | myfprintf (stderr, "%s: missing verbose level\n", progname); |
d3dbaf98 LM |
736 | usage (1); |
737 | } | |
5e0c5bc8 LM |
738 | verbose = myatoi (arg); |
739 | VERBOSE (INFO, myfprintf (stdout, "verbose: %d\n", verbose)); | |
58352bb0 LM |
740 | break; |
741 | case 'h': | |
742 | default: | |
743 | usage (c != 'h'); | |
744 | } | |
745 | } | |
d3dbaf98 | 746 | if ((input == NULL) || (output == NULL)) { |
5e0c5bc8 | 747 | myfprintf (stderr, "%s: missing file\n", progname); |
58352bb0 LM |
748 | usage (1); |
749 | } | |
37062814 | 750 | VERBOSE (DEBUG, PRINTF ("end processing arguments\n")); |
58352bb0 LM |
751 | |
752 | switch (mode) { | |
753 | case COMPRESS: | |
754 | table = create_table (input); | |
755 | if (table == NULL) break; | |
756 | VERBOSE (INFO, print_occ_table (table)); | |
757 | ||
758 | leafs = init_forest (table); | |
759 | if (leafs == NULL) break; | |
760 | root = create_tree (leafs); | |
761 | if (root == NULL) break; | |
762 | codes = create_code (root); | |
763 | if (codes == NULL) break; | |
764 | VERBOSE (INFO, print_code_table (codes)); | |
765 | header = encode_header_table (codes, table); | |
766 | if (header == NULL) break; | |
767 | VERBOSE (INFO, print_header (header)); | |
768 | rc = write_compress (output, input, codes, header); | |
769 | break; | |
770 | case DECOMPRESS: | |
37062814 LM |
771 | codes = read_header (input); |
772 | if (codes == NULL) break; | |
773 | VERBOSE (INFO, print_code_table (codes)); | |
774 | rc = write_decompress (output, input, codes); | |
58352bb0 LM |
775 | break; |
776 | } | |
777 | ||
778 | /* clean everything */ | |
c9987f3b | 779 | fflush (stdout); |
58352bb0 LM |
780 | if (header) free (header); |
781 | if (codes) free (codes); | |
782 | if (root) free_tree (root); | |
783 | if (leafs) free (leafs); | |
784 | if (table) free (table); | |
785 | ||
786 | return rc; | |
787 | } | |
788 | ||
789 | // test: compress.exe -h | |
790 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
791 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' | |
792 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
37062814 LM |
793 | // test: compress.exe -c -i compress.c -o compress.mz |
794 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz | |
795 | // test: compress.exe -d -i compress.mz -o tmp.c | |
796 | // test: cmp compress.c tmp.c | |
797 | // test: rm compress.mz tmp.c | |
58352bb0 | 798 | |
bf1d9554 | 799 | /* vim: set ts=4 sw=4 et: */ |