Commit | Line | Data |
---|---|---|
58352bb0 LM |
1 | /* depend: */ |
2 | /* cflags: */ | |
5e0c5bc8 | 3 | /* linker: atoi.o code.o debug.o fprintf.o */ |
58352bb0 | 4 | |
bf1d9554 | 5 | #include <fcntl.h> |
bf1d9554 | 6 | #include <unistd.h> |
c84ea202 | 7 | #include <stddef.h> |
5e0c5bc8 | 8 | #include "atoi.h" |
c9987f3b LM |
9 | #include "code.h" |
10 | #include "debug.h" | |
5f83300c | 11 | #include "fprintf.h" |
58352bb0 LM |
12 | |
13 | /* constants */ | |
14 | ||
58352bb0 LM |
15 | #define BUFFER_SIZE 4096 |
16 | ||
c84ea202 LM |
17 | #define COMPRESS 1 |
18 | #define DECOMPRESS 2 | |
19 | ||
58352bb0 LM |
20 | /* macros */ |
21 | ||
58352bb0 LM |
22 | /* gobal variables */ |
23 | ||
24 | char *progname = NULL; | |
58352bb0 LM |
25 | |
26 | /* help function */ | |
27 | ||
c84ea202 | 28 | int usage (int ret) |
58352bb0 | 29 | { |
c84ea202 LM |
30 | //int fd = ret ? STDERR_FILENO : STDOUT_FILENO; |
31 | int fd = ret ? _fderr : _fdout; | |
92fc2c44 LM |
32 | fdprintf (fd, "usage: %s\n", progname); |
33 | fdprintf (fd, " -h : help message\n"); | |
34 | fdprintf (fd, " -i <file>: input file\n"); | |
35 | fdprintf (fd, " -o <file>: output file\n"); | |
36 | fdprintf (fd, " -v : verbose level (%d)\n", verbose); | |
58352bb0 | 37 | |
c84ea202 | 38 | return ret; |
58352bb0 LM |
39 | } |
40 | ||
d3dbaf98 LM |
41 | void blkcpy (void *dst, const void *src, int len) |
42 | { | |
43 | while (len--) { | |
44 | *((char *)dst++) = *((char *)src++); | |
45 | } | |
46 | } | |
47 | ||
58352bb0 | 48 | /* create occurence table */ |
58352bb0 LM |
49 | int *create_table (char *filename) |
50 | { | |
c9987f3b | 51 | byte_t buffer[BUFFER_SIZE] = {0}; |
58352bb0 | 52 | int nbread; |
c84ea202 | 53 | static int table[NB_BYTES] = {0}; |
bf1d9554 | 54 | int fid = 0; |
58352bb0 | 55 | |
c84ea202 | 56 | VERBOSE (DEBUG, PRINTOUT ("start creating occurence table\n")); |
58352bb0 LM |
57 | |
58 | /* open file */ | |
bf1d9554 LM |
59 | fid = open (filename, O_RDONLY|O_RAW); |
60 | if (fid == -1) { | |
e75046fb | 61 | VERBOSE (ERROR, PRINTERR ("can't open file '%s'\n", filename/); |
58352bb0 LM |
62 | return NULL; |
63 | } | |
c84ea202 | 64 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", filename)); |
58352bb0 LM |
65 | |
66 | /* read file */ | |
bf1d9554 | 67 | while ((nbread = read (fid, buffer, BUFFER_SIZE)) > 0) { |
c84ea202 | 68 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nbread)); |
58352bb0 LM |
69 | while (nbread--) { |
70 | table[(int)buffer[nbread]]++; | |
71 | } | |
72 | } | |
73 | ||
74 | /* close file */ | |
bf1d9554 | 75 | close (fid); |
58352bb0 | 76 | |
c84ea202 | 77 | VERBOSE (DEBUG, PRINTOUT ("end creating occurence table\n")); |
58352bb0 LM |
78 | |
79 | return table; | |
80 | } | |
81 | ||
82 | /* print occurence table */ | |
83 | ||
84 | void print_occ_table (int *table) | |
85 | { | |
86 | int i; | |
87 | ||
c84ea202 | 88 | PRINTOUT ("Occurence table\n"); |
c9987f3b | 89 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 90 | if (table[i]) { |
c84ea202 | 91 | PRINTOUT ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); |
58352bb0 LM |
92 | } |
93 | } | |
94 | } | |
95 | ||
58352bb0 LM |
96 | /* initialize forest */ |
97 | ||
98 | leaf_t **init_forest (int *table) | |
99 | { | |
c84ea202 | 100 | static leaf_t *leafs[NB_BYTES] = {0}; |
58352bb0 LM |
101 | int nb_leafs = 0; |
102 | int i, l; | |
103 | ||
c84ea202 | 104 | VERBOSE (DEBUG, PRINTOUT ("start initiliazing forest\n")); |
58352bb0 LM |
105 | |
106 | /* count number of leafs */ | |
c9987f3b | 107 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
108 | if (table[i] > 0) { |
109 | nb_leafs++; | |
110 | } | |
111 | } | |
112 | ||
58352bb0 | 113 | /* initialize leafs */ |
c9987f3b | 114 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
58352bb0 | 115 | if (table[i] > 0) { |
c84ea202 | 116 | leafs[l] = getleaf (1); |
58352bb0 | 117 | if (leafs[l] == NULL) { |
e75046fb | 118 | VERBOSE (ERROR, PRINTERR ("can't allocate memory\n")); |
58352bb0 LM |
119 | return NULL; |
120 | } | |
121 | leafs[l]->occ = table[i]; | |
122 | leafs[l]->c = i; | |
123 | l++; | |
124 | } | |
125 | } | |
126 | ||
c84ea202 | 127 | VERBOSE (DEBUG, PRINTOUT ("end initiliazing forest\n")); |
58352bb0 LM |
128 | |
129 | return leafs; | |
130 | } | |
131 | ||
132 | /* create tree */ | |
133 | ||
134 | leaf_t *create_tree (leaf_t **leafs) | |
135 | { | |
136 | leaf_t *branch = NULL; | |
137 | int nb_leafs = 0; | |
37062814 LM |
138 | int last = -1; |
139 | int ante; | |
58352bb0 LM |
140 | int i, j; |
141 | ||
c84ea202 | 142 | VERBOSE (DEBUG, PRINTOUT ("start creating tree\n")); |
58352bb0 LM |
143 | |
144 | /* count number of leafs */ | |
145 | while (leafs[nb_leafs] != NULL) { | |
146 | nb_leafs++; | |
147 | } | |
148 | ||
149 | /* create tree */ | |
150 | for (j = 0; j < nb_leafs - 1; j++) { | |
151 | ||
152 | /* look for leatest occurence */ | |
153 | last = -1; | |
154 | for (i = 0; i < nb_leafs; i++) { | |
155 | if (leafs[i] == NULL) { | |
156 | continue; | |
157 | } | |
158 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { | |
159 | last = i; | |
160 | } | |
161 | } | |
162 | ||
163 | /* look for ante leatest occurence */ | |
164 | ante = -1; | |
165 | for (i = 0; i < nb_leafs; i++) { | |
166 | if ((i == last) || (leafs[i] == NULL)) { | |
167 | continue; | |
168 | } | |
169 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { | |
170 | ante = i; | |
171 | } | |
172 | } | |
173 | ||
174 | /* create branch */ | |
175 | if ((last == -1) || (ante == -1)) { | |
e75046fb | 176 | VERBOSE (ERROR, PRINTERR ("error during tree building\n")); |
58352bb0 LM |
177 | return NULL; |
178 | } | |
c84ea202 | 179 | branch = getleaf (1); |
58352bb0 | 180 | if (branch == NULL) { |
e75046fb | 181 | VERBOSE (ERROR, PRINTERR ("can't allocate memory\n")); |
58352bb0 LM |
182 | return NULL; |
183 | } | |
184 | branch->left = leafs[last]; | |
185 | branch->right = leafs[ante]; | |
186 | branch->occ = branch->left->occ + branch->right->occ; | |
187 | leafs[last] = branch; | |
188 | leafs[ante] = NULL; | |
189 | } | |
190 | ||
c84ea202 | 191 | VERBOSE (DEBUG, PRINTOUT ("end creating tree\n")); |
58352bb0 | 192 | |
37062814 | 193 | return (last != -1) ? leafs[last] : NULL; |
58352bb0 LM |
194 | } |
195 | ||
58352bb0 LM |
196 | /* explore tree */ |
197 | ||
198 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) | |
199 | { | |
c84ea202 LM |
200 | |
201 | VERBOSE (DEBUG, PRINTOUT ("start exploring code tree\n")); | |
202 | ||
58352bb0 | 203 | if ((root->left == NULL) && (root->right == NULL)) { |
c9987f3b | 204 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
58352bb0 LM |
205 | } |
206 | else { | |
c9987f3b | 207 | codcpy (code + index, sizeof (code_t), "1"); |
58352bb0 | 208 | explore_tree (table, root->left, code, index + 1); |
c9987f3b | 209 | codcpy (code + index, sizeof (code_t), "0"); |
58352bb0 LM |
210 | explore_tree (table, root->right, code, index + 1); |
211 | } | |
c84ea202 LM |
212 | |
213 | VERBOSE (DEBUG, PRINTOUT ("end exploring code tree\n")); | |
58352bb0 LM |
214 | } |
215 | ||
216 | /* create code table */ | |
58352bb0 LM |
217 | code_t *create_code (leaf_t *root) |
218 | { | |
c84ea202 | 219 | static code_t table[NB_BYTES] = {0}; |
58352bb0 LM |
220 | code_t code = {0}; |
221 | ||
c84ea202 | 222 | VERBOSE (DEBUG, PRINTOUT ("start creating code table\n")); |
58352bb0 LM |
223 | |
224 | explore_tree (table, root, (char *)&code, 0); | |
225 | ||
c84ea202 | 226 | VERBOSE (DEBUG, PRINTOUT ("end creating code table\n")); |
58352bb0 LM |
227 | |
228 | return table; | |
229 | } | |
230 | ||
231 | /* print code table */ | |
232 | ||
233 | void print_code_table (code_t *codes) | |
234 | { | |
235 | char *code; | |
236 | int i; | |
237 | ||
c84ea202 | 238 | PRINTOUT ("Code table\n"); |
c9987f3b | 239 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 240 | code = (char *)(codes + i); |
c9987f3b | 241 | if (codlen (code) == 0) { |
58352bb0 LM |
242 | continue; |
243 | } | |
c84ea202 | 244 | PRINTOUT ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); |
58352bb0 LM |
245 | } |
246 | } | |
247 | ||
248 | /* encode header and code table */ | |
249 | ||
c9987f3b | 250 | byte_t *encode_header_table (code_t *codes, int *occ) |
58352bb0 | 251 | { |
c84ea202 | 252 | static byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
c9987f3b | 253 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; |
58352bb0 | 254 | char *code; |
c9987f3b | 255 | byte_t *header = buffer; |
58352bb0 LM |
256 | int i, j, length, mode; |
257 | int nb = 0; | |
258 | int size = 0; | |
259 | ||
c84ea202 | 260 | VERBOSE (DEBUG, PRINTOUT ("start encoding header and code table\n")); |
58352bb0 LM |
261 | |
262 | /* mode 1 or 2 */ | |
c9987f3b | 263 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 264 | code = (char *)(codes + i); |
c9987f3b | 265 | if (codlen (code) > 0) { |
58352bb0 | 266 | nb++; |
c9987f3b | 267 | size += codlen (code) * occ[i]; |
58352bb0 LM |
268 | } |
269 | } | |
c9987f3b | 270 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
c84ea202 LM |
271 | VERBOSE (DEBUG, PRINTOUT ("nb chars: %d\n", nb)); |
272 | VERBOSE (DEBUG, PRINTOUT ("mode: %d\n", mode)); | |
273 | VERBOSE (DEBUG, PRINTOUT ("size: %d\n", size)); | |
274 | VERBOSE (DEBUG, PRINTOUT ("rem: %d\n", size % 256)); | |
58352bb0 LM |
275 | |
276 | /* header */ | |
c9987f3b | 277 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
58352bb0 LM |
278 | header += 6; |
279 | ||
280 | /* size */ | |
281 | switch (mode) { | |
282 | case 1: | |
c9987f3b | 283 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 284 | code = (char *)(codes + i); |
c9987f3b | 285 | *(header++) = (byte_t) codlen (code); |
58352bb0 LM |
286 | } |
287 | break; | |
288 | case 2: | |
c9987f3b LM |
289 | *(header++) = (byte_t)(nb - 1); |
290 | for (i = 0; i < NB_BYTES; i++) { | |
58352bb0 | 291 | code = (char *)(codes + i); |
c9987f3b LM |
292 | if (codlen (code) > 0) { |
293 | *(header++) = (byte_t) i; | |
294 | *(header++) = (byte_t) codlen (code); | |
58352bb0 LM |
295 | } |
296 | } | |
297 | break; | |
298 | } | |
299 | ||
300 | /* bits */ | |
c9987f3b | 301 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 302 | code = (char *)(codes + i); |
c9987f3b LM |
303 | if (codlen (code) > 0) { |
304 | codcat (bits, sizeof (code_t), code); | |
305 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
306 | for (j = 0; j < 8; j++) { |
307 | *header <<= 1; | |
308 | if (bits[j] == '1') { | |
309 | (*header)++; | |
310 | } | |
311 | } | |
c9987f3b | 312 | codcpy (bits, sizeof (code_t), bits + 8); |
58352bb0 LM |
313 | header++; |
314 | } | |
315 | } | |
316 | } | |
c9987f3b LM |
317 | if (codlen (bits) > 0) { |
318 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
319 | *header <<= 1; |
320 | if (bits[j] == '1') { | |
321 | (*header)++; | |
322 | } | |
323 | } | |
c9987f3b | 324 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 LM |
325 | *header <<= 1; |
326 | } | |
58352bb0 LM |
327 | header++; |
328 | } | |
329 | ||
330 | /* length */ | |
331 | length = (int)(header - buffer - 6); | |
c84ea202 | 332 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); |
c9987f3b LM |
333 | buffer[3] = (byte_t)(length >> 8); |
334 | buffer[4] = (byte_t)(length & 0xff); | |
335 | buffer[5] = (byte_t)(size % 256); | |
c84ea202 | 336 | header = buffer; |
58352bb0 | 337 | |
c84ea202 | 338 | VERBOSE (DEBUG, PRINTOUT ("end encoding header and code table\n")); |
58352bb0 | 339 | |
c9987f3b | 340 | return header; |
58352bb0 LM |
341 | } |
342 | ||
343 | /* print header */ | |
344 | ||
c9987f3b | 345 | void print_header (byte_t *header) |
58352bb0 LM |
346 | { |
347 | int length, i; | |
348 | ||
c9987f3b | 349 | length = (header[3] << 8) + header[4]; |
c84ea202 | 350 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d\n", length)); |
58352bb0 | 351 | for (i = 0; i < length + 6; i++) { |
c84ea202 | 352 | PRINTOUT ("%02x", header[i]); |
58352bb0 | 353 | } |
c84ea202 | 354 | PRINTOUT ("\n"); |
58352bb0 LM |
355 | } |
356 | ||
357 | /* write crompressed file */ | |
358 | ||
c9987f3b | 359 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
58352bb0 | 360 | { |
c9987f3b LM |
361 | byte_t bufin[BUFFER_SIZE] = {0}; |
362 | byte_t bufout[BUFFER_SIZE] = {0}; | |
363 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
bf1d9554 | 364 | int fin, fout; |
58352bb0 LM |
365 | int length = 0; |
366 | int i, j, nbread; | |
c9987f3b | 367 | byte_t *pt; |
58352bb0 | 368 | |
c84ea202 | 369 | VERBOSE (DEBUG, PRINTOUT ("start writting compressed file\n")); |
58352bb0 LM |
370 | |
371 | /* open input file */ | |
bf1d9554 LM |
372 | fin = open (input, O_RDONLY|O_RAW); |
373 | if (fin == -1) { | |
e75046fb | 374 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for reading\n", input)); |
58352bb0 LM |
375 | return 1; |
376 | } | |
c84ea202 | 377 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", input)); |
58352bb0 LM |
378 | |
379 | /* open output file */ | |
bf1d9554 LM |
380 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
381 | if (fout == -1) { | |
e75046fb | 382 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for writing\n", output)); |
bf1d9554 | 383 | close (fin); |
58352bb0 LM |
384 | return 1; |
385 | } | |
c84ea202 | 386 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", output)); |
58352bb0 LM |
387 | |
388 | /* write header */ | |
c9987f3b | 389 | length = (header[3] << 8) + header[4]; |
c84ea202 | 390 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d\n", length)); |
bf1d9554 | 391 | write (fout, header, length + 6); |
58352bb0 LM |
392 | |
393 | /* write file */ | |
394 | pt = bufout; | |
bf1d9554 | 395 | while ((nbread = read (fin, bufin, BUFFER_SIZE)) > 0) { |
c84ea202 | 396 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nbread)); |
58352bb0 | 397 | for (i = 0; i < nbread; i++) { |
c9987f3b LM |
398 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
399 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
400 | for (j = 0; j < 8; j++) { |
401 | *pt <<= 1; | |
402 | if (bits[j] == '1') { | |
403 | (*pt)++; | |
404 | } | |
405 | } | |
c9987f3b | 406 | codcpy (bits, sizeof (code_t), bits + 8); |
37062814 | 407 | if (pt - bufout == BUFFER_SIZE - 1) { |
bf1d9554 | 408 | write (fout, bufout, BUFFER_SIZE); |
58352bb0 | 409 | pt = bufout; |
37062814 LM |
410 | } else { |
411 | pt++; | |
58352bb0 LM |
412 | } |
413 | } | |
414 | } | |
415 | } | |
c84ea202 | 416 | VERBOSE (DEBUG, PRINTOUT ("lastest bits : %d\n", codlen (bits))); |
c9987f3b LM |
417 | if (codlen (bits) > 0) { |
418 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
419 | *pt <<= 1; |
420 | if (bits[j] == '1') { | |
421 | (*pt)++; | |
422 | } | |
423 | } | |
c9987f3b | 424 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 | 425 | *pt <<= 1; |
58352bb0 LM |
426 | } |
427 | pt++; | |
428 | } | |
429 | if (pt != bufout) { | |
c84ea202 | 430 | VERBOSE (DEBUG, PRINTOUT ("last partial buffer written: %u\n", pt - bufout)); |
bf1d9554 | 431 | write (fout, bufout, pt - bufout); |
58352bb0 LM |
432 | } |
433 | ||
434 | /* closing */ | |
bf1d9554 LM |
435 | close (fin); |
436 | close (fout); | |
58352bb0 | 437 | |
c84ea202 | 438 | VERBOSE (DEBUG, PRINTOUT ("end writting compressed file\n")); |
58352bb0 LM |
439 | |
440 | return 0; | |
441 | } | |
442 | ||
37062814 LM |
443 | /* read header */ |
444 | ||
445 | code_t *read_header (char *filename) { | |
c84ea202 | 446 | static code_t table[NB_BYTES] = {0}; |
c9987f3b | 447 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
c9987f3b LM |
448 | byte_t *codes = NULL; |
449 | byte_t cur; | |
450 | int lengths[NB_BYTES] = {0}; | |
bf1d9554 | 451 | int fid; |
37062814 | 452 | int mode = 0; |
bf1d9554 | 453 | int i, j, l, nb, size; |
37062814 | 454 | |
c84ea202 | 455 | VERBOSE (DEBUG, PRINTOUT ("start reading header\n")); |
37062814 LM |
456 | |
457 | /* open file */ | |
bf1d9554 LM |
458 | fid = open (filename, O_RDONLY|O_RAW); |
459 | if (fid == -1) { | |
e75046fb | 460 | VERBOSE (ERROR, PRINTERR ("can't open file '%s'\n", filename)); |
37062814 LM |
461 | return NULL; |
462 | } | |
c84ea202 | 463 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", filename)); |
37062814 LM |
464 | |
465 | /* read magic number */ | |
bf1d9554 | 466 | nb = read (fid, buffer, 6); |
c84ea202 | 467 | VERBOSE (DEBUG, PRINTOUT ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); |
37062814 LM |
468 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { |
469 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; | |
c9987f3b | 470 | size = (buffer[3] << 8) + buffer[4]; |
c84ea202 | 471 | VERBOSE (DEBUG, PRINTOUT ("mode, size: %d %d\n", mode, size)); |
c9987f3b | 472 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
37062814 LM |
473 | mode = 0; |
474 | } else { | |
bf1d9554 | 475 | nb = read (fid, buffer, size); |
c84ea202 | 476 | VERBOSE (DEBUG, PRINTOUT ("nb read: %d/%d\n", nb, size)); |
37062814 LM |
477 | if (nb != size) { |
478 | mode = 0; | |
479 | } | |
480 | } | |
481 | } | |
bf1d9554 | 482 | close (fid); |
37062814 | 483 | if (mode == 0) { |
e75046fb | 484 | VERBOSE (ERROR, PRINTERR ("incorrect file\n")); |
37062814 LM |
485 | return NULL; |
486 | } | |
487 | ||
488 | /* analyse header */ | |
c9987f3b | 489 | codes = buffer; |
37062814 LM |
490 | switch (mode) { |
491 | case 1: | |
c9987f3b | 492 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
493 | lengths[i] = *(codes++); |
494 | } | |
495 | break; | |
496 | case 2: | |
497 | nb = *(codes++) + 1; | |
c84ea202 | 498 | VERBOSE (DEBUG, PRINTOUT ("nb codes: %d\n", nb)); |
37062814 LM |
499 | for (i = 0; i < nb; i++) { |
500 | j = *(codes++); | |
501 | lengths[j] = *(codes++); | |
502 | } | |
503 | break; | |
504 | } | |
c84ea202 | 505 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTOUT ("%d: %d\n", i, lengths[i])); |
37062814 LM |
506 | |
507 | /* check lengths */ | |
c9987f3b | 508 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
37062814 LM |
509 | l += lengths[i]; |
510 | } | |
511 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || | |
512 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { | |
e75046fb | 513 | VERBOSE (ERROR, PRINTERR ("incorrect code table length: %d %d %d\n", size, nb, l)); |
37062814 LM |
514 | return NULL; |
515 | } | |
516 | ||
517 | /* decode code */ | |
518 | cur = *(codes++); | |
519 | l = 8; | |
c9987f3b | 520 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
521 | if (lengths[i] == 0) { |
522 | continue; | |
523 | } | |
524 | while (lengths[i]--) { | |
c9987f3b | 525 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
526 | l--; |
527 | cur <<= 1; | |
528 | if (l == 0) { | |
529 | cur = *(codes++); | |
530 | l = 8; | |
531 | } | |
532 | } | |
533 | } | |
534 | ||
c84ea202 | 535 | VERBOSE (DEBUG, PRINTOUT ("end reading header\n")); |
37062814 LM |
536 | |
537 | return table; | |
538 | } | |
539 | ||
540 | /* write decompressed file */ | |
541 | ||
542 | int write_decompress (char *output, char *input, code_t *codes) | |
543 | { | |
c9987f3b LM |
544 | byte_t bufin[BUFFER_SIZE] = {0}; |
545 | byte_t bufout[BUFFER_SIZE] = {0}; | |
546 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; | |
547 | char bits[(NB_BYTES - 1) + 1] = {0}; | |
bf1d9554 | 548 | int fin, fout; |
37062814 LM |
549 | int i, j, k, nb, size, rem; |
550 | int is_found; | |
551 | int l = 0; | |
c9987f3b | 552 | byte_t *pt; |
37062814 | 553 | |
c84ea202 | 554 | VERBOSE (DEBUG, PRINTOUT ("start writing decompressed file\n")); |
37062814 LM |
555 | |
556 | /* open file for reading */ | |
bf1d9554 LM |
557 | fin = open (input, O_RDONLY|O_RAW); |
558 | if (fin == -1) { | |
e75046fb | 559 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for reading\n", input)); |
37062814 LM |
560 | return 1; |
561 | } | |
c84ea202 | 562 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", input)); |
37062814 LM |
563 | |
564 | /* read magic number */ | |
bf1d9554 | 565 | nb = read (fin, bufhea, 6); |
37062814 | 566 | if (nb != 6) { |
e75046fb | 567 | VERBOSE (ERROR, PRINTERR ("can't read file\n")); |
bf1d9554 | 568 | close (fin); |
37062814 LM |
569 | return 1; |
570 | } | |
c9987f3b | 571 | size = (bufhea[3] << 8) + bufhea[4]; |
c84ea202 | 572 | VERBOSE (DEBUG, PRINTOUT ("table size: %d\n", size)); |
c9987f3b | 573 | rem = bufhea[5]; |
c84ea202 | 574 | VERBOSE (DEBUG, PRINTOUT ("remainder: %d\n", rem)); |
bf1d9554 | 575 | nb = read (fin, bufhea, size); |
37062814 | 576 | if (nb != size) { |
e75046fb | 577 | VERBOSE (ERROR, PRINTERR ("can't read file\n")); |
bf1d9554 | 578 | close (fin); |
37062814 LM |
579 | return 1; |
580 | } | |
581 | ||
582 | /* open file for writing */ | |
bf1d9554 LM |
583 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
584 | if (fout == -1) { | |
e75046fb | 585 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for writing\n", output)); |
bf1d9554 LM |
586 | close (fin); |
587 | return 2; | |
37062814 | 588 | } |
c84ea202 | 589 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", output)); |
37062814 LM |
590 | |
591 | /* write file */ | |
592 | pt = bufout; | |
bf1d9554 | 593 | while ((nb = read (fin, bufin, BUFFER_SIZE)) > 0) { |
c84ea202 | 594 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nb)); |
37062814 LM |
595 | for (i = 0; i < nb; i++) { |
596 | for (j = 0; j < 8; j++) { | |
c9987f3b | 597 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
598 | bufin[i] <<= 1; |
599 | l++; | |
c84ea202 | 600 | VERBOSE (DEBUG, PRINTOUT ("bits: %d - %s\n", codlen (bits), bits)); |
37062814 LM |
601 | |
602 | /* look for correct code */ | |
603 | is_found = 0; | |
c9987f3b LM |
604 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
605 | if (codcmp ((char *)(codes + k), bits) == 0) { | |
37062814 | 606 | is_found = 1; |
c84ea202 | 607 | VERBOSE (DEBUG, PRINTOUT ("found: %d\n", k)); |
37062814 LM |
608 | *pt= k; |
609 | bits[0] = 0; | |
610 | if (pt - bufout == BUFFER_SIZE - 1) { | |
c84ea202 | 611 | VERBOSE (DEBUG, PRINTOUT ("nb buffer out: %u\n", (pt - bufout))); |
bf1d9554 | 612 | write (fout, bufout, BUFFER_SIZE); |
37062814 LM |
613 | pt = bufout; |
614 | } else { | |
615 | pt++; | |
616 | } | |
617 | } | |
618 | } | |
bf1d9554 | 619 | if ((i == nb - 1) && (l % 256 == rem) && (nb != BUFFER_SIZE)) { |
c84ea202 | 620 | VERBOSE (DEBUG, PRINTOUT ("break\n")); |
37062814 LM |
621 | break; |
622 | } | |
623 | } | |
624 | } | |
625 | } | |
626 | if (pt != bufout) { | |
c84ea202 | 627 | VERBOSE (DEBUG, PRINTOUT ("nb buffer out: %u\n", (pt - bufout))); |
bf1d9554 | 628 | write (fout, bufout, pt - bufout); |
37062814 LM |
629 | } |
630 | ||
631 | /* close files */ | |
bf1d9554 LM |
632 | close (fin); |
633 | close (fout); | |
37062814 | 634 | |
c84ea202 | 635 | VERBOSE (DEBUG, PRINTOUT ("end writing decompressed file\n")); |
37062814 LM |
636 | |
637 | return 0; | |
638 | } | |
639 | ||
58352bb0 LM |
640 | /* main function */ |
641 | ||
642 | int main (int argc, char *argv[]) | |
643 | { | |
644 | char *input = NULL; | |
645 | char *output = NULL; | |
646 | int *table = NULL; | |
647 | leaf_t **leafs = NULL; | |
648 | leaf_t *root = NULL; | |
649 | code_t *codes = NULL; | |
c9987f3b | 650 | byte_t *header = NULL; |
58352bb0 | 651 | int mode = COMPRESS; |
37062814 | 652 | int rc = 1; |
58352bb0 LM |
653 | |
654 | progname = argv[0]; | |
655 | ||
656 | int c; | |
d3dbaf98 | 657 | char * arg; |
c84ea202 | 658 | VERBOSE (DEBUG, PRINTOUT ("start processing arguments\n")); |
d3dbaf98 LM |
659 | while (argc-- > 1) { |
660 | arg = *(++argv); | |
661 | if (arg[0] != '-') { | |
c84ea202 LM |
662 | PRINTERR ("%s: invalid option -- %s\n", progname, arg); |
663 | return usage (1); | |
d3dbaf98 LM |
664 | } |
665 | c = arg[1]; | |
c84ea202 | 666 | VERBOSE (DEBUG, PRINTOUT ("option: %c\n", c)); |
58352bb0 LM |
667 | switch (c) { |
668 | case 'c': | |
669 | mode = COMPRESS; | |
670 | break; | |
671 | case 'd': | |
672 | mode = DECOMPRESS; | |
673 | break; | |
674 | case 'i': | |
d3dbaf98 | 675 | input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
c84ea202 | 676 | VERBOSE (DEBUG, PRINTOUT ("input: %s\n", input)); |
58352bb0 LM |
677 | break; |
678 | case 'o': | |
d3dbaf98 | 679 | output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
c84ea202 | 680 | VERBOSE (DEBUG, PRINTOUT ("output: %s\n", output)); |
58352bb0 LM |
681 | break; |
682 | case 'v': | |
d3dbaf98 LM |
683 | arg = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
684 | if (arg == NULL) { | |
c84ea202 LM |
685 | PRINTERR ("%s: missing verbose level\n", progname); |
686 | return usage (1); | |
d3dbaf98 | 687 | } |
5e0c5bc8 | 688 | verbose = myatoi (arg); |
c84ea202 | 689 | VERBOSE (INFO, PRINTOUT ("verbose: %d\n", verbose)); |
58352bb0 LM |
690 | break; |
691 | case 'h': | |
692 | default: | |
c84ea202 | 693 | return usage (c != 'h'); |
58352bb0 LM |
694 | } |
695 | } | |
d3dbaf98 | 696 | if ((input == NULL) || (output == NULL)) { |
c84ea202 LM |
697 | PRINTERR ("%s: missing file\n", progname); |
698 | return usage (1); | |
58352bb0 | 699 | } |
c84ea202 | 700 | VERBOSE (DEBUG, PRINTOUT ("end processing arguments\n")); |
58352bb0 LM |
701 | |
702 | switch (mode) { | |
703 | case COMPRESS: | |
704 | table = create_table (input); | |
705 | if (table == NULL) break; | |
706 | VERBOSE (INFO, print_occ_table (table)); | |
707 | ||
708 | leafs = init_forest (table); | |
709 | if (leafs == NULL) break; | |
710 | root = create_tree (leafs); | |
711 | if (root == NULL) break; | |
712 | codes = create_code (root); | |
713 | if (codes == NULL) break; | |
714 | VERBOSE (INFO, print_code_table (codes)); | |
715 | header = encode_header_table (codes, table); | |
716 | if (header == NULL) break; | |
717 | VERBOSE (INFO, print_header (header)); | |
718 | rc = write_compress (output, input, codes, header); | |
719 | break; | |
720 | case DECOMPRESS: | |
37062814 LM |
721 | codes = read_header (input); |
722 | if (codes == NULL) break; | |
723 | VERBOSE (INFO, print_code_table (codes)); | |
724 | rc = write_decompress (output, input, codes); | |
58352bb0 LM |
725 | break; |
726 | } | |
727 | ||
58352bb0 LM |
728 | return rc; |
729 | } | |
730 | ||
731 | // test: compress.exe -h | |
732 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
733 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' | |
734 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
37062814 LM |
735 | // test: compress.exe -c -i compress.c -o compress.mz |
736 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz | |
737 | // test: compress.exe -d -i compress.mz -o tmp.c | |
738 | // test: cmp compress.c tmp.c | |
739 | // test: rm compress.mz tmp.c | |
58352bb0 | 740 | |
bf1d9554 | 741 | /* vim: set ts=4 sw=4 et: */ |