Commit | Line | Data |
---|---|---|
58352bb0 LM |
1 | /* depend: */ |
2 | /* cflags: */ | |
5e0c5bc8 | 3 | /* linker: atoi.o code.o debug.o fprintf.o */ |
58352bb0 | 4 | |
bf1d9554 | 5 | #include <fcntl.h> |
bf1d9554 | 6 | #include <unistd.h> |
c84ea202 | 7 | #include <stddef.h> |
5e0c5bc8 | 8 | #include "atoi.h" |
c9987f3b LM |
9 | #include "code.h" |
10 | #include "debug.h" | |
5f83300c | 11 | #include "fprintf.h" |
58352bb0 LM |
12 | |
13 | /* constants */ | |
14 | ||
58352bb0 LM |
15 | #define BUFFER_SIZE 4096 |
16 | ||
c84ea202 LM |
17 | #define COMPRESS 1 |
18 | #define DECOMPRESS 2 | |
19 | ||
d7d2982c LM |
20 | #ifndef O_RAW |
21 | #define O_RAW 0 | |
22 | #endif /* O_RAW */ | |
23 | ||
58352bb0 LM |
24 | /* macros */ |
25 | ||
58352bb0 LM |
26 | /* gobal variables */ |
27 | ||
28 | char *progname = NULL; | |
58352bb0 LM |
29 | |
30 | /* help function */ | |
31 | ||
c84ea202 | 32 | int usage (int ret) |
58352bb0 | 33 | { |
c84ea202 | 34 | int fd = ret ? _fderr : _fdout; |
92fc2c44 LM |
35 | fdprintf (fd, "usage: %s\n", progname); |
36 | fdprintf (fd, " -h : help message\n"); | |
37 | fdprintf (fd, " -i <file>: input file\n"); | |
38 | fdprintf (fd, " -o <file>: output file\n"); | |
39 | fdprintf (fd, " -v : verbose level (%d)\n", verbose); | |
58352bb0 | 40 | |
c84ea202 | 41 | return ret; |
58352bb0 LM |
42 | } |
43 | ||
44 | /* create occurence table */ | |
125462cf | 45 | |
58352bb0 LM |
46 | int *create_table (char *filename) |
47 | { | |
c9987f3b | 48 | byte_t buffer[BUFFER_SIZE] = {0}; |
58352bb0 | 49 | int nbread; |
c84ea202 | 50 | static int table[NB_BYTES] = {0}; |
bf1d9554 | 51 | int fid = 0; |
58352bb0 | 52 | |
c84ea202 | 53 | VERBOSE (DEBUG, PRINTOUT ("start creating occurence table\n")); |
58352bb0 LM |
54 | |
55 | /* open file */ | |
bf1d9554 LM |
56 | fid = open (filename, O_RDONLY|O_RAW); |
57 | if (fid == -1) { | |
125462cf | 58 | VERBOSE (ERROR, PRINTERR ("can't open file '%s'\n", filename)); |
58352bb0 LM |
59 | return NULL; |
60 | } | |
c84ea202 | 61 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", filename)); |
58352bb0 LM |
62 | |
63 | /* read file */ | |
bf1d9554 | 64 | while ((nbread = read (fid, buffer, BUFFER_SIZE)) > 0) { |
c84ea202 | 65 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nbread)); |
58352bb0 LM |
66 | while (nbread--) { |
67 | table[(int)buffer[nbread]]++; | |
68 | } | |
69 | } | |
70 | ||
71 | /* close file */ | |
bf1d9554 | 72 | close (fid); |
58352bb0 | 73 | |
c84ea202 | 74 | VERBOSE (DEBUG, PRINTOUT ("end creating occurence table\n")); |
58352bb0 LM |
75 | |
76 | return table; | |
77 | } | |
78 | ||
79 | /* print occurence table */ | |
80 | ||
81 | void print_occ_table (int *table) | |
82 | { | |
83 | int i; | |
84 | ||
c84ea202 | 85 | PRINTOUT ("Occurence table\n"); |
c9987f3b | 86 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 87 | if (table[i]) { |
c84ea202 | 88 | PRINTOUT ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); |
58352bb0 LM |
89 | } |
90 | } | |
91 | } | |
92 | ||
58352bb0 LM |
93 | /* initialize forest */ |
94 | ||
95 | leaf_t **init_forest (int *table) | |
96 | { | |
c84ea202 | 97 | static leaf_t *leafs[NB_BYTES] = {0}; |
58352bb0 LM |
98 | int nb_leafs = 0; |
99 | int i, l; | |
100 | ||
c84ea202 | 101 | VERBOSE (DEBUG, PRINTOUT ("start initiliazing forest\n")); |
58352bb0 LM |
102 | |
103 | /* count number of leafs */ | |
c9987f3b | 104 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
105 | if (table[i] > 0) { |
106 | nb_leafs++; | |
107 | } | |
108 | } | |
109 | ||
58352bb0 | 110 | /* initialize leafs */ |
c9987f3b | 111 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
58352bb0 | 112 | if (table[i] > 0) { |
c84ea202 | 113 | leafs[l] = getleaf (1); |
58352bb0 | 114 | if (leafs[l] == NULL) { |
e75046fb | 115 | VERBOSE (ERROR, PRINTERR ("can't allocate memory\n")); |
58352bb0 LM |
116 | return NULL; |
117 | } | |
118 | leafs[l]->occ = table[i]; | |
119 | leafs[l]->c = i; | |
120 | l++; | |
121 | } | |
122 | } | |
123 | ||
c84ea202 | 124 | VERBOSE (DEBUG, PRINTOUT ("end initiliazing forest\n")); |
58352bb0 LM |
125 | |
126 | return leafs; | |
127 | } | |
128 | ||
129 | /* create tree */ | |
130 | ||
131 | leaf_t *create_tree (leaf_t **leafs) | |
132 | { | |
133 | leaf_t *branch = NULL; | |
134 | int nb_leafs = 0; | |
37062814 LM |
135 | int last = -1; |
136 | int ante; | |
58352bb0 LM |
137 | int i, j; |
138 | ||
c84ea202 | 139 | VERBOSE (DEBUG, PRINTOUT ("start creating tree\n")); |
58352bb0 LM |
140 | |
141 | /* count number of leafs */ | |
142 | while (leafs[nb_leafs] != NULL) { | |
143 | nb_leafs++; | |
144 | } | |
145 | ||
146 | /* create tree */ | |
147 | for (j = 0; j < nb_leafs - 1; j++) { | |
148 | ||
149 | /* look for leatest occurence */ | |
150 | last = -1; | |
151 | for (i = 0; i < nb_leafs; i++) { | |
152 | if (leafs[i] == NULL) { | |
153 | continue; | |
154 | } | |
155 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { | |
156 | last = i; | |
157 | } | |
158 | } | |
159 | ||
160 | /* look for ante leatest occurence */ | |
161 | ante = -1; | |
162 | for (i = 0; i < nb_leafs; i++) { | |
163 | if ((i == last) || (leafs[i] == NULL)) { | |
164 | continue; | |
165 | } | |
166 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { | |
167 | ante = i; | |
168 | } | |
169 | } | |
170 | ||
171 | /* create branch */ | |
172 | if ((last == -1) || (ante == -1)) { | |
e75046fb | 173 | VERBOSE (ERROR, PRINTERR ("error during tree building\n")); |
58352bb0 LM |
174 | return NULL; |
175 | } | |
c84ea202 | 176 | branch = getleaf (1); |
58352bb0 | 177 | if (branch == NULL) { |
e75046fb | 178 | VERBOSE (ERROR, PRINTERR ("can't allocate memory\n")); |
58352bb0 LM |
179 | return NULL; |
180 | } | |
181 | branch->left = leafs[last]; | |
182 | branch->right = leafs[ante]; | |
183 | branch->occ = branch->left->occ + branch->right->occ; | |
184 | leafs[last] = branch; | |
185 | leafs[ante] = NULL; | |
186 | } | |
187 | ||
c84ea202 | 188 | VERBOSE (DEBUG, PRINTOUT ("end creating tree\n")); |
58352bb0 | 189 | |
37062814 | 190 | return (last != -1) ? leafs[last] : NULL; |
58352bb0 LM |
191 | } |
192 | ||
58352bb0 LM |
193 | /* explore tree */ |
194 | ||
195 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) | |
196 | { | |
c84ea202 LM |
197 | |
198 | VERBOSE (DEBUG, PRINTOUT ("start exploring code tree\n")); | |
199 | ||
58352bb0 | 200 | if ((root->left == NULL) && (root->right == NULL)) { |
c9987f3b | 201 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
58352bb0 LM |
202 | } |
203 | else { | |
c9987f3b | 204 | codcpy (code + index, sizeof (code_t), "1"); |
58352bb0 | 205 | explore_tree (table, root->left, code, index + 1); |
c9987f3b | 206 | codcpy (code + index, sizeof (code_t), "0"); |
58352bb0 LM |
207 | explore_tree (table, root->right, code, index + 1); |
208 | } | |
c84ea202 LM |
209 | |
210 | VERBOSE (DEBUG, PRINTOUT ("end exploring code tree\n")); | |
58352bb0 LM |
211 | } |
212 | ||
213 | /* create code table */ | |
58352bb0 LM |
214 | code_t *create_code (leaf_t *root) |
215 | { | |
c84ea202 | 216 | static code_t table[NB_BYTES] = {0}; |
58352bb0 LM |
217 | code_t code = {0}; |
218 | ||
c84ea202 | 219 | VERBOSE (DEBUG, PRINTOUT ("start creating code table\n")); |
58352bb0 LM |
220 | |
221 | explore_tree (table, root, (char *)&code, 0); | |
222 | ||
c84ea202 | 223 | VERBOSE (DEBUG, PRINTOUT ("end creating code table\n")); |
58352bb0 LM |
224 | |
225 | return table; | |
226 | } | |
227 | ||
228 | /* print code table */ | |
229 | ||
230 | void print_code_table (code_t *codes) | |
231 | { | |
232 | char *code; | |
233 | int i; | |
234 | ||
c84ea202 | 235 | PRINTOUT ("Code table\n"); |
c9987f3b | 236 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 237 | code = (char *)(codes + i); |
c9987f3b | 238 | if (codlen (code) == 0) { |
58352bb0 LM |
239 | continue; |
240 | } | |
c84ea202 | 241 | PRINTOUT ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); |
58352bb0 LM |
242 | } |
243 | } | |
244 | ||
245 | /* encode header and code table */ | |
246 | ||
c9987f3b | 247 | byte_t *encode_header_table (code_t *codes, int *occ) |
58352bb0 | 248 | { |
c84ea202 | 249 | static byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
c9987f3b | 250 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; |
58352bb0 | 251 | char *code; |
c9987f3b | 252 | byte_t *header = buffer; |
58352bb0 LM |
253 | int i, j, length, mode; |
254 | int nb = 0; | |
255 | int size = 0; | |
256 | ||
c84ea202 | 257 | VERBOSE (DEBUG, PRINTOUT ("start encoding header and code table\n")); |
58352bb0 LM |
258 | |
259 | /* mode 1 or 2 */ | |
c9987f3b | 260 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 261 | code = (char *)(codes + i); |
c9987f3b | 262 | if (codlen (code) > 0) { |
58352bb0 | 263 | nb++; |
c9987f3b | 264 | size += codlen (code) * occ[i]; |
58352bb0 LM |
265 | } |
266 | } | |
c9987f3b | 267 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
c84ea202 LM |
268 | VERBOSE (DEBUG, PRINTOUT ("nb chars: %d\n", nb)); |
269 | VERBOSE (DEBUG, PRINTOUT ("mode: %d\n", mode)); | |
270 | VERBOSE (DEBUG, PRINTOUT ("size: %d\n", size)); | |
271 | VERBOSE (DEBUG, PRINTOUT ("rem: %d\n", size % 256)); | |
58352bb0 LM |
272 | |
273 | /* header */ | |
c9987f3b | 274 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
58352bb0 LM |
275 | header += 6; |
276 | ||
277 | /* size */ | |
278 | switch (mode) { | |
279 | case 1: | |
c9987f3b | 280 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 281 | code = (char *)(codes + i); |
c9987f3b | 282 | *(header++) = (byte_t) codlen (code); |
58352bb0 LM |
283 | } |
284 | break; | |
285 | case 2: | |
c9987f3b LM |
286 | *(header++) = (byte_t)(nb - 1); |
287 | for (i = 0; i < NB_BYTES; i++) { | |
58352bb0 | 288 | code = (char *)(codes + i); |
c9987f3b LM |
289 | if (codlen (code) > 0) { |
290 | *(header++) = (byte_t) i; | |
291 | *(header++) = (byte_t) codlen (code); | |
58352bb0 LM |
292 | } |
293 | } | |
294 | break; | |
295 | } | |
296 | ||
297 | /* bits */ | |
c9987f3b | 298 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 299 | code = (char *)(codes + i); |
c9987f3b LM |
300 | if (codlen (code) > 0) { |
301 | codcat (bits, sizeof (code_t), code); | |
302 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
303 | for (j = 0; j < 8; j++) { |
304 | *header <<= 1; | |
305 | if (bits[j] == '1') { | |
306 | (*header)++; | |
307 | } | |
308 | } | |
c9987f3b | 309 | codcpy (bits, sizeof (code_t), bits + 8); |
58352bb0 LM |
310 | header++; |
311 | } | |
312 | } | |
313 | } | |
c9987f3b LM |
314 | if (codlen (bits) > 0) { |
315 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
316 | *header <<= 1; |
317 | if (bits[j] == '1') { | |
318 | (*header)++; | |
319 | } | |
320 | } | |
c9987f3b | 321 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 LM |
322 | *header <<= 1; |
323 | } | |
58352bb0 LM |
324 | header++; |
325 | } | |
326 | ||
327 | /* length */ | |
328 | length = (int)(header - buffer - 6); | |
c84ea202 | 329 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); |
c9987f3b LM |
330 | buffer[3] = (byte_t)(length >> 8); |
331 | buffer[4] = (byte_t)(length & 0xff); | |
332 | buffer[5] = (byte_t)(size % 256); | |
c84ea202 | 333 | header = buffer; |
58352bb0 | 334 | |
c84ea202 | 335 | VERBOSE (DEBUG, PRINTOUT ("end encoding header and code table\n")); |
58352bb0 | 336 | |
c9987f3b | 337 | return header; |
58352bb0 LM |
338 | } |
339 | ||
340 | /* print header */ | |
341 | ||
c9987f3b | 342 | void print_header (byte_t *header) |
58352bb0 LM |
343 | { |
344 | int length, i; | |
345 | ||
c9987f3b | 346 | length = (header[3] << 8) + header[4]; |
c84ea202 | 347 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d\n", length)); |
58352bb0 | 348 | for (i = 0; i < length + 6; i++) { |
c84ea202 | 349 | PRINTOUT ("%02x", header[i]); |
58352bb0 | 350 | } |
c84ea202 | 351 | PRINTOUT ("\n"); |
58352bb0 LM |
352 | } |
353 | ||
354 | /* write crompressed file */ | |
355 | ||
c9987f3b | 356 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
58352bb0 | 357 | { |
c9987f3b LM |
358 | byte_t bufin[BUFFER_SIZE] = {0}; |
359 | byte_t bufout[BUFFER_SIZE] = {0}; | |
360 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
bf1d9554 | 361 | int fin, fout; |
58352bb0 | 362 | int length = 0; |
d7d2982c | 363 | int i, j, nbread, nbwrite; |
c9987f3b | 364 | byte_t *pt; |
58352bb0 | 365 | |
c84ea202 | 366 | VERBOSE (DEBUG, PRINTOUT ("start writting compressed file\n")); |
58352bb0 LM |
367 | |
368 | /* open input file */ | |
bf1d9554 LM |
369 | fin = open (input, O_RDONLY|O_RAW); |
370 | if (fin == -1) { | |
e75046fb | 371 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for reading\n", input)); |
58352bb0 LM |
372 | return 1; |
373 | } | |
c84ea202 | 374 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", input)); |
58352bb0 LM |
375 | |
376 | /* open output file */ | |
bf1d9554 LM |
377 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
378 | if (fout == -1) { | |
e75046fb | 379 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for writing\n", output)); |
bf1d9554 | 380 | close (fin); |
58352bb0 LM |
381 | return 1; |
382 | } | |
c84ea202 | 383 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", output)); |
58352bb0 LM |
384 | |
385 | /* write header */ | |
c9987f3b | 386 | length = (header[3] << 8) + header[4]; |
c84ea202 | 387 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d\n", length)); |
d7d2982c LM |
388 | nbwrite = write (fout, header, length + 6); |
389 | if (nbwrite != length + 6) { | |
390 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n", length + 6 - nbwrite, output)); | |
391 | close (fout); | |
392 | close (fin); | |
393 | return 1; | |
394 | } | |
395 | ||
58352bb0 LM |
396 | |
397 | /* write file */ | |
398 | pt = bufout; | |
bf1d9554 | 399 | while ((nbread = read (fin, bufin, BUFFER_SIZE)) > 0) { |
c84ea202 | 400 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nbread)); |
58352bb0 | 401 | for (i = 0; i < nbread; i++) { |
c9987f3b LM |
402 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
403 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
404 | for (j = 0; j < 8; j++) { |
405 | *pt <<= 1; | |
406 | if (bits[j] == '1') { | |
407 | (*pt)++; | |
408 | } | |
409 | } | |
c9987f3b | 410 | codcpy (bits, sizeof (code_t), bits + 8); |
37062814 | 411 | if (pt - bufout == BUFFER_SIZE - 1) { |
d7d2982c LM |
412 | nbwrite = write (fout, bufout, BUFFER_SIZE); |
413 | if (nbwrite != BUFFER_SIZE) { | |
414 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n", BUFFER_SIZE - nbwrite, output)); | |
415 | close (fout); | |
416 | close (fin); | |
417 | return 1; | |
418 | } | |
58352bb0 | 419 | pt = bufout; |
37062814 LM |
420 | } else { |
421 | pt++; | |
58352bb0 LM |
422 | } |
423 | } | |
424 | } | |
425 | } | |
c84ea202 | 426 | VERBOSE (DEBUG, PRINTOUT ("lastest bits : %d\n", codlen (bits))); |
c9987f3b LM |
427 | if (codlen (bits) > 0) { |
428 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
429 | *pt <<= 1; |
430 | if (bits[j] == '1') { | |
431 | (*pt)++; | |
432 | } | |
433 | } | |
c9987f3b | 434 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 | 435 | *pt <<= 1; |
58352bb0 LM |
436 | } |
437 | pt++; | |
438 | } | |
439 | if (pt != bufout) { | |
c84ea202 | 440 | VERBOSE (DEBUG, PRINTOUT ("last partial buffer written: %u\n", pt - bufout)); |
d7d2982c LM |
441 | nbwrite = write (fout, bufout, pt - bufout); |
442 | if (nbwrite != pt - bufout) { | |
443 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n", pt - bufout - nbwrite, output)); | |
444 | close (fout); | |
445 | close (fin); | |
446 | return 1; | |
447 | } | |
58352bb0 LM |
448 | } |
449 | ||
450 | /* closing */ | |
bf1d9554 LM |
451 | close (fin); |
452 | close (fout); | |
58352bb0 | 453 | |
c84ea202 | 454 | VERBOSE (DEBUG, PRINTOUT ("end writting compressed file\n")); |
58352bb0 LM |
455 | |
456 | return 0; | |
457 | } | |
458 | ||
37062814 LM |
459 | /* read header */ |
460 | ||
461 | code_t *read_header (char *filename) { | |
c84ea202 | 462 | static code_t table[NB_BYTES] = {0}; |
c9987f3b | 463 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
c9987f3b LM |
464 | byte_t *codes = NULL; |
465 | byte_t cur; | |
466 | int lengths[NB_BYTES] = {0}; | |
bf1d9554 | 467 | int fid; |
37062814 | 468 | int mode = 0; |
bf1d9554 | 469 | int i, j, l, nb, size; |
37062814 | 470 | |
c84ea202 | 471 | VERBOSE (DEBUG, PRINTOUT ("start reading header\n")); |
37062814 LM |
472 | |
473 | /* open file */ | |
bf1d9554 LM |
474 | fid = open (filename, O_RDONLY|O_RAW); |
475 | if (fid == -1) { | |
e75046fb | 476 | VERBOSE (ERROR, PRINTERR ("can't open file '%s'\n", filename)); |
37062814 LM |
477 | return NULL; |
478 | } | |
c84ea202 | 479 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", filename)); |
37062814 LM |
480 | |
481 | /* read magic number */ | |
bf1d9554 | 482 | nb = read (fid, buffer, 6); |
c84ea202 | 483 | VERBOSE (DEBUG, PRINTOUT ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); |
37062814 LM |
484 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { |
485 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; | |
c9987f3b | 486 | size = (buffer[3] << 8) + buffer[4]; |
c84ea202 | 487 | VERBOSE (DEBUG, PRINTOUT ("mode, size: %d %d\n", mode, size)); |
c9987f3b | 488 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
37062814 LM |
489 | mode = 0; |
490 | } else { | |
bf1d9554 | 491 | nb = read (fid, buffer, size); |
c84ea202 | 492 | VERBOSE (DEBUG, PRINTOUT ("nb read: %d/%d\n", nb, size)); |
37062814 LM |
493 | if (nb != size) { |
494 | mode = 0; | |
495 | } | |
496 | } | |
497 | } | |
bf1d9554 | 498 | close (fid); |
37062814 | 499 | if (mode == 0) { |
e75046fb | 500 | VERBOSE (ERROR, PRINTERR ("incorrect file\n")); |
37062814 LM |
501 | return NULL; |
502 | } | |
503 | ||
504 | /* analyse header */ | |
c9987f3b | 505 | codes = buffer; |
37062814 LM |
506 | switch (mode) { |
507 | case 1: | |
c9987f3b | 508 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
509 | lengths[i] = *(codes++); |
510 | } | |
511 | break; | |
512 | case 2: | |
513 | nb = *(codes++) + 1; | |
c84ea202 | 514 | VERBOSE (DEBUG, PRINTOUT ("nb codes: %d\n", nb)); |
37062814 LM |
515 | for (i = 0; i < nb; i++) { |
516 | j = *(codes++); | |
517 | lengths[j] = *(codes++); | |
518 | } | |
519 | break; | |
520 | } | |
c84ea202 | 521 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTOUT ("%d: %d\n", i, lengths[i])); |
37062814 LM |
522 | |
523 | /* check lengths */ | |
c9987f3b | 524 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
37062814 LM |
525 | l += lengths[i]; |
526 | } | |
527 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || | |
528 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { | |
e75046fb | 529 | VERBOSE (ERROR, PRINTERR ("incorrect code table length: %d %d %d\n", size, nb, l)); |
37062814 LM |
530 | return NULL; |
531 | } | |
532 | ||
533 | /* decode code */ | |
534 | cur = *(codes++); | |
535 | l = 8; | |
c9987f3b | 536 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
537 | if (lengths[i] == 0) { |
538 | continue; | |
539 | } | |
540 | while (lengths[i]--) { | |
c9987f3b | 541 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
542 | l--; |
543 | cur <<= 1; | |
544 | if (l == 0) { | |
545 | cur = *(codes++); | |
546 | l = 8; | |
547 | } | |
548 | } | |
549 | } | |
550 | ||
c84ea202 | 551 | VERBOSE (DEBUG, PRINTOUT ("end reading header\n")); |
37062814 LM |
552 | |
553 | return table; | |
554 | } | |
555 | ||
556 | /* write decompressed file */ | |
557 | ||
558 | int write_decompress (char *output, char *input, code_t *codes) | |
559 | { | |
c9987f3b LM |
560 | byte_t bufin[BUFFER_SIZE] = {0}; |
561 | byte_t bufout[BUFFER_SIZE] = {0}; | |
562 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; | |
563 | char bits[(NB_BYTES - 1) + 1] = {0}; | |
bf1d9554 | 564 | int fin, fout; |
d7d2982c | 565 | int i, j, k, nb, size, nbwrite, rem; |
37062814 LM |
566 | int is_found; |
567 | int l = 0; | |
c9987f3b | 568 | byte_t *pt; |
37062814 | 569 | |
c84ea202 | 570 | VERBOSE (DEBUG, PRINTOUT ("start writing decompressed file\n")); |
37062814 LM |
571 | |
572 | /* open file for reading */ | |
bf1d9554 LM |
573 | fin = open (input, O_RDONLY|O_RAW); |
574 | if (fin == -1) { | |
e75046fb | 575 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for reading\n", input)); |
37062814 LM |
576 | return 1; |
577 | } | |
c84ea202 | 578 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", input)); |
37062814 LM |
579 | |
580 | /* read magic number */ | |
bf1d9554 | 581 | nb = read (fin, bufhea, 6); |
37062814 | 582 | if (nb != 6) { |
e75046fb | 583 | VERBOSE (ERROR, PRINTERR ("can't read file\n")); |
bf1d9554 | 584 | close (fin); |
37062814 LM |
585 | return 1; |
586 | } | |
c9987f3b | 587 | size = (bufhea[3] << 8) + bufhea[4]; |
c84ea202 | 588 | VERBOSE (DEBUG, PRINTOUT ("table size: %d\n", size)); |
c9987f3b | 589 | rem = bufhea[5]; |
c84ea202 | 590 | VERBOSE (DEBUG, PRINTOUT ("remainder: %d\n", rem)); |
bf1d9554 | 591 | nb = read (fin, bufhea, size); |
37062814 | 592 | if (nb != size) { |
e75046fb | 593 | VERBOSE (ERROR, PRINTERR ("can't read file\n")); |
bf1d9554 | 594 | close (fin); |
37062814 LM |
595 | return 1; |
596 | } | |
597 | ||
598 | /* open file for writing */ | |
bf1d9554 LM |
599 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
600 | if (fout == -1) { | |
e75046fb | 601 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for writing\n", output)); |
bf1d9554 | 602 | close (fin); |
d7d2982c | 603 | return 1; |
37062814 | 604 | } |
c84ea202 | 605 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", output)); |
37062814 LM |
606 | |
607 | /* write file */ | |
608 | pt = bufout; | |
bf1d9554 | 609 | while ((nb = read (fin, bufin, BUFFER_SIZE)) > 0) { |
c84ea202 | 610 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nb)); |
37062814 LM |
611 | for (i = 0; i < nb; i++) { |
612 | for (j = 0; j < 8; j++) { | |
c9987f3b | 613 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
614 | bufin[i] <<= 1; |
615 | l++; | |
c84ea202 | 616 | VERBOSE (DEBUG, PRINTOUT ("bits: %d - %s\n", codlen (bits), bits)); |
37062814 LM |
617 | |
618 | /* look for correct code */ | |
619 | is_found = 0; | |
c9987f3b LM |
620 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
621 | if (codcmp ((char *)(codes + k), bits) == 0) { | |
37062814 | 622 | is_found = 1; |
c84ea202 | 623 | VERBOSE (DEBUG, PRINTOUT ("found: %d\n", k)); |
37062814 LM |
624 | *pt= k; |
625 | bits[0] = 0; | |
626 | if (pt - bufout == BUFFER_SIZE - 1) { | |
c84ea202 | 627 | VERBOSE (DEBUG, PRINTOUT ("nb buffer out: %u\n", (pt - bufout))); |
d7d2982c LM |
628 | nbwrite = write (fout, bufout, BUFFER_SIZE); |
629 | if (nbwrite != BUFFER_SIZE) { | |
630 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n'", BUFFER_SIZE - nbwrite, output)); | |
631 | close (fout); | |
632 | close (fin); | |
633 | return 1; | |
634 | } | |
37062814 LM |
635 | pt = bufout; |
636 | } else { | |
637 | pt++; | |
638 | } | |
639 | } | |
640 | } | |
bf1d9554 | 641 | if ((i == nb - 1) && (l % 256 == rem) && (nb != BUFFER_SIZE)) { |
c84ea202 | 642 | VERBOSE (DEBUG, PRINTOUT ("break\n")); |
37062814 LM |
643 | break; |
644 | } | |
645 | } | |
646 | } | |
647 | } | |
648 | if (pt != bufout) { | |
c84ea202 | 649 | VERBOSE (DEBUG, PRINTOUT ("nb buffer out: %u\n", (pt - bufout))); |
d7d2982c LM |
650 | nbwrite = write (fout, bufout, pt - bufout); |
651 | if (nbwrite != pt - bufout) { | |
652 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n'", pt - bufout - nbwrite, output)); | |
653 | close (fout); | |
654 | close (fin); | |
655 | return 1; | |
656 | } | |
37062814 LM |
657 | } |
658 | ||
659 | /* close files */ | |
bf1d9554 LM |
660 | close (fin); |
661 | close (fout); | |
37062814 | 662 | |
c84ea202 | 663 | VERBOSE (DEBUG, PRINTOUT ("end writing decompressed file\n")); |
37062814 LM |
664 | |
665 | return 0; | |
666 | } | |
667 | ||
58352bb0 LM |
668 | /* main function */ |
669 | ||
670 | int main (int argc, char *argv[]) | |
671 | { | |
672 | char *input = NULL; | |
673 | char *output = NULL; | |
674 | int *table = NULL; | |
675 | leaf_t **leafs = NULL; | |
676 | leaf_t *root = NULL; | |
677 | code_t *codes = NULL; | |
c9987f3b | 678 | byte_t *header = NULL; |
58352bb0 | 679 | int mode = COMPRESS; |
37062814 | 680 | int rc = 1; |
58352bb0 LM |
681 | |
682 | progname = argv[0]; | |
683 | ||
684 | int c; | |
d3dbaf98 | 685 | char * arg; |
c84ea202 | 686 | VERBOSE (DEBUG, PRINTOUT ("start processing arguments\n")); |
d3dbaf98 LM |
687 | while (argc-- > 1) { |
688 | arg = *(++argv); | |
689 | if (arg[0] != '-') { | |
c84ea202 LM |
690 | PRINTERR ("%s: invalid option -- %s\n", progname, arg); |
691 | return usage (1); | |
d3dbaf98 LM |
692 | } |
693 | c = arg[1]; | |
c84ea202 | 694 | VERBOSE (DEBUG, PRINTOUT ("option: %c\n", c)); |
58352bb0 LM |
695 | switch (c) { |
696 | case 'c': | |
697 | mode = COMPRESS; | |
698 | break; | |
699 | case 'd': | |
700 | mode = DECOMPRESS; | |
701 | break; | |
702 | case 'i': | |
d3dbaf98 | 703 | input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
c84ea202 | 704 | VERBOSE (DEBUG, PRINTOUT ("input: %s\n", input)); |
58352bb0 LM |
705 | break; |
706 | case 'o': | |
d3dbaf98 | 707 | output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
c84ea202 | 708 | VERBOSE (DEBUG, PRINTOUT ("output: %s\n", output)); |
58352bb0 LM |
709 | break; |
710 | case 'v': | |
d3dbaf98 LM |
711 | arg = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
712 | if (arg == NULL) { | |
c84ea202 LM |
713 | PRINTERR ("%s: missing verbose level\n", progname); |
714 | return usage (1); | |
d3dbaf98 | 715 | } |
5e0c5bc8 | 716 | verbose = myatoi (arg); |
c84ea202 | 717 | VERBOSE (INFO, PRINTOUT ("verbose: %d\n", verbose)); |
58352bb0 LM |
718 | break; |
719 | case 'h': | |
720 | default: | |
c84ea202 | 721 | return usage (c != 'h'); |
58352bb0 LM |
722 | } |
723 | } | |
d3dbaf98 | 724 | if ((input == NULL) || (output == NULL)) { |
c84ea202 LM |
725 | PRINTERR ("%s: missing file\n", progname); |
726 | return usage (1); | |
58352bb0 | 727 | } |
c84ea202 | 728 | VERBOSE (DEBUG, PRINTOUT ("end processing arguments\n")); |
58352bb0 LM |
729 | |
730 | switch (mode) { | |
731 | case COMPRESS: | |
732 | table = create_table (input); | |
733 | if (table == NULL) break; | |
734 | VERBOSE (INFO, print_occ_table (table)); | |
735 | ||
736 | leafs = init_forest (table); | |
737 | if (leafs == NULL) break; | |
738 | root = create_tree (leafs); | |
739 | if (root == NULL) break; | |
740 | codes = create_code (root); | |
741 | if (codes == NULL) break; | |
742 | VERBOSE (INFO, print_code_table (codes)); | |
743 | header = encode_header_table (codes, table); | |
744 | if (header == NULL) break; | |
745 | VERBOSE (INFO, print_header (header)); | |
746 | rc = write_compress (output, input, codes, header); | |
747 | break; | |
748 | case DECOMPRESS: | |
37062814 LM |
749 | codes = read_header (input); |
750 | if (codes == NULL) break; | |
751 | VERBOSE (INFO, print_code_table (codes)); | |
752 | rc = write_decompress (output, input, codes); | |
58352bb0 LM |
753 | break; |
754 | } | |
755 | ||
58352bb0 LM |
756 | return rc; |
757 | } | |
758 | ||
759 | // test: compress.exe -h | |
760 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
761 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' | |
762 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
37062814 LM |
763 | // test: compress.exe -c -i compress.c -o compress.mz |
764 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz | |
765 | // test: compress.exe -d -i compress.mz -o tmp.c | |
766 | // test: cmp compress.c tmp.c | |
767 | // test: rm compress.mz tmp.c | |
58352bb0 | 768 | |
bf1d9554 | 769 | /* vim: set ts=4 sw=4 et: */ |