Commit | Line | Data |
---|---|---|
58352bb0 LM |
1 | /* depend: */ |
2 | /* cflags: */ | |
5e0c5bc8 | 3 | /* linker: atoi.o code.o debug.o fprintf.o */ |
58352bb0 | 4 | |
bf1d9554 | 5 | #include <fcntl.h> |
bf1d9554 | 6 | #include <unistd.h> |
c84ea202 | 7 | #include <stddef.h> |
5e0c5bc8 | 8 | #include "atoi.h" |
c9987f3b LM |
9 | #include "code.h" |
10 | #include "debug.h" | |
5f83300c | 11 | #include "fprintf.h" |
58352bb0 LM |
12 | |
13 | /* constants */ | |
14 | ||
58352bb0 LM |
15 | #define BUFFER_SIZE 4096 |
16 | ||
c84ea202 LM |
17 | #define COMPRESS 1 |
18 | #define DECOMPRESS 2 | |
19 | ||
58352bb0 LM |
20 | /* macros */ |
21 | ||
58352bb0 LM |
22 | /* gobal variables */ |
23 | ||
24 | char *progname = NULL; | |
58352bb0 LM |
25 | |
26 | /* help function */ | |
27 | ||
c84ea202 | 28 | int usage (int ret) |
58352bb0 | 29 | { |
c84ea202 | 30 | int fd = ret ? _fderr : _fdout; |
92fc2c44 LM |
31 | fdprintf (fd, "usage: %s\n", progname); |
32 | fdprintf (fd, " -h : help message\n"); | |
33 | fdprintf (fd, " -i <file>: input file\n"); | |
34 | fdprintf (fd, " -o <file>: output file\n"); | |
35 | fdprintf (fd, " -v : verbose level (%d)\n", verbose); | |
58352bb0 | 36 | |
c84ea202 | 37 | return ret; |
58352bb0 LM |
38 | } |
39 | ||
40 | /* create occurence table */ | |
125462cf | 41 | |
58352bb0 LM |
42 | int *create_table (char *filename) |
43 | { | |
c9987f3b | 44 | byte_t buffer[BUFFER_SIZE] = {0}; |
58352bb0 | 45 | int nbread; |
c84ea202 | 46 | static int table[NB_BYTES] = {0}; |
bf1d9554 | 47 | int fid = 0; |
58352bb0 | 48 | |
c84ea202 | 49 | VERBOSE (DEBUG, PRINTOUT ("start creating occurence table\n")); |
58352bb0 LM |
50 | |
51 | /* open file */ | |
bf1d9554 LM |
52 | fid = open (filename, O_RDONLY|O_RAW); |
53 | if (fid == -1) { | |
125462cf | 54 | VERBOSE (ERROR, PRINTERR ("can't open file '%s'\n", filename)); |
58352bb0 LM |
55 | return NULL; |
56 | } | |
c84ea202 | 57 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", filename)); |
58352bb0 LM |
58 | |
59 | /* read file */ | |
bf1d9554 | 60 | while ((nbread = read (fid, buffer, BUFFER_SIZE)) > 0) { |
c84ea202 | 61 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nbread)); |
58352bb0 LM |
62 | while (nbread--) { |
63 | table[(int)buffer[nbread]]++; | |
64 | } | |
65 | } | |
66 | ||
67 | /* close file */ | |
bf1d9554 | 68 | close (fid); |
58352bb0 | 69 | |
c84ea202 | 70 | VERBOSE (DEBUG, PRINTOUT ("end creating occurence table\n")); |
58352bb0 LM |
71 | |
72 | return table; | |
73 | } | |
74 | ||
75 | /* print occurence table */ | |
76 | ||
77 | void print_occ_table (int *table) | |
78 | { | |
79 | int i; | |
80 | ||
c84ea202 | 81 | PRINTOUT ("Occurence table\n"); |
c9987f3b | 82 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 83 | if (table[i]) { |
c84ea202 | 84 | PRINTOUT ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); |
58352bb0 LM |
85 | } |
86 | } | |
87 | } | |
88 | ||
58352bb0 LM |
89 | /* initialize forest */ |
90 | ||
91 | leaf_t **init_forest (int *table) | |
92 | { | |
c84ea202 | 93 | static leaf_t *leafs[NB_BYTES] = {0}; |
58352bb0 LM |
94 | int nb_leafs = 0; |
95 | int i, l; | |
96 | ||
c84ea202 | 97 | VERBOSE (DEBUG, PRINTOUT ("start initiliazing forest\n")); |
58352bb0 LM |
98 | |
99 | /* count number of leafs */ | |
c9987f3b | 100 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 LM |
101 | if (table[i] > 0) { |
102 | nb_leafs++; | |
103 | } | |
104 | } | |
105 | ||
58352bb0 | 106 | /* initialize leafs */ |
c9987f3b | 107 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
58352bb0 | 108 | if (table[i] > 0) { |
c84ea202 | 109 | leafs[l] = getleaf (1); |
58352bb0 | 110 | if (leafs[l] == NULL) { |
e75046fb | 111 | VERBOSE (ERROR, PRINTERR ("can't allocate memory\n")); |
58352bb0 LM |
112 | return NULL; |
113 | } | |
114 | leafs[l]->occ = table[i]; | |
115 | leafs[l]->c = i; | |
116 | l++; | |
117 | } | |
118 | } | |
119 | ||
c84ea202 | 120 | VERBOSE (DEBUG, PRINTOUT ("end initiliazing forest\n")); |
58352bb0 LM |
121 | |
122 | return leafs; | |
123 | } | |
124 | ||
125 | /* create tree */ | |
126 | ||
127 | leaf_t *create_tree (leaf_t **leafs) | |
128 | { | |
129 | leaf_t *branch = NULL; | |
130 | int nb_leafs = 0; | |
37062814 LM |
131 | int last = -1; |
132 | int ante; | |
58352bb0 LM |
133 | int i, j; |
134 | ||
c84ea202 | 135 | VERBOSE (DEBUG, PRINTOUT ("start creating tree\n")); |
58352bb0 LM |
136 | |
137 | /* count number of leafs */ | |
138 | while (leafs[nb_leafs] != NULL) { | |
139 | nb_leafs++; | |
140 | } | |
141 | ||
142 | /* create tree */ | |
143 | for (j = 0; j < nb_leafs - 1; j++) { | |
144 | ||
145 | /* look for leatest occurence */ | |
146 | last = -1; | |
147 | for (i = 0; i < nb_leafs; i++) { | |
148 | if (leafs[i] == NULL) { | |
149 | continue; | |
150 | } | |
151 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { | |
152 | last = i; | |
153 | } | |
154 | } | |
155 | ||
156 | /* look for ante leatest occurence */ | |
157 | ante = -1; | |
158 | for (i = 0; i < nb_leafs; i++) { | |
159 | if ((i == last) || (leafs[i] == NULL)) { | |
160 | continue; | |
161 | } | |
162 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { | |
163 | ante = i; | |
164 | } | |
165 | } | |
166 | ||
167 | /* create branch */ | |
168 | if ((last == -1) || (ante == -1)) { | |
e75046fb | 169 | VERBOSE (ERROR, PRINTERR ("error during tree building\n")); |
58352bb0 LM |
170 | return NULL; |
171 | } | |
c84ea202 | 172 | branch = getleaf (1); |
58352bb0 | 173 | if (branch == NULL) { |
e75046fb | 174 | VERBOSE (ERROR, PRINTERR ("can't allocate memory\n")); |
58352bb0 LM |
175 | return NULL; |
176 | } | |
177 | branch->left = leafs[last]; | |
178 | branch->right = leafs[ante]; | |
179 | branch->occ = branch->left->occ + branch->right->occ; | |
180 | leafs[last] = branch; | |
181 | leafs[ante] = NULL; | |
182 | } | |
183 | ||
c84ea202 | 184 | VERBOSE (DEBUG, PRINTOUT ("end creating tree\n")); |
58352bb0 | 185 | |
37062814 | 186 | return (last != -1) ? leafs[last] : NULL; |
58352bb0 LM |
187 | } |
188 | ||
58352bb0 LM |
189 | /* explore tree */ |
190 | ||
191 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) | |
192 | { | |
c84ea202 LM |
193 | |
194 | VERBOSE (DEBUG, PRINTOUT ("start exploring code tree\n")); | |
195 | ||
58352bb0 | 196 | if ((root->left == NULL) && (root->right == NULL)) { |
c9987f3b | 197 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
58352bb0 LM |
198 | } |
199 | else { | |
c9987f3b | 200 | codcpy (code + index, sizeof (code_t), "1"); |
58352bb0 | 201 | explore_tree (table, root->left, code, index + 1); |
c9987f3b | 202 | codcpy (code + index, sizeof (code_t), "0"); |
58352bb0 LM |
203 | explore_tree (table, root->right, code, index + 1); |
204 | } | |
c84ea202 LM |
205 | |
206 | VERBOSE (DEBUG, PRINTOUT ("end exploring code tree\n")); | |
58352bb0 LM |
207 | } |
208 | ||
209 | /* create code table */ | |
58352bb0 LM |
210 | code_t *create_code (leaf_t *root) |
211 | { | |
c84ea202 | 212 | static code_t table[NB_BYTES] = {0}; |
58352bb0 LM |
213 | code_t code = {0}; |
214 | ||
c84ea202 | 215 | VERBOSE (DEBUG, PRINTOUT ("start creating code table\n")); |
58352bb0 LM |
216 | |
217 | explore_tree (table, root, (char *)&code, 0); | |
218 | ||
c84ea202 | 219 | VERBOSE (DEBUG, PRINTOUT ("end creating code table\n")); |
58352bb0 LM |
220 | |
221 | return table; | |
222 | } | |
223 | ||
224 | /* print code table */ | |
225 | ||
226 | void print_code_table (code_t *codes) | |
227 | { | |
228 | char *code; | |
229 | int i; | |
230 | ||
c84ea202 | 231 | PRINTOUT ("Code table\n"); |
c9987f3b | 232 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 233 | code = (char *)(codes + i); |
c9987f3b | 234 | if (codlen (code) == 0) { |
58352bb0 LM |
235 | continue; |
236 | } | |
c84ea202 | 237 | PRINTOUT ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); |
58352bb0 LM |
238 | } |
239 | } | |
240 | ||
241 | /* encode header and code table */ | |
242 | ||
c9987f3b | 243 | byte_t *encode_header_table (code_t *codes, int *occ) |
58352bb0 | 244 | { |
c84ea202 | 245 | static byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
c9987f3b | 246 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; |
58352bb0 | 247 | char *code; |
c9987f3b | 248 | byte_t *header = buffer; |
58352bb0 LM |
249 | int i, j, length, mode; |
250 | int nb = 0; | |
251 | int size = 0; | |
252 | ||
c84ea202 | 253 | VERBOSE (DEBUG, PRINTOUT ("start encoding header and code table\n")); |
58352bb0 LM |
254 | |
255 | /* mode 1 or 2 */ | |
c9987f3b | 256 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 257 | code = (char *)(codes + i); |
c9987f3b | 258 | if (codlen (code) > 0) { |
58352bb0 | 259 | nb++; |
c9987f3b | 260 | size += codlen (code) * occ[i]; |
58352bb0 LM |
261 | } |
262 | } | |
c9987f3b | 263 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
c84ea202 LM |
264 | VERBOSE (DEBUG, PRINTOUT ("nb chars: %d\n", nb)); |
265 | VERBOSE (DEBUG, PRINTOUT ("mode: %d\n", mode)); | |
266 | VERBOSE (DEBUG, PRINTOUT ("size: %d\n", size)); | |
267 | VERBOSE (DEBUG, PRINTOUT ("rem: %d\n", size % 256)); | |
58352bb0 LM |
268 | |
269 | /* header */ | |
c9987f3b | 270 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
58352bb0 LM |
271 | header += 6; |
272 | ||
273 | /* size */ | |
274 | switch (mode) { | |
275 | case 1: | |
c9987f3b | 276 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 277 | code = (char *)(codes + i); |
c9987f3b | 278 | *(header++) = (byte_t) codlen (code); |
58352bb0 LM |
279 | } |
280 | break; | |
281 | case 2: | |
c9987f3b LM |
282 | *(header++) = (byte_t)(nb - 1); |
283 | for (i = 0; i < NB_BYTES; i++) { | |
58352bb0 | 284 | code = (char *)(codes + i); |
c9987f3b LM |
285 | if (codlen (code) > 0) { |
286 | *(header++) = (byte_t) i; | |
287 | *(header++) = (byte_t) codlen (code); | |
58352bb0 LM |
288 | } |
289 | } | |
290 | break; | |
291 | } | |
292 | ||
293 | /* bits */ | |
c9987f3b | 294 | for (i = 0; i < NB_BYTES; i++) { |
58352bb0 | 295 | code = (char *)(codes + i); |
c9987f3b LM |
296 | if (codlen (code) > 0) { |
297 | codcat (bits, sizeof (code_t), code); | |
298 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
299 | for (j = 0; j < 8; j++) { |
300 | *header <<= 1; | |
301 | if (bits[j] == '1') { | |
302 | (*header)++; | |
303 | } | |
304 | } | |
c9987f3b | 305 | codcpy (bits, sizeof (code_t), bits + 8); |
58352bb0 LM |
306 | header++; |
307 | } | |
308 | } | |
309 | } | |
c9987f3b LM |
310 | if (codlen (bits) > 0) { |
311 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
312 | *header <<= 1; |
313 | if (bits[j] == '1') { | |
314 | (*header)++; | |
315 | } | |
316 | } | |
c9987f3b | 317 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 LM |
318 | *header <<= 1; |
319 | } | |
58352bb0 LM |
320 | header++; |
321 | } | |
322 | ||
323 | /* length */ | |
324 | length = (int)(header - buffer - 6); | |
c84ea202 | 325 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); |
c9987f3b LM |
326 | buffer[3] = (byte_t)(length >> 8); |
327 | buffer[4] = (byte_t)(length & 0xff); | |
328 | buffer[5] = (byte_t)(size % 256); | |
c84ea202 | 329 | header = buffer; |
58352bb0 | 330 | |
c84ea202 | 331 | VERBOSE (DEBUG, PRINTOUT ("end encoding header and code table\n")); |
58352bb0 | 332 | |
c9987f3b | 333 | return header; |
58352bb0 LM |
334 | } |
335 | ||
336 | /* print header */ | |
337 | ||
c9987f3b | 338 | void print_header (byte_t *header) |
58352bb0 LM |
339 | { |
340 | int length, i; | |
341 | ||
c9987f3b | 342 | length = (header[3] << 8) + header[4]; |
c84ea202 | 343 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d\n", length)); |
58352bb0 | 344 | for (i = 0; i < length + 6; i++) { |
c84ea202 | 345 | PRINTOUT ("%02x", header[i]); |
58352bb0 | 346 | } |
c84ea202 | 347 | PRINTOUT ("\n"); |
58352bb0 LM |
348 | } |
349 | ||
350 | /* write crompressed file */ | |
351 | ||
c9987f3b | 352 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
58352bb0 | 353 | { |
c9987f3b LM |
354 | byte_t bufin[BUFFER_SIZE] = {0}; |
355 | byte_t bufout[BUFFER_SIZE] = {0}; | |
356 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; | |
bf1d9554 | 357 | int fin, fout; |
58352bb0 LM |
358 | int length = 0; |
359 | int i, j, nbread; | |
c9987f3b | 360 | byte_t *pt; |
58352bb0 | 361 | |
c84ea202 | 362 | VERBOSE (DEBUG, PRINTOUT ("start writting compressed file\n")); |
58352bb0 LM |
363 | |
364 | /* open input file */ | |
bf1d9554 LM |
365 | fin = open (input, O_RDONLY|O_RAW); |
366 | if (fin == -1) { | |
e75046fb | 367 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for reading\n", input)); |
58352bb0 LM |
368 | return 1; |
369 | } | |
c84ea202 | 370 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", input)); |
58352bb0 LM |
371 | |
372 | /* open output file */ | |
bf1d9554 LM |
373 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
374 | if (fout == -1) { | |
e75046fb | 375 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for writing\n", output)); |
bf1d9554 | 376 | close (fin); |
58352bb0 LM |
377 | return 1; |
378 | } | |
c84ea202 | 379 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", output)); |
58352bb0 LM |
380 | |
381 | /* write header */ | |
c9987f3b | 382 | length = (header[3] << 8) + header[4]; |
c84ea202 | 383 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d\n", length)); |
bf1d9554 | 384 | write (fout, header, length + 6); |
58352bb0 LM |
385 | |
386 | /* write file */ | |
387 | pt = bufout; | |
bf1d9554 | 388 | while ((nbread = read (fin, bufin, BUFFER_SIZE)) > 0) { |
c84ea202 | 389 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nbread)); |
58352bb0 | 390 | for (i = 0; i < nbread; i++) { |
c9987f3b LM |
391 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
392 | while (codlen (bits) > (8 - 1)) { | |
58352bb0 LM |
393 | for (j = 0; j < 8; j++) { |
394 | *pt <<= 1; | |
395 | if (bits[j] == '1') { | |
396 | (*pt)++; | |
397 | } | |
398 | } | |
c9987f3b | 399 | codcpy (bits, sizeof (code_t), bits + 8); |
37062814 | 400 | if (pt - bufout == BUFFER_SIZE - 1) { |
bf1d9554 | 401 | write (fout, bufout, BUFFER_SIZE); |
58352bb0 | 402 | pt = bufout; |
37062814 LM |
403 | } else { |
404 | pt++; | |
58352bb0 LM |
405 | } |
406 | } | |
407 | } | |
408 | } | |
c84ea202 | 409 | VERBOSE (DEBUG, PRINTOUT ("lastest bits : %d\n", codlen (bits))); |
c9987f3b LM |
410 | if (codlen (bits) > 0) { |
411 | for (j = 0; j < (int)codlen (bits); j++) { | |
58352bb0 LM |
412 | *pt <<= 1; |
413 | if (bits[j] == '1') { | |
414 | (*pt)++; | |
415 | } | |
416 | } | |
c9987f3b | 417 | for (j = (int)codlen (bits); j < 8; j++) { |
37062814 | 418 | *pt <<= 1; |
58352bb0 LM |
419 | } |
420 | pt++; | |
421 | } | |
422 | if (pt != bufout) { | |
c84ea202 | 423 | VERBOSE (DEBUG, PRINTOUT ("last partial buffer written: %u\n", pt - bufout)); |
bf1d9554 | 424 | write (fout, bufout, pt - bufout); |
58352bb0 LM |
425 | } |
426 | ||
427 | /* closing */ | |
bf1d9554 LM |
428 | close (fin); |
429 | close (fout); | |
58352bb0 | 430 | |
c84ea202 | 431 | VERBOSE (DEBUG, PRINTOUT ("end writting compressed file\n")); |
58352bb0 LM |
432 | |
433 | return 0; | |
434 | } | |
435 | ||
37062814 LM |
436 | /* read header */ |
437 | ||
438 | code_t *read_header (char *filename) { | |
c84ea202 | 439 | static code_t table[NB_BYTES] = {0}; |
c9987f3b | 440 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
c9987f3b LM |
441 | byte_t *codes = NULL; |
442 | byte_t cur; | |
443 | int lengths[NB_BYTES] = {0}; | |
bf1d9554 | 444 | int fid; |
37062814 | 445 | int mode = 0; |
bf1d9554 | 446 | int i, j, l, nb, size; |
37062814 | 447 | |
c84ea202 | 448 | VERBOSE (DEBUG, PRINTOUT ("start reading header\n")); |
37062814 LM |
449 | |
450 | /* open file */ | |
bf1d9554 LM |
451 | fid = open (filename, O_RDONLY|O_RAW); |
452 | if (fid == -1) { | |
e75046fb | 453 | VERBOSE (ERROR, PRINTERR ("can't open file '%s'\n", filename)); |
37062814 LM |
454 | return NULL; |
455 | } | |
c84ea202 | 456 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", filename)); |
37062814 LM |
457 | |
458 | /* read magic number */ | |
bf1d9554 | 459 | nb = read (fid, buffer, 6); |
c84ea202 | 460 | VERBOSE (DEBUG, PRINTOUT ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); |
37062814 LM |
461 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { |
462 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; | |
c9987f3b | 463 | size = (buffer[3] << 8) + buffer[4]; |
c84ea202 | 464 | VERBOSE (DEBUG, PRINTOUT ("mode, size: %d %d\n", mode, size)); |
c9987f3b | 465 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
37062814 LM |
466 | mode = 0; |
467 | } else { | |
bf1d9554 | 468 | nb = read (fid, buffer, size); |
c84ea202 | 469 | VERBOSE (DEBUG, PRINTOUT ("nb read: %d/%d\n", nb, size)); |
37062814 LM |
470 | if (nb != size) { |
471 | mode = 0; | |
472 | } | |
473 | } | |
474 | } | |
bf1d9554 | 475 | close (fid); |
37062814 | 476 | if (mode == 0) { |
e75046fb | 477 | VERBOSE (ERROR, PRINTERR ("incorrect file\n")); |
37062814 LM |
478 | return NULL; |
479 | } | |
480 | ||
481 | /* analyse header */ | |
c9987f3b | 482 | codes = buffer; |
37062814 LM |
483 | switch (mode) { |
484 | case 1: | |
c9987f3b | 485 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
486 | lengths[i] = *(codes++); |
487 | } | |
488 | break; | |
489 | case 2: | |
490 | nb = *(codes++) + 1; | |
c84ea202 | 491 | VERBOSE (DEBUG, PRINTOUT ("nb codes: %d\n", nb)); |
37062814 LM |
492 | for (i = 0; i < nb; i++) { |
493 | j = *(codes++); | |
494 | lengths[j] = *(codes++); | |
495 | } | |
496 | break; | |
497 | } | |
c84ea202 | 498 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTOUT ("%d: %d\n", i, lengths[i])); |
37062814 LM |
499 | |
500 | /* check lengths */ | |
c9987f3b | 501 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
37062814 LM |
502 | l += lengths[i]; |
503 | } | |
504 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || | |
505 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { | |
e75046fb | 506 | VERBOSE (ERROR, PRINTERR ("incorrect code table length: %d %d %d\n", size, nb, l)); |
37062814 LM |
507 | return NULL; |
508 | } | |
509 | ||
510 | /* decode code */ | |
511 | cur = *(codes++); | |
512 | l = 8; | |
c9987f3b | 513 | for (i = 0; i < NB_BYTES; i++) { |
37062814 LM |
514 | if (lengths[i] == 0) { |
515 | continue; | |
516 | } | |
517 | while (lengths[i]--) { | |
c9987f3b | 518 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
519 | l--; |
520 | cur <<= 1; | |
521 | if (l == 0) { | |
522 | cur = *(codes++); | |
523 | l = 8; | |
524 | } | |
525 | } | |
526 | } | |
527 | ||
c84ea202 | 528 | VERBOSE (DEBUG, PRINTOUT ("end reading header\n")); |
37062814 LM |
529 | |
530 | return table; | |
531 | } | |
532 | ||
533 | /* write decompressed file */ | |
534 | ||
535 | int write_decompress (char *output, char *input, code_t *codes) | |
536 | { | |
c9987f3b LM |
537 | byte_t bufin[BUFFER_SIZE] = {0}; |
538 | byte_t bufout[BUFFER_SIZE] = {0}; | |
539 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; | |
540 | char bits[(NB_BYTES - 1) + 1] = {0}; | |
bf1d9554 | 541 | int fin, fout; |
37062814 LM |
542 | int i, j, k, nb, size, rem; |
543 | int is_found; | |
544 | int l = 0; | |
c9987f3b | 545 | byte_t *pt; |
37062814 | 546 | |
c84ea202 | 547 | VERBOSE (DEBUG, PRINTOUT ("start writing decompressed file\n")); |
37062814 LM |
548 | |
549 | /* open file for reading */ | |
bf1d9554 LM |
550 | fin = open (input, O_RDONLY|O_RAW); |
551 | if (fin == -1) { | |
e75046fb | 552 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for reading\n", input)); |
37062814 LM |
553 | return 1; |
554 | } | |
c84ea202 | 555 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", input)); |
37062814 LM |
556 | |
557 | /* read magic number */ | |
bf1d9554 | 558 | nb = read (fin, bufhea, 6); |
37062814 | 559 | if (nb != 6) { |
e75046fb | 560 | VERBOSE (ERROR, PRINTERR ("can't read file\n")); |
bf1d9554 | 561 | close (fin); |
37062814 LM |
562 | return 1; |
563 | } | |
c9987f3b | 564 | size = (bufhea[3] << 8) + bufhea[4]; |
c84ea202 | 565 | VERBOSE (DEBUG, PRINTOUT ("table size: %d\n", size)); |
c9987f3b | 566 | rem = bufhea[5]; |
c84ea202 | 567 | VERBOSE (DEBUG, PRINTOUT ("remainder: %d\n", rem)); |
bf1d9554 | 568 | nb = read (fin, bufhea, size); |
37062814 | 569 | if (nb != size) { |
e75046fb | 570 | VERBOSE (ERROR, PRINTERR ("can't read file\n")); |
bf1d9554 | 571 | close (fin); |
37062814 LM |
572 | return 1; |
573 | } | |
574 | ||
575 | /* open file for writing */ | |
bf1d9554 LM |
576 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
577 | if (fout == -1) { | |
e75046fb | 578 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for writing\n", output)); |
bf1d9554 LM |
579 | close (fin); |
580 | return 2; | |
37062814 | 581 | } |
c84ea202 | 582 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", output)); |
37062814 LM |
583 | |
584 | /* write file */ | |
585 | pt = bufout; | |
bf1d9554 | 586 | while ((nb = read (fin, bufin, BUFFER_SIZE)) > 0) { |
c84ea202 | 587 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nb)); |
37062814 LM |
588 | for (i = 0; i < nb; i++) { |
589 | for (j = 0; j < 8; j++) { | |
c9987f3b | 590 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
37062814 LM |
591 | bufin[i] <<= 1; |
592 | l++; | |
c84ea202 | 593 | VERBOSE (DEBUG, PRINTOUT ("bits: %d - %s\n", codlen (bits), bits)); |
37062814 LM |
594 | |
595 | /* look for correct code */ | |
596 | is_found = 0; | |
c9987f3b LM |
597 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
598 | if (codcmp ((char *)(codes + k), bits) == 0) { | |
37062814 | 599 | is_found = 1; |
c84ea202 | 600 | VERBOSE (DEBUG, PRINTOUT ("found: %d\n", k)); |
37062814 LM |
601 | *pt= k; |
602 | bits[0] = 0; | |
603 | if (pt - bufout == BUFFER_SIZE - 1) { | |
c84ea202 | 604 | VERBOSE (DEBUG, PRINTOUT ("nb buffer out: %u\n", (pt - bufout))); |
bf1d9554 | 605 | write (fout, bufout, BUFFER_SIZE); |
37062814 LM |
606 | pt = bufout; |
607 | } else { | |
608 | pt++; | |
609 | } | |
610 | } | |
611 | } | |
bf1d9554 | 612 | if ((i == nb - 1) && (l % 256 == rem) && (nb != BUFFER_SIZE)) { |
c84ea202 | 613 | VERBOSE (DEBUG, PRINTOUT ("break\n")); |
37062814 LM |
614 | break; |
615 | } | |
616 | } | |
617 | } | |
618 | } | |
619 | if (pt != bufout) { | |
c84ea202 | 620 | VERBOSE (DEBUG, PRINTOUT ("nb buffer out: %u\n", (pt - bufout))); |
bf1d9554 | 621 | write (fout, bufout, pt - bufout); |
37062814 LM |
622 | } |
623 | ||
624 | /* close files */ | |
bf1d9554 LM |
625 | close (fin); |
626 | close (fout); | |
37062814 | 627 | |
c84ea202 | 628 | VERBOSE (DEBUG, PRINTOUT ("end writing decompressed file\n")); |
37062814 LM |
629 | |
630 | return 0; | |
631 | } | |
632 | ||
58352bb0 LM |
633 | /* main function */ |
634 | ||
635 | int main (int argc, char *argv[]) | |
636 | { | |
637 | char *input = NULL; | |
638 | char *output = NULL; | |
639 | int *table = NULL; | |
640 | leaf_t **leafs = NULL; | |
641 | leaf_t *root = NULL; | |
642 | code_t *codes = NULL; | |
c9987f3b | 643 | byte_t *header = NULL; |
58352bb0 | 644 | int mode = COMPRESS; |
37062814 | 645 | int rc = 1; |
58352bb0 LM |
646 | |
647 | progname = argv[0]; | |
648 | ||
649 | int c; | |
d3dbaf98 | 650 | char * arg; |
c84ea202 | 651 | VERBOSE (DEBUG, PRINTOUT ("start processing arguments\n")); |
d3dbaf98 LM |
652 | while (argc-- > 1) { |
653 | arg = *(++argv); | |
654 | if (arg[0] != '-') { | |
c84ea202 LM |
655 | PRINTERR ("%s: invalid option -- %s\n", progname, arg); |
656 | return usage (1); | |
d3dbaf98 LM |
657 | } |
658 | c = arg[1]; | |
c84ea202 | 659 | VERBOSE (DEBUG, PRINTOUT ("option: %c\n", c)); |
58352bb0 LM |
660 | switch (c) { |
661 | case 'c': | |
662 | mode = COMPRESS; | |
663 | break; | |
664 | case 'd': | |
665 | mode = DECOMPRESS; | |
666 | break; | |
667 | case 'i': | |
d3dbaf98 | 668 | input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
c84ea202 | 669 | VERBOSE (DEBUG, PRINTOUT ("input: %s\n", input)); |
58352bb0 LM |
670 | break; |
671 | case 'o': | |
d3dbaf98 | 672 | output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
c84ea202 | 673 | VERBOSE (DEBUG, PRINTOUT ("output: %s\n", output)); |
58352bb0 LM |
674 | break; |
675 | case 'v': | |
d3dbaf98 LM |
676 | arg = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
677 | if (arg == NULL) { | |
c84ea202 LM |
678 | PRINTERR ("%s: missing verbose level\n", progname); |
679 | return usage (1); | |
d3dbaf98 | 680 | } |
5e0c5bc8 | 681 | verbose = myatoi (arg); |
c84ea202 | 682 | VERBOSE (INFO, PRINTOUT ("verbose: %d\n", verbose)); |
58352bb0 LM |
683 | break; |
684 | case 'h': | |
685 | default: | |
c84ea202 | 686 | return usage (c != 'h'); |
58352bb0 LM |
687 | } |
688 | } | |
d3dbaf98 | 689 | if ((input == NULL) || (output == NULL)) { |
c84ea202 LM |
690 | PRINTERR ("%s: missing file\n", progname); |
691 | return usage (1); | |
58352bb0 | 692 | } |
c84ea202 | 693 | VERBOSE (DEBUG, PRINTOUT ("end processing arguments\n")); |
58352bb0 LM |
694 | |
695 | switch (mode) { | |
696 | case COMPRESS: | |
697 | table = create_table (input); | |
698 | if (table == NULL) break; | |
699 | VERBOSE (INFO, print_occ_table (table)); | |
700 | ||
701 | leafs = init_forest (table); | |
702 | if (leafs == NULL) break; | |
703 | root = create_tree (leafs); | |
704 | if (root == NULL) break; | |
705 | codes = create_code (root); | |
706 | if (codes == NULL) break; | |
707 | VERBOSE (INFO, print_code_table (codes)); | |
708 | header = encode_header_table (codes, table); | |
709 | if (header == NULL) break; | |
710 | VERBOSE (INFO, print_header (header)); | |
711 | rc = write_compress (output, input, codes, header); | |
712 | break; | |
713 | case DECOMPRESS: | |
37062814 LM |
714 | codes = read_header (input); |
715 | if (codes == NULL) break; | |
716 | VERBOSE (INFO, print_code_table (codes)); | |
717 | rc = write_decompress (output, input, codes); | |
58352bb0 LM |
718 | break; |
719 | } | |
720 | ||
58352bb0 LM |
721 | return rc; |
722 | } | |
723 | ||
724 | // test: compress.exe -h | |
725 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
726 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' | |
727 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' | |
37062814 LM |
728 | // test: compress.exe -c -i compress.c -o compress.mz |
729 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz | |
730 | // test: compress.exe -d -i compress.mz -o tmp.c | |
731 | // test: cmp compress.c tmp.c | |
732 | // test: rm compress.mz tmp.c | |
58352bb0 | 733 | |
bf1d9554 | 734 | /* vim: set ts=4 sw=4 et: */ |