| 1 | /* depend: */ |
| 2 | /* cflags: */ |
| 3 | /* linker: code.o debug.o */ |
| 4 | |
| 5 | #include <malloc.h> |
| 6 | #include <stdio.h> |
| 7 | #include "code.h" |
| 8 | #include "debug.h" |
| 9 | |
| 10 | /* constants */ |
| 11 | |
| 12 | #define BUFFER_SIZE 4096 |
| 13 | |
| 14 | /* macros */ |
| 15 | |
| 16 | /* gobal variables */ |
| 17 | |
| 18 | char *progname = NULL; |
| 19 | |
| 20 | /* help function */ |
| 21 | |
| 22 | void usage (int ret) |
| 23 | { |
| 24 | FILE *fd = ret ? stderr : stdout; |
| 25 | fprintf (fd, "usage: %s\n", progname); |
| 26 | fprintf (fd, " -h : help message\n"); |
| 27 | fprintf (fd, " -i <file>: input file\n"); |
| 28 | fprintf (fd, " -o <file>: output file\n"); |
| 29 | fprintf (fd, " -v : verbose level (%d)\n", verbose); |
| 30 | |
| 31 | exit (ret); |
| 32 | } |
| 33 | |
| 34 | void blkcpy (void *dst, const void *src, int len) |
| 35 | { |
| 36 | while (len--) { |
| 37 | *((char *)dst++) = *((char *)src++); |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | /* create occurence table */ |
| 42 | |
| 43 | int *create_table (char *filename) |
| 44 | { |
| 45 | byte_t buffer[BUFFER_SIZE] = {0}; |
| 46 | int nbread; |
| 47 | int *table = NULL; |
| 48 | FILE *fid = NULL; |
| 49 | |
| 50 | VERBOSE (DEBUG, PRINTF ("start creating occurence table\n")); |
| 51 | |
| 52 | /* memory allocation */ |
| 53 | table = (int *) calloc (NB_BYTES, sizeof (int)); |
| 54 | if (table == NULL) { |
| 55 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 56 | return NULL; |
| 57 | } |
| 58 | VERBOSE (INFO, printf ("memory allocated\n")); |
| 59 | |
| 60 | /* open file */ |
| 61 | fid = fopen (filename, "rb"); |
| 62 | if (fid == NULL) { |
| 63 | VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); |
| 64 | free (table); |
| 65 | return NULL; |
| 66 | } |
| 67 | VERBOSE (INFO, printf ("file '%s' opened\n", filename)); |
| 68 | |
| 69 | /* read file */ |
| 70 | while (!feof (fid)) { |
| 71 | nbread = fread (buffer, 1, BUFFER_SIZE, fid); |
| 72 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); |
| 73 | while (nbread--) { |
| 74 | table[(int)buffer[nbread]]++; |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | /* close file */ |
| 79 | fclose (fid); |
| 80 | |
| 81 | VERBOSE (DEBUG, PRINTF ("end creating occurence table\n")); |
| 82 | |
| 83 | return table; |
| 84 | } |
| 85 | |
| 86 | /* print occurence table */ |
| 87 | |
| 88 | void print_occ_table (int *table) |
| 89 | { |
| 90 | int i; |
| 91 | |
| 92 | printf ("Occurence table\n"); |
| 93 | for (i = 0; i < NB_BYTES; i++) { |
| 94 | if (table[i]) { |
| 95 | printf ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); |
| 96 | } |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | /* leaf structure */ |
| 101 | |
| 102 | typedef struct _leaf_t |
| 103 | { |
| 104 | struct _leaf_t *left; |
| 105 | struct _leaf_t *right; |
| 106 | int occ; |
| 107 | byte_t c; |
| 108 | } leaf_t; |
| 109 | |
| 110 | /* initialize forest */ |
| 111 | |
| 112 | leaf_t **init_forest (int *table) |
| 113 | { |
| 114 | leaf_t **leafs = NULL; |
| 115 | int nb_leafs = 0; |
| 116 | int i, l; |
| 117 | |
| 118 | VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n")); |
| 119 | |
| 120 | /* count number of leafs */ |
| 121 | for (i = 0; i < NB_BYTES; i++) { |
| 122 | if (table[i] > 0) { |
| 123 | nb_leafs++; |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | /* allocate memory */ |
| 128 | leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *)); |
| 129 | if (leafs == NULL) { |
| 130 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 131 | return NULL; |
| 132 | } |
| 133 | |
| 134 | /* initialize leafs */ |
| 135 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
| 136 | if (table[i] > 0) { |
| 137 | leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t)); |
| 138 | if (leafs[l] == NULL) { |
| 139 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 140 | return NULL; |
| 141 | } |
| 142 | leafs[l]->occ = table[i]; |
| 143 | leafs[l]->c = i; |
| 144 | l++; |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n")); |
| 149 | |
| 150 | return leafs; |
| 151 | } |
| 152 | |
| 153 | /* create tree */ |
| 154 | |
| 155 | leaf_t *create_tree (leaf_t **leafs) |
| 156 | { |
| 157 | leaf_t *branch = NULL; |
| 158 | int nb_leafs = 0; |
| 159 | int last = -1; |
| 160 | int ante; |
| 161 | int i, j; |
| 162 | |
| 163 | VERBOSE (DEBUG, PRINTF ("start creating tree\n")); |
| 164 | |
| 165 | /* count number of leafs */ |
| 166 | while (leafs[nb_leafs] != NULL) { |
| 167 | nb_leafs++; |
| 168 | } |
| 169 | |
| 170 | /* create tree */ |
| 171 | for (j = 0; j < nb_leafs - 1; j++) { |
| 172 | |
| 173 | /* look for leatest occurence */ |
| 174 | last = -1; |
| 175 | for (i = 0; i < nb_leafs; i++) { |
| 176 | if (leafs[i] == NULL) { |
| 177 | continue; |
| 178 | } |
| 179 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { |
| 180 | last = i; |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | /* look for ante leatest occurence */ |
| 185 | ante = -1; |
| 186 | for (i = 0; i < nb_leafs; i++) { |
| 187 | if ((i == last) || (leafs[i] == NULL)) { |
| 188 | continue; |
| 189 | } |
| 190 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { |
| 191 | ante = i; |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | /* create branch */ |
| 196 | if ((last == -1) || (ante == -1)) { |
| 197 | VERBOSE (ERROR, printf ("error during tree building\n")); |
| 198 | return NULL; |
| 199 | } |
| 200 | branch = (leaf_t *) calloc (1, sizeof (leaf_t)); |
| 201 | if (branch == NULL) { |
| 202 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 203 | return NULL; |
| 204 | } |
| 205 | branch->left = leafs[last]; |
| 206 | branch->right = leafs[ante]; |
| 207 | branch->occ = branch->left->occ + branch->right->occ; |
| 208 | leafs[last] = branch; |
| 209 | leafs[ante] = NULL; |
| 210 | } |
| 211 | |
| 212 | VERBOSE (DEBUG, PRINTF ("end creating tree\n")); |
| 213 | |
| 214 | return (last != -1) ? leafs[last] : NULL; |
| 215 | } |
| 216 | |
| 217 | /* free tree */ |
| 218 | |
| 219 | void free_tree (leaf_t *root) { |
| 220 | if (root) { |
| 221 | if (root->left) { |
| 222 | free_tree (root->left); |
| 223 | } |
| 224 | if (root->right) { |
| 225 | free_tree (root->right); |
| 226 | } |
| 227 | free (root); |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | /* explore tree */ |
| 232 | |
| 233 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) |
| 234 | { |
| 235 | if ((root->left == NULL) && (root->right == NULL)) { |
| 236 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
| 237 | } |
| 238 | else { |
| 239 | codcpy (code + index, sizeof (code_t), "1"); |
| 240 | explore_tree (table, root->left, code, index + 1); |
| 241 | codcpy (code + index, sizeof (code_t), "0"); |
| 242 | explore_tree (table, root->right, code, index + 1); |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | /* create code table */ |
| 247 | |
| 248 | code_t *create_code (leaf_t *root) |
| 249 | { |
| 250 | code_t *table = NULL; |
| 251 | code_t code = {0}; |
| 252 | |
| 253 | VERBOSE (DEBUG, PRINTF ("start creating code table\n")); |
| 254 | |
| 255 | /* allocate table */ |
| 256 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
| 257 | if (table == NULL) { |
| 258 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 259 | return NULL; |
| 260 | } |
| 261 | |
| 262 | explore_tree (table, root, (char *)&code, 0); |
| 263 | |
| 264 | VERBOSE (DEBUG, PRINTF ("end creating code table\n")); |
| 265 | |
| 266 | return table; |
| 267 | } |
| 268 | |
| 269 | /* print code table */ |
| 270 | |
| 271 | void print_code_table (code_t *codes) |
| 272 | { |
| 273 | char *code; |
| 274 | int i; |
| 275 | |
| 276 | printf ("Code table\n"); |
| 277 | for (i = 0; i < NB_BYTES; i++) { |
| 278 | code = (char *)(codes + i); |
| 279 | if (codlen (code) == 0) { |
| 280 | continue; |
| 281 | } |
| 282 | printf ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | /* encode header and code table */ |
| 287 | |
| 288 | byte_t *encode_header_table (code_t *codes, int *occ) |
| 289 | { |
| 290 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
| 291 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; |
| 292 | char *code; |
| 293 | byte_t *header = buffer; |
| 294 | int i, j, length, mode; |
| 295 | int nb = 0; |
| 296 | int size = 0; |
| 297 | |
| 298 | VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n")); |
| 299 | |
| 300 | /* mode 1 or 2 */ |
| 301 | for (i = 0; i < NB_BYTES; i++) { |
| 302 | code = (char *)(codes + i); |
| 303 | if (codlen (code) > 0) { |
| 304 | nb++; |
| 305 | size += codlen (code) * occ[i]; |
| 306 | } |
| 307 | } |
| 308 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
| 309 | VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); |
| 310 | VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); |
| 311 | VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); |
| 312 | VERBOSE (DEBUG, PRINTF ("rem: %d\n", size % 256)); |
| 313 | |
| 314 | /* header */ |
| 315 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
| 316 | header += 6; |
| 317 | |
| 318 | /* size */ |
| 319 | switch (mode) { |
| 320 | case 1: |
| 321 | for (i = 0; i < NB_BYTES; i++) { |
| 322 | code = (char *)(codes + i); |
| 323 | *(header++) = (byte_t) codlen (code); |
| 324 | } |
| 325 | break; |
| 326 | case 2: |
| 327 | *(header++) = (byte_t)(nb - 1); |
| 328 | for (i = 0; i < NB_BYTES; i++) { |
| 329 | code = (char *)(codes + i); |
| 330 | if (codlen (code) > 0) { |
| 331 | *(header++) = (byte_t) i; |
| 332 | *(header++) = (byte_t) codlen (code); |
| 333 | } |
| 334 | } |
| 335 | break; |
| 336 | } |
| 337 | |
| 338 | /* bits */ |
| 339 | for (i = 0; i < NB_BYTES; i++) { |
| 340 | code = (char *)(codes + i); |
| 341 | if (codlen (code) > 0) { |
| 342 | codcat (bits, sizeof (code_t), code); |
| 343 | while (codlen (bits) > (8 - 1)) { |
| 344 | for (j = 0; j < 8; j++) { |
| 345 | *header <<= 1; |
| 346 | if (bits[j] == '1') { |
| 347 | (*header)++; |
| 348 | } |
| 349 | } |
| 350 | codcpy (bits, sizeof (code_t), bits + 8); |
| 351 | header++; |
| 352 | } |
| 353 | } |
| 354 | } |
| 355 | if (codlen (bits) > 0) { |
| 356 | for (j = 0; j < (int)codlen (bits); j++) { |
| 357 | *header <<= 1; |
| 358 | if (bits[j] == '1') { |
| 359 | (*header)++; |
| 360 | } |
| 361 | } |
| 362 | for (j = (int)codlen (bits); j < 8; j++) { |
| 363 | *header <<= 1; |
| 364 | } |
| 365 | header++; |
| 366 | } |
| 367 | |
| 368 | /* length */ |
| 369 | length = (int)(header - buffer - 6); |
| 370 | VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); |
| 371 | buffer[3] = (byte_t)(length >> 8); |
| 372 | buffer[4] = (byte_t)(length & 0xff); |
| 373 | buffer[5] = (byte_t)(size % 256); |
| 374 | |
| 375 | /* allocation */ |
| 376 | header = (byte_t *) calloc (length + 6, 1); |
| 377 | blkcpy (header, buffer, length + 6); |
| 378 | |
| 379 | VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n")); |
| 380 | |
| 381 | return header; |
| 382 | } |
| 383 | |
| 384 | /* print header */ |
| 385 | |
| 386 | void print_header (byte_t *header) |
| 387 | { |
| 388 | int length, i; |
| 389 | |
| 390 | length = (header[3] << 8) + header[4]; |
| 391 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
| 392 | for (i = 0; i < length + 6; i++) { |
| 393 | printf ("%02x", header[i]); |
| 394 | } |
| 395 | printf ("\n"); |
| 396 | } |
| 397 | |
| 398 | /* write crompressed file */ |
| 399 | |
| 400 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
| 401 | { |
| 402 | byte_t bufin[BUFFER_SIZE] = {0}; |
| 403 | byte_t bufout[BUFFER_SIZE] = {0}; |
| 404 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; |
| 405 | FILE *fin, *fout; |
| 406 | int length = 0; |
| 407 | int i, j, nbread; |
| 408 | byte_t *pt; |
| 409 | |
| 410 | VERBOSE (DEBUG, PRINTF ("start writting compressed file\n")); |
| 411 | |
| 412 | /* open input file */ |
| 413 | fin = fopen (input, "rb"); |
| 414 | if (fin == NULL) { |
| 415 | VERBOSE (ERROR, printf ("can't open file '%s' for reading\n", input)); |
| 416 | return 1; |
| 417 | } |
| 418 | VERBOSE (INFO, printf ("file '%s' opened\n", input)); |
| 419 | |
| 420 | /* open output file */ |
| 421 | fout = fopen (output, "wb"); |
| 422 | if (fin == NULL) { |
| 423 | VERBOSE (ERROR, printf ("can't open file '%s' for writing\n", output)); |
| 424 | return 1; |
| 425 | } |
| 426 | VERBOSE (INFO, printf ("file '%s' opened\n", output)); |
| 427 | |
| 428 | /* write header */ |
| 429 | length = (header[3] << 8) + header[4]; |
| 430 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
| 431 | fwrite (header, 1, length + 6, fout); |
| 432 | |
| 433 | /* write file */ |
| 434 | pt = bufout; |
| 435 | while (!feof (fin)) { |
| 436 | nbread = fread (bufin, 1, BUFFER_SIZE, fin); |
| 437 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); |
| 438 | for (i = 0; i < nbread; i++) { |
| 439 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
| 440 | while (codlen (bits) > (8 - 1)) { |
| 441 | for (j = 0; j < 8; j++) { |
| 442 | *pt <<= 1; |
| 443 | if (bits[j] == '1') { |
| 444 | (*pt)++; |
| 445 | } |
| 446 | } |
| 447 | codcpy (bits, sizeof (code_t), bits + 8); |
| 448 | if (pt - bufout == BUFFER_SIZE - 1) { |
| 449 | fwrite (bufout, 1, BUFFER_SIZE, fout); |
| 450 | pt = bufout; |
| 451 | } else { |
| 452 | pt++; |
| 453 | } |
| 454 | } |
| 455 | } |
| 456 | } |
| 457 | VERBOSE (DEBUG, PRINTF ("lastest bits : %d\n", codlen (bits))); |
| 458 | if (codlen (bits) > 0) { |
| 459 | for (j = 0; j < (int)codlen (bits); j++) { |
| 460 | *pt <<= 1; |
| 461 | if (bits[j] == '1') { |
| 462 | (*pt)++; |
| 463 | } |
| 464 | } |
| 465 | for (j = (int)codlen (bits); j < 8; j++) { |
| 466 | *pt <<= 1; |
| 467 | } |
| 468 | pt++; |
| 469 | } |
| 470 | if (pt != bufout) { |
| 471 | VERBOSE (DEBUG, PRINTF ("last partial buffer written: %u\n", pt - bufout)); |
| 472 | fwrite (bufout, 1, pt - bufout, fout); |
| 473 | } |
| 474 | |
| 475 | /* closing */ |
| 476 | fclose (fin); |
| 477 | fclose (fout); |
| 478 | |
| 479 | VERBOSE (DEBUG, PRINTF ("end writting compressed file\n")); |
| 480 | |
| 481 | return 0; |
| 482 | } |
| 483 | |
| 484 | /* read header */ |
| 485 | |
| 486 | code_t *read_header (char *filename) { |
| 487 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
| 488 | code_t *table = NULL; |
| 489 | byte_t *codes = NULL; |
| 490 | byte_t cur; |
| 491 | int lengths[NB_BYTES] = {0}; |
| 492 | FILE *fid = NULL; |
| 493 | int mode = 0; |
| 494 | size_t i, j, l, nb, size; |
| 495 | |
| 496 | VERBOSE (DEBUG, PRINTF ("start reading header\n")); |
| 497 | |
| 498 | /* open file */ |
| 499 | fid = fopen (filename, "rb"); |
| 500 | if (fid == NULL) { |
| 501 | VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); |
| 502 | return NULL; |
| 503 | } |
| 504 | VERBOSE (INFO, printf ("file '%s' opened\n", filename)); |
| 505 | |
| 506 | /* read magic number */ |
| 507 | nb = fread (buffer, 1, 6, fid); |
| 508 | VERBOSE (DEBUG, PRINTF ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); |
| 509 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { |
| 510 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; |
| 511 | size = (buffer[3] << 8) + buffer[4]; |
| 512 | VERBOSE (DEBUG, PRINTF ("mode, size: %d %d\n", mode, size)); |
| 513 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
| 514 | mode = 0; |
| 515 | } else { |
| 516 | nb = fread (buffer, 1, size, fid); |
| 517 | VERBOSE (DEBUG, PRINTF ("nb read: %d\n", nb)); |
| 518 | if (nb != size) { |
| 519 | mode = 0; |
| 520 | } |
| 521 | } |
| 522 | } |
| 523 | fclose (fid); |
| 524 | if (mode == 0) { |
| 525 | VERBOSE (ERROR, printf ("incorrect file\n")); |
| 526 | return NULL; |
| 527 | } |
| 528 | |
| 529 | /* analyse header */ |
| 530 | codes = buffer; |
| 531 | switch (mode) { |
| 532 | case 1: |
| 533 | for (i = 0; i < NB_BYTES; i++) { |
| 534 | lengths[i] = *(codes++); |
| 535 | } |
| 536 | break; |
| 537 | case 2: |
| 538 | nb = *(codes++) + 1; |
| 539 | VERBOSE (DEBUG, PRINTF ("nb codes: %d\n", nb)); |
| 540 | for (i = 0; i < nb; i++) { |
| 541 | j = *(codes++); |
| 542 | lengths[j] = *(codes++); |
| 543 | } |
| 544 | break; |
| 545 | } |
| 546 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTF ("%d: %d\n", i, lengths[i])); |
| 547 | |
| 548 | /* check lengths */ |
| 549 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
| 550 | l += lengths[i]; |
| 551 | } |
| 552 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || |
| 553 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { |
| 554 | VERBOSE (ERROR, printf ("incorrect code table length\n")); |
| 555 | return NULL; |
| 556 | } |
| 557 | |
| 558 | /* allocate table */ |
| 559 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
| 560 | if (table == NULL) { |
| 561 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 562 | return NULL; |
| 563 | } |
| 564 | |
| 565 | /* decode code */ |
| 566 | cur = *(codes++); |
| 567 | l = 8; |
| 568 | for (i = 0; i < NB_BYTES; i++) { |
| 569 | if (lengths[i] == 0) { |
| 570 | continue; |
| 571 | } |
| 572 | while (lengths[i]--) { |
| 573 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
| 574 | l--; |
| 575 | cur <<= 1; |
| 576 | if (l == 0) { |
| 577 | cur = *(codes++); |
| 578 | l = 8; |
| 579 | } |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | VERBOSE (DEBUG, PRINTF ("end reading header\n")); |
| 584 | |
| 585 | return table; |
| 586 | } |
| 587 | |
| 588 | /* write decompressed file */ |
| 589 | |
| 590 | int write_decompress (char *output, char *input, code_t *codes) |
| 591 | { |
| 592 | byte_t bufin[BUFFER_SIZE] = {0}; |
| 593 | byte_t bufout[BUFFER_SIZE] = {0}; |
| 594 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; |
| 595 | char bits[(NB_BYTES - 1) + 1] = {0}; |
| 596 | FILE *fin, *fout; |
| 597 | int i, j, k, nb, size, rem; |
| 598 | int is_found; |
| 599 | int l = 0; |
| 600 | byte_t *pt; |
| 601 | |
| 602 | VERBOSE (DEBUG, PRINTF ("start writing decompressed file\n")); |
| 603 | |
| 604 | /* open file for reading */ |
| 605 | fin = fopen (input, "rb"); |
| 606 | if (fin == NULL) { |
| 607 | VERBOSE (ERROR, printf ("can't open file '%s' for reading\n", input)); |
| 608 | return 1; |
| 609 | } |
| 610 | VERBOSE (INFO, printf ("file '%s' opened\n", input)); |
| 611 | |
| 612 | /* read magic number */ |
| 613 | nb = fread (bufhea, 1, 6, fin); |
| 614 | if (nb != 6) { |
| 615 | VERBOSE (ERROR, printf ("can't read file\n")); |
| 616 | fclose (fin); |
| 617 | return 1; |
| 618 | } |
| 619 | size = (bufhea[3] << 8) + bufhea[4]; |
| 620 | VERBOSE (DEBUG, printf ("table size: %d\n", size)); |
| 621 | rem = bufhea[5]; |
| 622 | VERBOSE (DEBUG, printf ("remainder: %d\n", rem)); |
| 623 | nb = fread (bufhea, 1, size, fin); |
| 624 | if (nb != size) { |
| 625 | VERBOSE (ERROR, printf ("can't read file\n")); |
| 626 | fclose (fin); |
| 627 | return 1; |
| 628 | } |
| 629 | |
| 630 | /* open file for writing */ |
| 631 | fout = fopen (output, "wb"); |
| 632 | if (fout == NULL) { |
| 633 | VERBOSE (ERROR, printf ("can't open file '%s' for writing\n", output)); |
| 634 | return 2; |
| 635 | } |
| 636 | VERBOSE (INFO, printf ("file '%s' opened\n", output)); |
| 637 | |
| 638 | /* write file */ |
| 639 | pt = bufout; |
| 640 | while (!feof (fin)) { |
| 641 | nb = fread (bufin, 1, BUFFER_SIZE, fin); |
| 642 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nb)); |
| 643 | for (i = 0; i < nb; i++) { |
| 644 | for (j = 0; j < 8; j++) { |
| 645 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
| 646 | bufin[i] <<= 1; |
| 647 | l++; |
| 648 | VERBOSE (DEBUG, PRINTF ("bits: %d - %s\n", codlen (bits), bits)); |
| 649 | |
| 650 | /* look for correct code */ |
| 651 | is_found = 0; |
| 652 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
| 653 | if (codcmp ((char *)(codes + k), bits) == 0) { |
| 654 | is_found = 1; |
| 655 | VERBOSE (DEBUG, PRINTF ("found: %d\n", k)); |
| 656 | *pt= k; |
| 657 | bits[0] = 0; |
| 658 | if (pt - bufout == BUFFER_SIZE - 1) { |
| 659 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); |
| 660 | fwrite (bufout, 1, BUFFER_SIZE, fout); |
| 661 | pt = bufout; |
| 662 | } else { |
| 663 | pt++; |
| 664 | } |
| 665 | } |
| 666 | } |
| 667 | if ((i == nb - 1) && (l % 256 == rem) && (feof (fin))) { |
| 668 | VERBOSE (DEBUG, PRINTF ("break\n")); |
| 669 | break; |
| 670 | } |
| 671 | } |
| 672 | } |
| 673 | } |
| 674 | if (pt != bufout) { |
| 675 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); |
| 676 | fwrite (bufout, 1, pt - bufout, fout); |
| 677 | } |
| 678 | |
| 679 | /* close files */ |
| 680 | fclose (fin); |
| 681 | fclose (fout); |
| 682 | |
| 683 | VERBOSE (DEBUG, PRINTF ("end writing decompressed file\n")); |
| 684 | |
| 685 | return 0; |
| 686 | } |
| 687 | |
| 688 | /* main function */ |
| 689 | |
| 690 | int main (int argc, char *argv[]) |
| 691 | { |
| 692 | char *input = NULL; |
| 693 | char *output = NULL; |
| 694 | int *table = NULL; |
| 695 | leaf_t **leafs = NULL; |
| 696 | leaf_t *root = NULL; |
| 697 | code_t *codes = NULL; |
| 698 | byte_t *header = NULL; |
| 699 | int mode = COMPRESS; |
| 700 | int rc = 1; |
| 701 | |
| 702 | progname = argv[0]; |
| 703 | |
| 704 | int c; |
| 705 | char * arg; |
| 706 | VERBOSE (DEBUG, PRINTF ("start processing arguments\n")); |
| 707 | while (argc-- > 1) { |
| 708 | arg = *(++argv); |
| 709 | if (arg[0] != '-') { |
| 710 | fprintf (stderr, "%s: invalid option -- %s\n", progname, arg); |
| 711 | usage (1); |
| 712 | } |
| 713 | c = arg[1]; |
| 714 | VERBOSE (DEBUG, PRINTF ("option: %c\n", c)); |
| 715 | switch (c) { |
| 716 | case 'c': |
| 717 | mode = COMPRESS; |
| 718 | break; |
| 719 | case 'd': |
| 720 | mode = DECOMPRESS; |
| 721 | break; |
| 722 | case 'i': |
| 723 | input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
| 724 | VERBOSE (DEBUG, PRINTF ("input: %s\n", input)); |
| 725 | break; |
| 726 | case 'o': |
| 727 | output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
| 728 | VERBOSE (DEBUG, PRINTF ("output: %s\n", output)); |
| 729 | break; |
| 730 | case 'v': |
| 731 | arg = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
| 732 | if (arg == NULL) { |
| 733 | fprintf (stderr, "%s: missing verbose level\n", progname); |
| 734 | usage (1); |
| 735 | } |
| 736 | verbose = atoi (arg); |
| 737 | VERBOSE (INFO, printf ("verbose: %d\n", verbose)); |
| 738 | break; |
| 739 | case 'h': |
| 740 | default: |
| 741 | usage (c != 'h'); |
| 742 | } |
| 743 | } |
| 744 | if ((input == NULL) || (output == NULL)) { |
| 745 | fprintf (stderr, "%s: missing file\n", progname); |
| 746 | usage (1); |
| 747 | } |
| 748 | VERBOSE (DEBUG, PRINTF ("end processing arguments\n")); |
| 749 | |
| 750 | switch (mode) { |
| 751 | case COMPRESS: |
| 752 | table = create_table (input); |
| 753 | if (table == NULL) break; |
| 754 | VERBOSE (INFO, print_occ_table (table)); |
| 755 | |
| 756 | leafs = init_forest (table); |
| 757 | if (leafs == NULL) break; |
| 758 | root = create_tree (leafs); |
| 759 | if (root == NULL) break; |
| 760 | codes = create_code (root); |
| 761 | if (codes == NULL) break; |
| 762 | VERBOSE (INFO, print_code_table (codes)); |
| 763 | header = encode_header_table (codes, table); |
| 764 | if (header == NULL) break; |
| 765 | VERBOSE (INFO, print_header (header)); |
| 766 | rc = write_compress (output, input, codes, header); |
| 767 | break; |
| 768 | case DECOMPRESS: |
| 769 | codes = read_header (input); |
| 770 | if (codes == NULL) break; |
| 771 | VERBOSE (INFO, print_code_table (codes)); |
| 772 | rc = write_decompress (output, input, codes); |
| 773 | break; |
| 774 | } |
| 775 | |
| 776 | /* clean everything */ |
| 777 | fflush (stdout); |
| 778 | if (header) free (header); |
| 779 | if (codes) free (codes); |
| 780 | if (root) free_tree (root); |
| 781 | if (leafs) free (leafs); |
| 782 | if (table) free (table); |
| 783 | |
| 784 | return rc; |
| 785 | } |
| 786 | |
| 787 | // test: compress.exe -h |
| 788 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' |
| 789 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' |
| 790 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' |
| 791 | // test: compress.exe -c -i compress.c -o compress.mz |
| 792 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz |
| 793 | // test: compress.exe -d -i compress.mz -o tmp.c |
| 794 | // test: cmp compress.c tmp.c |
| 795 | // test: rm compress.mz tmp.c |
| 796 | |
| 797 | // vim: ts=4 sw=4 et |