| 1 | /* depend: */ |
| 2 | /* cflags: */ |
| 3 | /* linker: code.o debug.o fprintf.o */ |
| 4 | |
| 5 | #include <malloc.h> |
| 6 | #include <stdio.h> |
| 7 | #include "code.h" |
| 8 | #include "debug.h" |
| 9 | #include "fprintf.h" |
| 10 | |
| 11 | /* constants */ |
| 12 | |
| 13 | #define BUFFER_SIZE 4096 |
| 14 | |
| 15 | /* macros */ |
| 16 | |
| 17 | /* gobal variables */ |
| 18 | |
| 19 | char *progname = NULL; |
| 20 | |
| 21 | /* help function */ |
| 22 | |
| 23 | void usage (int ret) |
| 24 | { |
| 25 | FILE *fd = ret ? stderr : stdout; |
| 26 | _fprintf (fd, "usage: %s\n", progname); |
| 27 | _fprintf (fd, " -h : help message\n"); |
| 28 | _fprintf (fd, " -i <file>: input file\n"); |
| 29 | _fprintf (fd, " -o <file>: output file\n"); |
| 30 | _fprintf (fd, " -v : verbose level (%d)\n", verbose); |
| 31 | |
| 32 | exit (ret); |
| 33 | } |
| 34 | |
| 35 | void blkcpy (void *dst, const void *src, int len) |
| 36 | { |
| 37 | while (len--) { |
| 38 | *((char *)dst++) = *((char *)src++); |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | /* create occurence table */ |
| 43 | |
| 44 | int *create_table (char *filename) |
| 45 | { |
| 46 | byte_t buffer[BUFFER_SIZE] = {0}; |
| 47 | int nbread; |
| 48 | int *table = NULL; |
| 49 | FILE *fid = NULL; |
| 50 | |
| 51 | VERBOSE (DEBUG, PRINTF ("start creating occurence table\n")); |
| 52 | |
| 53 | /* memory allocation */ |
| 54 | table = (int *) calloc (NB_BYTES, sizeof (int)); |
| 55 | if (table == NULL) { |
| 56 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
| 57 | return NULL; |
| 58 | } |
| 59 | VERBOSE (INFO, _fprintf (stdout, "memory allocated\n")); |
| 60 | |
| 61 | /* open file */ |
| 62 | fid = fopen (filename, "rb"); |
| 63 | if (fid == NULL) { |
| 64 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s'\n", filename)); |
| 65 | free (table); |
| 66 | return NULL; |
| 67 | } |
| 68 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", filename)); |
| 69 | |
| 70 | /* read file */ |
| 71 | while (!feof (fid)) { |
| 72 | nbread = fread (buffer, 1, BUFFER_SIZE, fid); |
| 73 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); |
| 74 | while (nbread--) { |
| 75 | table[(int)buffer[nbread]]++; |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | /* close file */ |
| 80 | fclose (fid); |
| 81 | |
| 82 | VERBOSE (DEBUG, PRINTF ("end creating occurence table\n")); |
| 83 | |
| 84 | return table; |
| 85 | } |
| 86 | |
| 87 | /* print occurence table */ |
| 88 | |
| 89 | void print_occ_table (int *table) |
| 90 | { |
| 91 | int i; |
| 92 | |
| 93 | _fprintf (stdout, "Occurence table\n"); |
| 94 | for (i = 0; i < NB_BYTES; i++) { |
| 95 | if (table[i]) { |
| 96 | _fprintf (stdout, "0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | /* leaf structure */ |
| 102 | |
| 103 | typedef struct _leaf_t |
| 104 | { |
| 105 | struct _leaf_t *left; |
| 106 | struct _leaf_t *right; |
| 107 | int occ; |
| 108 | byte_t c; |
| 109 | } leaf_t; |
| 110 | |
| 111 | /* initialize forest */ |
| 112 | |
| 113 | leaf_t **init_forest (int *table) |
| 114 | { |
| 115 | leaf_t **leafs = NULL; |
| 116 | int nb_leafs = 0; |
| 117 | int i, l; |
| 118 | |
| 119 | VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n")); |
| 120 | |
| 121 | /* count number of leafs */ |
| 122 | for (i = 0; i < NB_BYTES; i++) { |
| 123 | if (table[i] > 0) { |
| 124 | nb_leafs++; |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | /* allocate memory */ |
| 129 | leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *)); |
| 130 | if (leafs == NULL) { |
| 131 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
| 132 | return NULL; |
| 133 | } |
| 134 | |
| 135 | /* initialize leafs */ |
| 136 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
| 137 | if (table[i] > 0) { |
| 138 | leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t)); |
| 139 | if (leafs[l] == NULL) { |
| 140 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
| 141 | return NULL; |
| 142 | } |
| 143 | leafs[l]->occ = table[i]; |
| 144 | leafs[l]->c = i; |
| 145 | l++; |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n")); |
| 150 | |
| 151 | return leafs; |
| 152 | } |
| 153 | |
| 154 | /* create tree */ |
| 155 | |
| 156 | leaf_t *create_tree (leaf_t **leafs) |
| 157 | { |
| 158 | leaf_t *branch = NULL; |
| 159 | int nb_leafs = 0; |
| 160 | int last = -1; |
| 161 | int ante; |
| 162 | int i, j; |
| 163 | |
| 164 | VERBOSE (DEBUG, PRINTF ("start creating tree\n")); |
| 165 | |
| 166 | /* count number of leafs */ |
| 167 | while (leafs[nb_leafs] != NULL) { |
| 168 | nb_leafs++; |
| 169 | } |
| 170 | |
| 171 | /* create tree */ |
| 172 | for (j = 0; j < nb_leafs - 1; j++) { |
| 173 | |
| 174 | /* look for leatest occurence */ |
| 175 | last = -1; |
| 176 | for (i = 0; i < nb_leafs; i++) { |
| 177 | if (leafs[i] == NULL) { |
| 178 | continue; |
| 179 | } |
| 180 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { |
| 181 | last = i; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | /* look for ante leatest occurence */ |
| 186 | ante = -1; |
| 187 | for (i = 0; i < nb_leafs; i++) { |
| 188 | if ((i == last) || (leafs[i] == NULL)) { |
| 189 | continue; |
| 190 | } |
| 191 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { |
| 192 | ante = i; |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | /* create branch */ |
| 197 | if ((last == -1) || (ante == -1)) { |
| 198 | VERBOSE (ERROR, _fprintf (stdout, "error during tree building\n")); |
| 199 | return NULL; |
| 200 | } |
| 201 | branch = (leaf_t *) calloc (1, sizeof (leaf_t)); |
| 202 | if (branch == NULL) { |
| 203 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
| 204 | return NULL; |
| 205 | } |
| 206 | branch->left = leafs[last]; |
| 207 | branch->right = leafs[ante]; |
| 208 | branch->occ = branch->left->occ + branch->right->occ; |
| 209 | leafs[last] = branch; |
| 210 | leafs[ante] = NULL; |
| 211 | } |
| 212 | |
| 213 | VERBOSE (DEBUG, PRINTF ("end creating tree\n")); |
| 214 | |
| 215 | return (last != -1) ? leafs[last] : NULL; |
| 216 | } |
| 217 | |
| 218 | /* free tree */ |
| 219 | |
| 220 | void free_tree (leaf_t *root) { |
| 221 | if (root) { |
| 222 | if (root->left) { |
| 223 | free_tree (root->left); |
| 224 | } |
| 225 | if (root->right) { |
| 226 | free_tree (root->right); |
| 227 | } |
| 228 | free (root); |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | /* explore tree */ |
| 233 | |
| 234 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) |
| 235 | { |
| 236 | if ((root->left == NULL) && (root->right == NULL)) { |
| 237 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
| 238 | } |
| 239 | else { |
| 240 | codcpy (code + index, sizeof (code_t), "1"); |
| 241 | explore_tree (table, root->left, code, index + 1); |
| 242 | codcpy (code + index, sizeof (code_t), "0"); |
| 243 | explore_tree (table, root->right, code, index + 1); |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | /* create code table */ |
| 248 | |
| 249 | code_t *create_code (leaf_t *root) |
| 250 | { |
| 251 | code_t *table = NULL; |
| 252 | code_t code = {0}; |
| 253 | |
| 254 | VERBOSE (DEBUG, PRINTF ("start creating code table\n")); |
| 255 | |
| 256 | /* allocate table */ |
| 257 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
| 258 | if (table == NULL) { |
| 259 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
| 260 | return NULL; |
| 261 | } |
| 262 | |
| 263 | explore_tree (table, root, (char *)&code, 0); |
| 264 | |
| 265 | VERBOSE (DEBUG, PRINTF ("end creating code table\n")); |
| 266 | |
| 267 | return table; |
| 268 | } |
| 269 | |
| 270 | /* print code table */ |
| 271 | |
| 272 | void print_code_table (code_t *codes) |
| 273 | { |
| 274 | char *code; |
| 275 | int i; |
| 276 | |
| 277 | _fprintf (stdout, "Code table\n"); |
| 278 | for (i = 0; i < NB_BYTES; i++) { |
| 279 | code = (char *)(codes + i); |
| 280 | if (codlen (code) == 0) { |
| 281 | continue; |
| 282 | } |
| 283 | _fprintf (stdout, "0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | /* encode header and code table */ |
| 288 | |
| 289 | byte_t *encode_header_table (code_t *codes, int *occ) |
| 290 | { |
| 291 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
| 292 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; |
| 293 | char *code; |
| 294 | byte_t *header = buffer; |
| 295 | int i, j, length, mode; |
| 296 | int nb = 0; |
| 297 | int size = 0; |
| 298 | |
| 299 | VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n")); |
| 300 | |
| 301 | /* mode 1 or 2 */ |
| 302 | for (i = 0; i < NB_BYTES; i++) { |
| 303 | code = (char *)(codes + i); |
| 304 | if (codlen (code) > 0) { |
| 305 | nb++; |
| 306 | size += codlen (code) * occ[i]; |
| 307 | } |
| 308 | } |
| 309 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
| 310 | VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); |
| 311 | VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); |
| 312 | VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); |
| 313 | VERBOSE (DEBUG, PRINTF ("rem: %d\n", size % 256)); |
| 314 | |
| 315 | /* header */ |
| 316 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
| 317 | header += 6; |
| 318 | |
| 319 | /* size */ |
| 320 | switch (mode) { |
| 321 | case 1: |
| 322 | for (i = 0; i < NB_BYTES; i++) { |
| 323 | code = (char *)(codes + i); |
| 324 | *(header++) = (byte_t) codlen (code); |
| 325 | } |
| 326 | break; |
| 327 | case 2: |
| 328 | *(header++) = (byte_t)(nb - 1); |
| 329 | for (i = 0; i < NB_BYTES; i++) { |
| 330 | code = (char *)(codes + i); |
| 331 | if (codlen (code) > 0) { |
| 332 | *(header++) = (byte_t) i; |
| 333 | *(header++) = (byte_t) codlen (code); |
| 334 | } |
| 335 | } |
| 336 | break; |
| 337 | } |
| 338 | |
| 339 | /* bits */ |
| 340 | for (i = 0; i < NB_BYTES; i++) { |
| 341 | code = (char *)(codes + i); |
| 342 | if (codlen (code) > 0) { |
| 343 | codcat (bits, sizeof (code_t), code); |
| 344 | while (codlen (bits) > (8 - 1)) { |
| 345 | for (j = 0; j < 8; j++) { |
| 346 | *header <<= 1; |
| 347 | if (bits[j] == '1') { |
| 348 | (*header)++; |
| 349 | } |
| 350 | } |
| 351 | codcpy (bits, sizeof (code_t), bits + 8); |
| 352 | header++; |
| 353 | } |
| 354 | } |
| 355 | } |
| 356 | if (codlen (bits) > 0) { |
| 357 | for (j = 0; j < (int)codlen (bits); j++) { |
| 358 | *header <<= 1; |
| 359 | if (bits[j] == '1') { |
| 360 | (*header)++; |
| 361 | } |
| 362 | } |
| 363 | for (j = (int)codlen (bits); j < 8; j++) { |
| 364 | *header <<= 1; |
| 365 | } |
| 366 | header++; |
| 367 | } |
| 368 | |
| 369 | /* length */ |
| 370 | length = (int)(header - buffer - 6); |
| 371 | VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); |
| 372 | buffer[3] = (byte_t)(length >> 8); |
| 373 | buffer[4] = (byte_t)(length & 0xff); |
| 374 | buffer[5] = (byte_t)(size % 256); |
| 375 | |
| 376 | /* allocation */ |
| 377 | header = (byte_t *) calloc (length + 6, 1); |
| 378 | blkcpy (header, buffer, length + 6); |
| 379 | |
| 380 | VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n")); |
| 381 | |
| 382 | return header; |
| 383 | } |
| 384 | |
| 385 | /* print header */ |
| 386 | |
| 387 | void print_header (byte_t *header) |
| 388 | { |
| 389 | int length, i; |
| 390 | |
| 391 | length = (header[3] << 8) + header[4]; |
| 392 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
| 393 | for (i = 0; i < length + 6; i++) { |
| 394 | _fprintf (stdout, "%02x", header[i]); |
| 395 | } |
| 396 | _fprintf (stdout, "\n"); |
| 397 | } |
| 398 | |
| 399 | /* write crompressed file */ |
| 400 | |
| 401 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
| 402 | { |
| 403 | byte_t bufin[BUFFER_SIZE] = {0}; |
| 404 | byte_t bufout[BUFFER_SIZE] = {0}; |
| 405 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; |
| 406 | FILE *fin, *fout; |
| 407 | int length = 0; |
| 408 | int i, j, nbread; |
| 409 | byte_t *pt; |
| 410 | |
| 411 | VERBOSE (DEBUG, PRINTF ("start writting compressed file\n")); |
| 412 | |
| 413 | /* open input file */ |
| 414 | fin = fopen (input, "rb"); |
| 415 | if (fin == NULL) { |
| 416 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s' for reading\n", input)); |
| 417 | return 1; |
| 418 | } |
| 419 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", input)); |
| 420 | |
| 421 | /* open output file */ |
| 422 | fout = fopen (output, "wb"); |
| 423 | if (fin == NULL) { |
| 424 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s' for writing\n", output)); |
| 425 | return 1; |
| 426 | } |
| 427 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", output)); |
| 428 | |
| 429 | /* write header */ |
| 430 | length = (header[3] << 8) + header[4]; |
| 431 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
| 432 | fwrite (header, 1, length + 6, fout); |
| 433 | |
| 434 | /* write file */ |
| 435 | pt = bufout; |
| 436 | while (!feof (fin)) { |
| 437 | nbread = fread (bufin, 1, BUFFER_SIZE, fin); |
| 438 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); |
| 439 | for (i = 0; i < nbread; i++) { |
| 440 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
| 441 | while (codlen (bits) > (8 - 1)) { |
| 442 | for (j = 0; j < 8; j++) { |
| 443 | *pt <<= 1; |
| 444 | if (bits[j] == '1') { |
| 445 | (*pt)++; |
| 446 | } |
| 447 | } |
| 448 | codcpy (bits, sizeof (code_t), bits + 8); |
| 449 | if (pt - bufout == BUFFER_SIZE - 1) { |
| 450 | fwrite (bufout, 1, BUFFER_SIZE, fout); |
| 451 | pt = bufout; |
| 452 | } else { |
| 453 | pt++; |
| 454 | } |
| 455 | } |
| 456 | } |
| 457 | } |
| 458 | VERBOSE (DEBUG, PRINTF ("lastest bits : %d\n", codlen (bits))); |
| 459 | if (codlen (bits) > 0) { |
| 460 | for (j = 0; j < (int)codlen (bits); j++) { |
| 461 | *pt <<= 1; |
| 462 | if (bits[j] == '1') { |
| 463 | (*pt)++; |
| 464 | } |
| 465 | } |
| 466 | for (j = (int)codlen (bits); j < 8; j++) { |
| 467 | *pt <<= 1; |
| 468 | } |
| 469 | pt++; |
| 470 | } |
| 471 | if (pt != bufout) { |
| 472 | VERBOSE (DEBUG, PRINTF ("last partial buffer written: %u\n", pt - bufout)); |
| 473 | fwrite (bufout, 1, pt - bufout, fout); |
| 474 | } |
| 475 | |
| 476 | /* closing */ |
| 477 | fclose (fin); |
| 478 | fclose (fout); |
| 479 | |
| 480 | VERBOSE (DEBUG, PRINTF ("end writting compressed file\n")); |
| 481 | |
| 482 | return 0; |
| 483 | } |
| 484 | |
| 485 | /* read header */ |
| 486 | |
| 487 | code_t *read_header (char *filename) { |
| 488 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
| 489 | code_t *table = NULL; |
| 490 | byte_t *codes = NULL; |
| 491 | byte_t cur; |
| 492 | int lengths[NB_BYTES] = {0}; |
| 493 | FILE *fid = NULL; |
| 494 | int mode = 0; |
| 495 | size_t i, j, l, nb, size; |
| 496 | |
| 497 | VERBOSE (DEBUG, PRINTF ("start reading header\n")); |
| 498 | |
| 499 | /* open file */ |
| 500 | fid = fopen (filename, "rb"); |
| 501 | if (fid == NULL) { |
| 502 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s'\n", filename)); |
| 503 | return NULL; |
| 504 | } |
| 505 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", filename)); |
| 506 | |
| 507 | /* read magic number */ |
| 508 | nb = fread (buffer, 1, 6, fid); |
| 509 | VERBOSE (DEBUG, PRINTF ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); |
| 510 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { |
| 511 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; |
| 512 | size = (buffer[3] << 8) + buffer[4]; |
| 513 | VERBOSE (DEBUG, PRINTF ("mode, size: %d %d\n", mode, size)); |
| 514 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
| 515 | mode = 0; |
| 516 | } else { |
| 517 | nb = fread (buffer, 1, size, fid); |
| 518 | VERBOSE (DEBUG, PRINTF ("nb read: %d\n", nb)); |
| 519 | if (nb != size) { |
| 520 | mode = 0; |
| 521 | } |
| 522 | } |
| 523 | } |
| 524 | fclose (fid); |
| 525 | if (mode == 0) { |
| 526 | VERBOSE (ERROR, _fprintf (stdout, "incorrect file\n")); |
| 527 | return NULL; |
| 528 | } |
| 529 | |
| 530 | /* analyse header */ |
| 531 | codes = buffer; |
| 532 | switch (mode) { |
| 533 | case 1: |
| 534 | for (i = 0; i < NB_BYTES; i++) { |
| 535 | lengths[i] = *(codes++); |
| 536 | } |
| 537 | break; |
| 538 | case 2: |
| 539 | nb = *(codes++) + 1; |
| 540 | VERBOSE (DEBUG, PRINTF ("nb codes: %d\n", nb)); |
| 541 | for (i = 0; i < nb; i++) { |
| 542 | j = *(codes++); |
| 543 | lengths[j] = *(codes++); |
| 544 | } |
| 545 | break; |
| 546 | } |
| 547 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTF ("%d: %d\n", i, lengths[i])); |
| 548 | |
| 549 | /* check lengths */ |
| 550 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
| 551 | l += lengths[i]; |
| 552 | } |
| 553 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || |
| 554 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { |
| 555 | VERBOSE (ERROR, _fprintf (stdout, "incorrect code table length\n")); |
| 556 | return NULL; |
| 557 | } |
| 558 | |
| 559 | /* allocate table */ |
| 560 | table = (code_t *) calloc (NB_BYTES, sizeof (code_t)); |
| 561 | if (table == NULL) { |
| 562 | VERBOSE (ERROR, _fprintf (stdout, "can't allocate memory\n")); |
| 563 | return NULL; |
| 564 | } |
| 565 | |
| 566 | /* decode code */ |
| 567 | cur = *(codes++); |
| 568 | l = 8; |
| 569 | for (i = 0; i < NB_BYTES; i++) { |
| 570 | if (lengths[i] == 0) { |
| 571 | continue; |
| 572 | } |
| 573 | while (lengths[i]--) { |
| 574 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
| 575 | l--; |
| 576 | cur <<= 1; |
| 577 | if (l == 0) { |
| 578 | cur = *(codes++); |
| 579 | l = 8; |
| 580 | } |
| 581 | } |
| 582 | } |
| 583 | |
| 584 | VERBOSE (DEBUG, PRINTF ("end reading header\n")); |
| 585 | |
| 586 | return table; |
| 587 | } |
| 588 | |
| 589 | /* write decompressed file */ |
| 590 | |
| 591 | int write_decompress (char *output, char *input, code_t *codes) |
| 592 | { |
| 593 | byte_t bufin[BUFFER_SIZE] = {0}; |
| 594 | byte_t bufout[BUFFER_SIZE] = {0}; |
| 595 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; |
| 596 | char bits[(NB_BYTES - 1) + 1] = {0}; |
| 597 | FILE *fin, *fout; |
| 598 | int i, j, k, nb, size, rem; |
| 599 | int is_found; |
| 600 | int l = 0; |
| 601 | byte_t *pt; |
| 602 | |
| 603 | VERBOSE (DEBUG, PRINTF ("start writing decompressed file\n")); |
| 604 | |
| 605 | /* open file for reading */ |
| 606 | fin = fopen (input, "rb"); |
| 607 | if (fin == NULL) { |
| 608 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s' for reading\n", input)); |
| 609 | return 1; |
| 610 | } |
| 611 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", input)); |
| 612 | |
| 613 | /* read magic number */ |
| 614 | nb = fread (bufhea, 1, 6, fin); |
| 615 | if (nb != 6) { |
| 616 | VERBOSE (ERROR, _fprintf (stdout, "can't read file\n")); |
| 617 | fclose (fin); |
| 618 | return 1; |
| 619 | } |
| 620 | size = (bufhea[3] << 8) + bufhea[4]; |
| 621 | VERBOSE (DEBUG, _fprintf (stdout, "table size: %d\n", size)); |
| 622 | rem = bufhea[5]; |
| 623 | VERBOSE (DEBUG, _fprintf (stdout, "remainder: %d\n", rem)); |
| 624 | nb = fread (bufhea, 1, size, fin); |
| 625 | if (nb != size) { |
| 626 | VERBOSE (ERROR, _fprintf (stdout, "can't read file\n")); |
| 627 | fclose (fin); |
| 628 | return 1; |
| 629 | } |
| 630 | |
| 631 | /* open file for writing */ |
| 632 | fout = fopen (output, "wb"); |
| 633 | if (fout == NULL) { |
| 634 | VERBOSE (ERROR, _fprintf (stdout, "can't open file '%s' for writing\n", output)); |
| 635 | return 2; |
| 636 | } |
| 637 | VERBOSE (INFO, _fprintf (stdout, "file '%s' opened\n", output)); |
| 638 | |
| 639 | /* write file */ |
| 640 | pt = bufout; |
| 641 | while (!feof (fin)) { |
| 642 | nb = fread (bufin, 1, BUFFER_SIZE, fin); |
| 643 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nb)); |
| 644 | for (i = 0; i < nb; i++) { |
| 645 | for (j = 0; j < 8; j++) { |
| 646 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
| 647 | bufin[i] <<= 1; |
| 648 | l++; |
| 649 | VERBOSE (DEBUG, PRINTF ("bits: %d - %s\n", codlen (bits), bits)); |
| 650 | |
| 651 | /* look for correct code */ |
| 652 | is_found = 0; |
| 653 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
| 654 | if (codcmp ((char *)(codes + k), bits) == 0) { |
| 655 | is_found = 1; |
| 656 | VERBOSE (DEBUG, PRINTF ("found: %d\n", k)); |
| 657 | *pt= k; |
| 658 | bits[0] = 0; |
| 659 | if (pt - bufout == BUFFER_SIZE - 1) { |
| 660 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); |
| 661 | fwrite (bufout, 1, BUFFER_SIZE, fout); |
| 662 | pt = bufout; |
| 663 | } else { |
| 664 | pt++; |
| 665 | } |
| 666 | } |
| 667 | } |
| 668 | if ((i == nb - 1) && (l % 256 == rem) && (feof (fin))) { |
| 669 | VERBOSE (DEBUG, PRINTF ("break\n")); |
| 670 | break; |
| 671 | } |
| 672 | } |
| 673 | } |
| 674 | } |
| 675 | if (pt != bufout) { |
| 676 | VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); |
| 677 | fwrite (bufout, 1, pt - bufout, fout); |
| 678 | } |
| 679 | |
| 680 | /* close files */ |
| 681 | fclose (fin); |
| 682 | fclose (fout); |
| 683 | |
| 684 | VERBOSE (DEBUG, PRINTF ("end writing decompressed file\n")); |
| 685 | |
| 686 | return 0; |
| 687 | } |
| 688 | |
| 689 | /* main function */ |
| 690 | |
| 691 | int main (int argc, char *argv[]) |
| 692 | { |
| 693 | char *input = NULL; |
| 694 | char *output = NULL; |
| 695 | int *table = NULL; |
| 696 | leaf_t **leafs = NULL; |
| 697 | leaf_t *root = NULL; |
| 698 | code_t *codes = NULL; |
| 699 | byte_t *header = NULL; |
| 700 | int mode = COMPRESS; |
| 701 | int rc = 1; |
| 702 | |
| 703 | progname = argv[0]; |
| 704 | |
| 705 | int c; |
| 706 | char * arg; |
| 707 | VERBOSE (DEBUG, PRINTF ("start processing arguments\n")); |
| 708 | while (argc-- > 1) { |
| 709 | arg = *(++argv); |
| 710 | if (arg[0] != '-') { |
| 711 | _fprintf (stderr, "%s: invalid option -- %s\n", progname, arg); |
| 712 | usage (1); |
| 713 | } |
| 714 | c = arg[1]; |
| 715 | VERBOSE (DEBUG, PRINTF ("option: %c\n", c)); |
| 716 | switch (c) { |
| 717 | case 'c': |
| 718 | mode = COMPRESS; |
| 719 | break; |
| 720 | case 'd': |
| 721 | mode = DECOMPRESS; |
| 722 | break; |
| 723 | case 'i': |
| 724 | input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
| 725 | VERBOSE (DEBUG, PRINTF ("input: %s\n", input)); |
| 726 | break; |
| 727 | case 'o': |
| 728 | output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
| 729 | VERBOSE (DEBUG, PRINTF ("output: %s\n", output)); |
| 730 | break; |
| 731 | case 'v': |
| 732 | arg = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
| 733 | if (arg == NULL) { |
| 734 | _fprintf (stderr, "%s: missing verbose level\n", progname); |
| 735 | usage (1); |
| 736 | } |
| 737 | verbose = atoi (arg); |
| 738 | VERBOSE (INFO, _fprintf (stdout, "verbose: %d\n", verbose)); |
| 739 | break; |
| 740 | case 'h': |
| 741 | default: |
| 742 | usage (c != 'h'); |
| 743 | } |
| 744 | } |
| 745 | if ((input == NULL) || (output == NULL)) { |
| 746 | _fprintf (stderr, "%s: missing file\n", progname); |
| 747 | usage (1); |
| 748 | } |
| 749 | VERBOSE (DEBUG, PRINTF ("end processing arguments\n")); |
| 750 | |
| 751 | switch (mode) { |
| 752 | case COMPRESS: |
| 753 | table = create_table (input); |
| 754 | if (table == NULL) break; |
| 755 | VERBOSE (INFO, print_occ_table (table)); |
| 756 | |
| 757 | leafs = init_forest (table); |
| 758 | if (leafs == NULL) break; |
| 759 | root = create_tree (leafs); |
| 760 | if (root == NULL) break; |
| 761 | codes = create_code (root); |
| 762 | if (codes == NULL) break; |
| 763 | VERBOSE (INFO, print_code_table (codes)); |
| 764 | header = encode_header_table (codes, table); |
| 765 | if (header == NULL) break; |
| 766 | VERBOSE (INFO, print_header (header)); |
| 767 | rc = write_compress (output, input, codes, header); |
| 768 | break; |
| 769 | case DECOMPRESS: |
| 770 | codes = read_header (input); |
| 771 | if (codes == NULL) break; |
| 772 | VERBOSE (INFO, print_code_table (codes)); |
| 773 | rc = write_decompress (output, input, codes); |
| 774 | break; |
| 775 | } |
| 776 | |
| 777 | /* clean everything */ |
| 778 | fflush (stdout); |
| 779 | if (header) free (header); |
| 780 | if (codes) free (codes); |
| 781 | if (root) free_tree (root); |
| 782 | if (leafs) free (leafs); |
| 783 | if (table) free (table); |
| 784 | |
| 785 | return rc; |
| 786 | } |
| 787 | |
| 788 | // test: compress.exe -h |
| 789 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' |
| 790 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' |
| 791 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' |
| 792 | // test: compress.exe -c -i compress.c -o compress.mz |
| 793 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz |
| 794 | // test: compress.exe -d -i compress.mz -o tmp.c |
| 795 | // test: cmp compress.c tmp.c |
| 796 | // test: rm compress.mz tmp.c |
| 797 | |
| 798 | /* vim: set ts=4 sw=4 et */ |