| 1 | /* depend: */ |
| 2 | /* cflags: */ |
| 3 | /* linker: */ |
| 4 | |
| 5 | #include <assert.h> |
| 6 | #include <getopt.h> |
| 7 | #include <malloc.h> |
| 8 | #include <stdio.h> |
| 9 | #include <stdlib.h> |
| 10 | #include <string.h> |
| 11 | |
| 12 | /* constants */ |
| 13 | |
| 14 | #define COMPRESS 1 |
| 15 | #define DECOMPRESS 2 |
| 16 | |
| 17 | #define NB_CHARS 256 |
| 18 | #define BUFFER_SIZE 4096 |
| 19 | |
| 20 | #define DEBUG 4 |
| 21 | #define INFO 3 |
| 22 | #define WARNING 2 |
| 23 | #define ERROR 1 |
| 24 | |
| 25 | /* macros */ |
| 26 | |
| 27 | #define CEIL(x, y) (((x) + (y) - 1) / (y)) |
| 28 | #define MIN(x, y) (((x) < (y)) ? (x) : (y)) |
| 29 | #define MAX(x, y) (((x) > (y)) ? (x) : (y)) |
| 30 | #define VERBOSE(level, statement...) do { if (level <= verbose) { statement; } } while(0) |
| 31 | #define PRINTF(args...) do { fprintf (stdout, args); fflush (stdout); } while (0) |
| 32 | |
| 33 | /* gobal variables */ |
| 34 | |
| 35 | char *progname = NULL; |
| 36 | int verbose = 2; |
| 37 | |
| 38 | /* help function */ |
| 39 | |
| 40 | void usage (int ret) |
| 41 | { |
| 42 | FILE *fd = ret ? stderr : stdout; |
| 43 | fprintf (fd, "usage: %s\n", progname); |
| 44 | fprintf (fd, " -h : help message\n"); |
| 45 | fprintf (fd, " -i <file>: input file\n"); |
| 46 | fprintf (fd, " -o <file>: output file\n"); |
| 47 | fprintf (fd, " -v : verbose level (%d)\n", verbose); |
| 48 | |
| 49 | exit (ret); |
| 50 | } |
| 51 | |
| 52 | /* create occurence table */ |
| 53 | |
| 54 | int *create_table (char *filename) |
| 55 | { |
| 56 | char buffer[BUFFER_SIZE] = {0}; |
| 57 | int nbread; |
| 58 | int *table = NULL; |
| 59 | FILE *fid = NULL; |
| 60 | |
| 61 | VERBOSE (DEBUG, PRINTF ("start create occurence table\n")); |
| 62 | |
| 63 | /* memory allocation */ |
| 64 | table = (int *) calloc (NB_CHARS, sizeof (int)); |
| 65 | if (table == NULL) { |
| 66 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 67 | return NULL; |
| 68 | } |
| 69 | VERBOSE (INFO, printf ("memory allocated\n")); |
| 70 | |
| 71 | /* open file */ |
| 72 | fid = fopen (filename, "rb"); |
| 73 | if (fid == NULL) { |
| 74 | VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); |
| 75 | free (table); |
| 76 | return NULL; |
| 77 | } |
| 78 | VERBOSE (INFO, printf ("file '%s' opened\n", filename)); |
| 79 | |
| 80 | /* read file */ |
| 81 | while (!feof (fid)) { |
| 82 | nbread = fread (buffer, 1, BUFFER_SIZE, fid); |
| 83 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); |
| 84 | while (nbread--) { |
| 85 | table[(int)buffer[nbread]]++; |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | /* close file */ |
| 90 | fclose (fid); |
| 91 | |
| 92 | VERBOSE (DEBUG, PRINTF ("end create occurence table\n")); |
| 93 | |
| 94 | return table; |
| 95 | } |
| 96 | |
| 97 | /* print occurence table */ |
| 98 | |
| 99 | void print_occ_table (int *table) |
| 100 | { |
| 101 | int i; |
| 102 | |
| 103 | printf ("Occurence table\n"); |
| 104 | for (i = 0; i < NB_CHARS; i++) { |
| 105 | if (table[i]) { |
| 106 | printf ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | /* leaf structure */ |
| 112 | |
| 113 | typedef struct _leaf_t |
| 114 | { |
| 115 | struct _leaf_t *left; |
| 116 | struct _leaf_t *right; |
| 117 | int occ; |
| 118 | char c; |
| 119 | } leaf_t; |
| 120 | |
| 121 | /* initialize forest */ |
| 122 | |
| 123 | leaf_t **init_forest (int *table) |
| 124 | { |
| 125 | leaf_t **leafs = NULL; |
| 126 | int nb_leafs = 0; |
| 127 | int i, l; |
| 128 | |
| 129 | VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n")); |
| 130 | |
| 131 | /* count number of leafs */ |
| 132 | for (i = 0; i < NB_CHARS; i++) { |
| 133 | if (table[i] > 0) { |
| 134 | nb_leafs++; |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | /* allocate memory */ |
| 139 | leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *)); |
| 140 | if (leafs == NULL) { |
| 141 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 142 | return NULL; |
| 143 | } |
| 144 | |
| 145 | /* initialize leafs */ |
| 146 | for (i = 0, l = 0; i < NB_CHARS; i++) { |
| 147 | if (table[i] > 0) { |
| 148 | leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t)); |
| 149 | if (leafs[l] == NULL) { |
| 150 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 151 | return NULL; |
| 152 | } |
| 153 | leafs[l]->occ = table[i]; |
| 154 | leafs[l]->c = i; |
| 155 | l++; |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n")); |
| 160 | |
| 161 | return leafs; |
| 162 | } |
| 163 | |
| 164 | /* create tree */ |
| 165 | |
| 166 | leaf_t *create_tree (leaf_t **leafs) |
| 167 | { |
| 168 | leaf_t *branch = NULL; |
| 169 | int nb_leafs = 0; |
| 170 | int last, ante; |
| 171 | int i, j; |
| 172 | |
| 173 | VERBOSE (DEBUG, PRINTF ("start creating tree\n")); |
| 174 | |
| 175 | /* count number of leafs */ |
| 176 | while (leafs[nb_leafs] != NULL) { |
| 177 | nb_leafs++; |
| 178 | } |
| 179 | |
| 180 | /* create tree */ |
| 181 | for (j = 0; j < nb_leafs - 1; j++) { |
| 182 | |
| 183 | /* look for leatest occurence */ |
| 184 | last = -1; |
| 185 | for (i = 0; i < nb_leafs; i++) { |
| 186 | if (leafs[i] == NULL) { |
| 187 | continue; |
| 188 | } |
| 189 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { |
| 190 | last = i; |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | /* look for ante leatest occurence */ |
| 195 | ante = -1; |
| 196 | for (i = 0; i < nb_leafs; i++) { |
| 197 | if ((i == last) || (leafs[i] == NULL)) { |
| 198 | continue; |
| 199 | } |
| 200 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { |
| 201 | ante = i; |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | /* create branch */ |
| 206 | if ((last == -1) || (ante == -1)) { |
| 207 | VERBOSE (ERROR, printf ("error during tree building\n")); |
| 208 | return NULL; |
| 209 | } |
| 210 | branch = (leaf_t *) calloc (1, sizeof (leaf_t)); |
| 211 | if (branch == NULL) { |
| 212 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 213 | return NULL; |
| 214 | } |
| 215 | branch->left = leafs[last]; |
| 216 | branch->right = leafs[ante]; |
| 217 | branch->occ = branch->left->occ + branch->right->occ; |
| 218 | leafs[last] = branch; |
| 219 | leafs[ante] = NULL; |
| 220 | } |
| 221 | |
| 222 | VERBOSE (DEBUG, PRINTF ("end creating tree\n")); |
| 223 | |
| 224 | return leafs[last]; |
| 225 | } |
| 226 | |
| 227 | /* free tree */ |
| 228 | |
| 229 | void free_tree (leaf_t *root) { |
| 230 | if (root) { |
| 231 | if (root->left) { |
| 232 | free_tree (root->left); |
| 233 | } |
| 234 | if (root->right) { |
| 235 | free_tree (root->right); |
| 236 | } |
| 237 | free (root); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | /* code structure */ |
| 242 | |
| 243 | typedef struct { |
| 244 | char code[NB_CHARS - 1 + 1]; |
| 245 | } code_t; |
| 246 | |
| 247 | /* explore tree */ |
| 248 | |
| 249 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) |
| 250 | { |
| 251 | if ((root->left == NULL) && (root->right == NULL)) { |
| 252 | strcpy ((char *)(table + (int)(root->c)), code); |
| 253 | } |
| 254 | else { |
| 255 | strcpy (code + index, "1"); |
| 256 | explore_tree (table, root->left, code, index + 1); |
| 257 | strcpy (code + index, "0"); |
| 258 | explore_tree (table, root->right, code, index + 1); |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | /* create code table */ |
| 263 | |
| 264 | code_t *create_code (leaf_t *root) |
| 265 | { |
| 266 | code_t *table = NULL; |
| 267 | code_t code = {0}; |
| 268 | |
| 269 | VERBOSE (DEBUG, PRINTF ("start creating code table\n")); |
| 270 | |
| 271 | /* allocate table */ |
| 272 | table = (code_t *) calloc (NB_CHARS, sizeof (code_t)); |
| 273 | if (table == NULL) { |
| 274 | VERBOSE (ERROR, printf ("can't allocate memory\n")); |
| 275 | return NULL; |
| 276 | } |
| 277 | |
| 278 | explore_tree (table, root, (char *)&code, 0); |
| 279 | |
| 280 | VERBOSE (DEBUG, PRINTF ("end creating code table\n")); |
| 281 | |
| 282 | return table; |
| 283 | } |
| 284 | |
| 285 | /* print code table */ |
| 286 | |
| 287 | void print_code_table (code_t *codes) |
| 288 | { |
| 289 | char *code; |
| 290 | int i; |
| 291 | |
| 292 | printf ("Code table\n"); |
| 293 | for (i = 0; i < NB_CHARS; i++) { |
| 294 | code = (char *)(codes + i); |
| 295 | if (strlen (code) == 0) { |
| 296 | continue; |
| 297 | } |
| 298 | printf ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | /* encode header and code table */ |
| 303 | |
| 304 | char *encode_header_table (code_t *codes, int *occ) |
| 305 | { |
| 306 | unsigned char buffer[NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS + 2] = {0}; |
| 307 | char bits[(NB_CHARS - 1) + 8 + 1] = {0}; |
| 308 | char *code; |
| 309 | unsigned char *header = buffer; |
| 310 | int i, j, length, mode; |
| 311 | int nb = 0; |
| 312 | int size = 0; |
| 313 | |
| 314 | VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n")); |
| 315 | |
| 316 | /* mode 1 or 2 */ |
| 317 | for (i = 0; i < NB_CHARS; i++) { |
| 318 | code = (char *)(codes + i); |
| 319 | if (strlen (code) > 0) { |
| 320 | nb++; |
| 321 | size += strlen (code) * occ[i]; |
| 322 | } |
| 323 | } |
| 324 | mode = (NB_CHARS < 2 * nb + 1) ? 1 : 2; |
| 325 | VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); |
| 326 | VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); |
| 327 | VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); |
| 328 | |
| 329 | /* header */ |
| 330 | strcpy ((char *)header, (mode == 1) ? "MZ1 " : "MZ2 "); |
| 331 | header += 6; |
| 332 | |
| 333 | /* size */ |
| 334 | switch (mode) { |
| 335 | case 1: |
| 336 | for (i = 0; i < NB_CHARS; i++) { |
| 337 | code = (char *)(codes + i); |
| 338 | *(header++) = (unsigned char) strlen (code); |
| 339 | } |
| 340 | break; |
| 341 | case 2: |
| 342 | *(header++) = (unsigned char)(nb - 1); |
| 343 | for (i = 0; i < NB_CHARS; i++) { |
| 344 | code = (char *)(codes + i); |
| 345 | if (strlen (code) > 0) { |
| 346 | *(header++) = (unsigned char)i; |
| 347 | *(header++) = (unsigned char) strlen (code); |
| 348 | } |
| 349 | } |
| 350 | break; |
| 351 | } |
| 352 | |
| 353 | /* bits */ |
| 354 | for (i = 0; i < NB_CHARS; i++) { |
| 355 | code = (char *)(codes + i); |
| 356 | if (strlen (code) > 0) { |
| 357 | strcat (bits, code); |
| 358 | while (strlen (bits) > (8 - 1)) { |
| 359 | for (j = 0; j < 8; j++) { |
| 360 | *header <<= 1; |
| 361 | if (bits[j] == '1') { |
| 362 | (*header)++; |
| 363 | } |
| 364 | } |
| 365 | strcpy (bits, bits + 8); |
| 366 | header++; |
| 367 | } |
| 368 | } |
| 369 | } |
| 370 | if (strlen (bits) > 0) { |
| 371 | for (j = 0; j < (int)strlen (bits); j++) { |
| 372 | *header <<= 1; |
| 373 | if (bits[j] == '1') { |
| 374 | (*header)++; |
| 375 | } |
| 376 | } |
| 377 | header++; |
| 378 | } |
| 379 | |
| 380 | /* length */ |
| 381 | length = (int)(header - buffer - 6); |
| 382 | VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); |
| 383 | buffer[3] = (unsigned char)(length >> 8); |
| 384 | buffer[4] = (unsigned char)(length & 0xff); |
| 385 | buffer[5] = (unsigned char)(size % 8); |
| 386 | |
| 387 | /* allocation */ |
| 388 | header = (unsigned char *) calloc (length + 6, 1); |
| 389 | memcpy (header, buffer, length + 6); |
| 390 | |
| 391 | VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n")); |
| 392 | |
| 393 | return (char *)header; |
| 394 | } |
| 395 | |
| 396 | /* print header */ |
| 397 | |
| 398 | void print_header (char *header) |
| 399 | { |
| 400 | int length, i; |
| 401 | |
| 402 | length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]); |
| 403 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
| 404 | for (i = 0; i < length + 6; i++) { |
| 405 | printf ("%02x", (unsigned char)header[i]); |
| 406 | } |
| 407 | printf ("\n"); |
| 408 | } |
| 409 | |
| 410 | /* write crompressed file */ |
| 411 | |
| 412 | int write_compress (char *output, char *input, code_t *codes, char *header) |
| 413 | { |
| 414 | char bufin[BUFFER_SIZE] = {0}; |
| 415 | char bufout[BUFFER_SIZE] = {0}; |
| 416 | char bits[(NB_CHARS - 1) + 8 + 1] = {0}; |
| 417 | FILE *fin, *fout; |
| 418 | int length = 0; |
| 419 | int i, j, nbread; |
| 420 | char *pt; |
| 421 | |
| 422 | VERBOSE (DEBUG, PRINTF ("start writting compressed file\n")); |
| 423 | |
| 424 | /* open input file */ |
| 425 | fin = fopen (input, "rb"); |
| 426 | if (fin == NULL) { |
| 427 | VERBOSE (ERROR, printf ("can't open file '%s'\n", input)); |
| 428 | return 1; |
| 429 | } |
| 430 | VERBOSE (INFO, printf ("file '%s' opened\n", input)); |
| 431 | |
| 432 | /* open output file */ |
| 433 | fout = fopen (output, "wb"); |
| 434 | if (fin == NULL) { |
| 435 | VERBOSE (ERROR, printf ("can't open file '%s'\n", output)); |
| 436 | return 1; |
| 437 | } |
| 438 | VERBOSE (INFO, printf ("file '%s' opened\n", output)); |
| 439 | |
| 440 | /* write header */ |
| 441 | length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]); |
| 442 | VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); |
| 443 | fwrite (header, 1, length + 6, fout); |
| 444 | |
| 445 | /* write file */ |
| 446 | pt = bufout; |
| 447 | while (!feof (fin)) { |
| 448 | nbread = fread (bufin, 1, BUFFER_SIZE, fin); |
| 449 | VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); |
| 450 | for (i = 0; i < nbread; i++) { |
| 451 | strcat (bits, (char *)(codes + bufin[i])); |
| 452 | while (strlen (bits) > (8 - 1)) { |
| 453 | for (j = 0; j < 8; j++) { |
| 454 | *pt <<= 1; |
| 455 | if (bits[j] == '1') { |
| 456 | (*pt)++; |
| 457 | } |
| 458 | } |
| 459 | strcpy (bits, bits + 8); |
| 460 | if (pt - bufout < BUFFER_SIZE) { |
| 461 | pt++; |
| 462 | } else { |
| 463 | fwrite (bufout, 1, BUFFER_SIZE, fout); |
| 464 | pt = bufout; |
| 465 | } |
| 466 | } |
| 467 | } |
| 468 | } |
| 469 | if (strlen (bits) > 0) { |
| 470 | for (j = 0; j < (int)strlen (bits); j++) { |
| 471 | *pt <<= 1; |
| 472 | if (bits[j] == '1') { |
| 473 | (*pt)++; |
| 474 | } |
| 475 | } |
| 476 | if (pt - bufout < BUFFER_SIZE) { |
| 477 | pt++; |
| 478 | } else { |
| 479 | fwrite (bufout, 1, BUFFER_SIZE, fout); |
| 480 | pt = bufout; |
| 481 | } |
| 482 | pt++; |
| 483 | } |
| 484 | if (pt != bufout) { |
| 485 | fwrite (bufout, 1, pt - bufout, fout); |
| 486 | } |
| 487 | |
| 488 | /* closing */ |
| 489 | fclose (fin); |
| 490 | fclose (fout); |
| 491 | |
| 492 | VERBOSE (DEBUG, PRINTF ("end writting compressed file\n")); |
| 493 | |
| 494 | return 0; |
| 495 | } |
| 496 | |
| 497 | /* main function */ |
| 498 | |
| 499 | int main (int argc, char *argv[]) |
| 500 | { |
| 501 | char *input = NULL; |
| 502 | char *output = NULL; |
| 503 | int *table = NULL; |
| 504 | leaf_t **leafs = NULL; |
| 505 | leaf_t *root = NULL; |
| 506 | code_t *codes = NULL; |
| 507 | char *header = NULL; |
| 508 | int mode = COMPRESS; |
| 509 | int rc = 0; |
| 510 | |
| 511 | progname = argv[0]; |
| 512 | |
| 513 | int c; |
| 514 | VERBOSE (DEBUG, PRINTF ("start argument processing\n")); |
| 515 | while ((c = getopt(argc, argv, "cdhi:o:v:")) != EOF) { |
| 516 | switch (c) { |
| 517 | case 'c': |
| 518 | mode = COMPRESS; |
| 519 | break; |
| 520 | case 'd': |
| 521 | mode = DECOMPRESS; |
| 522 | break; |
| 523 | case 'i': |
| 524 | VERBOSE (DEBUG, PRINTF ("-i\n")); |
| 525 | VERBOSE (DEBUG, PRINTF ("optarg: %s\n", optarg)); |
| 526 | input = optarg; |
| 527 | break; |
| 528 | case 'o': |
| 529 | VERBOSE (DEBUG, PRINTF ("-o\n")); |
| 530 | output = optarg; |
| 531 | break; |
| 532 | case 'v': |
| 533 | VERBOSE (DEBUG, PRINTF ("-v\n")); |
| 534 | verbose = atoi (optarg); |
| 535 | VERBOSE (INFO, printf ("verbose: %d\n", verbose)); |
| 536 | break; |
| 537 | case 'h': |
| 538 | default: |
| 539 | usage (c != 'h'); |
| 540 | } |
| 541 | } |
| 542 | if (argc - optind != 0) { |
| 543 | fprintf (stderr, "%s: invalid option -- %s\n", progname, argv[optind]); |
| 544 | usage (1); |
| 545 | } |
| 546 | VERBOSE (DEBUG, PRINTF ("end argument processing\n")); |
| 547 | |
| 548 | switch (mode) { |
| 549 | case COMPRESS: |
| 550 | table = create_table (input); |
| 551 | if (table == NULL) break; |
| 552 | VERBOSE (INFO, print_occ_table (table)); |
| 553 | |
| 554 | leafs = init_forest (table); |
| 555 | if (leafs == NULL) break; |
| 556 | root = create_tree (leafs); |
| 557 | if (root == NULL) break; |
| 558 | codes = create_code (root); |
| 559 | if (codes == NULL) break; |
| 560 | VERBOSE (INFO, print_code_table (codes)); |
| 561 | header = encode_header_table (codes, table); |
| 562 | if (header == NULL) break; |
| 563 | VERBOSE (INFO, print_header (header)); |
| 564 | rc = write_compress (output, input, codes, header); |
| 565 | break; |
| 566 | case DECOMPRESS: |
| 567 | rc = 1; |
| 568 | break; |
| 569 | } |
| 570 | |
| 571 | /* clean everything */ |
| 572 | if (header) free (header); |
| 573 | if (codes) free (codes); |
| 574 | if (root) free_tree (root); |
| 575 | if (leafs) free (leafs); |
| 576 | if (table) free (table); |
| 577 | |
| 578 | return rc; |
| 579 | } |
| 580 | |
| 581 | // test: compress.exe -h |
| 582 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' |
| 583 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' |
| 584 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' |
| 585 | |
| 586 | // vim: ts=4 sw=4 et |