| 1 | /* depend: */ |
| 2 | /* cflags: */ |
| 3 | /* linker: atoi.o code.o debug.o fdprintf.o */ |
| 4 | |
| 5 | #include <fcntl.h> |
| 6 | #include <unistd.h> |
| 7 | #include <stddef.h> |
| 8 | #include "atoi.h" |
| 9 | #include "code.h" |
| 10 | #include "debug.h" |
| 11 | #include "fdprintf.h" |
| 12 | |
| 13 | /* constants */ |
| 14 | |
| 15 | #define BUFFER_SIZE 4096 |
| 16 | |
| 17 | #define COMPRESS 1 |
| 18 | #define DECOMPRESS 2 |
| 19 | |
| 20 | #ifndef O_RAW |
| 21 | #define O_RAW 0 |
| 22 | #endif /* O_RAW */ |
| 23 | |
| 24 | /* macros */ |
| 25 | |
| 26 | /* gobal variables */ |
| 27 | |
| 28 | char *progname = NULL; |
| 29 | |
| 30 | /* help function */ |
| 31 | |
| 32 | int usage (int ret) |
| 33 | { |
| 34 | int fd = ret ? stdfderr : stdfdout; |
| 35 | fdprintf (fd, "usage: %s\n", progname); |
| 36 | fdprintf (fd, " -h : help message\n"); |
| 37 | fdprintf (fd, " -i <file>: input file\n"); |
| 38 | fdprintf (fd, " -o <file>: output file\n"); |
| 39 | fdprintf (fd, " -v : verbose level (%d)\n", verbose); |
| 40 | |
| 41 | return ret; |
| 42 | } |
| 43 | |
| 44 | /* create occurence table */ |
| 45 | |
| 46 | int *create_table (char *filename) |
| 47 | { |
| 48 | byte_t buffer[BUFFER_SIZE] = {0}; |
| 49 | int nbread; |
| 50 | static int table[NB_BYTES] = {0}; |
| 51 | int fid = 0; |
| 52 | |
| 53 | VERBOSE (DEBUG, PRINTOUT ("start creating occurence table\n")); |
| 54 | |
| 55 | /* open file */ |
| 56 | fid = open (filename, O_RDONLY|O_RAW); |
| 57 | if (fid == -1) { |
| 58 | VERBOSE (ERROR, PRINTERR ("can't open file '%s'\n", filename)); |
| 59 | return NULL; |
| 60 | } |
| 61 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", filename)); |
| 62 | |
| 63 | /* read file */ |
| 64 | while ((nbread = read (fid, buffer, BUFFER_SIZE)) > 0) { |
| 65 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nbread)); |
| 66 | while (nbread--) { |
| 67 | table[(int)buffer[nbread]]++; |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | /* close file */ |
| 72 | close (fid); |
| 73 | |
| 74 | VERBOSE (DEBUG, PRINTOUT ("end creating occurence table\n")); |
| 75 | |
| 76 | return table; |
| 77 | } |
| 78 | |
| 79 | /* print occurence table */ |
| 80 | |
| 81 | void print_occ_table (int *table) |
| 82 | { |
| 83 | int i; |
| 84 | |
| 85 | PRINTOUT ("Occurence table\n"); |
| 86 | for (i = 0; i < NB_BYTES; i++) { |
| 87 | if (table[i]) { |
| 88 | PRINTOUT ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); |
| 89 | } |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | /* initialize forest */ |
| 94 | |
| 95 | leaf_t **init_forest (int *table) |
| 96 | { |
| 97 | static leaf_t *leafs[NB_BYTES + 1] = {0}; |
| 98 | int nb_leafs = 0; |
| 99 | int i, l; |
| 100 | |
| 101 | VERBOSE (DEBUG, PRINTOUT ("start initiliazing forest\n")); |
| 102 | |
| 103 | /* count number of leafs */ |
| 104 | for (i = 0; i < NB_BYTES; i++) { |
| 105 | if (table[i] > 0) { |
| 106 | nb_leafs++; |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | /* initialize leafs */ |
| 111 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
| 112 | if (table[i] > 0) { |
| 113 | leafs[l] = getleaf (1); |
| 114 | if (leafs[l] == NULL) { |
| 115 | VERBOSE (ERROR, PRINTERR ("can't allocate memory\n")); |
| 116 | return NULL; |
| 117 | } |
| 118 | leafs[l]->occ = table[i]; |
| 119 | leafs[l]->c = i; |
| 120 | l++; |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | VERBOSE (DEBUG, PRINTOUT ("end initiliazing forest\n")); |
| 125 | |
| 126 | return leafs; |
| 127 | } |
| 128 | |
| 129 | /* create tree */ |
| 130 | |
| 131 | leaf_t *create_tree (leaf_t **leafs) |
| 132 | { |
| 133 | leaf_t *branch = NULL; |
| 134 | int nb_leafs = 0; |
| 135 | int last = -1; |
| 136 | int ante; |
| 137 | int i, j; |
| 138 | |
| 139 | VERBOSE (DEBUG, PRINTOUT ("start creating tree\n")); |
| 140 | |
| 141 | /* count number of leafs */ |
| 142 | while (leafs[nb_leafs] != NULL) { |
| 143 | nb_leafs++; |
| 144 | } |
| 145 | |
| 146 | /* create tree */ |
| 147 | for (j = 0; j < nb_leafs - 1; j++) { |
| 148 | |
| 149 | /* look for leatest occurence */ |
| 150 | last = -1; |
| 151 | for (i = 0; i < nb_leafs; i++) { |
| 152 | if (leafs[i] == NULL) { |
| 153 | continue; |
| 154 | } |
| 155 | if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { |
| 156 | last = i; |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | /* look for ante leatest occurence */ |
| 161 | ante = -1; |
| 162 | for (i = 0; i < nb_leafs; i++) { |
| 163 | if ((i == last) || (leafs[i] == NULL)) { |
| 164 | continue; |
| 165 | } |
| 166 | if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { |
| 167 | ante = i; |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | /* create branch */ |
| 172 | if ((last == -1) || (ante == -1)) { |
| 173 | VERBOSE (ERROR, PRINTERR ("error during tree building\n")); |
| 174 | return NULL; |
| 175 | } |
| 176 | branch = getleaf (1); |
| 177 | if (branch == NULL) { |
| 178 | VERBOSE (ERROR, PRINTERR ("can't allocate memory\n")); |
| 179 | return NULL; |
| 180 | } |
| 181 | branch->left = leafs[last]; |
| 182 | branch->right = leafs[ante]; |
| 183 | branch->occ = branch->left->occ + branch->right->occ; |
| 184 | leafs[last] = branch; |
| 185 | leafs[ante] = NULL; |
| 186 | } |
| 187 | |
| 188 | VERBOSE (DEBUG, PRINTOUT ("end creating tree\n")); |
| 189 | |
| 190 | return (last != -1) ? leafs[last] : NULL; |
| 191 | } |
| 192 | |
| 193 | /* explore tree */ |
| 194 | |
| 195 | void explore_tree (code_t *table, leaf_t *root, char *code, int index) |
| 196 | { |
| 197 | |
| 198 | VERBOSE (DEBUG, PRINTOUT ("start exploring code tree\n")); |
| 199 | |
| 200 | if ((root->left == NULL) && (root->right == NULL)) { |
| 201 | codcpy ((char *)(table + (int)(root->c)), sizeof (code_t), code); |
| 202 | } |
| 203 | else { |
| 204 | codcpy (code + index, sizeof (code_t), "1"); |
| 205 | explore_tree (table, root->left, code, index + 1); |
| 206 | codcpy (code + index, sizeof (code_t), "0"); |
| 207 | explore_tree (table, root->right, code, index + 1); |
| 208 | } |
| 209 | |
| 210 | VERBOSE (DEBUG, PRINTOUT ("end exploring code tree\n")); |
| 211 | } |
| 212 | |
| 213 | /* create code table */ |
| 214 | code_t *create_code (leaf_t *root) |
| 215 | { |
| 216 | static code_t table[NB_BYTES] = {0}; |
| 217 | code_t code = {0}; |
| 218 | |
| 219 | VERBOSE (DEBUG, PRINTOUT ("start creating code table\n")); |
| 220 | |
| 221 | explore_tree (table, root, (char *)&code, 0); |
| 222 | |
| 223 | VERBOSE (DEBUG, PRINTOUT ("end creating code table\n")); |
| 224 | |
| 225 | return table; |
| 226 | } |
| 227 | |
| 228 | /* print code table */ |
| 229 | |
| 230 | void print_code_table (code_t *codes) |
| 231 | { |
| 232 | char *code; |
| 233 | int i; |
| 234 | |
| 235 | PRINTOUT ("Code table\n"); |
| 236 | for (i = 0; i < NB_BYTES; i++) { |
| 237 | code = (char *)(codes + i); |
| 238 | if (codlen (code) == 0) { |
| 239 | continue; |
| 240 | } |
| 241 | PRINTOUT ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | /* encode header and code table */ |
| 246 | |
| 247 | byte_t *encode_header_table (code_t *codes, int *occ) |
| 248 | { |
| 249 | static byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
| 250 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; |
| 251 | char *code; |
| 252 | byte_t *header = buffer; |
| 253 | int i, j, length, mode; |
| 254 | int nb = 0; |
| 255 | int size = 0; |
| 256 | |
| 257 | VERBOSE (DEBUG, PRINTOUT ("start encoding header and code table\n")); |
| 258 | |
| 259 | /* mode 1 or 2 */ |
| 260 | for (i = 0; i < NB_BYTES; i++) { |
| 261 | code = (char *)(codes + i); |
| 262 | if (codlen (code) > 0) { |
| 263 | nb++; |
| 264 | size += codlen (code) * occ[i]; |
| 265 | } |
| 266 | } |
| 267 | mode = (NB_BYTES < 2 * nb + 1) ? 1 : 2; |
| 268 | VERBOSE (DEBUG, PRINTOUT ("nb chars: %d\n", nb)); |
| 269 | VERBOSE (DEBUG, PRINTOUT ("mode: %d\n", mode)); |
| 270 | VERBOSE (DEBUG, PRINTOUT ("size: %d\n", size)); |
| 271 | VERBOSE (DEBUG, PRINTOUT ("rem: %d\n", size % 256)); |
| 272 | |
| 273 | /* header */ |
| 274 | codcpy ((char *)header, sizeof (buffer), (mode == 1) ? "MZ1 " : "MZ2 "); |
| 275 | header += 6; |
| 276 | |
| 277 | /* size */ |
| 278 | switch (mode) { |
| 279 | case 1: |
| 280 | for (i = 0; i < NB_BYTES; i++) { |
| 281 | code = (char *)(codes + i); |
| 282 | *(header++) = (byte_t) codlen (code); |
| 283 | } |
| 284 | break; |
| 285 | case 2: |
| 286 | *(header++) = (byte_t)(nb - 1); |
| 287 | for (i = 0; i < NB_BYTES; i++) { |
| 288 | code = (char *)(codes + i); |
| 289 | if (codlen (code) > 0) { |
| 290 | *(header++) = (byte_t) i; |
| 291 | *(header++) = (byte_t) codlen (code); |
| 292 | } |
| 293 | } |
| 294 | break; |
| 295 | } |
| 296 | |
| 297 | /* bits */ |
| 298 | for (i = 0; i < NB_BYTES; i++) { |
| 299 | code = (char *)(codes + i); |
| 300 | if (codlen (code) > 0) { |
| 301 | codcat (bits, sizeof (code_t), code); |
| 302 | while (codlen (bits) > (8 - 1)) { |
| 303 | for (j = 0; j < 8; j++) { |
| 304 | *header <<= 1; |
| 305 | if (bits[j] == '1') { |
| 306 | (*header)++; |
| 307 | } |
| 308 | } |
| 309 | codcpy (bits, sizeof (code_t), bits + 8); |
| 310 | header++; |
| 311 | } |
| 312 | } |
| 313 | } |
| 314 | if (codlen (bits) > 0) { |
| 315 | for (j = 0; j < (int)codlen (bits); j++) { |
| 316 | *header <<= 1; |
| 317 | if (bits[j] == '1') { |
| 318 | (*header)++; |
| 319 | } |
| 320 | } |
| 321 | for (j = (int)codlen (bits); j < 8; j++) { |
| 322 | *header <<= 1; |
| 323 | } |
| 324 | header++; |
| 325 | } |
| 326 | |
| 327 | /* length */ |
| 328 | length = (int)(header - buffer - 6); |
| 329 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); |
| 330 | buffer[3] = (byte_t)(length >> 8); |
| 331 | buffer[4] = (byte_t)(length & 0xff); |
| 332 | buffer[5] = (byte_t)(size % 256); |
| 333 | header = buffer; |
| 334 | |
| 335 | VERBOSE (DEBUG, PRINTOUT ("end encoding header and code table\n")); |
| 336 | |
| 337 | return header; |
| 338 | } |
| 339 | |
| 340 | /* print header */ |
| 341 | |
| 342 | void print_header (byte_t *header) |
| 343 | { |
| 344 | int length, i; |
| 345 | |
| 346 | length = (header[3] << 8) + header[4]; |
| 347 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d\n", length)); |
| 348 | for (i = 0; i < length + 6; i++) { |
| 349 | PRINTOUT ("%02x", header[i]); |
| 350 | } |
| 351 | PRINTOUT ("\n"); |
| 352 | } |
| 353 | |
| 354 | /* write crompressed file */ |
| 355 | |
| 356 | int write_compress (char *output, char *input, code_t *codes, byte_t *header) |
| 357 | { |
| 358 | byte_t bufin[BUFFER_SIZE] = {0}; |
| 359 | byte_t bufout[BUFFER_SIZE] = {0}; |
| 360 | char bits[(NB_BYTES - 1) + 8 + 1] = {0}; |
| 361 | int fin, fout; |
| 362 | int length = 0; |
| 363 | int i, j, nbread, nbwrite; |
| 364 | byte_t *pt; |
| 365 | |
| 366 | VERBOSE (DEBUG, PRINTOUT ("start writting compressed file\n")); |
| 367 | |
| 368 | /* open input file */ |
| 369 | fin = open (input, O_RDONLY|O_RAW); |
| 370 | if (fin == -1) { |
| 371 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for reading\n", input)); |
| 372 | return 1; |
| 373 | } |
| 374 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", input)); |
| 375 | |
| 376 | /* open output file */ |
| 377 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
| 378 | if (fout == -1) { |
| 379 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for writing\n", output)); |
| 380 | close (fin); |
| 381 | return 1; |
| 382 | } |
| 383 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", output)); |
| 384 | |
| 385 | /* write header */ |
| 386 | length = (header[3] << 8) + header[4]; |
| 387 | VERBOSE (DEBUG, PRINTOUT ("lengh: %d\n", length)); |
| 388 | nbwrite = write (fout, header, length + 6); |
| 389 | if (nbwrite != length + 6) { |
| 390 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n", length + 6 - nbwrite, output)); |
| 391 | close (fout); |
| 392 | close (fin); |
| 393 | return 1; |
| 394 | } |
| 395 | |
| 396 | |
| 397 | /* write file */ |
| 398 | pt = bufout; |
| 399 | while ((nbread = read (fin, bufin, BUFFER_SIZE)) > 0) { |
| 400 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nbread)); |
| 401 | for (i = 0; i < nbread; i++) { |
| 402 | codcat (bits, sizeof (code_t), (char *)(codes + bufin[i])); |
| 403 | while (codlen (bits) > (8 - 1)) { |
| 404 | for (j = 0; j < 8; j++) { |
| 405 | *pt <<= 1; |
| 406 | if (bits[j] == '1') { |
| 407 | (*pt)++; |
| 408 | } |
| 409 | } |
| 410 | codcpy (bits, sizeof (code_t), bits + 8); |
| 411 | if (pt - bufout == BUFFER_SIZE - 1) { |
| 412 | nbwrite = write (fout, bufout, BUFFER_SIZE); |
| 413 | if (nbwrite != BUFFER_SIZE) { |
| 414 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n", BUFFER_SIZE - nbwrite, output)); |
| 415 | close (fout); |
| 416 | close (fin); |
| 417 | return 1; |
| 418 | } |
| 419 | pt = bufout; |
| 420 | } else { |
| 421 | pt++; |
| 422 | } |
| 423 | } |
| 424 | } |
| 425 | } |
| 426 | VERBOSE (DEBUG, PRINTOUT ("lastest bits : %d\n", codlen (bits))); |
| 427 | if (codlen (bits) > 0) { |
| 428 | for (j = 0; j < (int)codlen (bits); j++) { |
| 429 | *pt <<= 1; |
| 430 | if (bits[j] == '1') { |
| 431 | (*pt)++; |
| 432 | } |
| 433 | } |
| 434 | for (j = (int)codlen (bits); j < 8; j++) { |
| 435 | *pt <<= 1; |
| 436 | } |
| 437 | pt++; |
| 438 | } |
| 439 | if (pt != bufout) { |
| 440 | VERBOSE (DEBUG, PRINTOUT ("last partial buffer written: %u\n", pt - bufout)); |
| 441 | nbwrite = write (fout, bufout, pt - bufout); |
| 442 | if (nbwrite != pt - bufout) { |
| 443 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n", pt - bufout - nbwrite, output)); |
| 444 | close (fout); |
| 445 | close (fin); |
| 446 | return 1; |
| 447 | } |
| 448 | } |
| 449 | |
| 450 | /* closing */ |
| 451 | close (fin); |
| 452 | close (fout); |
| 453 | |
| 454 | VERBOSE (DEBUG, PRINTOUT ("end writting compressed file\n")); |
| 455 | |
| 456 | return 0; |
| 457 | } |
| 458 | |
| 459 | /* read header */ |
| 460 | |
| 461 | code_t *read_header (char *filename) { |
| 462 | static code_t table[NB_BYTES] = {0}; |
| 463 | byte_t buffer[NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6] = {0}; |
| 464 | byte_t *codes = NULL; |
| 465 | byte_t cur; |
| 466 | int lengths[NB_BYTES] = {0}; |
| 467 | int fid; |
| 468 | int mode = 0; |
| 469 | int i, j, l, nb, size; |
| 470 | |
| 471 | VERBOSE (DEBUG, PRINTOUT ("start reading header\n")); |
| 472 | |
| 473 | /* open file */ |
| 474 | fid = open (filename, O_RDONLY|O_RAW); |
| 475 | if (fid == -1) { |
| 476 | VERBOSE (ERROR, PRINTERR ("can't open file '%s'\n", filename)); |
| 477 | return NULL; |
| 478 | } |
| 479 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", filename)); |
| 480 | |
| 481 | /* read magic number */ |
| 482 | nb = read (fid, buffer, 6); |
| 483 | VERBOSE (DEBUG, PRINTOUT ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); |
| 484 | if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { |
| 485 | mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; |
| 486 | size = (buffer[3] << 8) + buffer[4]; |
| 487 | VERBOSE (DEBUG, PRINTOUT ("mode, size: %d %d\n", mode, size)); |
| 488 | if (size > NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES) { |
| 489 | mode = 0; |
| 490 | } else { |
| 491 | nb = read (fid, buffer, size); |
| 492 | VERBOSE (DEBUG, PRINTOUT ("nb read: %d/%d\n", nb, size)); |
| 493 | if (nb != size) { |
| 494 | mode = 0; |
| 495 | } |
| 496 | } |
| 497 | } |
| 498 | close (fid); |
| 499 | if (mode == 0) { |
| 500 | VERBOSE (ERROR, PRINTERR ("incorrect file\n")); |
| 501 | return NULL; |
| 502 | } |
| 503 | |
| 504 | /* analyse header */ |
| 505 | codes = buffer; |
| 506 | switch (mode) { |
| 507 | case 1: |
| 508 | for (i = 0; i < NB_BYTES; i++) { |
| 509 | lengths[i] = *(codes++); |
| 510 | } |
| 511 | break; |
| 512 | case 2: |
| 513 | nb = *(codes++) + 1; |
| 514 | VERBOSE (DEBUG, PRINTOUT ("nb codes: %d\n", nb)); |
| 515 | for (i = 0; i < nb; i++) { |
| 516 | j = *(codes++); |
| 517 | lengths[j] = *(codes++); |
| 518 | } |
| 519 | break; |
| 520 | } |
| 521 | VERBOSE (DEBUG, for (i = 0; i < NB_BYTES; i++) if (lengths[i]) PRINTOUT ("%d: %d\n", i, lengths[i])); |
| 522 | |
| 523 | /* check lengths */ |
| 524 | for (i = 0, l = 0; i < NB_BYTES; i++) { |
| 525 | l += lengths[i]; |
| 526 | } |
| 527 | if (((mode == 1) && (size - 256 != (l + 7) / 8)) || |
| 528 | ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { |
| 529 | VERBOSE (ERROR, PRINTERR ("incorrect code table length: %d %d %d\n", size, nb, l)); |
| 530 | return NULL; |
| 531 | } |
| 532 | |
| 533 | /* decode code */ |
| 534 | cur = *(codes++); |
| 535 | l = 8; |
| 536 | for (i = 0; i < NB_BYTES; i++) { |
| 537 | if (lengths[i] == 0) { |
| 538 | continue; |
| 539 | } |
| 540 | while (lengths[i]--) { |
| 541 | codcat ((char *)(table + i), sizeof (code_t), ((cur & 0x80) == 0) ? "0" : "1"); |
| 542 | l--; |
| 543 | cur <<= 1; |
| 544 | if (l == 0) { |
| 545 | cur = *(codes++); |
| 546 | l = 8; |
| 547 | } |
| 548 | } |
| 549 | } |
| 550 | |
| 551 | VERBOSE (DEBUG, PRINTOUT ("end reading header\n")); |
| 552 | |
| 553 | return table; |
| 554 | } |
| 555 | |
| 556 | /* write decompressed file */ |
| 557 | |
| 558 | int write_decompress (char *output, char *input, code_t *codes) |
| 559 | { |
| 560 | byte_t bufin[BUFFER_SIZE] = {0}; |
| 561 | byte_t bufout[BUFFER_SIZE] = {0}; |
| 562 | byte_t bufhea[MAX(NB_BYTES * (NB_BYTES - 1) / 2 / 8 + NB_BYTES + 6, BUFFER_SIZE)] = {0}; |
| 563 | char bits[(NB_BYTES - 1) + 1] = {0}; |
| 564 | int fin, fout; |
| 565 | int i, j, k, nb, size, nbwrite, rem; |
| 566 | int is_found; |
| 567 | int l = 0; |
| 568 | byte_t *pt; |
| 569 | |
| 570 | VERBOSE (DEBUG, PRINTOUT ("start writing decompressed file\n")); |
| 571 | |
| 572 | /* open file for reading */ |
| 573 | fin = open (input, O_RDONLY|O_RAW); |
| 574 | if (fin == -1) { |
| 575 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for reading\n", input)); |
| 576 | return 1; |
| 577 | } |
| 578 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", input)); |
| 579 | |
| 580 | /* read magic number */ |
| 581 | nb = read (fin, bufhea, 6); |
| 582 | if (nb != 6) { |
| 583 | VERBOSE (ERROR, PRINTERR ("can't read file\n")); |
| 584 | close (fin); |
| 585 | return 1; |
| 586 | } |
| 587 | size = (bufhea[3] << 8) + bufhea[4]; |
| 588 | VERBOSE (DEBUG, PRINTOUT ("table size: %d\n", size)); |
| 589 | rem = bufhea[5]; |
| 590 | VERBOSE (DEBUG, PRINTOUT ("remainder: %d\n", rem)); |
| 591 | nb = read (fin, bufhea, size); |
| 592 | if (nb != size) { |
| 593 | VERBOSE (ERROR, PRINTERR ("can't read file\n")); |
| 594 | close (fin); |
| 595 | return 1; |
| 596 | } |
| 597 | |
| 598 | /* open file for writing */ |
| 599 | fout = open (output, O_WRONLY|O_CREAT|O_RAW, 0700); |
| 600 | if (fout == -1) { |
| 601 | VERBOSE (ERROR, PRINTERR ("can't open file '%s' for writing\n", output)); |
| 602 | close (fin); |
| 603 | return 1; |
| 604 | } |
| 605 | VERBOSE (INFO, PRINTOUT ("file '%s' opened\n", output)); |
| 606 | |
| 607 | /* write file */ |
| 608 | pt = bufout; |
| 609 | while ((nb = read (fin, bufin, BUFFER_SIZE)) > 0) { |
| 610 | VERBOSE (DEBUG, PRINTOUT ("nbread: %d\n", nb)); |
| 611 | for (i = 0; i < nb; i++) { |
| 612 | for (j = 0; j < 8; j++) { |
| 613 | codcat (bits, sizeof (bits), ((bufin[i] & 0x80) == 0) ? "0" : "1"); |
| 614 | bufin[i] <<= 1; |
| 615 | l++; |
| 616 | VERBOSE (DEBUG, PRINTOUT ("bits: %d - %s\n", codlen (bits), bits)); |
| 617 | |
| 618 | /* look for correct code */ |
| 619 | is_found = 0; |
| 620 | for (k = 0; (k < NB_BYTES) && (!is_found); k++) { |
| 621 | if (codcmp ((char *)(codes + k), bits) == 0) { |
| 622 | is_found = 1; |
| 623 | VERBOSE (DEBUG, PRINTOUT ("found: %d\n", k)); |
| 624 | *pt= k; |
| 625 | bits[0] = 0; |
| 626 | if (pt - bufout == BUFFER_SIZE - 1) { |
| 627 | VERBOSE (DEBUG, PRINTOUT ("nb buffer out: %u\n", (pt - bufout))); |
| 628 | nbwrite = write (fout, bufout, BUFFER_SIZE); |
| 629 | if (nbwrite != BUFFER_SIZE) { |
| 630 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n'", BUFFER_SIZE - nbwrite, output)); |
| 631 | close (fout); |
| 632 | close (fin); |
| 633 | return 1; |
| 634 | } |
| 635 | pt = bufout; |
| 636 | } else { |
| 637 | pt++; |
| 638 | } |
| 639 | } |
| 640 | } |
| 641 | if ((i == nb - 1) && (l % 256 == rem) && (nb != BUFFER_SIZE)) { |
| 642 | VERBOSE (DEBUG, PRINTOUT ("break\n")); |
| 643 | break; |
| 644 | } |
| 645 | } |
| 646 | } |
| 647 | } |
| 648 | if (pt != bufout) { |
| 649 | VERBOSE (DEBUG, PRINTOUT ("nb buffer out: %u\n", (pt - bufout))); |
| 650 | nbwrite = write (fout, bufout, pt - bufout); |
| 651 | if (nbwrite != pt - bufout) { |
| 652 | VERBOSE (ERROR, PRINTERR ("can't write %d bytes in file '%s'\n'", pt - bufout - nbwrite, output)); |
| 653 | close (fout); |
| 654 | close (fin); |
| 655 | return 1; |
| 656 | } |
| 657 | } |
| 658 | |
| 659 | /* close files */ |
| 660 | close (fin); |
| 661 | close (fout); |
| 662 | |
| 663 | VERBOSE (DEBUG, PRINTOUT ("end writing decompressed file\n")); |
| 664 | |
| 665 | return 0; |
| 666 | } |
| 667 | |
| 668 | /* main function */ |
| 669 | |
| 670 | int main (int argc, char *argv[]) |
| 671 | { |
| 672 | char *input = NULL; |
| 673 | char *output = NULL; |
| 674 | int *table = NULL; |
| 675 | leaf_t **leafs = NULL; |
| 676 | leaf_t *root = NULL; |
| 677 | code_t *codes = NULL; |
| 678 | byte_t *header = NULL; |
| 679 | int mode = COMPRESS; |
| 680 | int rc = 1; |
| 681 | |
| 682 | progname = argv[0]; |
| 683 | |
| 684 | int c; |
| 685 | char * arg; |
| 686 | VERBOSE (DEBUG, PRINTOUT ("start processing arguments\n")); |
| 687 | while (argc-- > 1) { |
| 688 | arg = *(++argv); |
| 689 | if (arg[0] != '-') { |
| 690 | PRINTERR ("%s: invalid option -- %s\n", progname, arg); |
| 691 | return usage (1); |
| 692 | } |
| 693 | c = arg[1]; |
| 694 | VERBOSE (DEBUG, PRINTOUT ("option: %c\n", c)); |
| 695 | switch (c) { |
| 696 | case 'c': |
| 697 | mode = COMPRESS; |
| 698 | break; |
| 699 | case 'd': |
| 700 | mode = DECOMPRESS; |
| 701 | break; |
| 702 | case 'i': |
| 703 | input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
| 704 | VERBOSE (DEBUG, PRINTOUT ("input: %s\n", input)); |
| 705 | break; |
| 706 | case 'o': |
| 707 | output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
| 708 | VERBOSE (DEBUG, PRINTOUT ("output: %s\n", output)); |
| 709 | break; |
| 710 | case 'v': |
| 711 | arg = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL; |
| 712 | if (arg == NULL) { |
| 713 | PRINTERR ("%s: missing verbose level\n", progname); |
| 714 | return usage (1); |
| 715 | } |
| 716 | verbose = atoi (arg); |
| 717 | VERBOSE (INFO, PRINTOUT ("verbose: %d\n", verbose)); |
| 718 | break; |
| 719 | case 'h': |
| 720 | default: |
| 721 | return usage (c != 'h'); |
| 722 | } |
| 723 | } |
| 724 | if ((input == NULL) || (output == NULL)) { |
| 725 | PRINTERR ("%s: missing file\n", progname); |
| 726 | return usage (1); |
| 727 | } |
| 728 | VERBOSE (DEBUG, PRINTOUT ("end processing arguments\n")); |
| 729 | |
| 730 | switch (mode) { |
| 731 | case COMPRESS: |
| 732 | table = create_table (input); |
| 733 | if (table == NULL) break; |
| 734 | VERBOSE (INFO, print_occ_table (table)); |
| 735 | |
| 736 | leafs = init_forest (table); |
| 737 | if (leafs == NULL) break; |
| 738 | root = create_tree (leafs); |
| 739 | if (root == NULL) break; |
| 740 | codes = create_code (root); |
| 741 | if (codes == NULL) break; |
| 742 | VERBOSE (INFO, print_code_table (codes)); |
| 743 | header = encode_header_table (codes, table); |
| 744 | if (header == NULL) break; |
| 745 | VERBOSE (INFO, print_header (header)); |
| 746 | rc = write_compress (output, input, codes, header); |
| 747 | break; |
| 748 | case DECOMPRESS: |
| 749 | codes = read_header (input); |
| 750 | if (codes == NULL) break; |
| 751 | VERBOSE (INFO, print_code_table (codes)); |
| 752 | rc = write_decompress (output, input, codes); |
| 753 | break; |
| 754 | } |
| 755 | |
| 756 | return rc; |
| 757 | } |
| 758 | |
| 759 | // test: compress.exe -h |
| 760 | // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' |
| 761 | // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' |
| 762 | // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' |
| 763 | // test: compress.exe -v 2>&1 | grep -q 'missing verbose level' |
| 764 | // test: compress.exe -c -i compress.c 2>&1 | grep -q 'missing file' |
| 765 | // test: compress.exe -c -v 4 -i compress.c -o compress.mz | grep -q "Occurence table" |
| 766 | // test: compress.exe -c -i compress.c -o compress.mz |
| 767 | // test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz |
| 768 | // test: compress.exe -d -i compress.mz -o tmp.c |
| 769 | // test: cmp compress.c tmp.c; x=$?; rm compress.mz tmp.c; test x$x = x0 |
| 770 | // test: compress.exe -c -i test/compress.c -o compress.mz 2>&1 | grep "can't open file" |
| 771 | // test: compress.exe -c -i compress.c -o test/compress.mz 2>&1 | grep "can't open file" |
| 772 | |
| 773 | /* vim: set ts=4 sw=4 et: */ |