From 37062814905890a1fc18f11fb663f8017a097ebf Mon Sep 17 00:00:00 2001 From: Laurent Mazet Date: Tue, 22 Nov 2022 18:17:27 +0100 Subject: [PATCH] decompression is ready --- compress.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 240 insertions(+), 25 deletions(-) diff --git a/compress.c b/compress.c index d0d411a..010ef80 100644 --- a/compress.c +++ b/compress.c @@ -58,7 +58,7 @@ int *create_table (char *filename) int *table = NULL; FILE *fid = NULL; - VERBOSE (DEBUG, PRINTF ("start create occurence table\n")); + VERBOSE (DEBUG, PRINTF ("start creating occurence table\n")); /* memory allocation */ table = (int *) calloc (NB_CHARS, sizeof (int)); @@ -89,7 +89,7 @@ int *create_table (char *filename) /* close file */ fclose (fid); - VERBOSE (DEBUG, PRINTF ("end create occurence table\n")); + VERBOSE (DEBUG, PRINTF ("end creating occurence table\n")); return table; } @@ -167,7 +167,8 @@ leaf_t *create_tree (leaf_t **leafs) { leaf_t *branch = NULL; int nb_leafs = 0; - int last, ante; + int last = -1; + int ante; int i, j; VERBOSE (DEBUG, PRINTF ("start creating tree\n")); @@ -221,7 +222,7 @@ leaf_t *create_tree (leaf_t **leafs) VERBOSE (DEBUG, PRINTF ("end creating tree\n")); - return leafs[last]; + return (last != -1) ? leafs[last] : NULL; } /* free tree */ @@ -303,7 +304,7 @@ void print_code_table (code_t *codes) char *encode_header_table (code_t *codes, int *occ) { - unsigned char buffer[NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS + 2] = {0}; + unsigned char buffer[NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS + 6] = {0}; char bits[(NB_CHARS - 1) + 8 + 1] = {0}; char *code; unsigned char *header = buffer; @@ -321,10 +322,11 @@ char *encode_header_table (code_t *codes, int *occ) size += strlen (code) * occ[i]; } } - mode = (NB_CHARS < 2 * nb + 1) ? 1 : 2; + mode = (NB_CHARS < 2 * nb + 1) ? 1 : 2; VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); + VERBOSE (DEBUG, PRINTF ("rem: %d\n", size % 256)); /* header */ strcpy ((char *)header, (mode == 1) ? "MZ1 " : "MZ2 "); @@ -374,6 +376,9 @@ char *encode_header_table (code_t *codes, int *occ) (*header)++; } } + for (j = (int)strlen (bits); j < 8; j++) { + *header <<= 1; + } header++; } @@ -382,7 +387,7 @@ char *encode_header_table (code_t *codes, int *occ) VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); buffer[3] = (unsigned char)(length >> 8); buffer[4] = (unsigned char)(length & 0xff); - buffer[5] = (unsigned char)(size % 8); + buffer[5] = (unsigned char)(size % 256); /* allocation */ header = (unsigned char *) calloc (length + 6, 1); @@ -424,7 +429,7 @@ int write_compress (char *output, char *input, code_t *codes, char *header) /* open input file */ fin = fopen (input, "rb"); if (fin == NULL) { - VERBOSE (ERROR, printf ("can't open file '%s'\n", input)); + VERBOSE (ERROR, printf ("can't open file '%s' for reading\n", input)); return 1; } VERBOSE (INFO, printf ("file '%s' opened\n", input)); @@ -432,7 +437,7 @@ int write_compress (char *output, char *input, code_t *codes, char *header) /* open output file */ fout = fopen (output, "wb"); if (fin == NULL) { - VERBOSE (ERROR, printf ("can't open file '%s'\n", output)); + VERBOSE (ERROR, printf ("can't open file '%s' for writing\n", output)); return 1; } VERBOSE (INFO, printf ("file '%s' opened\n", output)); @@ -457,15 +462,16 @@ int write_compress (char *output, char *input, code_t *codes, char *header) } } strcpy (bits, bits + 8); - if (pt - bufout < BUFFER_SIZE) { - pt++; - } else { + if (pt - bufout == BUFFER_SIZE - 1) { fwrite (bufout, 1, BUFFER_SIZE, fout); pt = bufout; + } else { + pt++; } } } } + VERBOSE (DEBUG, PRINTF ("lastest bits : %d\n", strlen (bits))); if (strlen (bits) > 0) { for (j = 0; j < (int)strlen (bits); j++) { *pt <<= 1; @@ -473,15 +479,13 @@ int write_compress (char *output, char *input, code_t *codes, char *header) (*pt)++; } } - if (pt - bufout < BUFFER_SIZE) { - pt++; - } else { - fwrite (bufout, 1, BUFFER_SIZE, fout); - pt = bufout; + for (j = (int)strlen (bits); j < 8; j++) { + *pt <<= 1; } pt++; } if (pt != bufout) { + VERBOSE (DEBUG, PRINTF ("last partial buffer written: %u\n", pt - bufout)); fwrite (bufout, 1, pt - bufout, fout); } @@ -494,6 +498,210 @@ int write_compress (char *output, char *input, code_t *codes, char *header) return 0; } +/* read header */ + +code_t *read_header (char *filename) { + unsigned char buffer[NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS + 6] = {0}; + code_t *table = NULL; + unsigned char *codes = NULL; + unsigned char cur; + int lengths[NB_CHARS] = {0}; + FILE *fid = NULL; + int mode = 0; + size_t i, j, l, nb, size; + + VERBOSE (DEBUG, PRINTF ("start reading header\n")); + + /* open file */ + fid = fopen (filename, "rb"); + if (fid == NULL) { + VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); + return NULL; + } + VERBOSE (INFO, printf ("file '%s' opened\n", filename)); + + /* read magic number */ + nb = fread (buffer, 1, 6, fid); + VERBOSE (DEBUG, PRINTF ("nb, buffer: %d 0x%02x 0x%02x\n", nb, buffer[0], buffer[1])); + if ((nb == 6) && (buffer[0] == 'M') && (buffer[1] == 'Z')) { + mode = (buffer[2] == '1') ? 1 : (buffer[2] == '2') ? 2 : 0; + size = ((unsigned char)(buffer[3]) << 8) + (unsigned char)(buffer[4]); + VERBOSE (DEBUG, PRINTF ("mode, size: %d %d\n", mode, size)); + if (size > NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS) { + mode = 0; + } else { + nb = fread (buffer, 1, size, fid); + VERBOSE (DEBUG, PRINTF ("nb read: %d\n", nb)); + if (nb != size) { + mode = 0; + } + } + } + fclose (fid); + if (mode == 0) { + VERBOSE (ERROR, printf ("incorrect file\n")); + return NULL; + } + + /* analyse header */ + codes = (unsigned char *)buffer; + switch (mode) { + case 1: + for (i = 0; i < NB_CHARS; i++) { + lengths[i] = *(codes++); + } + break; + case 2: + nb = *(codes++) + 1; + VERBOSE (DEBUG, PRINTF ("nb codes: %d\n", nb)); + for (i = 0; i < nb; i++) { + j = *(codes++); + lengths[j] = *(codes++); + } + break; + } + VERBOSE (DEBUG, for (i = 0; i < NB_CHARS; i++) if (lengths[i]) PRINTF ("%d: %d\n", i, lengths[i])); + + /* check lengths */ + for (i = 0, l = 0; i < NB_CHARS; i++) { + l += lengths[i]; + } + if (((mode == 1) && (size - 256 != (l + 7) / 8)) || + ((mode == 2) && (size - 2 * nb - 1 != (l + 7) / 8))) { + VERBOSE (ERROR, printf ("incorrect code table length\n")); + return NULL; + } + + /* allocate table */ + table = (code_t *) calloc (NB_CHARS, sizeof (code_t)); + if (table == NULL) { + VERBOSE (ERROR, printf ("can't allocate memory\n")); + return NULL; + } + + /* decode code */ + cur = *(codes++); + l = 8; + for (i = 0; i < NB_CHARS; i++) { + if (lengths[i] == 0) { + continue; + } + while (lengths[i]--) { + strcat ((char *)(table + i), ((cur & 0x80) == 0) ? "0" : "1"); + l--; + cur <<= 1; + if (l == 0) { + cur = *(codes++); + l = 8; + } + } + } + + VERBOSE (DEBUG, PRINTF ("end reading header\n")); + + return table; +} + +/* write decompressed file */ + +int write_decompress (char *output, char *input, code_t *codes) +{ + char bufin[BUFFER_SIZE] = {0}; + char bufout[BUFFER_SIZE] = {0}; + unsigned char buffer[MAX(NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS + 6, BUFFER_SIZE)] = {0}; + char bits[(NB_CHARS - 1) + 1] = {0}; + FILE *fin, *fout; + int i, j, k, nb, size, rem; + int is_found; + int l = 0; + char *pt; + + VERBOSE (DEBUG, PRINTF ("start writing decompressed file\n")); + + /* open file for reading */ + fin = fopen (input, "rb"); + if (fin == NULL) { + VERBOSE (ERROR, printf ("can't open file '%s' for reading\n", input)); + return 1; + } + VERBOSE (INFO, printf ("file '%s' opened\n", input)); + + /* read magic number */ + nb = fread (buffer, 1, 6, fin); + if (nb != 6) { + VERBOSE (ERROR, printf ("can't read file\n")); + fclose (fin); + return 1; + } + size = ((unsigned char)(buffer[3]) << 8) + (unsigned char)(buffer[4]); + VERBOSE (DEBUG, printf ("table size: %d\n", size)); + rem = buffer[5]; + VERBOSE (DEBUG, printf ("remainder: %d\n", rem)); + nb = fread (buffer, 1, size, fin); + if (nb != size) { + VERBOSE (ERROR, printf ("can't read file\n")); + fclose (fin); + return 1; + } + + /* open file for writing */ + fout = fopen (output, "wb"); + if (fout == NULL) { + VERBOSE (ERROR, printf ("can't open file '%s' for writing\n", output)); + return 2; + } + VERBOSE (INFO, printf ("file '%s' opened\n", output)); + + /* write file */ + pt = bufout; + while (!feof (fin)) { + nb = fread (bufin, 1, BUFFER_SIZE, fin); + VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nb)); + for (i = 0; i < nb; i++) { + for (j = 0; j < 8; j++) { + strcat (bits, ((bufin[i] & 0x80) == 0) ? "0" : "1"); + bufin[i] <<= 1; + l++; + VERBOSE (DEBUG, PRINTF ("bits: %d - %s\n", strlen (bits), bits)); + + /* look for correct code */ + is_found = 0; + for (k = 0; (k < NB_CHARS) && (!is_found); k++) { + if (strcmp ((char *)(codes + k), bits) == 0) { + is_found = 1; + VERBOSE (DEBUG, PRINTF ("found: %d\n", k)); + *pt= k; + bits[0] = 0; + if (pt - bufout == BUFFER_SIZE - 1) { + VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); + fwrite (bufout, 1, BUFFER_SIZE, fout); + pt = bufout; + } else { + pt++; + } + } + } + if ((i == nb - 1) && (l % 256 == rem) && (feof (fin))) { + VERBOSE (DEBUG, PRINTF ("break\n")); + break; + } + } + } + } + if (pt != bufout) { + VERBOSE (DEBUG, PRINTF ("nb buffer out: %u\n", (pt - bufout))); + fwrite (bufout, 1, pt - bufout, fout); + } + + /* close files */ + fclose (fin); + fclose (fout); + + VERBOSE (DEBUG, PRINTF ("end writing decompressed file\n")); + + return 0; +} + /* main function */ int main (int argc, char *argv[]) @@ -506,13 +714,14 @@ int main (int argc, char *argv[]) code_t *codes = NULL; char *header = NULL; int mode = COMPRESS; - int rc = 0; + int rc = 1; progname = argv[0]; int c; - VERBOSE (DEBUG, PRINTF ("start argument processing\n")); + VERBOSE (DEBUG, PRINTF ("start processing arguments\n")); while ((c = getopt(argc, argv, "cdhi:o:v:")) != EOF) { + VERBOSE (DEBUG, PRINTF ("option: %c\n", c)); switch (c) { case 'c': mode = COMPRESS; @@ -521,16 +730,14 @@ int main (int argc, char *argv[]) mode = DECOMPRESS; break; case 'i': - VERBOSE (DEBUG, PRINTF ("-i\n")); - VERBOSE (DEBUG, PRINTF ("optarg: %s\n", optarg)); input = optarg; + VERBOSE (DEBUG, PRINTF ("input: %s\n", input)); break; case 'o': - VERBOSE (DEBUG, PRINTF ("-o\n")); output = optarg; + VERBOSE (DEBUG, PRINTF ("output: %s\n", output)); break; case 'v': - VERBOSE (DEBUG, PRINTF ("-v\n")); verbose = atoi (optarg); VERBOSE (INFO, printf ("verbose: %d\n", verbose)); break; @@ -543,7 +750,7 @@ int main (int argc, char *argv[]) fprintf (stderr, "%s: invalid option -- %s\n", progname, argv[optind]); usage (1); } - VERBOSE (DEBUG, PRINTF ("end argument processing\n")); + VERBOSE (DEBUG, PRINTF ("end processing arguments\n")); switch (mode) { case COMPRESS: @@ -564,7 +771,10 @@ int main (int argc, char *argv[]) rc = write_compress (output, input, codes, header); break; case DECOMPRESS: - rc = 1; + codes = read_header (input); + if (codes == NULL) break; + VERBOSE (INFO, print_code_table (codes)); + rc = write_decompress (output, input, codes); break; } @@ -582,5 +792,10 @@ int main (int argc, char *argv[]) // test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' // test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' // test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' +// test: compress.exe -c -i compress.c -o compress.mz +// test: ls -sS1 compress.c compress.mz | tail -1 | grep compress.mz +// test: compress.exe -d -i compress.mz -o tmp.c +// test: cmp compress.c tmp.c +// test: rm compress.mz tmp.c // vim: ts=4 sw=4 et -- 2.30.2