From 58352bb03181bec09abf991ae982bc3bb04c5fd3 Mon Sep 17 00:00:00 2001 From: Laurent Mazet Date: Fri, 18 Nov 2022 11:26:23 +0100 Subject: [PATCH] compression is ready --- compress.c | 586 +++++++++++++++++++++++++++++++++++++++++++++++++++++ makefile | 2 +- 2 files changed, 587 insertions(+), 1 deletion(-) create mode 100644 compress.c diff --git a/compress.c b/compress.c new file mode 100644 index 0000000..d0d411a --- /dev/null +++ b/compress.c @@ -0,0 +1,586 @@ +/* depend: */ +/* cflags: */ +/* linker: */ + +#include +#include +#include +#include +#include +#include + +/* constants */ + +#define COMPRESS 1 +#define DECOMPRESS 2 + +#define NB_CHARS 256 +#define BUFFER_SIZE 4096 + +#define DEBUG 4 +#define INFO 3 +#define WARNING 2 +#define ERROR 1 + +/* macros */ + +#define CEIL(x, y) (((x) + (y) - 1) / (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define VERBOSE(level, statement...) do { if (level <= verbose) { statement; } } while(0) +#define PRINTF(args...) do { fprintf (stdout, args); fflush (stdout); } while (0) + +/* gobal variables */ + +char *progname = NULL; +int verbose = 2; + +/* help function */ + +void usage (int ret) +{ + FILE *fd = ret ? stderr : stdout; + fprintf (fd, "usage: %s\n", progname); + fprintf (fd, " -h : help message\n"); + fprintf (fd, " -i : input file\n"); + fprintf (fd, " -o : output file\n"); + fprintf (fd, " -v : verbose level (%d)\n", verbose); + + exit (ret); +} + +/* create occurence table */ + +int *create_table (char *filename) +{ + char buffer[BUFFER_SIZE] = {0}; + int nbread; + int *table = NULL; + FILE *fid = NULL; + + VERBOSE (DEBUG, PRINTF ("start create occurence table\n")); + + /* memory allocation */ + table = (int *) calloc (NB_CHARS, sizeof (int)); + if (table == NULL) { + VERBOSE (ERROR, printf ("can't allocate memory\n")); + return NULL; + } + VERBOSE (INFO, printf ("memory allocated\n")); + + /* open file */ + fid = fopen (filename, "rb"); + if (fid == NULL) { + VERBOSE (ERROR, printf ("can't open file '%s'\n", filename)); + free (table); + return NULL; + } + VERBOSE (INFO, printf ("file '%s' opened\n", filename)); + + /* read file */ + while (!feof (fid)) { + nbread = fread (buffer, 1, BUFFER_SIZE, fid); + VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); + while (nbread--) { + table[(int)buffer[nbread]]++; + } + } + + /* close file */ + fclose (fid); + + VERBOSE (DEBUG, PRINTF ("end create occurence table\n")); + + return table; +} + +/* print occurence table */ + +void print_occ_table (int *table) +{ + int i; + + printf ("Occurence table\n"); + for (i = 0; i < NB_CHARS; i++) { + if (table[i]) { + printf ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]); + } + } +} + +/* leaf structure */ + +typedef struct _leaf_t +{ + struct _leaf_t *left; + struct _leaf_t *right; + int occ; + char c; +} leaf_t; + +/* initialize forest */ + +leaf_t **init_forest (int *table) +{ + leaf_t **leafs = NULL; + int nb_leafs = 0; + int i, l; + + VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n")); + + /* count number of leafs */ + for (i = 0; i < NB_CHARS; i++) { + if (table[i] > 0) { + nb_leafs++; + } + } + + /* allocate memory */ + leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *)); + if (leafs == NULL) { + VERBOSE (ERROR, printf ("can't allocate memory\n")); + return NULL; + } + + /* initialize leafs */ + for (i = 0, l = 0; i < NB_CHARS; i++) { + if (table[i] > 0) { + leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t)); + if (leafs[l] == NULL) { + VERBOSE (ERROR, printf ("can't allocate memory\n")); + return NULL; + } + leafs[l]->occ = table[i]; + leafs[l]->c = i; + l++; + } + } + + VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n")); + + return leafs; +} + +/* create tree */ + +leaf_t *create_tree (leaf_t **leafs) +{ + leaf_t *branch = NULL; + int nb_leafs = 0; + int last, ante; + int i, j; + + VERBOSE (DEBUG, PRINTF ("start creating tree\n")); + + /* count number of leafs */ + while (leafs[nb_leafs] != NULL) { + nb_leafs++; + } + + /* create tree */ + for (j = 0; j < nb_leafs - 1; j++) { + + /* look for leatest occurence */ + last = -1; + for (i = 0; i < nb_leafs; i++) { + if (leafs[i] == NULL) { + continue; + } + if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) { + last = i; + } + } + + /* look for ante leatest occurence */ + ante = -1; + for (i = 0; i < nb_leafs; i++) { + if ((i == last) || (leafs[i] == NULL)) { + continue; + } + if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) { + ante = i; + } + } + + /* create branch */ + if ((last == -1) || (ante == -1)) { + VERBOSE (ERROR, printf ("error during tree building\n")); + return NULL; + } + branch = (leaf_t *) calloc (1, sizeof (leaf_t)); + if (branch == NULL) { + VERBOSE (ERROR, printf ("can't allocate memory\n")); + return NULL; + } + branch->left = leafs[last]; + branch->right = leafs[ante]; + branch->occ = branch->left->occ + branch->right->occ; + leafs[last] = branch; + leafs[ante] = NULL; + } + + VERBOSE (DEBUG, PRINTF ("end creating tree\n")); + + return leafs[last]; +} + +/* free tree */ + +void free_tree (leaf_t *root) { + if (root) { + if (root->left) { + free_tree (root->left); + } + if (root->right) { + free_tree (root->right); + } + free (root); + } +} + +/* code structure */ + +typedef struct { + char code[NB_CHARS - 1 + 1]; +} code_t; + +/* explore tree */ + +void explore_tree (code_t *table, leaf_t *root, char *code, int index) +{ + if ((root->left == NULL) && (root->right == NULL)) { + strcpy ((char *)(table + (int)(root->c)), code); + } + else { + strcpy (code + index, "1"); + explore_tree (table, root->left, code, index + 1); + strcpy (code + index, "0"); + explore_tree (table, root->right, code, index + 1); + } +} + +/* create code table */ + +code_t *create_code (leaf_t *root) +{ + code_t *table = NULL; + code_t code = {0}; + + VERBOSE (DEBUG, PRINTF ("start creating code table\n")); + + /* allocate table */ + table = (code_t *) calloc (NB_CHARS, sizeof (code_t)); + if (table == NULL) { + VERBOSE (ERROR, printf ("can't allocate memory\n")); + return NULL; + } + + explore_tree (table, root, (char *)&code, 0); + + VERBOSE (DEBUG, PRINTF ("end creating code table\n")); + + return table; +} + +/* print code table */ + +void print_code_table (code_t *codes) +{ + char *code; + int i; + + printf ("Code table\n"); + for (i = 0; i < NB_CHARS; i++) { + code = (char *)(codes + i); + if (strlen (code) == 0) { + continue; + } + printf ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code); + } +} + +/* encode header and code table */ + +char *encode_header_table (code_t *codes, int *occ) +{ + unsigned char buffer[NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS + 2] = {0}; + char bits[(NB_CHARS - 1) + 8 + 1] = {0}; + char *code; + unsigned char *header = buffer; + int i, j, length, mode; + int nb = 0; + int size = 0; + + VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n")); + + /* mode 1 or 2 */ + for (i = 0; i < NB_CHARS; i++) { + code = (char *)(codes + i); + if (strlen (code) > 0) { + nb++; + size += strlen (code) * occ[i]; + } + } + mode = (NB_CHARS < 2 * nb + 1) ? 1 : 2; + VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb)); + VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode)); + VERBOSE (DEBUG, PRINTF ("size: %d\n", size)); + + /* header */ + strcpy ((char *)header, (mode == 1) ? "MZ1 " : "MZ2 "); + header += 6; + + /* size */ + switch (mode) { + case 1: + for (i = 0; i < NB_CHARS; i++) { + code = (char *)(codes + i); + *(header++) = (unsigned char) strlen (code); + } + break; + case 2: + *(header++) = (unsigned char)(nb - 1); + for (i = 0; i < NB_CHARS; i++) { + code = (char *)(codes + i); + if (strlen (code) > 0) { + *(header++) = (unsigned char)i; + *(header++) = (unsigned char) strlen (code); + } + } + break; + } + + /* bits */ + for (i = 0; i < NB_CHARS; i++) { + code = (char *)(codes + i); + if (strlen (code) > 0) { + strcat (bits, code); + while (strlen (bits) > (8 - 1)) { + for (j = 0; j < 8; j++) { + *header <<= 1; + if (bits[j] == '1') { + (*header)++; + } + } + strcpy (bits, bits + 8); + header++; + } + } + } + if (strlen (bits) > 0) { + for (j = 0; j < (int)strlen (bits); j++) { + *header <<= 1; + if (bits[j] == '1') { + (*header)++; + } + } + header++; + } + + /* length */ + length = (int)(header - buffer - 6); + VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff)); + buffer[3] = (unsigned char)(length >> 8); + buffer[4] = (unsigned char)(length & 0xff); + buffer[5] = (unsigned char)(size % 8); + + /* allocation */ + header = (unsigned char *) calloc (length + 6, 1); + memcpy (header, buffer, length + 6); + + VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n")); + + return (char *)header; +} + +/* print header */ + +void print_header (char *header) +{ + int length, i; + + length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]); + VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); + for (i = 0; i < length + 6; i++) { + printf ("%02x", (unsigned char)header[i]); + } + printf ("\n"); +} + +/* write crompressed file */ + +int write_compress (char *output, char *input, code_t *codes, char *header) +{ + char bufin[BUFFER_SIZE] = {0}; + char bufout[BUFFER_SIZE] = {0}; + char bits[(NB_CHARS - 1) + 8 + 1] = {0}; + FILE *fin, *fout; + int length = 0; + int i, j, nbread; + char *pt; + + VERBOSE (DEBUG, PRINTF ("start writting compressed file\n")); + + /* open input file */ + fin = fopen (input, "rb"); + if (fin == NULL) { + VERBOSE (ERROR, printf ("can't open file '%s'\n", input)); + return 1; + } + VERBOSE (INFO, printf ("file '%s' opened\n", input)); + + /* open output file */ + fout = fopen (output, "wb"); + if (fin == NULL) { + VERBOSE (ERROR, printf ("can't open file '%s'\n", output)); + return 1; + } + VERBOSE (INFO, printf ("file '%s' opened\n", output)); + + /* write header */ + length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]); + VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length)); + fwrite (header, 1, length + 6, fout); + + /* write file */ + pt = bufout; + while (!feof (fin)) { + nbread = fread (bufin, 1, BUFFER_SIZE, fin); + VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread)); + for (i = 0; i < nbread; i++) { + strcat (bits, (char *)(codes + bufin[i])); + while (strlen (bits) > (8 - 1)) { + for (j = 0; j < 8; j++) { + *pt <<= 1; + if (bits[j] == '1') { + (*pt)++; + } + } + strcpy (bits, bits + 8); + if (pt - bufout < BUFFER_SIZE) { + pt++; + } else { + fwrite (bufout, 1, BUFFER_SIZE, fout); + pt = bufout; + } + } + } + } + if (strlen (bits) > 0) { + for (j = 0; j < (int)strlen (bits); j++) { + *pt <<= 1; + if (bits[j] == '1') { + (*pt)++; + } + } + if (pt - bufout < BUFFER_SIZE) { + pt++; + } else { + fwrite (bufout, 1, BUFFER_SIZE, fout); + pt = bufout; + } + pt++; + } + if (pt != bufout) { + fwrite (bufout, 1, pt - bufout, fout); + } + + /* closing */ + fclose (fin); + fclose (fout); + + VERBOSE (DEBUG, PRINTF ("end writting compressed file\n")); + + return 0; +} + +/* main function */ + +int main (int argc, char *argv[]) +{ + char *input = NULL; + char *output = NULL; + int *table = NULL; + leaf_t **leafs = NULL; + leaf_t *root = NULL; + code_t *codes = NULL; + char *header = NULL; + int mode = COMPRESS; + int rc = 0; + + progname = argv[0]; + + int c; + VERBOSE (DEBUG, PRINTF ("start argument processing\n")); + while ((c = getopt(argc, argv, "cdhi:o:v:")) != EOF) { + switch (c) { + case 'c': + mode = COMPRESS; + break; + case 'd': + mode = DECOMPRESS; + break; + case 'i': + VERBOSE (DEBUG, PRINTF ("-i\n")); + VERBOSE (DEBUG, PRINTF ("optarg: %s\n", optarg)); + input = optarg; + break; + case 'o': + VERBOSE (DEBUG, PRINTF ("-o\n")); + output = optarg; + break; + case 'v': + VERBOSE (DEBUG, PRINTF ("-v\n")); + verbose = atoi (optarg); + VERBOSE (INFO, printf ("verbose: %d\n", verbose)); + break; + case 'h': + default: + usage (c != 'h'); + } + } + if (argc - optind != 0) { + fprintf (stderr, "%s: invalid option -- %s\n", progname, argv[optind]); + usage (1); + } + VERBOSE (DEBUG, PRINTF ("end argument processing\n")); + + switch (mode) { + case COMPRESS: + table = create_table (input); + if (table == NULL) break; + VERBOSE (INFO, print_occ_table (table)); + + leafs = init_forest (table); + if (leafs == NULL) break; + root = create_tree (leafs); + if (root == NULL) break; + codes = create_code (root); + if (codes == NULL) break; + VERBOSE (INFO, print_code_table (codes)); + header = encode_header_table (codes, table); + if (header == NULL) break; + VERBOSE (INFO, print_header (header)); + rc = write_compress (output, input, codes, header); + break; + case DECOMPRESS: + rc = 1; + break; + } + + /* clean everything */ + if (header) free (header); + if (codes) free (codes); + if (root) free_tree (root); + if (leafs) free (leafs); + if (table) free (table); + + return rc; +} + +// test: compress.exe -h +// test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }' +// test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }' +// test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }' + +// vim: ts=4 sw=4 et diff --git a/makefile b/makefile index dcc7a58..5b03e86 100644 --- a/makefile +++ b/makefile @@ -16,7 +16,7 @@ LDFLAGS += -g # Targets ALLEXE = - +ALLEXE += compress ALLEXE += skel SHELL = bash -- 2.30.2