--- /dev/null
+/* depend: */
+/* cflags: */
+/* linker: */
+
+#include <assert.h>
+#include <getopt.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* constants */
+
+#define COMPRESS 1
+#define DECOMPRESS 2
+
+#define NB_CHARS 256
+#define BUFFER_SIZE 4096
+
+#define DEBUG 4
+#define INFO 3
+#define WARNING 2
+#define ERROR 1
+
+/* macros */
+
+#define CEIL(x, y) (((x) + (y) - 1) / (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define VERBOSE(level, statement...) do { if (level <= verbose) { statement; } } while(0)
+#define PRINTF(args...) do { fprintf (stdout, args); fflush (stdout); } while (0)
+
+/* gobal variables */
+
+char *progname = NULL;
+int verbose = 2;
+
+/* help function */
+
+void usage (int ret)
+{
+ FILE *fd = ret ? stderr : stdout;
+ fprintf (fd, "usage: %s\n", progname);
+ fprintf (fd, " -h : help message\n");
+ fprintf (fd, " -i <file>: input file\n");
+ fprintf (fd, " -o <file>: output file\n");
+ fprintf (fd, " -v : verbose level (%d)\n", verbose);
+
+ exit (ret);
+}
+
+/* create occurence table */
+
+int *create_table (char *filename)
+{
+ char buffer[BUFFER_SIZE] = {0};
+ int nbread;
+ int *table = NULL;
+ FILE *fid = NULL;
+
+ VERBOSE (DEBUG, PRINTF ("start create occurence table\n"));
+
+ /* memory allocation */
+ table = (int *) calloc (NB_CHARS, sizeof (int));
+ if (table == NULL) {
+ VERBOSE (ERROR, printf ("can't allocate memory\n"));
+ return NULL;
+ }
+ VERBOSE (INFO, printf ("memory allocated\n"));
+
+ /* open file */
+ fid = fopen (filename, "rb");
+ if (fid == NULL) {
+ VERBOSE (ERROR, printf ("can't open file '%s'\n", filename));
+ free (table);
+ return NULL;
+ }
+ VERBOSE (INFO, printf ("file '%s' opened\n", filename));
+
+ /* read file */
+ while (!feof (fid)) {
+ nbread = fread (buffer, 1, BUFFER_SIZE, fid);
+ VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread));
+ while (nbread--) {
+ table[(int)buffer[nbread]]++;
+ }
+ }
+
+ /* close file */
+ fclose (fid);
+
+ VERBOSE (DEBUG, PRINTF ("end create occurence table\n"));
+
+ return table;
+}
+
+/* print occurence table */
+
+void print_occ_table (int *table)
+{
+ int i;
+
+ printf ("Occurence table\n");
+ for (i = 0; i < NB_CHARS; i++) {
+ if (table[i]) {
+ printf ("0x%02x '%c': %d\n", i, ((i < 32) || (i > 127)) ? '.' : i, table[i]);
+ }
+ }
+}
+
+/* leaf structure */
+
+typedef struct _leaf_t
+{
+ struct _leaf_t *left;
+ struct _leaf_t *right;
+ int occ;
+ char c;
+} leaf_t;
+
+/* initialize forest */
+
+leaf_t **init_forest (int *table)
+{
+ leaf_t **leafs = NULL;
+ int nb_leafs = 0;
+ int i, l;
+
+ VERBOSE (DEBUG, PRINTF ("start initiliazing forest\n"));
+
+ /* count number of leafs */
+ for (i = 0; i < NB_CHARS; i++) {
+ if (table[i] > 0) {
+ nb_leafs++;
+ }
+ }
+
+ /* allocate memory */
+ leafs = (leaf_t **) calloc (nb_leafs + 1, sizeof (leaf_t *));
+ if (leafs == NULL) {
+ VERBOSE (ERROR, printf ("can't allocate memory\n"));
+ return NULL;
+ }
+
+ /* initialize leafs */
+ for (i = 0, l = 0; i < NB_CHARS; i++) {
+ if (table[i] > 0) {
+ leafs[l] = (leaf_t *) calloc (1, sizeof (leaf_t));
+ if (leafs[l] == NULL) {
+ VERBOSE (ERROR, printf ("can't allocate memory\n"));
+ return NULL;
+ }
+ leafs[l]->occ = table[i];
+ leafs[l]->c = i;
+ l++;
+ }
+ }
+
+ VERBOSE (DEBUG, PRINTF ("end initiliazing forest\n"));
+
+ return leafs;
+}
+
+/* create tree */
+
+leaf_t *create_tree (leaf_t **leafs)
+{
+ leaf_t *branch = NULL;
+ int nb_leafs = 0;
+ int last, ante;
+ int i, j;
+
+ VERBOSE (DEBUG, PRINTF ("start creating tree\n"));
+
+ /* count number of leafs */
+ while (leafs[nb_leafs] != NULL) {
+ nb_leafs++;
+ }
+
+ /* create tree */
+ for (j = 0; j < nb_leafs - 1; j++) {
+
+ /* look for leatest occurence */
+ last = -1;
+ for (i = 0; i < nb_leafs; i++) {
+ if (leafs[i] == NULL) {
+ continue;
+ }
+ if ((last == -1) || (leafs[i]->occ < leafs[last]->occ)) {
+ last = i;
+ }
+ }
+
+ /* look for ante leatest occurence */
+ ante = -1;
+ for (i = 0; i < nb_leafs; i++) {
+ if ((i == last) || (leafs[i] == NULL)) {
+ continue;
+ }
+ if ((ante == -1) || (leafs[i]->occ < leafs[ante]->occ)) {
+ ante = i;
+ }
+ }
+
+ /* create branch */
+ if ((last == -1) || (ante == -1)) {
+ VERBOSE (ERROR, printf ("error during tree building\n"));
+ return NULL;
+ }
+ branch = (leaf_t *) calloc (1, sizeof (leaf_t));
+ if (branch == NULL) {
+ VERBOSE (ERROR, printf ("can't allocate memory\n"));
+ return NULL;
+ }
+ branch->left = leafs[last];
+ branch->right = leafs[ante];
+ branch->occ = branch->left->occ + branch->right->occ;
+ leafs[last] = branch;
+ leafs[ante] = NULL;
+ }
+
+ VERBOSE (DEBUG, PRINTF ("end creating tree\n"));
+
+ return leafs[last];
+}
+
+/* free tree */
+
+void free_tree (leaf_t *root) {
+ if (root) {
+ if (root->left) {
+ free_tree (root->left);
+ }
+ if (root->right) {
+ free_tree (root->right);
+ }
+ free (root);
+ }
+}
+
+/* code structure */
+
+typedef struct {
+ char code[NB_CHARS - 1 + 1];
+} code_t;
+
+/* explore tree */
+
+void explore_tree (code_t *table, leaf_t *root, char *code, int index)
+{
+ if ((root->left == NULL) && (root->right == NULL)) {
+ strcpy ((char *)(table + (int)(root->c)), code);
+ }
+ else {
+ strcpy (code + index, "1");
+ explore_tree (table, root->left, code, index + 1);
+ strcpy (code + index, "0");
+ explore_tree (table, root->right, code, index + 1);
+ }
+}
+
+/* create code table */
+
+code_t *create_code (leaf_t *root)
+{
+ code_t *table = NULL;
+ code_t code = {0};
+
+ VERBOSE (DEBUG, PRINTF ("start creating code table\n"));
+
+ /* allocate table */
+ table = (code_t *) calloc (NB_CHARS, sizeof (code_t));
+ if (table == NULL) {
+ VERBOSE (ERROR, printf ("can't allocate memory\n"));
+ return NULL;
+ }
+
+ explore_tree (table, root, (char *)&code, 0);
+
+ VERBOSE (DEBUG, PRINTF ("end creating code table\n"));
+
+ return table;
+}
+
+/* print code table */
+
+void print_code_table (code_t *codes)
+{
+ char *code;
+ int i;
+
+ printf ("Code table\n");
+ for (i = 0; i < NB_CHARS; i++) {
+ code = (char *)(codes + i);
+ if (strlen (code) == 0) {
+ continue;
+ }
+ printf ("0x%02x '%c': %s\n", i, ((i < 32) || (i > 127)) ? '.' : i, code);
+ }
+}
+
+/* encode header and code table */
+
+char *encode_header_table (code_t *codes, int *occ)
+{
+ unsigned char buffer[NB_CHARS * (NB_CHARS - 1) / 2 / 8 + NB_CHARS + 2] = {0};
+ char bits[(NB_CHARS - 1) + 8 + 1] = {0};
+ char *code;
+ unsigned char *header = buffer;
+ int i, j, length, mode;
+ int nb = 0;
+ int size = 0;
+
+ VERBOSE (DEBUG, PRINTF ("start encoding header and code table\n"));
+
+ /* mode 1 or 2 */
+ for (i = 0; i < NB_CHARS; i++) {
+ code = (char *)(codes + i);
+ if (strlen (code) > 0) {
+ nb++;
+ size += strlen (code) * occ[i];
+ }
+ }
+ mode = (NB_CHARS < 2 * nb + 1) ? 1 : 2;
+ VERBOSE (DEBUG, PRINTF ("nb chars: %d\n", nb));
+ VERBOSE (DEBUG, PRINTF ("mode: %d\n", mode));
+ VERBOSE (DEBUG, PRINTF ("size: %d\n", size));
+
+ /* header */
+ strcpy ((char *)header, (mode == 1) ? "MZ1 " : "MZ2 ");
+ header += 6;
+
+ /* size */
+ switch (mode) {
+ case 1:
+ for (i = 0; i < NB_CHARS; i++) {
+ code = (char *)(codes + i);
+ *(header++) = (unsigned char) strlen (code);
+ }
+ break;
+ case 2:
+ *(header++) = (unsigned char)(nb - 1);
+ for (i = 0; i < NB_CHARS; i++) {
+ code = (char *)(codes + i);
+ if (strlen (code) > 0) {
+ *(header++) = (unsigned char)i;
+ *(header++) = (unsigned char) strlen (code);
+ }
+ }
+ break;
+ }
+
+ /* bits */
+ for (i = 0; i < NB_CHARS; i++) {
+ code = (char *)(codes + i);
+ if (strlen (code) > 0) {
+ strcat (bits, code);
+ while (strlen (bits) > (8 - 1)) {
+ for (j = 0; j < 8; j++) {
+ *header <<= 1;
+ if (bits[j] == '1') {
+ (*header)++;
+ }
+ }
+ strcpy (bits, bits + 8);
+ header++;
+ }
+ }
+ }
+ if (strlen (bits) > 0) {
+ for (j = 0; j < (int)strlen (bits); j++) {
+ *header <<= 1;
+ if (bits[j] == '1') {
+ (*header)++;
+ }
+ }
+ header++;
+ }
+
+ /* length */
+ length = (int)(header - buffer - 6);
+ VERBOSE (DEBUG, PRINTF ("lengh: %d %02x %02x\n", length, length >> 8, length & 0xff));
+ buffer[3] = (unsigned char)(length >> 8);
+ buffer[4] = (unsigned char)(length & 0xff);
+ buffer[5] = (unsigned char)(size % 8);
+
+ /* allocation */
+ header = (unsigned char *) calloc (length + 6, 1);
+ memcpy (header, buffer, length + 6);
+
+ VERBOSE (DEBUG, PRINTF ("end encoding header and code table\n"));
+
+ return (char *)header;
+}
+
+/* print header */
+
+void print_header (char *header)
+{
+ int length, i;
+
+ length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]);
+ VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length));
+ for (i = 0; i < length + 6; i++) {
+ printf ("%02x", (unsigned char)header[i]);
+ }
+ printf ("\n");
+}
+
+/* write crompressed file */
+
+int write_compress (char *output, char *input, code_t *codes, char *header)
+{
+ char bufin[BUFFER_SIZE] = {0};
+ char bufout[BUFFER_SIZE] = {0};
+ char bits[(NB_CHARS - 1) + 8 + 1] = {0};
+ FILE *fin, *fout;
+ int length = 0;
+ int i, j, nbread;
+ char *pt;
+
+ VERBOSE (DEBUG, PRINTF ("start writting compressed file\n"));
+
+ /* open input file */
+ fin = fopen (input, "rb");
+ if (fin == NULL) {
+ VERBOSE (ERROR, printf ("can't open file '%s'\n", input));
+ return 1;
+ }
+ VERBOSE (INFO, printf ("file '%s' opened\n", input));
+
+ /* open output file */
+ fout = fopen (output, "wb");
+ if (fin == NULL) {
+ VERBOSE (ERROR, printf ("can't open file '%s'\n", output));
+ return 1;
+ }
+ VERBOSE (INFO, printf ("file '%s' opened\n", output));
+
+ /* write header */
+ length = ((unsigned char)(header[3]) << 8) + (unsigned char)(header[4]);
+ VERBOSE (DEBUG, PRINTF ("lengh: %d\n", length));
+ fwrite (header, 1, length + 6, fout);
+
+ /* write file */
+ pt = bufout;
+ while (!feof (fin)) {
+ nbread = fread (bufin, 1, BUFFER_SIZE, fin);
+ VERBOSE (DEBUG, PRINTF ("nbread: %d\n", nbread));
+ for (i = 0; i < nbread; i++) {
+ strcat (bits, (char *)(codes + bufin[i]));
+ while (strlen (bits) > (8 - 1)) {
+ for (j = 0; j < 8; j++) {
+ *pt <<= 1;
+ if (bits[j] == '1') {
+ (*pt)++;
+ }
+ }
+ strcpy (bits, bits + 8);
+ if (pt - bufout < BUFFER_SIZE) {
+ pt++;
+ } else {
+ fwrite (bufout, 1, BUFFER_SIZE, fout);
+ pt = bufout;
+ }
+ }
+ }
+ }
+ if (strlen (bits) > 0) {
+ for (j = 0; j < (int)strlen (bits); j++) {
+ *pt <<= 1;
+ if (bits[j] == '1') {
+ (*pt)++;
+ }
+ }
+ if (pt - bufout < BUFFER_SIZE) {
+ pt++;
+ } else {
+ fwrite (bufout, 1, BUFFER_SIZE, fout);
+ pt = bufout;
+ }
+ pt++;
+ }
+ if (pt != bufout) {
+ fwrite (bufout, 1, pt - bufout, fout);
+ }
+
+ /* closing */
+ fclose (fin);
+ fclose (fout);
+
+ VERBOSE (DEBUG, PRINTF ("end writting compressed file\n"));
+
+ return 0;
+}
+
+/* main function */
+
+int main (int argc, char *argv[])
+{
+ char *input = NULL;
+ char *output = NULL;
+ int *table = NULL;
+ leaf_t **leafs = NULL;
+ leaf_t *root = NULL;
+ code_t *codes = NULL;
+ char *header = NULL;
+ int mode = COMPRESS;
+ int rc = 0;
+
+ progname = argv[0];
+
+ int c;
+ VERBOSE (DEBUG, PRINTF ("start argument processing\n"));
+ while ((c = getopt(argc, argv, "cdhi:o:v:")) != EOF) {
+ switch (c) {
+ case 'c':
+ mode = COMPRESS;
+ break;
+ case 'd':
+ mode = DECOMPRESS;
+ break;
+ case 'i':
+ VERBOSE (DEBUG, PRINTF ("-i\n"));
+ VERBOSE (DEBUG, PRINTF ("optarg: %s\n", optarg));
+ input = optarg;
+ break;
+ case 'o':
+ VERBOSE (DEBUG, PRINTF ("-o\n"));
+ output = optarg;
+ break;
+ case 'v':
+ VERBOSE (DEBUG, PRINTF ("-v\n"));
+ verbose = atoi (optarg);
+ VERBOSE (INFO, printf ("verbose: %d\n", verbose));
+ break;
+ case 'h':
+ default:
+ usage (c != 'h');
+ }
+ }
+ if (argc - optind != 0) {
+ fprintf (stderr, "%s: invalid option -- %s\n", progname, argv[optind]);
+ usage (1);
+ }
+ VERBOSE (DEBUG, PRINTF ("end argument processing\n"));
+
+ switch (mode) {
+ case COMPRESS:
+ table = create_table (input);
+ if (table == NULL) break;
+ VERBOSE (INFO, print_occ_table (table));
+
+ leafs = init_forest (table);
+ if (leafs == NULL) break;
+ root = create_tree (leafs);
+ if (root == NULL) break;
+ codes = create_code (root);
+ if (codes == NULL) break;
+ VERBOSE (INFO, print_code_table (codes));
+ header = encode_header_table (codes, table);
+ if (header == NULL) break;
+ VERBOSE (INFO, print_header (header));
+ rc = write_compress (output, input, codes, header);
+ break;
+ case DECOMPRESS:
+ rc = 1;
+ break;
+ }
+
+ /* clean everything */
+ if (header) free (header);
+ if (codes) free (codes);
+ if (root) free_tree (root);
+ if (leafs) free (leafs);
+ if (table) free (table);
+
+ return rc;
+}
+
+// test: compress.exe -h
+// test: compress.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
+// test: compress.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
+// test: compress.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
+
+// vim: ts=4 sw=4 et