dab192bac85922e29c6ceb204596a7d45f3ae557
[hexdump.git] / hexdump.c
1 /* depend: */
2 /* cflags: */
3 /* linker: debug.o */
4
5 #include <assert.h>
6 #include <limits.h>
7 #include <malloc.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11
12 #include "debug.h"
13
14 /* macros */
15
16 #define CEIL(x, y) (((x) + (y) - 1) / (y))
17 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
18 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
19
20 //#define BUFFERSIZE 4096
21 #define BUFFERSIZE 256
22 #define NBCOLS 8
23 #define NBDIGITS 6
24 #define SEQLEN 32
25
26 /* gobal variables */
27
28 int nbcols = NBCOLS;
29 int nbdigits = NBDIGITS;
30 int offset = 0;
31
32 char buffer[BUFFERSIZE] = {0};
33 FILE *fin = NULL;
34 long int addrfile = 0;
35 FILE *fout = NULL;
36 char *progname = NULL;
37
38 /* type definitions */
39
40 typedef struct {
41 char *sequence;
42 char bytes[SEQLEN];
43 int length;
44 } sequence_t;
45
46 /* help function */
47
48 int usage (int ret)
49 {
50 FILE *fd = ret ? stderr : stdout;
51 fprintf (fd, "usage: %s [-i file] [-h] [-n nbcols] [-o file] [-v]\n", progname);
52 fprintf (fd, " -i: input file\n");
53 fprintf (fd, " -h: help message\n");
54 fprintf (fd, " -n: number of columns\n");
55 fprintf (fd, " -e: commands\n");
56 fprintf (fd, " -o: output file\n");
57 fprintf (fd, " -v: verbose level (%d)\n", verbose);
58 fprintf (fd, "\n");
59 fprintf (fd, "commands: [/hstr/|addr|+nb] [a hstr] [d nb|-] [i hstr] [p nb|-] [s/h1/h2/[g]]\n");
60 fprintf (fd, " addr: move to address (0... octal, [1-9]... deci, 0x... hexa)\n");
61 fprintf (fd, " +nb: move to offset (0... octal, [1-9]... deci, 0x... hexa)\n");
62 fprintf (fd, " //: move to hexa stringi hstr\n");
63 fprintf (fd, " a : append hexa string hstr to current address\n");
64 fprintf (fd, " d : delete nb bytes (- until end file)\n");
65 fprintf (fd, " i : insert hexa string hstr to current address\n");
66 fprintf (fd, " p : print nb bytes (- until end file)\n");
67 fprintf (fd, " s : substitute h1 by h2 (g for globally)\n");
68
69 return ret;
70 }
71
72 /* get number of digits */
73
74 unsigned int getnbdigits (unsigned long int l) {
75 int n = 0;
76 while (l) {
77 n += 2;
78 l /= 256;
79 }
80 return n;
81 }
82
83 /* print a line */
84
85 void printline (char *buffer, int nb, int addr) {
86 int i;
87
88 printf ("0x%0*x:", nbdigits, addr);
89 for (i = 0; i < nb; i++) {
90 printf (" %02x", buffer[i]);
91 }
92 for (i = nb; i < nbcols; i++) {
93 printf (" ");
94 }
95 printf (" ");
96 for (i = 0; i < nb; i++) {
97 char c = buffer[i];
98 printf ("%c", (c > 31) && (c < 127) ? c : '.');
99 }
100 printf ("\n");
101 }
102
103 /* write file function */
104
105 int writefile (char *pt, int nb) {
106 if (fout) {
107 fwrite (pt, 1, nb, fout);
108 }
109 return 1;
110 }
111
112 /* search sequence function */
113
114 int searchseq (sequence_t *seq) {
115 char *pt = buffer;
116 int nb = 0;
117 int i, j;
118 int valid = 0;
119
120 VERBOSE (DEBUG, printf ("search sequence: %s\n", seq->sequence));
121
122 while (!feof (fin)) {
123 int nbread = fread (pt, 1, BUFFERSIZE - (pt - buffer), fin);
124 nb += nbread;
125 pt = buffer;
126 for (i = 0; i < nb - seq->length; i++) {
127 valid = 1;
128 for (j = 0; (j < seq->length) && (valid); j++) {
129 if (pt[i + j] != seq->bytes[j]) {
130 valid = 0;
131 }
132 }
133 if (valid) {
134 break;
135 }
136 }
137
138 if (!valid) {
139 writefile (buffer, nb - seq->length);
140 offset = 0;
141 addrfile += nb - seq->length;
142 for (i = 0; i < seq->length; i++) {
143 buffer[i] = buffer[nb - seq->length + i];
144 }
145 pt = buffer + seq->length;
146 nb = seq->length;
147 } else {
148 writefile (buffer, i);
149 offset = seq->length;
150 addrfile += i;
151 fseek (fin, i - nb, SEEK_CUR);
152 VERBOSE (DEBUG, printf ("found sequence (%d)\n", i - nb));
153 return 0;
154 }
155 }
156
157 if (!valid) {
158 writefile (buffer, nb);
159 addrfile += seq->length;
160 }
161
162 return 1;
163 }
164
165 /* go to address function */
166
167 int gotoaddr (long int addr) {
168 char buffer[BUFFERSIZE] = {0};
169
170 if (addr == -1) {
171 addr = LONG_MAX;
172 } else if (addrfile > addr) {
173 return 1;
174 }
175
176 VERBOSE (DEBUG, printf ("look for address: 0x%04lx\n", addr));
177 while (!feof (fin)) {
178 int nbtoread = (addrfile + BUFFERSIZE > addr) ? addr - addrfile : BUFFERSIZE;
179 int nbread = fread (buffer, 1, nbtoread, fin);
180 writefile (buffer, nbread);
181 addrfile += nbread;
182 if (addrfile == addr) {
183 return 0;
184 }
185 }
186
187 return 1;
188 }
189
190 /* insert sequence function */
191
192 int insertseq (sequence_t *seq) {
193 char buffer[BUFFERSIZE] = {0};
194
195 VERBOSE (DEBUG, printf ("insert (%d): '%s'\n", offset, seq->sequence);
196 int i;
197 for (i = 0; i < seq->length; i++) {
198 char c = seq->bytes[i];
199 printf (" 0x%02x (%c)", c, ((c >= 32) && (c < 127)) ? c : '.');
200 };
201 printf ("\n"));
202 if (offset > 0) {
203 int nbread = fread (buffer, 1, offset, fin);
204 if (nbread != offset) {
205 return 1;
206 }
207 writefile (buffer, offset);
208 offset = 0;
209 }
210 writefile (seq->bytes, seq->length);
211
212 return 0;
213 }
214
215 /* hexadecimal dump function */
216
217 int hexdump (int len) {
218 char buffer[BUFFERSIZE] = {0};
219 int i;
220
221 char *pt = buffer;
222
223 int nb = 0;
224 while (!feof (fin)) {
225 int nbtoread = BUFFERSIZE - (pt - buffer);
226 if ((len > 0) && (nbtoread > len)) {
227 nbtoread = len;
228 }
229 int nbread = fread (pt, 1, nbtoread, fin);
230 if (len > 0) {
231 len -= nbread;
232 }
233 nb += nbread;
234 pt = buffer;
235
236 /* print line */
237 while ((nb - (int)(pt - buffer)) / nbcols > 0) {
238 printline (pt, nbcols, addrfile);
239 writefile (pt, nbcols);
240 addrfile += nbcols;
241 pt += nbcols;
242 }
243
244 /* copy end buffer */
245 nb -= pt - buffer;
246 for (i = 0; i < nb; i++) {
247 buffer[i] = pt[i];
248 }
249 pt = buffer + nb;
250
251 /* end partial reading */
252 if (len == 0) {
253 break;
254 }
255 }
256
257 /* last line */
258 if (nb > 0) {
259 printline (buffer, nb, addrfile);
260 writefile (buffer, nb);
261 addrfile += nb;
262 }
263
264 return 0;
265 }
266
267 /* parse octal string */
268
269 long int octal (char *s, int n) {
270 int i;
271 unsigned long int l = 0;
272 for (i = 0; i < n; i++) {
273 if ((s[i] >= '0') && (s[i] <= '9')) {
274 l = l * 8 + s[i] - '0';
275 } else {
276 return -1;
277 }
278 }
279 return l;
280 }
281
282 /* parse hexa string */
283
284 long int hexa (char *s, int n) {
285 int i;
286 unsigned long int l = 0;
287 for (i = 0; i < n; i++) {
288 l *= 16;
289 if ((s[i] >= '0') && (s[i] <= '9')) {
290 l += s[i] - '0';
291 } else if ((s[i] >= 'A') && (s[i] <= 'F')) {
292 l += s[i] + 10 - 'A';
293 } else if ((s[i] >= 'a') && (s[i] <= 'f')) {
294 l += s[i] + 10 - 'a';
295 } else {
296 return -1;
297 }
298 }
299 return l;
300 }
301
302 /* special character function */
303
304 int specialchar (char *s, char *b) {
305 int i = 0, j = 0;
306 while (s[i] != 0) {
307 if (j == SEQLEN) {
308 return 0;
309 }
310 if (s[i] != '\\') {
311 b[j++] = s[i++];
312 continue;
313 }
314
315 int l = -1;
316 switch (s[i + 1]) {
317 case 'a': l = 0x07; i += 2; break;
318 case 'b': l = 0x08; i += 2; break;
319 case 'e': l = 0x1b; i += 2; break;
320 case 'f': l = 0x0c; i += 2; break;
321 case 'n': l = 0x0a; i += 2; break;
322 case 'r': l = 0x0d; i += 2; break;
323 case 't': l = 0x09; i += 2; break;
324 case 'v': l = 0x0b; i += 2; break;
325 case '/': l = '/'; i += 2; break;
326 case '\\': l = '\\'; i += 2; break;
327 case '\'': l = '\''; i += 2; break;
328 case '"': l = '"'; i += 2; break;
329 case '0':
330 case '1':
331 case '2':
332 case '3':
333 l = octal (s + i + 1, 3);
334 if (l != -1) {
335 i += 4;
336 }
337 break;
338 case 'x':
339 l = hexa (s + i + 2, 2);
340 if (l != -1) {
341 i += 4;
342 }
343 break;
344 default:
345 break;
346 }
347 if (l != -1) {
348 VERBOSE (DEBUG, printf("l: 0x%02x '%c'\n", l, l));
349 }
350 b[j++] = (l != -1) ? l : s[i++];
351 }
352
353 return j;
354 }
355
356 /* main function */
357
358 int main (int argc, char *argv[])
359 {
360 int i, rc = 0;
361 char *input = NULL;
362 char *output = NULL;
363 char *commands = NULL;
364 long int length = -1;
365 sequence_t seq = {0};
366 long int addr = 0;
367 char c;
368
369 /* get basename */
370 char *pt = progname = argv[0];
371 while (*pt) {
372 if ((*pt == '/') || (*pt == '\\')) {
373 progname = pt + 1;
374 }
375 pt++;
376 }
377
378 while (argc-- > 1) {
379 char *arg = *(++argv);
380 if (arg[0] != '-') {
381 VERBOSE (ERROR, fprintf (stderr, "%s: invalid option -- %s\n", progname, arg));
382 return usage (1);
383 }
384 char c = arg[1];
385 switch (c) {
386 case 'e':
387 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
388 if (arg) {
389 if (commands == NULL) {
390 commands = arg;
391 } else {
392 strcat (commands, " ");
393 strcat (commands, arg);
394 }
395 }
396 break;
397 case 'i':
398 input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL;
399 break;
400 case 'n':
401 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
402 if (arg == NULL) {
403 VERBOSE (ERROR, fprintf (stderr, "%s: missing number of columns\n", progname));
404 return usage (1);
405 }
406 nbcols = atoi (arg);
407 break;
408 case 'o':
409 output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL;
410 break;
411 case 'v':
412 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
413 if (arg == NULL) {
414 VERBOSE (ERROR, fprintf (stderr, "%s: missing verbose level\n", progname));
415 return usage (1);
416 }
417 verbose = atoi (arg);
418 break;
419 case 'h':
420 default:
421 return usage (c != 'h');
422 }
423 }
424
425 /* check input */
426 if (input) {
427 fin = fopen (input, "rb");
428 if (!fin) {
429 VERBOSE (ERROR, fprintf (stderr, "error: can't open file '%s'\n", input));
430 return 1;
431 }
432 } else {
433 fin = stdin;
434 }
435
436 /* check output */
437 if (output) {
438 fout = fopen (output, "wb");
439 if (!fout) {
440 VERBOSE (ERROR, fprintf (stderr, "error: can't open file '%s'\n", output));
441 fclose (fin);
442 return 1;
443 }
444 } else {
445 //fout = stdout;
446 }
447
448 /* get file size */
449 if (fin != stdin) {
450 fseek (fin, 0 , SEEK_END);
451 unsigned long int filesize = ftell (fin);
452 fseek (fin, 0 , SEEK_SET);
453 nbdigits = getnbdigits (filesize);
454 }
455
456 if (commands == NULL) {
457 VERBOSE (DEBUG, printf ("no command\n"));
458 hexdump (-1);
459 } else {
460 VERBOSE (DEBUG, printf ("commands: %s\n", commands));
461 while ((*commands != '\0') && (rc == 0)) {
462 switch (c = *commands++) {
463 case ' ':
464 case '\t':
465 break;
466
467 case '/': /* read patern */
468 seq.sequence = commands;
469 seq.length = 0;
470 while (*commands) {
471 if ((*commands == '\\') &&
472 ((commands[1] == '/') || (commands[1] == '\\'))) {
473 commands++;
474 } else if (*commands == '/') {
475 *commands++ = 0;
476 break;
477 }
478 commands++;
479 }
480 seq.length = specialchar (seq.sequence, seq.bytes);
481 if (seq.length != 0) {
482 rc = searchseq (&seq);
483 } else {
484 VERBOSE (ERROR, fprintf (stderr, "incorrect sequence (%s)\n", seq.sequence));
485 rc = 1;
486 }
487 break;
488
489 case '0': /* read address */
490 if (*commands == 'x') {
491 commands++;
492 addr = strtol (commands, &commands, 16);
493 } else {
494 addr = strtol (commands, &commands, 8);
495 }
496 if (addr) {
497 rc = gotoaddr (addr);
498 } else {
499 VERBOSE (ERROR, fprintf (stderr, "erroneous address\n"));
500 rc = 1;
501 }
502 offset = 0;
503 break;
504
505 case '1':
506 case '2':
507 case '3':
508 case '4':
509 case '5':
510 case '6':
511 case '7':
512 case '8':
513 case '9': /* read address */
514 commands--;
515 addr = strtol (commands, &commands, 10);
516 rc = gotoaddr (addr);
517 offset = 0;
518 break;
519
520 case 'a': /* append mode */
521 offset = 0;
522 /* fall through */
523
524 case 'i': /* insert mode */
525 while (*commands) {
526 if ((*commands == ' ') || (*commands == '\t')) {
527 commands++;
528 } else {
529 break;
530 }
531 }
532 seq.sequence = commands;
533 seq.length = 0;
534 i = 0;
535 while (*commands) {
536 if ((*commands == ' ') || (*commands == '\t')) {
537 *commands++ = '\0';
538 break;
539 } else {
540 commands++;
541 i++;
542 if (i % 2 == 0) {
543 seq.bytes[seq.length] = hexa (seq.sequence + 2 * seq.length, 2);
544 if (seq.bytes[seq.length] == -1) {
545 rc = 1;
546 break;
547 }
548 seq.length++;
549 }
550 }
551 }
552 if ((seq.length > 0) && (rc == 0) && (i % 2 == 0)) {
553 rc = insertseq (&seq);
554 } else {
555 VERBOSE (ERROR, fprintf (stderr, "erroneous sequence '%s'\n", seq.sequence));
556 rc = 1;
557 }
558 offset = 0;
559 break;
560
561 case '+': /* relative move */
562 /* fall through */
563
564 case 'd': /* delete mode */
565 /* fall through */
566
567 case 'p': /* print mode */
568 length = -1;
569 while (*commands != '\0') {
570 if ((*commands == ' ') || (*commands == '\t')) {
571 commands++;
572 } else if ((*commands >= '0') && (*commands <= '9')) {
573 length = strtol (commands, &commands, 10);
574 break;
575 } else if (*commands == '-') {
576 length = -1;
577 commands++;
578 break;
579 } else {
580 VERBOSE (ERROR, fprintf (stderr, "unknown length (%s)\n", commands));
581 rc = 1;
582 break;
583 }
584 }
585 if (rc == 0) {
586 switch (c) {
587 case '+':
588 rc = gotoaddr ((length > 0) ? addrfile + length : -1);
589 break;
590
591 case 'd':
592 fseek (fin, length, SEEK_CUR);
593 break;
594
595 case 'p':
596 hexdump (length);
597 break;
598 }
599 }
600 offset = 0;
601 break;
602
603 case 's': /* substitute mode */
604 offset = 0;
605 break;
606
607 default:
608 VERBOSE (ERROR, fprintf (stderr, "unknown command (%c)\n", commands[-1]));
609 rc = 1;
610 }
611 }
612 }
613
614 /* end of file */
615 if ((rc == 0) && (fout != NULL)) {
616 while (!feof (fin)) {
617 int nbread = fread (buffer, 1, BUFFERSIZE, fin);
618 if (nbread) {
619 fwrite (buffer, 1, nbread, fout);
620 }
621 }
622 }
623
624 /* close all */
625 if (fin) fclose (fin);
626 if (fout) fclose (fout);
627
628 return rc;
629 }
630
631 // test: hexdump.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
632 // test: hexdump.exe foo 2>&1 | grep -q 'invalid option'
633 // test: hexdump.exe -n 2>&1 | grep -q 'missing number of columns'
634 // test: hexdump.exe -v 2>&1 | grep -q 'missing verbose level'
635 // test: hexdump.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
636 // test: hexdump.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
637 // test: hexdump.exe -i hexdump.c | grep -q '0x[0-9a-f]*: '
638 // test: hexdump.exe -i hexdump.ko 2>&1 | grep -q "can't open file"
639 // test: hexdump.exe -i hexdump.c -o ko/test.c 2>&1 | grep -q "can't open file"
640 // test: cat hexdump.c | hexdump.exe -n 3 | head -2 | tail -1 | grep -q '0x000003: 64 65 70 dep'
641 // test: hexdump.exe -i hexdump.c -n 3 | head -2 | tail -1 | grep -q '0x0003: 64 65 70 dep'
642 // test: hexdump.exe -i hexdump.c -o test.c -e 'p 200' | tail -1 | grep -q '0x00c0:'
643 // test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
644 // test: hexdump.exe -i hexdump.c -e ' /cflags/ p 17 /debug/ p 8' | grep -q '0x0019: 2a 2f 0a 2f 2a 20 6c 69 \*\/\./\* li'
645 // test: hexdump.exe -i hexdump.c -o test.c -e ' /cfl\x61gs/ p 16 /d\145bug/ p 8' | grep -q '0x0027: 64 65 62 75 67 2e 6f 20 debug.o'
646 // test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
647 // test: hexdump.exe -i hexdump.c -e ' /\n/ p 8' | grep -q '0x000d: 0a 2f 2a 20 63 66 6c 61 \./\* cfla'
648 // test: hexdump.exe -i hexdump.c -o test.c -e ' /\a\b\e\f\r\t\v/ p 8'; x=$?; test x$x = x1
649 // test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
650 // test: hexdump.exe -i hexdump.c -v 3 -e " /\'/" -e ' /\"/' -e ' /\\/' -e ' /\x2a/' -e ' s/\x3A/' | grep l: | wc -l | xargs test 5 =
651 // test: hexdump.exe -i hexdump.c -e ' /\n\/* vim:/ p -' | grep -q ': 74 3a 20 2a 2f 0a *t: \*\/\.'
652 // test: hexdump.exe -i hexdump.c -e 'p go_to_end' 2>&1 | grep -q 'unknown length'
653 // test: hexdump.exe -i hexdump.c -e ' //' 2>&1 | grep -q 'incorrect sequence'
654 // test: hexdump.exe -i hexdump.c -e 'foo' 2>&1 | grep -q 'unknown command'
655 // test: hexdump.exe -i hexdump.c -e '0x20 p 8 64 p 8 0200 p 16' | grep -q '0x0080:'
656 // test: hexdump.exe -i hexdump.c -e '0xg' 2>&1 | grep -q 'erroneous address'
657 // test: hexdump.exe -i hexdump.c -o test.c -e ' /cflags/ a 414e5a /link/ i 2F333B'
658 // test: grep -q ANZcflags test.c && grep -q 'link/3;er' test.c; x=$?; rm test.c; test x$x = x0
659 // test: hexdump.exe -i hexdump.c -e ' /cflags/ a 414e5' 2>&1 | grep 'erroneous sequence'
660 // test: hexdump.exe -i hexdump.c -o test.c -e ' /lags/ d 2'
661 // test: grep -q cfgs test.c; x=$?; rm test.c; test x$x = x0
662 // test: hexdump.exe -i hexdump.c -o test.c -e ' /lags/ +2 i 2041'
663 // test: grep -q 'cf Ags' test.c; x=$?; rm test.c; test x$x = x0
664
665 /* vim: set ts=4 sw=4 et: */