partial append and insert commands
[hexdump.git] / hexdump.c
CommitLineData
a1ab98f9
LM
1/* depend: */
2/* cflags: */
3/* linker: debug.o */
4
5#include <assert.h>
a1ab98f9
LM
6#include <malloc.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10
11#include "debug.h"
12
13/* macros */
14
15#define CEIL(x, y) (((x) + (y) - 1) / (y))
16#define MIN(x, y) (((x) < (y)) ? (x) : (y))
17#define MAX(x, y) (((x) > (y)) ? (x) : (y))
18
19//#define BUFFERSIZE 4096
20#define BUFFERSIZE 256
21#define NBCOLS 8
4c4a10dd 22#define NBDIGITS 6
5bcbfcca 23#define SEQLEN 32
a1ab98f9
LM
24
25/* gobal variables */
26
4c4a10dd
LM
27int nbcols = NBCOLS;
28int nbdigits = NBDIGITS;
9e439809 29int offset = 0;
4c4a10dd
LM
30
31char buffer[BUFFERSIZE] = {0};
ce305529 32FILE *fin = NULL;
cc02a838 33unsigned long int addrfile = 0;
ce305529 34FILE *fout = NULL;
a1ab98f9
LM
35char *progname = NULL;
36
5bcbfcca
LM
37/* type definitions */
38
39typedef struct {
40 char *sequence;
41 char bytes[SEQLEN];
42 int length;
43} sequence_t;
44
a1ab98f9
LM
45/* help function */
46
4c4a10dd 47int usage (int ret)
a1ab98f9
LM
48{
49 FILE *fd = ret ? stderr : stdout;
50 fprintf (fd, "usage: %s [-i file] [-h] [-n nbcols] [-o file] [-v]\n", progname);
5272fae8
LM
51 fprintf (fd, " -i: input file\n");
52 fprintf (fd, " -h: help message\n");
53 fprintf (fd, " -n: number of columns\n");
54 fprintf (fd, " -e: commands\n");
55 fprintf (fd, " -o: output file\n");
56 fprintf (fd, " -v: verbose level (%d)\n", verbose);
57 fprintf (fd, "\n");
cc02a838
LM
58 fprintf (fd, "commands: [/hstr/|addr] [a hstr] [d nb|-] [i hstr] [p nb|-] [s/h1/h2/[g]]\n");
59 fprintf (fd, " addr: move to address (0... octal, [1-9]... deci, 0x... hexa)\n");
5272fae8
LM
60 fprintf (fd, " //: move to hexa stringi hstr\n");
61 fprintf (fd, " a : append hexa string hstr to current address\n");
62 fprintf (fd, " d : delete nb bytes (- until end file)\n");
63 fprintf (fd, " i : insert hexa string hstr to current address\n");
64 fprintf (fd, " p : print nb bytes (- until end file)\n");
65 fprintf (fd, " s : substitute h1 by h2 (g for globally)\n");
a1ab98f9 66
4c4a10dd 67 return ret;
a1ab98f9
LM
68}
69
70/* get number of digits */
71
cc02a838 72unsigned int getnbdigits (unsigned long int l) {
a1ab98f9
LM
73 int n = 0;
74 while (l) {
75 n += 2;
76 l /= 256;
77 }
78 return n;
79}
80
81/* print a line */
82
4c4a10dd 83void printline (char *buffer, int nb, int addr) {
a1ab98f9
LM
84 int i;
85
86 printf ("0x%0*x:", nbdigits, addr);
87 for (i = 0; i < nb; i++) {
88 printf (" %02x", buffer[i]);
89 }
90 for (i = nb; i < nbcols; i++) {
91 printf (" ");
92 }
93 printf (" ");
94 for (i = 0; i < nb; i++) {
95 char c = buffer[i];
96 printf ("%c", (c > 31) && (c < 127) ? c : '.');
97 }
98 printf ("\n");
99}
100
ce305529 101/* write file function */
a1ab98f9 102
ce305529
LM
103int writefile (char *pt, int nb) {
104 if (fout) {
105 fwrite (pt, 1, nb, fout);
106 }
107 return 1;
108}
109
4c4a10dd
LM
110/* search sequence function */
111
5bcbfcca 112int searchseq (sequence_t *seq) {
4c4a10dd
LM
113 char *pt = buffer;
114 int nb = 0;
115 int i, j;
116 int valid = 0;
4c4a10dd 117
5bcbfcca 118 VERBOSE (DEBUG, printf ("search sequence: %s\n", seq->sequence));
4c4a10dd
LM
119
120 while (!feof (fin)) {
121 int nbread = fread (pt, 1, BUFFERSIZE - (pt - buffer), fin);
122 nb += nbread;
123 pt = buffer;
5bcbfcca 124 for (i = 0; i < nb - seq->length; i++) {
4c4a10dd 125 valid = 1;
5bcbfcca
LM
126 for (j = 0; (j < seq->length) && (valid); j++) {
127 if (pt[i + j] != seq->bytes[j]) {
4c4a10dd
LM
128 valid = 0;
129 }
130 }
131 if (valid) {
132 break;
133 }
134 }
135
136 if (!valid) {
5bcbfcca 137 writefile (buffer, nb - seq->length);
4c4a10dd 138 offset = 0;
5bcbfcca
LM
139 addrfile += nb - seq->length;
140 for (i = 0; i < seq->length; i++) {
141 buffer[i] = buffer[nb - seq->length + i];
4c4a10dd 142 }
5bcbfcca
LM
143 pt = buffer + seq->length;
144 nb = seq->length;
4c4a10dd
LM
145 } else {
146 writefile (buffer, i);
5bcbfcca 147 offset = seq->length;
4c4a10dd
LM
148 addrfile += i;
149 fseek (fin, i - nb, SEEK_CUR);
150 VERBOSE (DEBUG, printf ("found sequence (%d)\n", i - nb));
151 return 0;
152 }
153 }
154
155 if (!valid) {
156 writefile (buffer, nb);
5bcbfcca 157 addrfile += seq->length;
4c4a10dd
LM
158 }
159
160 return 1;
161}
162
cc02a838
LM
163/* go to address function */
164
165int gotoaddr (unsigned long int addr) {
166 char buffer[BUFFERSIZE] = {0};
167
168 if (addrfile > addr) {
169 return 1;
170 }
171
172 VERBOSE (DEBUG, printf ("look for address: 0x%04lx\n", addr));
173 while (!feof (fin)) {
174 int nbtoread = (addrfile + BUFFERSIZE > addr) ? addr - addrfile : BUFFERSIZE;
175 int nbread = fread (buffer, 1, nbtoread, fin);
176 writefile (buffer, nbread);
177 addrfile += nbread;
178 if (addrfile == addr) {
179 return 0;
180 }
181 }
182
183 return 1;
184}
185
9e439809
LM
186/* insert sequence function */
187
188int insertseq (sequence_t *seq) {
189 char buffer[BUFFERSIZE] = {0};
190
191 VERBOSE (DEBUG, printf ("insert (%d): '%s'\n", offset, seq->sequence);
192 int i;
193 for (i = 0; i < seq->length; i++) {
194 char c = seq->bytes[i];
195 printf (" 0x%02x (%c)", c, ((c >= 32) && (c < 127)) ? c : '.');
196 };
197 printf ("\n"));
198 if (offset > 0) {
199 int nbread = fread (buffer, 1, offset, fin);
200 if (nbread != offset) {
201 return 1;
202 }
203 writefile (buffer, offset);
204 offset = 0;
205 }
206 writefile (seq->bytes, seq->length);
207
208 return 0;
209}
210
ce305529
LM
211/* hexadecimal dump function */
212
4c4a10dd 213int hexdump (int len) {
a1ab98f9
LM
214 char buffer[BUFFERSIZE] = {0};
215 int i;
216
217 char *pt = buffer;
218
a1ab98f9
LM
219 int nb = 0;
220 while (!feof (fin)) {
5272fae8
LM
221 int nbtoread = BUFFERSIZE - (pt - buffer);
222 if ((len > 0) && (nbtoread > len)) {
223 nbtoread = len;
224 }
225 int nbread = fread (pt, 1, nbtoread, fin);
226 if (len > 0) {
227 len -= nbread;
228 }
229 nb += nbread;
a1ab98f9
LM
230 pt = buffer;
231
232 /* print line */
233 while ((nb - (int)(pt - buffer)) / nbcols > 0) {
4c4a10dd 234 printline (pt, nbcols, addrfile);
ce305529 235 writefile (pt, nbcols);
4c4a10dd 236 addrfile += nbcols;
a1ab98f9 237 pt += nbcols;
a1ab98f9
LM
238 }
239
240 /* copy end buffer */
241 nb -= pt - buffer;
242 for (i = 0; i < nb; i++) {
243 buffer[i] = pt[i];
244 }
245 pt = buffer + nb;
5272fae8
LM
246
247 /* end partial reading */
248 if (len == 0) {
249 break;
250 }
a1ab98f9
LM
251 }
252
253 /* last line */
254 if (nb > 0) {
4c4a10dd
LM
255 printline (buffer, nb, addrfile);
256 writefile (buffer, nb);
257 addrfile += nb;
a1ab98f9
LM
258 }
259
260 return 0;
261}
262
47db4fc7
LM
263/* parse octal string */
264
9e439809 265long int octal (char *s, int n) {
47db4fc7 266 int i;
cc02a838 267 unsigned long int l = 0;
47db4fc7
LM
268 for (i = 0; i < n; i++) {
269 if ((s[i] >= '0') && (s[i] <= '9')) {
270 l = l * 8 + s[i] - '0';
271 } else {
272 return -1;
273 }
274 }
275 return l;
276}
277
278/* parse hexa string */
279
9e439809 280long int hexa (char *s, int n) {
47db4fc7 281 int i;
cc02a838 282 unsigned long int l = 0;
47db4fc7
LM
283 for (i = 0; i < n; i++) {
284 l *= 16;
285 if ((s[i] >= '0') && (s[i] <= '9')) {
286 l += s[i] - '0';
287 } else if ((s[i] >= 'A') && (s[i] <= 'F')) {
288 l += s[i] + 10 - 'A';
289 } else if ((s[i] >= 'a') && (s[i] <= 'f')) {
290 l += s[i] + 10 - 'a';
291 } else {
292 return -1;
293 }
294 }
295 return l;
296}
297
4c4a10dd
LM
298/* special character function */
299
5bcbfcca 300int specialchar (char *s, char *b) {
4c4a10dd
LM
301 int i = 0, j = 0;
302 while (s[i] != 0) {
5bcbfcca
LM
303 if (j == SEQLEN) {
304 return 0;
305 }
4c4a10dd 306 if (s[i] != '\\') {
5bcbfcca 307 b[j++] = s[i++];
4c4a10dd
LM
308 continue;
309 }
310
47db4fc7 311 int l = -1;
4c4a10dd
LM
312 switch (s[i + 1]) {
313 case 'a': l = 0x07; i += 2; break;
314 case 'b': l = 0x08; i += 2; break;
315 case 'e': l = 0x1b; i += 2; break;
316 case 'f': l = 0x0c; i += 2; break;
317 case 'n': l = 0x0a; i += 2; break;
318 case 'r': l = 0x0d; i += 2; break;
319 case 't': l = 0x09; i += 2; break;
320 case 'v': l = 0x0b; i += 2; break;
f975557c 321 case '/': l = '/'; i += 2; break;
4c4a10dd
LM
322 case '\\': l = '\\'; i += 2; break;
323 case '\'': l = '\''; i += 2; break;
324 case '"': l = '"'; i += 2; break;
325 case '0':
326 case '1':
327 case '2':
328 case '3':
5bcbfcca 329 l = octal (s + i + 1, 3);
47db4fc7 330 if (l != -1) {
4c4a10dd
LM
331 i += 4;
332 }
333 break;
334 case 'x':
5bcbfcca 335 l = hexa (s + i + 2, 2);
47db4fc7 336 if (l != -1) {
4c4a10dd
LM
337 i += 4;
338 }
339 break;
340 default:
cc02a838 341 break;
4c4a10dd 342 }
5bcbfcca
LM
343 if (l != -1) {
344 VERBOSE (DEBUG, printf("l: 0x%02x '%c'\n", l, l));
345 }
346 b[j++] = (l != -1) ? l : s[i++];
4c4a10dd 347 }
4c4a10dd 348
5bcbfcca 349 return j;
4c4a10dd
LM
350}
351
a1ab98f9
LM
352/* main function */
353
4c4a10dd 354int main (int argc, char *argv[])
a1ab98f9 355{
9e439809 356 int i, rc = 0;
a1ab98f9
LM
357 char *input = NULL;
358 char *output = NULL;
5272fae8
LM
359 char *commands = NULL;
360 int printlen = -1;
5bcbfcca 361 sequence_t seq = {0};
cc02a838 362 unsigned long int addr = 0;
4c4a10dd 363
a1ab98f9
LM
364 /* get basename */
365 char *pt = progname = argv[0];
366 while (*pt) {
367 if ((*pt == '/') || (*pt == '\\')) {
368 progname = pt + 1;
369 }
370 pt++;
371 }
372
4c4a10dd
LM
373 while (argc-- > 1) {
374 char *arg = *(++argv);
375 if (arg[0] != '-') {
376 VERBOSE (ERROR, fprintf (stderr, "%s: invalid option -- %s\n", progname, arg));
377 return usage (1);
378 }
379 char c = arg[1];
a1ab98f9 380 switch (c) {
5272fae8 381 case 'e':
4c4a10dd
LM
382 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
383 if (arg) {
4c4a10dd
LM
384 if (commands == NULL) {
385 commands = arg;
386 } else {
387 strcat (commands, " ");
388 strcat (commands, arg);
389 }
5272fae8
LM
390 }
391 break;
4c4a10dd
LM
392 case 'i':
393 input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL;
394 break;
a1ab98f9 395 case 'n':
4c4a10dd
LM
396 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
397 if (arg == NULL) {
398 VERBOSE (ERROR, fprintf (stderr, "%s: missing number of columns\n", progname));
399 return usage (1);
400 }
401 nbcols = atoi (arg);
a1ab98f9
LM
402 break;
403 case 'o':
4c4a10dd 404 output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL;
a1ab98f9
LM
405 break;
406 case 'v':
4c4a10dd
LM
407 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
408 if (arg == NULL) {
409 VERBOSE (ERROR, fprintf (stderr, "%s: missing verbose level\n", progname));
410 return usage (1);
411 }
412 verbose = atoi (arg);
a1ab98f9
LM
413 break;
414 case 'h':
415 default:
4c4a10dd 416 return usage (c != 'h');
a1ab98f9
LM
417 }
418 }
a1ab98f9
LM
419
420 /* check input */
a1ab98f9
LM
421 if (input) {
422 fin = fopen (input, "rb");
423 if (!fin) {
424 VERBOSE (ERROR, fprintf (stderr, "error: can't open file '%s'\n", input));
5272fae8 425 return 1;
a1ab98f9
LM
426 }
427 } else {
428 fin = stdin;
429 }
430
431 /* check output */
a1ab98f9 432 if (output) {
ce305529 433 fout = fopen (output, "wb");
a1ab98f9
LM
434 if (!fout) {
435 VERBOSE (ERROR, fprintf (stderr, "error: can't open file '%s'\n", output));
436 fclose (fin);
5272fae8 437 return 1;
a1ab98f9
LM
438 }
439 } else {
ce305529 440 //fout = stdout;
a1ab98f9
LM
441 }
442
4c4a10dd
LM
443 /* get file size */
444 if (fin != stdin) {
445 fseek (fin, 0 , SEEK_END);
cc02a838 446 unsigned long int filesize = ftell (fin);
4c4a10dd
LM
447 fseek (fin, 0 , SEEK_SET);
448 nbdigits = getnbdigits (filesize);
449 }
450
5272fae8 451 if (commands == NULL) {
4c4a10dd 452 hexdump (-1);
5272fae8
LM
453 } else {
454 VERBOSE (DEBUG, printf ("commands: %s\n", commands));
455 while ((*commands != '\0') && (rc == 0)) {
456 switch (*commands++) {
457 case ' ':
458 case '\t':
459 break;
460
461 case '/': /* read patern */
5bcbfcca 462 seq.sequence = commands;
9e439809 463 seq.length = 0;
4c4a10dd 464 while (*commands) {
f975557c
LM
465 if ((*commands == '\\') &&
466 ((commands[1] == '/') || (commands[1] == '\\'))) {
467 commands++;
468 } else if (*commands == '/') {
4c4a10dd
LM
469 *commands++ = 0;
470 break;
471 }
472 commands++;
473 }
5bcbfcca
LM
474 seq.length = specialchar (seq.sequence, seq.bytes);
475 if (seq.length != 0) {
476 rc = searchseq (&seq);
4c4a10dd 477 } else {
5bcbfcca 478 VERBOSE (ERROR, fprintf (stderr, "incorrect sequence (%s)\n", seq.sequence));
4c4a10dd
LM
479 rc = 1;
480 }
5272fae8
LM
481 break;
482
483 case '0': /* read address */
cc02a838
LM
484 if (*commands == 'x') {
485 commands++;
486 addr = strtol (commands, &commands, 16);
487 } else {
488 addr = strtol (commands, &commands, 8);
489 }
490 if (addr) {
491 rc = gotoaddr (addr);
492 } else {
493 VERBOSE (ERROR, fprintf (stderr, "erroneous address\n"));
9e439809 494 rc = 1;
cc02a838
LM
495 }
496 break;
497
498 case '1':
499 case '2':
500 case '3':
501 case '4':
502 case '5':
503 case '6':
504 case '7':
505 case '8':
506 case '9': /* read address */
507 commands--;
508 addr = strtol (commands, &commands, 10);
509 if (addr) {
510 rc = gotoaddr (addr);
511 } else {
512 VERBOSE (ERROR, fprintf (stderr, "erroneous address\n"));
9e439809 513 rc = 1;
cc02a838 514 }
5272fae8
LM
515 break;
516
517 case 'a': /* append mode */
9e439809
LM
518 offset = 0;
519 /* fall through */
5272fae8 520
9e439809
LM
521 case 'i': /* insert mode */
522 while (*commands) {
523 if ((*commands == ' ') || (*commands == '\t')) {
524 commands++;
525 } else {
526 break;
527 }
528 }
529 seq.sequence = commands;
530 seq.length = 0;
531 i = 0;
532 while (*commands) {
533 if ((*commands == ' ') || (*commands == '\t')) {
534 *commands++ = '\0';
535 break;
536 } else {
537 commands++;
538 i++;
539 if (i % 2 == 0) {
540 seq.bytes[seq.length] = hexa (seq.sequence + 2 * seq.length, 2);
541 if (seq.bytes[seq.length] == -1) {
542 rc = 1;
543 break;
544 }
545 seq.length++;
546 }
547 }
548 }
549 if ((seq.length > 0) && (rc == 0)) {
550 rc = insertseq (&seq);
551 } else {
552 VERBOSE (ERROR, fprintf (stderr, "erroneous sequence '%s'\n", seq.sequence));
553 rc = 1;
554 }
5272fae8
LM
555 break;
556
9e439809 557 case 'd': /* delete mode */
5272fae8
LM
558 break;
559
560 case 'p': /* print mode */
561 printlen = -1;
562 while (*commands != '\0') {
563 if ((*commands == ' ') || (*commands == '\t')) {
564 commands++;
565 } else if ((*commands >= '0') && (*commands <= '9')) {
566 printlen = strtol (commands, &commands, 10);
567 break;
568 } else if (*commands == '-') {
569 printlen = -1;
570 commands++;
571 break;
572 } else {
f975557c 573 VERBOSE (ERROR, fprintf (stderr, "unknown print length (%s)\n", commands));
5272fae8
LM
574 rc = 1;
575 break;
576 }
577 }
4c4a10dd
LM
578 if (rc == 0) {
579 hexdump (printlen);
580 }
5272fae8
LM
581 break;
582
583 case 's': /* substitute mode */
584 break;
585
586 default:
587 VERBOSE (ERROR, fprintf (stderr, "unknown command (%c)\n", commands[-1]));
588 rc = 1;
589 }
590 }
591 }
a1ab98f9 592
ce305529
LM
593 /* end of file */
594 if ((rc == 0) && (fout != NULL)) {
595 while (!feof (fin)) {
596 int nbread = fread (buffer, 1, BUFFERSIZE, fin);
597 if (nbread) {
598 fwrite (buffer, 1, nbread, fout);
599 }
600 }
601 }
602
a1ab98f9 603 /* close all */
5272fae8
LM
604 if (fin) fclose (fin);
605 if (fout) fclose (fout);
a1ab98f9 606
5272fae8 607 return rc;
a1ab98f9
LM
608}
609
a1ab98f9 610// test: hexdump.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
f975557c
LM
611// test: hexdump.exe foo 2>&1 | grep -q 'invalid option'
612// test: hexdump.exe -n 2>&1 | grep -q 'missing number of columns'
613// test: hexdump.exe -v 2>&1 | grep -q 'missing verbose level'
a1ab98f9
LM
614// test: hexdump.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
615// test: hexdump.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
616// test: hexdump.exe -i hexdump.c | grep -q '0x[0-9a-f]*: '
f975557c
LM
617// test: hexdump.exe -i hexdump.ko 2>&1 | grep -q "can't open file"
618// test: hexdump.exe -i hexdump.c -o ko/test.c 2>&1 | grep -q "can't open file"
619// test: cat hexdump.c | hexdump.exe -n 3 | head -2 | tail -1 | grep -q '0x000003: 64 65 70 dep'
ce305529
LM
620// test: hexdump.exe -i hexdump.c -n 3 | head -2 | tail -1 | grep -q '0x0003: 64 65 70 dep'
621// test: hexdump.exe -i hexdump.c -o test.c -e 'p 200' | tail -1 | grep -q '0x00c0:'
5bcbfcca 622// test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
f975557c 623// test: hexdump.exe -i hexdump.c -e ' /cflags/ p 17 /debug/ p 8' | grep -q '0x0019: 2a 2f 0a 2f 2a 20 6c 69 \*\/\./\* li'
5bcbfcca
LM
624// test: hexdump.exe -i hexdump.c -o test.c -e ' /cfl\x61gs/ p 16 /d\145bug/ p 8' | grep -q '0x0027: 64 65 62 75 67 2e 6f 20 debug.o'
625// test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
626// test: hexdump.exe -i hexdump.c -e ' /\n/ p 8' | grep -q '0x000d: 0a 2f 2a 20 63 66 6c 61 \./\* cfla'
627// test: hexdump.exe -i hexdump.c -o test.c -e ' /\a\b\e\f\r\t\v/ p 8'; x=$?; test x$x = x1
628// test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
f975557c
LM
629// test: hexdump.exe -i hexdump.c -v 3 -e " /\'/" -e ' /\"/' -e ' /\\/' -e ' /\x2a/' -e ' s/\x3A/' | grep l: | wc -l | xargs test 5 =
630// test: hexdump.exe -i hexdump.c -e ' /\n\/* vim:/ p -' | grep -q ': 74 3a 20 2a 2f 0a *t: \*\/\.'
631// test: hexdump.exe -i hexdump.c -e 'p go_to_end' 2>&1 | grep -q 'unknown print length'
cc02a838 632// test: hexdump.exe -i hexdump.c -e ' //' 2>&1 | grep -q 'incorrect sequence'
f975557c 633// test: hexdump.exe -i hexdump.c -e 'foo' 2>&1 | grep -q 'unknown command'
cc02a838 634// test: hexdump.exe -i hexdump.c -e '0x20 p 8 64 p 8 0200 p 16' | grep -q '0x0080:'
a1ab98f9
LM
635
636/* vim: set ts=4 sw=4 et: */