dd4ba734d24fa68bdb38ddd1c5ff397f7fb50fc5
[hexdump.git] / hexdump.c
1 /* depend: */
2 /* cflags: */
3 /* linker: debug.o */
4
5 #include <assert.h>
6 #include <malloc.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10
11 #include "debug.h"
12
13 /* macros */
14
15 #define CEIL(x, y) (((x) + (y) - 1) / (y))
16 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
17 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
18
19 //#define BUFFERSIZE 4096
20 #define BUFFERSIZE 256
21 #define NBCOLS 8
22 #define NBDIGITS 6
23 #define SEQLEN 32
24
25 /* gobal variables */
26
27 int nbcols = NBCOLS;
28 int nbdigits = NBDIGITS;
29 int offset = 1;
30
31 char buffer[BUFFERSIZE] = {0};
32 FILE *fin = NULL;
33 unsigned long int addrfile = 0;
34 FILE *fout = NULL;
35 char *progname = NULL;
36
37 /* type definitions */
38
39 typedef struct {
40 char *sequence;
41 char bytes[SEQLEN];
42 int length;
43 } sequence_t;
44
45 /* help function */
46
47 int usage (int ret)
48 {
49 FILE *fd = ret ? stderr : stdout;
50 fprintf (fd, "usage: %s [-i file] [-h] [-n nbcols] [-o file] [-v]\n", progname);
51 fprintf (fd, " -i: input file\n");
52 fprintf (fd, " -h: help message\n");
53 fprintf (fd, " -n: number of columns\n");
54 fprintf (fd, " -e: commands\n");
55 fprintf (fd, " -o: output file\n");
56 fprintf (fd, " -v: verbose level (%d)\n", verbose);
57 fprintf (fd, "\n");
58 fprintf (fd, "commands: [/hstr/|addr] [a hstr] [d nb|-] [i hstr] [p nb|-] [s/h1/h2/[g]]\n");
59 fprintf (fd, " addr: move to address (0... octal, [1-9]... deci, 0x... hexa)\n");
60 fprintf (fd, " //: move to hexa stringi hstr\n");
61 fprintf (fd, " a : append hexa string hstr to current address\n");
62 fprintf (fd, " d : delete nb bytes (- until end file)\n");
63 fprintf (fd, " i : insert hexa string hstr to current address\n");
64 fprintf (fd, " p : print nb bytes (- until end file)\n");
65 fprintf (fd, " s : substitute h1 by h2 (g for globally)\n");
66
67 return ret;
68 }
69
70 /* get number of digits */
71
72 unsigned int getnbdigits (unsigned long int l) {
73 int n = 0;
74 while (l) {
75 n += 2;
76 l /= 256;
77 }
78 return n;
79 }
80
81 /* print a line */
82
83 void printline (char *buffer, int nb, int addr) {
84 int i;
85
86 printf ("0x%0*x:", nbdigits, addr);
87 for (i = 0; i < nb; i++) {
88 printf (" %02x", buffer[i]);
89 }
90 for (i = nb; i < nbcols; i++) {
91 printf (" ");
92 }
93 printf (" ");
94 for (i = 0; i < nb; i++) {
95 char c = buffer[i];
96 printf ("%c", (c > 31) && (c < 127) ? c : '.');
97 }
98 printf ("\n");
99 }
100
101 /* write file function */
102
103 int writefile (char *pt, int nb) {
104 if (fout) {
105 fwrite (pt, 1, nb, fout);
106 }
107 return 1;
108 }
109
110 /* search sequence function */
111
112 int searchseq (sequence_t *seq) {
113 char *pt = buffer;
114 int nb = 0;
115 int i, j;
116 int valid = 0;
117
118 VERBOSE (DEBUG, printf ("search sequence: %s\n", seq->sequence));
119
120 while (!feof (fin)) {
121 int nbread = fread (pt, 1, BUFFERSIZE - (pt - buffer), fin);
122 nb += nbread;
123 pt = buffer;
124 for (i = 0; i < nb - seq->length; i++) {
125 valid = 1;
126 for (j = 0; (j < seq->length) && (valid); j++) {
127 if (pt[i + j] != seq->bytes[j]) {
128 valid = 0;
129 }
130 }
131 if (valid) {
132 break;
133 }
134 }
135
136 if (!valid) {
137 writefile (buffer, nb - seq->length);
138 offset = 0;
139 addrfile += nb - seq->length;
140 for (i = 0; i < seq->length; i++) {
141 buffer[i] = buffer[nb - seq->length + i];
142 }
143 pt = buffer + seq->length;
144 nb = seq->length;
145 } else {
146 writefile (buffer, i);
147 offset = seq->length;
148 addrfile += i;
149 fseek (fin, i - nb, SEEK_CUR);
150 VERBOSE (DEBUG, printf ("found sequence (%d)\n", i - nb));
151 return 0;
152 }
153 }
154
155 if (!valid) {
156 writefile (buffer, nb);
157 addrfile += seq->length;
158 }
159
160 return 1;
161 }
162
163 /* go to address function */
164
165 int gotoaddr (unsigned long int addr) {
166 char buffer[BUFFERSIZE] = {0};
167
168 if (addrfile > addr) {
169 return 1;
170 }
171
172 VERBOSE (DEBUG, printf ("look for address: 0x%04lx\n", addr));
173 while (!feof (fin)) {
174 int nbtoread = (addrfile + BUFFERSIZE > addr) ? addr - addrfile : BUFFERSIZE;
175 int nbread = fread (buffer, 1, nbtoread, fin);
176 writefile (buffer, nbread);
177 addrfile += nbread;
178 if (addrfile == addr) {
179 return 0;
180 }
181 }
182
183 return 1;
184 }
185
186 /* hexadecimal dump function */
187
188 int hexdump (int len) {
189 char buffer[BUFFERSIZE] = {0};
190 int i;
191
192 char *pt = buffer;
193
194 int nb = 0;
195 while (!feof (fin)) {
196 int nbtoread = BUFFERSIZE - (pt - buffer);
197 if ((len > 0) && (nbtoread > len)) {
198 nbtoread = len;
199 }
200 int nbread = fread (pt, 1, nbtoread, fin);
201 if (len > 0) {
202 len -= nbread;
203 }
204 nb += nbread;
205 pt = buffer;
206
207 /* print line */
208 while ((nb - (int)(pt - buffer)) / nbcols > 0) {
209 printline (pt, nbcols, addrfile);
210 writefile (pt, nbcols);
211 addrfile += nbcols;
212 pt += nbcols;
213 }
214
215 /* copy end buffer */
216 nb -= pt - buffer;
217 for (i = 0; i < nb; i++) {
218 buffer[i] = pt[i];
219 }
220 pt = buffer + nb;
221
222 /* end partial reading */
223 if (len == 0) {
224 break;
225 }
226 }
227
228 /* last line */
229 if (nb > 0) {
230 printline (buffer, nb, addrfile);
231 writefile (buffer, nb);
232 addrfile += nb;
233 }
234
235 return 0;
236 }
237
238 /* parse octal string */
239
240 unsigned long int octal (char *s, int n) {
241 int i;
242 unsigned long int l = 0;
243 for (i = 0; i < n; i++) {
244 if ((s[i] >= '0') && (s[i] <= '9')) {
245 l = l * 8 + s[i] - '0';
246 } else {
247 return -1;
248 }
249 }
250 return l;
251 }
252
253 /* parse hexa string */
254
255 unsigned long int hexa (char *s, int n) {
256 int i;
257 unsigned long int l = 0;
258 for (i = 0; i < n; i++) {
259 l *= 16;
260 if ((s[i] >= '0') && (s[i] <= '9')) {
261 l += s[i] - '0';
262 } else if ((s[i] >= 'A') && (s[i] <= 'F')) {
263 l += s[i] + 10 - 'A';
264 } else if ((s[i] >= 'a') && (s[i] <= 'f')) {
265 l += s[i] + 10 - 'a';
266 } else {
267 return -1;
268 }
269 }
270 return l;
271 }
272
273 /* special character function */
274
275 int specialchar (char *s, char *b) {
276 int i = 0, j = 0;
277 while (s[i] != 0) {
278 if (j == SEQLEN) {
279 return 0;
280 }
281 if (s[i] != '\\') {
282 b[j++] = s[i++];
283 continue;
284 }
285
286 int l = -1;
287 switch (s[i + 1]) {
288 case 'a': l = 0x07; i += 2; break;
289 case 'b': l = 0x08; i += 2; break;
290 case 'e': l = 0x1b; i += 2; break;
291 case 'f': l = 0x0c; i += 2; break;
292 case 'n': l = 0x0a; i += 2; break;
293 case 'r': l = 0x0d; i += 2; break;
294 case 't': l = 0x09; i += 2; break;
295 case 'v': l = 0x0b; i += 2; break;
296 case '/': l = '/'; i += 2; break;
297 case '\\': l = '\\'; i += 2; break;
298 case '\'': l = '\''; i += 2; break;
299 case '"': l = '"'; i += 2; break;
300 case '0':
301 case '1':
302 case '2':
303 case '3':
304 l = octal (s + i + 1, 3);
305 if (l != -1) {
306 i += 4;
307 }
308 break;
309 case 'x':
310 l = hexa (s + i + 2, 2);
311 if (l != -1) {
312 i += 4;
313 }
314 break;
315 default:
316 break;
317 }
318 if (l != -1) {
319 VERBOSE (DEBUG, printf("l: 0x%02x '%c'\n", l, l));
320 }
321 b[j++] = (l != -1) ? l : s[i++];
322 }
323
324 return j;
325 }
326
327 /* main function */
328
329 int main (int argc, char *argv[])
330 {
331 int rc = 0;
332 char *input = NULL;
333 char *output = NULL;
334 char *commands = NULL;
335 int printlen = -1;
336 sequence_t seq = {0};
337 unsigned long int addr = 0;
338
339 /* get basename */
340 char *pt = progname = argv[0];
341 while (*pt) {
342 if ((*pt == '/') || (*pt == '\\')) {
343 progname = pt + 1;
344 }
345 pt++;
346 }
347
348 while (argc-- > 1) {
349 char *arg = *(++argv);
350 if (arg[0] != '-') {
351 VERBOSE (ERROR, fprintf (stderr, "%s: invalid option -- %s\n", progname, arg));
352 return usage (1);
353 }
354 char c = arg[1];
355 switch (c) {
356 case 'e':
357 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
358 if (arg) {
359 if (commands == NULL) {
360 commands = arg;
361 } else {
362 strcat (commands, " ");
363 strcat (commands, arg);
364 }
365 }
366 break;
367 case 'i':
368 input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL;
369 break;
370 case 'n':
371 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
372 if (arg == NULL) {
373 VERBOSE (ERROR, fprintf (stderr, "%s: missing number of columns\n", progname));
374 return usage (1);
375 }
376 nbcols = atoi (arg);
377 break;
378 case 'o':
379 output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL;
380 break;
381 case 'v':
382 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
383 if (arg == NULL) {
384 VERBOSE (ERROR, fprintf (stderr, "%s: missing verbose level\n", progname));
385 return usage (1);
386 }
387 verbose = atoi (arg);
388 break;
389 case 'h':
390 default:
391 return usage (c != 'h');
392 }
393 }
394
395 /* check input */
396 if (input) {
397 fin = fopen (input, "rb");
398 if (!fin) {
399 VERBOSE (ERROR, fprintf (stderr, "error: can't open file '%s'\n", input));
400 return 1;
401 }
402 } else {
403 fin = stdin;
404 }
405
406 /* check output */
407 if (output) {
408 fout = fopen (output, "wb");
409 if (!fout) {
410 VERBOSE (ERROR, fprintf (stderr, "error: can't open file '%s'\n", output));
411 fclose (fin);
412 return 1;
413 }
414 } else {
415 //fout = stdout;
416 }
417
418 /* get file size */
419 if (fin != stdin) {
420 fseek (fin, 0 , SEEK_END);
421 unsigned long int filesize = ftell (fin);
422 fseek (fin, 0 , SEEK_SET);
423 nbdigits = getnbdigits (filesize);
424 }
425
426 if (commands == NULL) {
427 hexdump (-1);
428 } else {
429 VERBOSE (DEBUG, printf ("commands: %s\n", commands));
430 while ((*commands != '\0') && (rc == 0)) {
431 switch (*commands++) {
432 case ' ':
433 case '\t':
434 break;
435
436 case '/': /* read patern */
437 seq.sequence = commands;
438 while (*commands) {
439 if ((*commands == '\\') &&
440 ((commands[1] == '/') || (commands[1] == '\\'))) {
441 commands++;
442 } else if (*commands == '/') {
443 *commands++ = 0;
444 break;
445 }
446 commands++;
447 }
448 seq.length = specialchar (seq.sequence, seq.bytes);
449 if (seq.length != 0) {
450 rc = searchseq (&seq);
451 } else {
452 VERBOSE (ERROR, fprintf (stderr, "incorrect sequence (%s)\n", seq.sequence));
453 rc = 1;
454 }
455 break;
456
457 case '0': /* read address */
458 if (*commands == 'x') {
459 commands++;
460 addr = strtol (commands, &commands, 16);
461 } else {
462 addr = strtol (commands, &commands, 8);
463 }
464 if (addr) {
465 rc = gotoaddr (addr);
466 } else {
467 VERBOSE (ERROR, fprintf (stderr, "erroneous address\n"));
468 }
469 break;
470
471 case '1':
472 case '2':
473 case '3':
474 case '4':
475 case '5':
476 case '6':
477 case '7':
478 case '8':
479 case '9': /* read address */
480 commands--;
481 addr = strtol (commands, &commands, 10);
482 if (addr) {
483 rc = gotoaddr (addr);
484 } else {
485 VERBOSE (ERROR, fprintf (stderr, "erroneous address\n"));
486 }
487 break;
488
489 case 'a': /* append mode */
490 break;
491
492 case 'd': /* delete mode */
493 break;
494
495 case 'i': /* insert mode */
496 break;
497
498 case 'p': /* print mode */
499 printlen = -1;
500 while (*commands != '\0') {
501 if ((*commands == ' ') || (*commands == '\t')) {
502 commands++;
503 } else if ((*commands >= '0') && (*commands <= '9')) {
504 printlen = strtol (commands, &commands, 10);
505 break;
506 } else if (*commands == '-') {
507 printlen = -1;
508 commands++;
509 break;
510 } else {
511 VERBOSE (ERROR, fprintf (stderr, "unknown print length (%s)\n", commands));
512 rc = 1;
513 break;
514 }
515 }
516 if (rc == 0) {
517 hexdump (printlen);
518 }
519 break;
520
521 case 's': /* substitute mode */
522 break;
523
524 default:
525 VERBOSE (ERROR, fprintf (stderr, "unknown command (%c)\n", commands[-1]));
526 rc = 1;
527 }
528 }
529 }
530
531 /* end of file */
532 if ((rc == 0) && (fout != NULL)) {
533 while (!feof (fin)) {
534 int nbread = fread (buffer, 1, BUFFERSIZE, fin);
535 if (nbread) {
536 fwrite (buffer, 1, nbread, fout);
537 }
538 }
539 }
540
541 /* close all */
542 if (fin) fclose (fin);
543 if (fout) fclose (fout);
544
545 return rc;
546 }
547
548 // test: hexdump.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
549 // test: hexdump.exe foo 2>&1 | grep -q 'invalid option'
550 // test: hexdump.exe -n 2>&1 | grep -q 'missing number of columns'
551 // test: hexdump.exe -v 2>&1 | grep -q 'missing verbose level'
552 // test: hexdump.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
553 // test: hexdump.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
554 // test: hexdump.exe -i hexdump.c | grep -q '0x[0-9a-f]*: '
555 // test: hexdump.exe -i hexdump.ko 2>&1 | grep -q "can't open file"
556 // test: hexdump.exe -i hexdump.c -o ko/test.c 2>&1 | grep -q "can't open file"
557 // test: cat hexdump.c | hexdump.exe -n 3 | head -2 | tail -1 | grep -q '0x000003: 64 65 70 dep'
558 // test: hexdump.exe -i hexdump.c -n 3 | head -2 | tail -1 | grep -q '0x0003: 64 65 70 dep'
559 // test: hexdump.exe -i hexdump.c -o test.c -e 'p 200' | tail -1 | grep -q '0x00c0:'
560 // test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
561 // test: hexdump.exe -i hexdump.c -e ' /cflags/ p 17 /debug/ p 8' | grep -q '0x0019: 2a 2f 0a 2f 2a 20 6c 69 \*\/\./\* li'
562 // test: hexdump.exe -i hexdump.c -o test.c -e ' /cfl\x61gs/ p 16 /d\145bug/ p 8' | grep -q '0x0027: 64 65 62 75 67 2e 6f 20 debug.o'
563 // test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
564 // test: hexdump.exe -i hexdump.c -e ' /\n/ p 8' | grep -q '0x000d: 0a 2f 2a 20 63 66 6c 61 \./\* cfla'
565 // test: hexdump.exe -i hexdump.c -o test.c -e ' /\a\b\e\f\r\t\v/ p 8'; x=$?; test x$x = x1
566 // test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
567 // test: hexdump.exe -i hexdump.c -v 3 -e " /\'/" -e ' /\"/' -e ' /\\/' -e ' /\x2a/' -e ' s/\x3A/' | grep l: | wc -l | xargs test 5 =
568 // test: hexdump.exe -i hexdump.c -e ' /\n\/* vim:/ p -' | grep -q ': 74 3a 20 2a 2f 0a *t: \*\/\.'
569 // test: hexdump.exe -i hexdump.c -e 'p go_to_end' 2>&1 | grep -q 'unknown print length'
570 // test: hexdump.exe -i hexdump.c -e ' //' 2>&1 | grep -q 'incorrect sequence'
571 // test: hexdump.exe -i hexdump.c -e 'foo' 2>&1 | grep -q 'unknown command'
572 // test: hexdump.exe -i hexdump.c -e '0x20 p 8 64 p 8 0200 p 16' | grep -q '0x0080:'
573
574 /* vim: set ts=4 sw=4 et: */