better search sequence management
[hexdump.git] / hexdump.c
CommitLineData
a1ab98f9
LM
1/* depend: */
2/* cflags: */
3/* linker: debug.o */
4
5#include <assert.h>
a1ab98f9
LM
6#include <malloc.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10
11#include "debug.h"
12
13/* macros */
14
15#define CEIL(x, y) (((x) + (y) - 1) / (y))
16#define MIN(x, y) (((x) < (y)) ? (x) : (y))
17#define MAX(x, y) (((x) > (y)) ? (x) : (y))
18
19//#define BUFFERSIZE 4096
20#define BUFFERSIZE 256
21#define NBCOLS 8
4c4a10dd 22#define NBDIGITS 6
5bcbfcca 23#define SEQLEN 32
a1ab98f9
LM
24
25/* gobal variables */
26
4c4a10dd
LM
27int nbcols = NBCOLS;
28int nbdigits = NBDIGITS;
29int offset = 1;
30
31char buffer[BUFFERSIZE] = {0};
ce305529 32FILE *fin = NULL;
4c4a10dd 33int addrfile = 0;
ce305529 34FILE *fout = NULL;
a1ab98f9
LM
35char *progname = NULL;
36
5bcbfcca
LM
37/* type definitions */
38
39typedef struct {
40 char *sequence;
41 char bytes[SEQLEN];
42 int length;
43} sequence_t;
44
a1ab98f9
LM
45/* help function */
46
4c4a10dd 47int usage (int ret)
a1ab98f9
LM
48{
49 FILE *fd = ret ? stderr : stdout;
50 fprintf (fd, "usage: %s [-i file] [-h] [-n nbcols] [-o file] [-v]\n", progname);
5272fae8
LM
51 fprintf (fd, " -i: input file\n");
52 fprintf (fd, " -h: help message\n");
53 fprintf (fd, " -n: number of columns\n");
54 fprintf (fd, " -e: commands\n");
55 fprintf (fd, " -o: output file\n");
56 fprintf (fd, " -v: verbose level (%d)\n", verbose);
57 fprintf (fd, "\n");
58 fprintf (fd, "commands: [/hstr/|0xaddr] [a hstr] [d nb|-] [i hstr] [p nb|-] [s/h1/h2/[g]]\n");
59 fprintf (fd, " 0x: move to address addr\n");
60 fprintf (fd, " //: move to hexa stringi hstr\n");
61 fprintf (fd, " a : append hexa string hstr to current address\n");
62 fprintf (fd, " d : delete nb bytes (- until end file)\n");
63 fprintf (fd, " i : insert hexa string hstr to current address\n");
64 fprintf (fd, " p : print nb bytes (- until end file)\n");
65 fprintf (fd, " s : substitute h1 by h2 (g for globally)\n");
a1ab98f9 66
4c4a10dd 67 return ret;
a1ab98f9
LM
68}
69
70/* get number of digits */
71
72int getnbdigits (long int l) {
73 int n = 0;
74 while (l) {
75 n += 2;
76 l /= 256;
77 }
78 return n;
79}
80
81/* print a line */
82
4c4a10dd 83void printline (char *buffer, int nb, int addr) {
a1ab98f9
LM
84 int i;
85
86 printf ("0x%0*x:", nbdigits, addr);
87 for (i = 0; i < nb; i++) {
88 printf (" %02x", buffer[i]);
89 }
90 for (i = nb; i < nbcols; i++) {
91 printf (" ");
92 }
93 printf (" ");
94 for (i = 0; i < nb; i++) {
95 char c = buffer[i];
96 printf ("%c", (c > 31) && (c < 127) ? c : '.');
97 }
98 printf ("\n");
99}
100
ce305529 101/* write file function */
a1ab98f9 102
ce305529
LM
103int writefile (char *pt, int nb) {
104 if (fout) {
105 fwrite (pt, 1, nb, fout);
106 }
107 return 1;
108}
109
4c4a10dd
LM
110/* search sequence function */
111
5bcbfcca 112int searchseq (sequence_t *seq) {
4c4a10dd
LM
113 char *pt = buffer;
114 int nb = 0;
115 int i, j;
116 int valid = 0;
4c4a10dd 117
5bcbfcca 118 VERBOSE (DEBUG, printf ("search sequence: %s\n", seq->sequence));
4c4a10dd
LM
119
120 while (!feof (fin)) {
121 int nbread = fread (pt, 1, BUFFERSIZE - (pt - buffer), fin);
122 nb += nbread;
123 pt = buffer;
5bcbfcca 124 for (i = 0; i < nb - seq->length; i++) {
4c4a10dd 125 valid = 1;
5bcbfcca
LM
126 for (j = 0; (j < seq->length) && (valid); j++) {
127 if (pt[i + j] != seq->bytes[j]) {
4c4a10dd
LM
128 valid = 0;
129 }
130 }
131 if (valid) {
132 break;
133 }
134 }
135
136 if (!valid) {
5bcbfcca 137 writefile (buffer, nb - seq->length);
4c4a10dd 138 offset = 0;
5bcbfcca
LM
139 addrfile += nb - seq->length;
140 for (i = 0; i < seq->length; i++) {
141 buffer[i] = buffer[nb - seq->length + i];
4c4a10dd 142 }
5bcbfcca
LM
143 pt = buffer + seq->length;
144 nb = seq->length;
4c4a10dd
LM
145 } else {
146 writefile (buffer, i);
5bcbfcca 147 offset = seq->length;
4c4a10dd
LM
148 addrfile += i;
149 fseek (fin, i - nb, SEEK_CUR);
150 VERBOSE (DEBUG, printf ("found sequence (%d)\n", i - nb));
151 return 0;
152 }
153 }
154
155 if (!valid) {
156 writefile (buffer, nb);
5bcbfcca 157 addrfile += seq->length;
4c4a10dd
LM
158 }
159
160 return 1;
161}
162
ce305529
LM
163/* hexadecimal dump function */
164
4c4a10dd 165int hexdump (int len) {
a1ab98f9
LM
166 char buffer[BUFFERSIZE] = {0};
167 int i;
168
169 char *pt = buffer;
170
a1ab98f9
LM
171 int nb = 0;
172 while (!feof (fin)) {
5272fae8
LM
173 int nbtoread = BUFFERSIZE - (pt - buffer);
174 if ((len > 0) && (nbtoread > len)) {
175 nbtoread = len;
176 }
177 int nbread = fread (pt, 1, nbtoread, fin);
178 if (len > 0) {
179 len -= nbread;
180 }
181 nb += nbread;
a1ab98f9
LM
182 pt = buffer;
183
184 /* print line */
185 while ((nb - (int)(pt - buffer)) / nbcols > 0) {
4c4a10dd 186 printline (pt, nbcols, addrfile);
ce305529 187 writefile (pt, nbcols);
4c4a10dd 188 addrfile += nbcols;
a1ab98f9 189 pt += nbcols;
a1ab98f9
LM
190 }
191
192 /* copy end buffer */
193 nb -= pt - buffer;
194 for (i = 0; i < nb; i++) {
195 buffer[i] = pt[i];
196 }
197 pt = buffer + nb;
5272fae8
LM
198
199 /* end partial reading */
200 if (len == 0) {
201 break;
202 }
a1ab98f9
LM
203 }
204
205 /* last line */
206 if (nb > 0) {
4c4a10dd
LM
207 printline (buffer, nb, addrfile);
208 writefile (buffer, nb);
209 addrfile += nb;
a1ab98f9
LM
210 }
211
212 return 0;
213}
214
47db4fc7
LM
215/* parse octal string */
216
217long int octal (char *s, int n) {
218 int i;
219 long int l = 0;
220 for (i = 0; i < n; i++) {
221 if ((s[i] >= '0') && (s[i] <= '9')) {
222 l = l * 8 + s[i] - '0';
223 } else {
224 return -1;
225 }
226 }
227 return l;
228}
229
230/* parse hexa string */
231
232long int hexa (char *s, int n) {
233 int i;
234 long int l = 0;
235 for (i = 0; i < n; i++) {
236 l *= 16;
237 if ((s[i] >= '0') && (s[i] <= '9')) {
238 l += s[i] - '0';
239 } else if ((s[i] >= 'A') && (s[i] <= 'F')) {
240 l += s[i] + 10 - 'A';
241 } else if ((s[i] >= 'a') && (s[i] <= 'f')) {
242 l += s[i] + 10 - 'a';
243 } else {
244 return -1;
245 }
246 }
247 return l;
248}
249
4c4a10dd
LM
250/* special character function */
251
5bcbfcca 252int specialchar (char *s, char *b) {
4c4a10dd
LM
253 int i = 0, j = 0;
254 while (s[i] != 0) {
5bcbfcca
LM
255 if (j == SEQLEN) {
256 return 0;
257 }
4c4a10dd 258 if (s[i] != '\\') {
5bcbfcca 259 b[j++] = s[i++];
4c4a10dd
LM
260 continue;
261 }
262
47db4fc7 263 int l = -1;
4c4a10dd
LM
264 switch (s[i + 1]) {
265 case 'a': l = 0x07; i += 2; break;
266 case 'b': l = 0x08; i += 2; break;
267 case 'e': l = 0x1b; i += 2; break;
268 case 'f': l = 0x0c; i += 2; break;
269 case 'n': l = 0x0a; i += 2; break;
270 case 'r': l = 0x0d; i += 2; break;
271 case 't': l = 0x09; i += 2; break;
272 case 'v': l = 0x0b; i += 2; break;
273 case '\\': l = '\\'; i += 2; break;
274 case '\'': l = '\''; i += 2; break;
275 case '"': l = '"'; i += 2; break;
276 case '0':
277 case '1':
278 case '2':
279 case '3':
5bcbfcca 280 l = octal (s + i + 1, 3);
47db4fc7 281 if (l != -1) {
4c4a10dd
LM
282 i += 4;
283 }
284 break;
285 case 'x':
5bcbfcca 286 l = hexa (s + i + 2, 2);
47db4fc7 287 if (l != -1) {
4c4a10dd
LM
288 i += 4;
289 }
290 break;
291 default:
292 }
5bcbfcca
LM
293 if (l != -1) {
294 VERBOSE (DEBUG, printf("l: 0x%02x '%c'\n", l, l));
295 }
296 b[j++] = (l != -1) ? l : s[i++];
4c4a10dd 297 }
4c4a10dd 298
5bcbfcca 299 return j;
4c4a10dd
LM
300}
301
a1ab98f9
LM
302/* main function */
303
4c4a10dd 304int main (int argc, char *argv[])
a1ab98f9 305{
5272fae8 306 int rc = 0;
a1ab98f9
LM
307 char *input = NULL;
308 char *output = NULL;
5272fae8
LM
309 char *commands = NULL;
310 int printlen = -1;
5bcbfcca 311 sequence_t seq = {0};
4c4a10dd
LM
312 char *addr = NULL;
313
a1ab98f9
LM
314 /* get basename */
315 char *pt = progname = argv[0];
316 while (*pt) {
317 if ((*pt == '/') || (*pt == '\\')) {
318 progname = pt + 1;
319 }
320 pt++;
321 }
322
4c4a10dd
LM
323 while (argc-- > 1) {
324 char *arg = *(++argv);
325 if (arg[0] != '-') {
326 VERBOSE (ERROR, fprintf (stderr, "%s: invalid option -- %s\n", progname, arg));
327 return usage (1);
328 }
329 char c = arg[1];
a1ab98f9 330 switch (c) {
5272fae8 331 case 'e':
4c4a10dd
LM
332 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
333 if (arg) {
4c4a10dd
LM
334 if (commands == NULL) {
335 commands = arg;
336 } else {
337 strcat (commands, " ");
338 strcat (commands, arg);
339 }
5272fae8
LM
340 }
341 break;
4c4a10dd
LM
342 case 'i':
343 input = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL;
344 break;
a1ab98f9 345 case 'n':
4c4a10dd
LM
346 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
347 if (arg == NULL) {
348 VERBOSE (ERROR, fprintf (stderr, "%s: missing number of columns\n", progname));
349 return usage (1);
350 }
351 nbcols = atoi (arg);
a1ab98f9
LM
352 break;
353 case 'o':
4c4a10dd 354 output = (arg[2]) ? arg + 2 : (--argc > 0 ) ? *(++argv) : NULL;
a1ab98f9
LM
355 break;
356 case 'v':
4c4a10dd
LM
357 arg = (arg[2]) ? arg + 2 : (--argc > 0) ? *(++argv) : NULL;
358 if (arg == NULL) {
359 VERBOSE (ERROR, fprintf (stderr, "%s: missing verbose level\n", progname));
360 return usage (1);
361 }
362 verbose = atoi (arg);
a1ab98f9
LM
363 break;
364 case 'h':
365 default:
4c4a10dd 366 return usage (c != 'h');
a1ab98f9
LM
367 }
368 }
a1ab98f9
LM
369
370 /* check input */
a1ab98f9
LM
371 if (input) {
372 fin = fopen (input, "rb");
373 if (!fin) {
374 VERBOSE (ERROR, fprintf (stderr, "error: can't open file '%s'\n", input));
5272fae8 375 return 1;
a1ab98f9
LM
376 }
377 } else {
378 fin = stdin;
379 }
380
381 /* check output */
a1ab98f9 382 if (output) {
ce305529 383 fout = fopen (output, "wb");
a1ab98f9
LM
384 if (!fout) {
385 VERBOSE (ERROR, fprintf (stderr, "error: can't open file '%s'\n", output));
386 fclose (fin);
5272fae8 387 return 1;
a1ab98f9
LM
388 }
389 } else {
ce305529 390 //fout = stdout;
a1ab98f9
LM
391 }
392
4c4a10dd
LM
393 /* get file size */
394 if (fin != stdin) {
395 fseek (fin, 0 , SEEK_END);
396 long int filesize = ftell (fin);
397 fseek (fin, 0 , SEEK_SET);
398 nbdigits = getnbdigits (filesize);
399 }
400
5272fae8 401 if (commands == NULL) {
4c4a10dd 402 hexdump (-1);
5272fae8
LM
403 } else {
404 VERBOSE (DEBUG, printf ("commands: %s\n", commands));
405 while ((*commands != '\0') && (rc == 0)) {
406 switch (*commands++) {
407 case ' ':
408 case '\t':
409 break;
410
411 case '/': /* read patern */
5bcbfcca 412 seq.sequence = commands;
4c4a10dd
LM
413 while (*commands) {
414 if (*commands == '/') {
415 *commands++ = 0;
416 break;
417 }
418 commands++;
419 }
5bcbfcca
LM
420 seq.length = specialchar (seq.sequence, seq.bytes);
421 if (seq.length != 0) {
422 rc = searchseq (&seq);
4c4a10dd 423 } else {
5bcbfcca 424 VERBOSE (ERROR, fprintf (stderr, "incorrect sequence (%s)\n", seq.sequence));
4c4a10dd
LM
425 rc = 1;
426 }
5272fae8
LM
427 break;
428
429 case '0': /* read address */
430 break;
431
432 case 'a': /* append mode */
433 break;
434
435 case 'd': /* delete mode */
436 break;
437
438 case 'i': /* insert mode */
439 break;
440
441 case 'p': /* print mode */
442 printlen = -1;
443 while (*commands != '\0') {
444 if ((*commands == ' ') || (*commands == '\t')) {
445 commands++;
446 } else if ((*commands >= '0') && (*commands <= '9')) {
447 printlen = strtol (commands, &commands, 10);
448 break;
449 } else if (*commands == '-') {
450 printlen = -1;
451 commands++;
452 break;
453 } else {
5bcbfcca 454 VERBOSE (ERROR, fprintf (stderr, "unkown print length (%s)\n", commands));
5272fae8
LM
455 rc = 1;
456 break;
457 }
458 }
4c4a10dd
LM
459 if (rc == 0) {
460 hexdump (printlen);
461 }
5272fae8
LM
462 break;
463
464 case 's': /* substitute mode */
465 break;
466
467 default:
468 VERBOSE (ERROR, fprintf (stderr, "unknown command (%c)\n", commands[-1]));
469 rc = 1;
470 }
471 }
472 }
a1ab98f9 473
ce305529
LM
474 /* end of file */
475 if ((rc == 0) && (fout != NULL)) {
476 while (!feof (fin)) {
477 int nbread = fread (buffer, 1, BUFFERSIZE, fin);
478 if (nbread) {
479 fwrite (buffer, 1, nbread, fout);
480 }
481 }
482 }
483
a1ab98f9 484 /* close all */
5272fae8
LM
485 if (fin) fclose (fin);
486 if (fout) fclose (fout);
a1ab98f9 487
5272fae8 488 return rc;
a1ab98f9
LM
489}
490
a1ab98f9
LM
491// test: hexdump.exe -h | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
492// test: hexdump.exe -_ 2> /dev/null | awk 'END { if (NR == 0) { exit(0) } else exit (1) }'
493// test: hexdump.exe -_ 2>&1 | awk '/usage:/ { rc=1 } END { exit (1-rc) }'
494// test: hexdump.exe -i hexdump.c | grep -q '0x[0-9a-f]*: '
ce305529
LM
495// test: hexdump.exe -i hexdump.c -n 3 | head -2 | tail -1 | grep -q '0x0003: 64 65 70 dep'
496// test: hexdump.exe -i hexdump.c -o test.c -e 'p 200' | tail -1 | grep -q '0x00c0:'
5bcbfcca
LM
497// test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
498// test: hexdump.exe -i hexdump.c -e ' /cflags/ p 17 /debug/ p 8' | grep -q '0x0019: 2a 2f 0a 2f 2a 20 6c 69 \*/\./\* li'
499// test: hexdump.exe -i hexdump.c -o test.c -e ' /cfl\x61gs/ p 16 /d\145bug/ p 8' | grep -q '0x0027: 64 65 62 75 67 2e 6f 20 debug.o'
500// test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
501// test: hexdump.exe -i hexdump.c -e ' /\n/ p 8' | grep -q '0x000d: 0a 2f 2a 20 63 66 6c 61 \./\* cfla'
502// test: hexdump.exe -i hexdump.c -o test.c -e ' /\a\b\e\f\r\t\v/ p 8'; x=$?; test x$x = x1
503// test: cmp hexdump.c test.c; x=$?; rm test.c; test x$x = x0
504// test: hexdump.exe -i hexdump.c -e ' /\"/' -e " /\\'/" -e ' /\\/'
a1ab98f9
LM
505
506/* vim: set ts=4 sw=4 et: */