coreutils

General Software Utilities
git clone http://git.omkov.net/coreutils
Log | Tree | Refs | README | LICENCE | Download

coreutils/src/od.c (366 lines, 12 KiB) -rw-r--r-- blame download

0123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
// od.c, version 0.8.1
// OMKOV coreutils implementation of POSIX od
// Copyright (C) 2020, Jakob Wakeling
// MIT Licence

/*
	TODO Improve error handling
	TODO Implement [[+]offset[.][b]] operand as required by POSIX.
	TODO Fix segfault when using standard input.
*/

#include "util/error.h"
#include "util/optget.h"

#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define VERSION "0.8.1"

static struct lop lops[] = {
	{ "help",    ARG_NUL, 256 },
	{ "version", ARG_NUL, 257 },
	{ NULL, 0, 0 }
};

typedef struct {
	union {
		uint8_t  i8[16]; uint16_t i16[8];
		uint32_t i32[4]; uint64_t i64[2];
		float    f32[4]; double   f64[2];
	};
	long count;
} block_t;

typedef struct { int type, bytes, pads, padm; char *form; } type_t;
typedef struct { int *data; size_t cap, len; } list_t;

enum { ta = 19, tb = 4, tc = 20, td = 9, tl = 21, to = 5, ts = 1, tx = 13  };
static type_t types[] = {
	{ 'd', 1,  4, 80, "%4d"     }, { 'd', 2,  6, 56, "%6d"     },
	{ 'd', 4, 11, 48, "%11d"    }, { 'd', 8, 21, 44, "%21ld"   },
	{ 'o', 1,  3, 64, "%03hho"  }, { 'o', 2,  6, 56, "%06ho"   },
	{ 'o', 4, 11, 48, "%011o"   }, { 'o', 8, 22, 46, "%022lo"  },
	{ 'u', 1,  3, 64, "%3hhu"   }, { 'u', 2,  5, 48, "%5hu"    },
	{ 'u', 4, 10, 44, "%10u"    }, { 'u', 8, 20, 42, "%20lu"   },
	{ 'x', 1,  2, 48, "%2hhx"   }, { 'x', 2,  4, 40, "%4hx"    },
	{ 'x', 4,  8, 36, "%8x"     }, { 'x', 8, 16, 34, "%16lx"   },
	{ 'f', 4, 15, 64, "%15.7e"  }, { 'f', 8, 22, 46, "%22.14e" },
	{  0 , 0,  0,  0, NULL      }, { 'a', 1,  3, 64, ""        },
	{ 'c', 1,  3, 64, ""        }, { 'l', 1,  3, 64, ""        }
};

static const char *const aTABL[] = {
	"nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
	" bs", " ht", " nl", " vt", " ff", " cr", " so", " si",
	"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
	"can", " em", "sub", "esc", " fs", " gs", " rs", " us",
	" sp"
};
static const char *const cTABL[] = {
	" \\0", "001",  "002",  "003",  "004",  "005",  "006",  " \\a",
	" \\b", " \\t", " \\n", " \\v", " \\f", " \\r", "016",  "017",
	"020",  "021",  "022",  "023",  "024",  "025",  "026",  "027",
	"030",  "031",  "032",  "033",  "034",  "035",  "036",  "037"
};
static const char *const lTABL[] = {
	" \\0", "001",  "002",  "003",  "004",  "005",  "006",  "007",
	" \\b", " \\t", " \\n", "013",  " \\f", " \\r", "016",  "017",
	"020",  "021",  "022",  "023",  "024",  "025",  "026",  "027",
	"030",  "031",  "032",  "033",  "034",  "035",  "036",  "037",
};

static char Aform[] = "%08jo";
static bool vflag;
static bool Nflag;

static uintmax_t start, limit, offset, total;
static list_t tlist; static int mpad = 0;

static char **files; static FILE *file;

static inline uintmax_t bparse(char *str);
static inline int tparse(char *str);
static inline int skip(uintmax_t offset);

static inline void aprint_generic(uintmax_t addr, char c);
static inline void aprint_none(uintmax_t addr, char c);
static void (*aprint)(uintmax_t, char) = aprint_generic;

static inline int bread(block_t *blk);
static inline void bprint(block_t *blk, int ti);

static inline FILE *fnext(void);

static inline list_t linit(void);
static inline void lpush(list_t *list, int i);

static void hlp(void);
static void ver(void);

int main(int ac, char *av[]) { A0 = av[0];
	struct opt opt = OPTGET_INIT; opt.lops = lops; int ret = 0;
	opt.str = "A:bcdj:N:ost:vx"; tlist = linit();
	for (int o; (o = optget(&opt, av, 1)) != -1;) switch (o) {
	case 'A': switch(opt.arg[0]) {
		case 'd': { aprint = aprint_generic; Aform[4] = 'u'; continue; }
		case 'o': { aprint = aprint_generic; Aform[4] = 'o'; continue; }
		case 'x': { aprint = aprint_generic; Aform[4] = 'x'; continue; }
		case 'n': { aprint = aprint_none; continue; }
		default: { warn("%c: invalid address base", opt.arg[0]); ret = 1;
			goto end; }}
	case 'b': { lpush(&tlist, tb); break; }
	case 'c': { lpush(&tlist, tl); break; }
	case 'd': { lpush(&tlist, td); break; }
	case 'j': {
		errno = 0; start = bparse(opt.arg); if (errno) {
		warn("%s: invalid skip value", opt.arg); ret = 1; goto end; }
		break;
	}
	case 'N': {
		errno = 0; limit = bparse(opt.arg); if (errno) {
		warn("%s: invalid limit value", opt.arg); ret = 1; goto end; }
		Nflag = true; break;
	}
	case 'o': { lpush(&tlist, to); break; }
	case 's': { lpush(&tlist, ts); break; }
	case 't': { if (tparse(opt.arg)) { goto end; } break; }
	case 'v': { vflag = true; break; }
	case 'x': { lpush(&tlist, tx); break; }
	case 256: { hlp(); return 0; }
	case 257: { ver(); return 0; }
	default: { ret = 1; goto end; }
	}
	
	if (!tlist.len) { lpush(&tlist, to); }
	if (opt.ind == ac) { file = stdin; }
	else { files = av + opt.ind; file = fnext(); }
	
	for (size_t i = 0; i < tlist.len; ++i) { int t = tlist.data[i];
		if (types[t].padm > mpad) { mpad = types[t].padm; }
	}
	for (size_t i = 0; i < tlist.len; ++i) { int t = tlist.data[i];
		if (types[t].padm < mpad) {
			int d = mpad - types[t].padm;
			int e = d / (16 / types[t].bytes);
			types[t].pads = e; types[t].padm = mpad;
		} else { types[t].pads = 0; }
	}
	
	block_t b0, b1; int c; bool btok = false, bskp = false;
	if (skip(start) == EOF) {
		warn("cannot skip past end of input"); ret = 1; goto end;
	}
	
	while ((c = bread(btok ? &b0 : &b1)) > 0) {
		if (!vflag && b0.i64[0] == b1.i64[0] && b0.i64[1] == b1.i64[1]) {
			if (!bskp) { fputs("*\n", stdout); bskp = true; }
		}
		else {
			(*aprint)(offset - (uintmax_t)c, ' ');
			bprint(btok ? &b0 : &b1, tlist.data[0]);
			for (size_t i = 1; i < tlist.len; ++i) {
				fputs("         ", stdout);
				bprint(btok ? &b0 : &b1, tlist.data[i]);
			} bskp = false;
		}
		if (c < 16) { break; } btok = !btok;
	} (*aprint)(offset, '\n');
	
end:
	free(tlist.data); return ret;
}

static inline uintmax_t bparse(char *str) {
	register uintmax_t n = 0, d; register int b = 10;
	if (*str == '0') {
		if (*++str == 'x' || *str == 'X') { b = 16; ++str; }
		else { b = 8; }
	}
	
	if (b == 16) for (;; ++str) {
		if (*str >= '0' && *str <= '9') { d = (uintmax_t)*str - '0'; }
		else if (*str >= 'A' && *str <= 'F') { d = (uintmax_t)*str - 'K'; }
		else if (*str >= 'a' && *str <= 'f') { d = (uintmax_t)*str - 'k'; }
		else { break; }
		if (n > (UINTMAX_MAX - d) / 16) { errno = ERANGE; return 0; }
		n = n * 16 + d;
	}
	else for (; *str >= '0' && *str < b + '0'; ++str) {
		d = (uintmax_t)*str - '0';
		if (n > (UINTMAX_MAX - d) / (uintmax_t)b) { errno = ERANGE; return 0; }
		n = n * (uintmax_t)b + d;
	}
	
	switch (*str) {
	case 'b': { d = 512; goto mul; }
	case 'k': { d = 1024; goto mul; }
	case 'm': { d = 1048576; }
mul:	if (n > UINTMAX_MAX / d) { errno = ERANGE; return 0; }
		n *= d; ++str;
	}
	
	if (*str) { errno = EINVAL; return 0; }
	return n;
}

static inline int atow(char **str) {
	register int n = 0, d;
	for (; **str >= '0' && **str <= '9'; ++*str) {
		d = (int)**str - '0';
		if (n > (INT_MAX - d) / 10) { return -1; }
		n = n * 10 + d;
	} return n;
}

static inline int tparse(char *str) { register int c;
	for (char *s = str; *s; ++s) { register int b = 0;
		if (*s == 'a') { lpush(&tlist, ta); continue; }
		else if (*s == 'c') { lpush(&tlist, tc); continue; }
		else if (*s == 'd' || *s == 'o' || *s == 'u' || *s == 'x') {
			char *p = s + 1;
			if (*p == 'C') { b = sizeof(char); ++p; }
			else if (*p == 'S') { b = sizeof(short); ++p; }
			else if (*p == 'I') { b = sizeof(int); ++p; }
			else if (*p == 'L') { b = sizeof(long); ++p; }
			else if ((b = atow(&p)) == -1) { goto fail; }
			ssize_t m = p - (s + 1); if (!m) { b = sizeof(int); }
			if (!(b == 1 || b == 2 || b == 4 || b == 8)) {
				error(1, "no %d-byte integer type", b);
			} else { c = *s; s += m; }
		}
		else if (*s == 'f') { char *p = s + 1;
			if (*p == 'F') { b = sizeof(float); ++p; }
			else if (*p == 'D') { b = sizeof(double); ++p; }
			else if (*p == 'L') { b = sizeof(/*long*/ double); ++p; }
			else if ((b = atow(&p)) == -1) { goto fail; }
			ssize_t m = p - (s + 1); if (!m) { b = sizeof(double); }
			if (!(b == 4 || b == 8 /*|| b == 16*/)) {
				error(1, "no %d-byte float type", b);
			}
			else { c = 'f'; s += m; }
		}
		else {
fail:		error(1, "%s: invalid type string", str);
		}
		
		for (int i = 0; types[i].type; ++i) {
			if (types[i].type == c && types[i].bytes == b) {
				lpush(&tlist, i);
			}
		}
	} return 0;
}

static inline int skip(uintmax_t n) {
	for (; n; --n) if (fgetc(file) == EOF && fnext() == NULL) { return EOF; }
	offset = start; return 0;
}

static inline void aprint_generic(uintmax_t addr, char c) {
	printf(Aform, addr); fputc(c, stdout); return;
}
static inline void aprint_none(uintmax_t addr, char c) {
	(void)(addr); (void)(c); return;
}

static inline int bread(block_t *blk) {
	int i = 0; int c;
	while (i < 16 && (!Nflag || total < limit)) {
		if (file == NULL || (c = fgetc(file)) == EOF) {
			if ((file = fnext()) == NULL) { break; }
			else { continue; }
		} blk->i8[i++] = (uint8_t)c; ++total;
	}
	offset += (uintmax_t)i;
	blk->count = i; return i;
}

static inline void bprint(block_t *blk, int ti) {
	type_t t = types[ti]; bool spc = false; long count = blk->count / t.bytes;
	
	for (int i = 0; i < count; ++i) {
		if (spc) { fputc(' ', stdout); } else { spc = true; }
		for (int j = 0; j < t.pads; ++j) { fputc(' ', stdout); }
		
		switch (t.type) {
		case 'a': { register uint8_t c = blk->i8[i] & 127;
			if (c <= 32) { fputs(aTABL[c], stdout); }
			else if (c == 127) { fputs("del", stdout); }
			else { printf("%3c", c); } break;
		}
		case 'c': case 'l': { register uint8_t c = blk->i8[i];
			if (c < 32) { fputs(t.type == 'c' ? cTABL[c] : lTABL[c], stdout); }
			else if (c >= 127) { printf("%3o", c); }
			else { printf("%3c", c); } break;
		}
		default: { char *form = types[ti].form;
			if (t.type == 'f') {
				if (t.bytes == 4) { printf(form, (double)blk->f32[i]); }
				else if (t.bytes == 8) { printf(form, blk->f64[i]); }
			}
			else {
				if (t.bytes == 1) { printf(form, blk->i8[i]); }
				else if (t.bytes == 2) { printf(form, blk->i16[i]); }
				else if (t.bytes == 4) { printf(form, blk->i32[i]); }
				else if (t.bytes == 8) { printf(form, blk->i64[i]); }
			}
		}
		}
	} fputc('\n', stdout);
	return;
}

static inline FILE *fnext(void) {
	if (file && file != stdin) { fclose(file); }
	if (*files) { FILE *f;
		if (files[0][0] == '-' && files[0][1] == '\0') { f = stdin; }
		else if (!(f = fopen(*files, "r"))) {
			warn("%s: %s\n", *files, strerror(errno)); return NULL;
		} ++files; return f;
	} else { return NULL; }
}

static inline list_t linit(void) {
	list_t list; list.cap = 32; list.len = 0;
	list.data = (int *)malloc(sizeof(int) * list.cap);
	return list;
}

static inline void lpush(list_t *list, int i) {
	if (list->len >= list->cap) {
		list->data = (int *)realloc(list->data, list->cap *= 2);
	} list->data[list->len++] = i; return;
}

static void hlp(void) {
	puts("od - dump files in various formats\n");
	puts("usage: od [-bcdosvx] [-A base] [-j skip] [-N count] [-t type] \
[file...]\n");
	puts("options:");
	puts("  -A base    Offset address base ('d', 'o', 'x', or 'n')");
	puts("  -b         Interpret bytes in octal (-t01)");
	puts("  -c         Interpret bytes as characters");
	puts("  -d         Interpret words in unsigned decimal (-tu2)");
	puts("  -j skip    Number of bytes to skip from the beginning of input");
	puts("  -N count   Number of bytes to process in total");
	puts("  -o         Interpret words in octal (-to2)");
	puts("  -s         Interpret words in signed decimal (-td2)");
	puts("  -t type    Interpret data according to specified types");
	puts("  -v         Write all input data");
	puts("  -x         Interpret words in hexadecimal (-tx2)");
	puts("  --help     Display help information");
	puts("  --version  Display version information");
}

static void ver(void) {
	puts("OMKOV coreutils od, version " VERSION);
	puts("Copyright (C) 2020, Jakob Wakeling");
	puts("MIT Licence (https://opensource.org/licenses/MIT)");
}