Author | Jamozed <[email protected]> |
Date | 2020-06-27 02:35:08 |
Commit | c1461fb65f12a277db640d00e8a74b846a00fd3a |
Parent | 8b4f9998e7324543aa53e38b36050d3afee83680 |
od: Add POSIX od
Diffstat
M | CMakeLists.txt | | | 1 | + |
M | README.md | | | 1 | + |
A | man/od.1 | | | 101 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/od.c | | | 398 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
4 files changed, 501 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index e835d70..d8b0102 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ ADD_EXECUTABLE(link ${CMAKE_SOURCE_DIR}/src/link.c) ADD_EXECUTABLE(logname ${CMAKE_SOURCE_DIR}/src/logname.c) ADD_EXECUTABLE(mkdir ${CMAKE_SOURCE_DIR}/src/mkdir.c) ADD_EXECUTABLE(nice ${CMAKE_SOURCE_DIR}/src/nice.c) +ADD_EXECUTABLE(od ${CMAKE_SOURCE_DIR}/src/od.c) ADD_EXECUTABLE(pwd ${CMAKE_SOURCE_DIR}/src/pwd.c) ADD_EXECUTABLE(rand ${CMAKE_SOURCE_DIR}/src/rand.c) ADD_EXECUTABLE(realpath ${CMAKE_SOURCE_DIR}/src/realpath.c) diff --git a/README.md b/README.md index 72351b2..5ba563a 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ UNIX-like systems. | logname | Return the user's login name | POSIX | | mkdir | Make directories | POSIX | | nice | Execute with an altered nice value | POSIX | +| od\* | Dump files in various formats | POSIX | | pwd | Print working directory | POSIX | | rand | Generate random string | | | realpath | Resolve an absolute pathname | | diff --git a/man/od.1 b/man/od.1 new file mode 100644 index 0000000..c3f5906 --- /dev/null +++ b/man/od.1 @@ -0,0 +1,101 @@ +.TH OD 1 2020-06-27 "OMKOV coreutils" "General Commands Manual" +.SH NAME +od \(em dump files in various formats +.SH SYNOPSYS +\fBod\fR [-bcdosx] [-v] [-A \fIbase\fR] [-j \fIskip\fR] [-N \fIcount\fR] +[-t \fItype\fR] [\fIfile\fR...] +.SH DESCRIPTION +Write the contents of its input files to standard output in the specified +format(s). +.SH OPTIONS +The following options are supported: +.TP +.B -A \fIbase\fR +Offset address base. The characters '\fBd\fR', '\fBo\fR', and '\fBx\fR' will +result in an address offset base of decimal, octal, or hexadecimal, +respectively. The character '\fBn\fR' will result in no offset address being +printed. +.TP +.B -b +Interpret bytes in octal. Equivalent to \fB-to1\fR. +.TP +.B -c +Interpret bytes as characters. Characters \fB\\0\fR, \fB\\b\fR, \fB\\f\fR, +\fB\\n\fR, \fB\\r\fR, and \fB\\t\fR will be displayed as escape sequences. Other +non-printable characters will be displayed as octal bytes. +.TP +.B -d +Interpret words in unsigned decimal. Equivalent to \fB-tu2\fR. +.TP +.B -j \fIskip\fR +Number of bytes to skip from the beginning of input. May be an octal or +hexadecimal number if prefixed by '\fB0\fR' or '\fB0x\fR', respectively, +otherwise it will be interpreted as a decimal number. Appending the characters +\&'\fBb\fR', '\fBk\fR', or '\fBm\fR' will cause \fIskip\fR to be interpreted as +a multiple of 512, 1024, or 1048575 bytes, respectively. Any '\fBb\fR' character +in a hexadecimal number will be interpreted as a number, not as a multiplier. +.TP +.B -N \fIcount\fR +Number of bytes to process in total. \fIcount\fR will be interpreted in the same +manner as \fIskip\fR. +.TP +.B -o +Interpret words in octal. Equivalent to \fB-to2\fR. +.TP +.B -s +Interpret words in signed decimal. Equivalent to \fB-td2\fR. +.TP +.B -t \fItype\fR +Interpret data according to specified types. \fItype\fR may contain multiple +type specifiers. Each type specifier must start with one of the characters +\&'\fBa\fR', '\fBc\fR', '\fBd\fR', '\fBf\fR', '\fBo\fR', '\fBu\fR', or +\&'\fBx\fR', specifying named character, character, signed decimal, floating +point, octal, unsigned decimal, and hecadecimal, respectively. Non-character +types may be followed by a decimal integer that specifies the number of bytes to +be interpreted at a time. Integer types (\fBd\fR, \fBo\fR, \fBu\fR, \fBx\fR) may +be followed by \fBC\fR, \fBS\fR, \fBI\fR, or \fBL\fR instead of an integer, +specifying \fBchar\fR, \fBshort\fR, \fBinteger\fR, or \fBlong\fR, respectively, +and floating-point types (\fBf\fR) may be followed by \fBF\fR, \fBD\fR, or +\&\fBL\fR, specifying \fBfloat\fR, \fBdouble\fR, or \fBlong double\fR, +respectively. +.TP +.B -v +Write all input data. If \fB-v\fR is not set, repeated lines will be output as a +single line containing only an asterisk. +.TP +.B -x +Interpret words in hexadecimal. Equivalent to \fB-tx2\fR. +.P +Multiple types can be specified using multiple \fB-bcdostx\fR options. Each +block of data is written in each of the specified formats in the order that they +were specified. If no type is specified, data will be output equivalent to +\fB-toI\fR with offset addressed printed in octal. +.TP +.B --help +Display help information. +.TP +.B --version +Display version information. +.SH OPERANDS +The following operand is supported: +.TP +.I file +A pathname of an input file. If no \fIfile\fR operands are specified, of +\fIfile\fR is a '\fB-\fR', \fIhead\fR will read from standard input. +.SH EXIT STATUS +The following exit values will be returned: +.TP +\ 0 +All files were processed successfully. +.TP +>0 +An error occurred. +.SH STANDARDS +The \fIod\fR utility is compliant with the IEEE Std 1003.2-1992 ("POSIX.2") +specification. +.SH COPYRIGHT +.nf +Copyright (C) 2020, Jakob Wakeling +All rights reserved. +OMKOV Permissive Licence (https://www.omkov.net/OLPE) +.fi diff --git a/src/od.c b/src/od.c new file mode 100644 index 0000000..4c3067d --- /dev/null +++ b/src/od.c @@ -0,0 +1,398 @@ +// od.c, version 0.8.0 +// OMKOV coreutils implementation of POSIX od +// Copyright (C) 2020, Jakob Wakeling +// All rights reserved. + +/* +OMKOV Permissive Licence, version 1.0 + +Copyright (C) 2020, Jakob Wakeling +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimers. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimers in the documentation and/or + other materials provided with the distribution. +* Neither the names of the copyright holders, nor the names of its contributors + may be used to endorse or promote products derived from this Software without + specific prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT +HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +*/ + +/* + TODO Implement [[+]offset[.][b]] operand as required by POSIX. + TODO Fix segfault when using standard input. +*/ + +#include "error.h" +#include "optget.h" + +#include <errno.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +typedef struct { + union { + uint8_t i8[16]; uint16_t i16[8]; + uint32_t i32[4]; uint64_t i64[2]; + float f32[4]; double f64[2]; + }; + long count; +} block_t; + +typedef struct { int type, bytes, pads, padm; char *form; } type_t; +typedef struct { int *data; size_t cap, len; } list_t; + +enum { ta = 19, tb = 4, tc = 20, td = 9, tl = 21, to = 5, ts = 1, tx = 13 }; +static type_t types[] = { + { 'd', 1, 4, 80, "%4d" }, { 'd', 2, 6, 56, "%6d" }, + { 'd', 4, 11, 48, "%11d" }, { 'd', 8, 21, 44, "%21ld" }, + { 'o', 1, 3, 64, "%03hho" }, { 'o', 2, 6, 56, "%06ho" }, + { 'o', 4, 11, 48, "%011o" }, { 'o', 8, 22, 46, "%022lo" }, + { 'u', 1, 3, 64, "%3hhu" }, { 'u', 2, 5, 48, "%5hu" }, + { 'u', 4, 10, 44, "%10u" }, { 'u', 8, 20, 42, "%20lu" }, + { 'x', 1, 2, 48, "%2hhx" }, { 'x', 2, 4, 40, "%4hx" }, + { 'x', 4, 8, 36, "%8x" }, { 'x', 8, 16, 34, "%16lx" }, + { 'f', 4, 15, 64, "%15.7e" }, { 'f', 8, 22, 46, "%22.14e" }, + { 0 , 0, 0, 0, NULL }, { 'a', 1, 3, 64, "" }, + { 'c', 1, 3, 64, "" }, { 'l', 1, 3, 64, "" } +}; + +static const char *const aTABL[] = { + "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", + " bs", " ht", " nl", " vt", " ff", " cr", " so", " si", + "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", + "can", " em", "sub", "esc", " fs", " gs", " rs", " us", + " sp" +}; +static const char *const cTABL[] = { + " \\0", "001", "002", "003", "004", "005", "006", " \\a", + " \\b", " \\t", " \\n", " \\v", " \\f", " \\r", "016", "017", + "020", "021", "022", "023", "024", "025", "026", "027", + "030", "031", "032", "033", "034", "035", "036", "037" +}; +static const char *const lTABL[] = { + " \\0", "001", "002", "003", "004", "005", "006", "007", + " \\b", " \\t", " \\n", "013", " \\f", " \\r", "016", "017", + "020", "021", "022", "023", "024", "025", "026", "027", + "030", "031", "032", "033", "034", "035", "036", "037", +}; + +static const char *ARG0; + +static char Aform[] = "%08jo"; +static bool vflag; +static bool Nflag; + +static uintmax_t start, limit, offset, total; +static list_t tlist; static int mpad = 0; + +static char **files; static FILE *file; + +static inline uintmax_t bparse(char *str); +static inline int tparse(char *str); +static inline int skip(uintmax_t offset); + +static inline void aprint_generic(uintmax_t addr, char c); +static inline void aprint_none(uintmax_t addr, char c); +static void (*aprint)(uintmax_t, char) = aprint_generic; + +static inline int bread(block_t *blk); +static inline void bprint(block_t *blk, int ti); + +static inline FILE *fnext(void); + +static inline list_t linit(void); +static inline void lpush(list_t *list, int i); + +static void help(void); +static void version(void); + +int main(int argc, char *argv[]) { ARG0 = argv[0]; + lop_t lops[] = { + { "help", ARG_NUL, 256 }, + { "version", ARG_NUL, 257 }, + { NULL, 0, 0 } + }; + + opt_t opt = OPTGET_INIT; opt.lops = lops; int o, ret = 0; + opt.str = "A:bcdj:N:ost:vx"; tlist = linit(); + while ((o = optget(&opt, argv, 1)) != -1) switch (o) { + case 'A': switch(opt.arg[0]) { + case 'd': { aprint = aprint_generic; Aform[4] = 'u'; continue; } + case 'o': { aprint = aprint_generic; Aform[4] = 'o'; continue; } + case 'x': { aprint = aprint_generic; Aform[4] = 'x'; continue; } + case 'n': { aprint = aprint_none; continue; } + default: { fprintf(stderr, "%s: invalid address base '%c'\n", argv[0], + opt.arg[0]); ret = 1; goto end; }} + case 'b': { lpush(&tlist, tb); break; } + case 'c': { lpush(&tlist, tl); break; } + case 'd': { lpush(&tlist, td); break; } + case 'j': { errno = 0; start = bparse(opt.arg); if (errno) { + fprintf(stderr, "%s: invalid skip value '%s'\n", argv[0], opt.arg); + ret = 1; goto end; } break; } + case 'N': { errno = 0; limit = bparse(opt.arg); if (errno) { + fprintf(stderr, "%s: invalid limit value '%s'\n", argv[0], opt.arg); + ret = 1; goto end; } Nflag = true; break; } + case 'o': { lpush(&tlist, to); break; } + case 's': { lpush(&tlist, ts); break; } + case 't': { if (tparse(opt.arg)) { goto end; } break; } + case 'v': { vflag = true; break; } + case 'x': { lpush(&tlist, tx); break; } + case 256: { help(); return 0; } + case 257: { version(); return 0; } + default: { ret = 1; goto end; } + } + + if (!tlist.len) { lpush(&tlist, to); } + if (opt.ind == argc) { file = stdin; } + else { files = argv + opt.ind; file = fnext(); } + + for (size_t i = 0; i < tlist.len; ++i) { int t = tlist.data[i]; + if (types[t].padm > mpad) { mpad = types[t].padm; } + } + for (size_t i = 0; i < tlist.len; ++i) { int t = tlist.data[i]; + if (types[t].padm < mpad) { + int d = mpad - types[t].padm; + int e = d / (16 / types[t].bytes); + types[t].pads = e; types[t].padm = mpad; + } else { types[t].pads = 0; } + } + + block_t b0, b1; int c; bool btok = false, bskp = false; + if (skip(start) == EOF) { + fprintf(stderr, "%s: cannot skip past end of input\n", ARG0); + ret = 1; goto end; + } + + while ((c = bread(btok ? &b0 : &b1)) > 0) { + if (!vflag && b0.i64[0] == b1.i64[0] && b0.i64[1] == b1.i64[1]) { + if (!bskp) { fputs("*\n", stdout); bskp = true; } + } + else { + (*aprint)(offset - (uintmax_t)c, ' '); + bprint(btok ? &b0 : &b1, tlist.data[0]); + for (size_t i = 1; i < tlist.len; ++i) { + fputs(" ", stdout); + bprint(btok ? &b0 : &b1, tlist.data[i]); + } bskp = false; + } + if (c < 16) { break; } btok = !btok; + } (*aprint)(offset, '\n'); + +end: + free(tlist.data); return ret; +} + +static inline uintmax_t bparse(char *str) { + register uintmax_t n = 0, d; register int b = 10; + if (*str == '0') { + if (*++str == 'x' || *str == 'X') { b = 16; ++str; } + else { b = 8; } + } + + if (b == 16) for (;; ++str) { + if (*str >= '0' && *str <= '9') { d = (uintmax_t)*str - '0'; } + else if (*str >= 'A' && *str <= 'F') { d = (uintmax_t)*str - 'K'; } + else if (*str >= 'a' && *str <= 'f') { d = (uintmax_t)*str - 'k'; } + else { break; } + if (n > (UINTMAX_MAX - d) / 16) { errno = ERANGE; return 0; } + n = n * 16 + d; + } + else for (; *str >= '0' && *str < b + '0'; ++str) { + d = (uintmax_t)*str - '0'; + if (n > (UINTMAX_MAX - d) / (uintmax_t)b) { errno = ERANGE; return 0; } + n = n * (uintmax_t)b + d; + } + + switch (*str) { + case 'b': { d = 512; goto mul; } + case 'k': { d = 1024; goto mul; } + case 'm': { d = 1048576; } +mul: if (n > UINTMAX_MAX / d) { errno = ERANGE; return 0; } + n *= d; ++str; + } + + if (*str) { errno = EINVAL; return 0; } + return n; +} + +static inline int atow(char **str) { + register int n = 0, d; + for (; **str >= '0' && **str <= '9'; ++*str) { + d = (int)**str - '0'; + if (n > (INT_MAX - d) / 10) { return -1; } + n = n * 10 + d; + } return n; +} + +static inline int tparse(char *str) { register int c; + for (char *s = str; *s; ++s) { register int b = 0; + if (*s == 'a') { lpush(&tlist, ta); continue; } + else if (*s == 'c') { lpush(&tlist, tc); continue; } + else if (*s == 'd' || *s == 'o' || *s == 'u' || *s == 'x') { + char *p = s + 1; + if (*p == 'C') { b = sizeof(char); ++p; } + else if (*p == 'S') { b = sizeof(short); ++p; } + else if (*p == 'I') { b = sizeof(int); ++p; } + else if (*p == 'L') { b = sizeof(long); ++p; } + else if ((b = atow(&p)) == -1) { goto fail; } + ssize_t m = p - (s + 1); if (!m) { b = sizeof(int); } + if (!(b == 1 || b == 2 || b == 4 || b == 8)) { + fprintf(stderr, "%s: no %d-byte integer type\n", ARG0, b); + return 1; + } else { c = *s; s += m; } + } + else if (*s == 'f') { char *p = s + 1; + if (*p == 'F') { b = sizeof(float); ++p; } + else if (*p == 'D') { b = sizeof(double); ++p; } + else if (*p == 'L') { b = sizeof(/*long*/ double); ++p; } + else if ((b = atow(&p)) == -1) { goto fail; } + ssize_t m = p - (s + 1); if (!m) { b = sizeof(double); } + if (!(b == 4 || b == 8 /*|| b == 16*/)) { + fprintf(stderr, "%s: no %d-byte float type\n", ARG0, b); + return 1; + } + else { c = 'f'; s += m; } + } + else { +fail: fprintf(stderr, "%s: invalid type string '%s'\n", ARG0, str); + return 1; + } + + for (int i = 0; types[i].type; ++i) { + if (types[i].type == c && types[i].bytes == b) { + lpush(&tlist, i); + } + } + } return 0; +} + +static inline int skip(uintmax_t n) { + for (; n; --n) if (fgetc(file) == EOF && fnext() == NULL) { return EOF; } + offset = start; return 0; +} + +static inline void aprint_generic(uintmax_t addr, char c) { + printf(Aform, addr); fputc(c, stdout); return; +} +static inline void aprint_none(uintmax_t addr, char c) { + (void)(addr); (void)(c); return; +} + +static inline int bread(block_t *blk) { + int i = 0; int c; + while (i < 16 && (!Nflag || total < limit)) { + if (file == NULL || (c = fgetc(file)) == EOF) { + if ((file = fnext()) == NULL) { break; } + else { continue; } + } blk->i8[i++] = (uint8_t)c; ++total; + } + offset += (uintmax_t)i; + blk->count = i; return i; +} + +static inline void bprint(block_t *blk, int ti) { + type_t t = types[ti]; bool spc = false; long count = blk->count / t.bytes; + + for (int i = 0; i < count; ++i) { + if (spc) { fputc(' ', stdout); } else { spc = true; } + for (int j = 0; j < t.pads; ++j) { fputc(' ', stdout); } + + switch (t.type) { + case 'a': { register uint8_t c = blk->i8[i] & 127; + if (c <= 32) { fputs(aTABL[c], stdout); } + else if (c == 127) { fputs("del", stdout); } + else { printf("%3c", c); } break; + } + case 'c': case 'l': { register uint8_t c = blk->i8[i]; + if (c < 32) { fputs(t.type == 'c' ? cTABL[c] : lTABL[c], stdout); } + else if (c >= 127) { printf("%3o", c); } + else { printf("%3c", c); } break; + } + default: { char *form = types[ti].form; + if (t.type == 'f') { + if (t.bytes == 4) { printf(form, (double)blk->f32[i]); } + else if (t.bytes == 8) { printf(form, blk->f64[i]); } + } + else { + if (t.bytes == 1) { printf(form, blk->i8[i]); } + else if (t.bytes == 2) { printf(form, blk->i16[i]); } + else if (t.bytes == 4) { printf(form, blk->i32[i]); } + else if (t.bytes == 8) { printf(form, blk->i64[i]); } + } + } + } + } fputc('\n', stdout); + return; +} + +static inline FILE *fnext(void) { + if (file && file != stdin) { fclose(file); } + if (*files) { FILE *f; + if (files[0][0] == '-' && files[0][1] == '\0') { f = stdin; } + else if (!(f = fopen(*files, "r"))) { + fprintf(stderr, "%s: %s: %s\n", ARG0, *files, strerror(errno)); + return NULL; + } ++files; return f; + } else { return NULL; } +} + +static inline list_t linit(void) { + list_t list; list.cap = 32; list.len = 0; + list.data = (int *)malloc(sizeof(int) * list.cap); + return list; +} + +static inline void lpush(list_t *list, int i) { + if (list->len >= list->cap) { + list->data = (int *)realloc(list->data, list->cap *= 2); + } list->data[list->len++] = i; return; +} + +static void help(void) { + puts("od - dump files in various formats\n"); + puts("usage: od [-bcdosvx] [-A base] [-j skip] [-N count] [-t type] \ +[file...]\n"); + puts("options:"); + puts(" -A base Offset address base ('d', 'o', 'x', or 'n')"); + puts(" -b Interpret bytes in octal (-t01)"); + puts(" -c Interpret bytes as characters"); + puts(" -d Interpret words in unsigned decimal (-tu2)"); + puts(" -j skip Number of bytes to skip from the beginning of input"); + puts(" -N count Number of bytes to process in total"); + puts(" -o Interpret words in octal (-to2)"); + puts(" -s Interpret words in signed decimal (-td2)"); + puts(" -t type Interpret data according to specified types"); + puts(" -v Write all input data"); + puts(" -x Interpret words in hexadecimal (-tx2)"); + puts(" --help Display help information"); + puts(" --version Display version information"); + return; +} + +static void version(void) { + puts("OMKOV coreutils od, version 0.8.0"); + puts("Copyright (C) 2020, Jakob Wakeling"); + puts("All rights reserved."); + puts("OMKOV Permissive Licence (https://www.omkov.net/OLPE)"); + return; +}