// wc.c, version 0.2.0 // OMKOV coreutils implementation of POSIX wc // Copyright (C) 2021, Jakob Wakeling // MIT Licence /* FIXME See line 112. */ #include "util/error.h" #include "util/optget.h" #include #include #include #include #include #include #define VERSION "0.2.0" static struct lop lops[] = { { "help", ARG_NUL, 256 }, { "version", ARG_NUL, 257 }, { NULL, 0, 0 } }; static bool cflag, lflag, mflag, wflag; static size_t ctotal, ltotal, mtotal, wtotal; static inline int wc(const char *file); static void report(size_t c, size_t l, size_t m, size_t w, const char *f); static void hlp(void); static void ver(void); int main(int ac, char *av[]) { A0 = av[0]; struct opt opt = OPTGET_INIT; opt.str = "clmw"; opt.lops = lops; for (int o; (o = optget(&opt, av, 1)) != -1;) switch (o) { case 'c': { cflag = true; break; } case 'l': { lflag = true; break; } case 'm': { mflag = true; break; } case 'w': { wflag = true; break; } case 256: { hlp(); return 0; } case 257: { ver(); return 0; } default: { return 1; } } char *lc = setlocale(LC_ALL, ""); // If no options specified, use default format if (!cflag && !lflag && !mflag && !wflag) { cflag = lflag = wflag = true; } bool warned = false; if (opt.ind == ac) { wc(NULL); } else for (char **p = &av[opt.ind]; *p; ++p) if (wc(*p)) { warn("%s: %s", *p, serr()); warned = true; } if ((ac - opt.ind) > 1) { report(ctotal, ltotal, mtotal, wtotal, "total"); } return warned; } /* Count the number of bytes, characters, words and lines in a file. If the file path given is NULL or "-", then use stdin. */ static inline int wc(const char *file) { char buf[BUFSIZ * 16]; FILE *fi; size_t ccount = 0, lcount = 0, mcount = 0, wcount = 0; bool inword = false; if (!file || (file[0] == '-' && file[1] == 0)) { fi = stdin; } else if (!(fi = fopen(file, "r"))) { return 1; } for (size_t c; (c = fread(buf, 1, sizeof (buf), fi)) != 0;) { size_t r = c; ccount += c; bool failflag = false; for (char *p = buf; r != 0;) { wchar_t wc; size_t n = mbrtowc(&wc, p, r, NULL); switch (n) { // FIXME Case (size_t)-2 fails when incomplete strings of more than // one byte are encountered, I can't think of a way to fix this // right now because I have no way of knowing how many bytes it has // processed. This shouldn't occur on correcly encoded files though case (size_t)-2: { ++p; --r; continue; } case (size_t)-1: { ++p; --r; continue; } case 0: { n = 1; wc = 0; break; } } if (wc == '\n') { ++lcount; } if (iswspace(wc)) { if (inword) { inword = false; ++wcount; }} else { inword = true; } p += n; r -= n; ++mcount; } } if (inword) { ++wcount; } // Add final word to count if applicable report(ccount, lcount, mcount, wcount, file); ctotal += ccount; ltotal += lcount; mtotal += mcount; wtotal += wcount; if (fi != stdin) { fclose(fi); } return 0; } /* Report the appropriate metrics */ static void report(size_t c, size_t l, size_t m, size_t w, const char *f) { if (lflag) { printf("%zu ", l); } if (wflag) { printf("%zu ", w); } if (mflag) { printf("%zu ", m); } if (cflag) { printf("%zu ", c); } if (f) { fputs(f, stdout); } fputc('\n', stdout); return; } /* Print help information */ static void hlp(void) { puts("wc - word, line, and byte or character count\n"); puts("usage: wc [-clmw] [file...]\n"); puts("options:"); puts(" -c Print the number of bytes in the file"); puts(" -l Print the number of newlines in the file"); puts(" -m Print the number of characters in the file"); puts(" -w Print the number of words in the file"); puts(" --help Display help information"); puts(" --version Display version information"); } /* Print version information */ static void ver(void) { puts("OMKOV coreutils wc, version " VERSION); puts("Copyright (C) 2020, Jakob Wakeling"); puts("MIT Licence (https://opensource.org/licenses/MIT)"); }