coreutils

General Software Utilities
git clone http://git.omkov.net/coreutils
Log | Tree | Refs | README | LICENCE | Download

coreutils/src/wc.c (136 lines, 3.9 KiB) -rw-r--r-- blame download

0123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
// wc.c, version 0.2.0
// OMKOV coreutils implementation of POSIX wc
// Copyright (C) 2021, Jakob Wakeling
// MIT Licence

/*
	FIXME See line 112.
*/

#include "util/error.h"
#include "util/optget.h"

#include <ctype.h>
#include <locale.h>
#include <stdbool.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>

#define VERSION "0.2.0"

static struct lop lops[] = {
	{ "help",    ARG_NUL, 256 },
	{ "version", ARG_NUL, 257 },
	{ NULL, 0, 0 }
};

static bool cflag, lflag, mflag, wflag;
static size_t ctotal, ltotal, mtotal, wtotal;

static inline int wc(const char *file);
static void report(size_t c, size_t l, size_t m, size_t w, const char *f);

static void hlp(void);
static void ver(void);

int main(int ac, char *av[]) { A0 = av[0];
	struct opt opt = OPTGET_INIT; opt.str = "clmw"; opt.lops = lops;
	for (int o; (o = optget(&opt, av, 1)) != -1;) switch (o) {
	case 'c': { cflag = true; break; }
	case 'l': { lflag = true; break; }
	case 'm': { mflag = true; break; }
	case 'w': { wflag = true; break; }
	case 256: { hlp(); return 0; }
	case 257: { ver(); return 0; }
	default: { return 1; }
	}
	
	char *lc = setlocale(LC_ALL, "");
	
	// If no options specified, use default format
	if (!cflag && !lflag && !mflag && !wflag) { cflag = lflag = wflag = true; }
	bool warned = false;
	
	if (opt.ind == ac) { wc(NULL); }
	else for (char **p = &av[opt.ind]; *p; ++p) if (wc(*p)) {
		warn("%s: %s", *p, serr()); warned = true;
	}
	
	if ((ac - opt.ind) > 1) { report(ctotal, ltotal, mtotal, wtotal, "total"); }
	
	return warned;
}

/*
	Count the number of bytes, characters, words and lines in a file.
	If the file path given is NULL or "-", then use stdin.
*/
static inline int wc(const char *file) {
	char buf[BUFSIZ * 16]; FILE *fi;
	
	size_t ccount = 0, lcount = 0, mcount = 0, wcount = 0;
	bool inword = false;
	
	if (!file || (file[0] == '-' && file[1] == 0)) { fi = stdin; }
	else if (!(fi = fopen(file, "r"))) { return 1; }
	
	for (size_t c; (c = fread(buf, 1, sizeof (buf), fi)) != 0;) {
		size_t r = c; ccount += c; bool failflag = false;
		
		for (char *p = buf; r != 0;) {
			wchar_t wc; size_t n = mbrtowc(&wc, p, r, NULL);
			
			switch (n) {
			// FIXME Case (size_t)-2 fails when incomplete strings of more than
			// one byte are encountered, I can't think of a way to fix this
			// right now because I have no way of knowing how many bytes it has
			// processed. This shouldn't occur on correcly encoded files though
			case (size_t)-2: { ++p; --r; continue; }
			case (size_t)-1: { ++p; --r; continue; }
			case 0: { n = 1; wc = 0; break; }
			}
			
			if (wc == '\n') { ++lcount; }
			if (iswspace(wc)) { if (inword) { inword = false; ++wcount; }}
			else { inword = true; }
			
			p += n; r -= n; ++mcount;
		}
	}
	
	if (inword) { ++wcount; } // Add final word to count if applicable
	
	report(ccount, lcount, mcount, wcount, file);
	ctotal += ccount; ltotal += lcount; mtotal += mcount; wtotal += wcount;
	
	if (fi != stdin) { fclose(fi); } return 0;
}

/* Report the appropriate metrics */
static void report(size_t c, size_t l, size_t m, size_t w, const char *f) {
	if (lflag) { printf("%zu ", l); } if (wflag) { printf("%zu ", w); }
	if (mflag) { printf("%zu ", m); } if (cflag) { printf("%zu ", c); }
	if (f) { fputs(f, stdout); } fputc('\n', stdout); return;
}

/* Print help information */
static void hlp(void) {
	puts("wc - word, line, and byte or character count\n");
	puts("usage: wc [-clmw] [file...]\n");
	puts("options:");
	puts("  -c         Print the number of bytes in the file");
	puts("  -l         Print the number of newlines in the file");
	puts("  -m         Print the number of characters in the file");
	puts("  -w         Print the number of words in the file");
	puts("  --help     Display help information");
	puts("  --version  Display version information");
}

/* Print version information */
static void ver(void) {
	puts("OMKOV coreutils wc, version " VERSION);
	puts("Copyright (C) 2020, Jakob Wakeling");
	puts("MIT Licence (https://opensource.org/licenses/MIT)");
}