coreutils

General Software Utilities
git clone http://git.omkov.net/coreutils
Log | Tree | Refs | README | LICENCE | Download

coreutils/src/wc.c (136 lines, 3.9 KiB) -rw-r--r-- file download

be0a1d2 Jamozed 2021-01-08 02:51:54
0
// wc.c, version 0.2.0
ca21463 Jamozed 2021-01-03 14:02:38
1
// OMKOV coreutils implementation of POSIX wc
1fcd349 Jamozed 2021-01-08 03:07:11
2
// Copyright (C) 2021, Jakob Wakeling
e2140ec Jamozed 2022-03-06 15:27:45
3
// MIT Licence
ca21463 Jamozed 2021-01-03 14:02:38
4
ca21463 Jamozed 2021-01-03 14:02:38
5
/*
be0a1d2 Jamozed 2021-01-08 02:51:54
6
	FIXME See line 112.
ca21463 Jamozed 2021-01-03 14:02:38
7
*/
ca21463 Jamozed 2021-01-03 14:02:38
8
b181413 Jamozed 2022-02-05 22:32:00
9
#include "util/error.h"
b181413 Jamozed 2022-02-05 22:32:00
10
#include "util/optget.h"
ca21463 Jamozed 2021-01-03 14:02:38
11
ca21463 Jamozed 2021-01-03 14:02:38
12
#include <ctype.h>
ca21463 Jamozed 2021-01-03 14:02:38
13
#include <locale.h>
ca21463 Jamozed 2021-01-03 14:02:38
14
#include <stdbool.h>
ca21463 Jamozed 2021-01-03 14:02:38
15
#include <stdio.h>
be0a1d2 Jamozed 2021-01-08 02:51:54
16
#include <wchar.h>
be0a1d2 Jamozed 2021-01-08 02:51:54
17
#include <wctype.h>
ca21463 Jamozed 2021-01-03 14:02:38
18
be0a1d2 Jamozed 2021-01-08 02:51:54
19
#define VERSION "0.2.0"
ca21463 Jamozed 2021-01-03 14:02:38
20
ca21463 Jamozed 2021-01-03 14:02:38
21
static struct lop lops[] = {
ca21463 Jamozed 2021-01-03 14:02:38
22
	{ "help",    ARG_NUL, 256 },
ca21463 Jamozed 2021-01-03 14:02:38
23
	{ "version", ARG_NUL, 257 },
ca21463 Jamozed 2021-01-03 14:02:38
24
	{ NULL, 0, 0 }
ca21463 Jamozed 2021-01-03 14:02:38
25
};
ca21463 Jamozed 2021-01-03 14:02:38
26
ca21463 Jamozed 2021-01-03 14:02:38
27
static bool cflag, lflag, mflag, wflag;
ca21463 Jamozed 2021-01-03 14:02:38
28
static size_t ctotal, ltotal, mtotal, wtotal;
ca21463 Jamozed 2021-01-03 14:02:38
29
ca21463 Jamozed 2021-01-03 14:02:38
30
static inline int wc(const char *file);
be0a1d2 Jamozed 2021-01-08 02:51:54
31
static void report(size_t c, size_t l, size_t m, size_t w, const char *f);
ca21463 Jamozed 2021-01-03 14:02:38
32
ca21463 Jamozed 2021-01-03 14:02:38
33
static void hlp(void);
ca21463 Jamozed 2021-01-03 14:02:38
34
static void ver(void);
ca21463 Jamozed 2021-01-03 14:02:38
35
ca21463 Jamozed 2021-01-03 14:02:38
36
int main(int ac, char *av[]) { A0 = av[0];
ca21463 Jamozed 2021-01-03 14:02:38
37
	struct opt opt = OPTGET_INIT; opt.str = "clmw"; opt.lops = lops;
ca21463 Jamozed 2021-01-03 14:02:38
38
	for (int o; (o = optget(&opt, av, 1)) != -1;) switch (o) {
ca21463 Jamozed 2021-01-03 14:02:38
39
	case 'c': { cflag = true; break; }
ca21463 Jamozed 2021-01-03 14:02:38
40
	case 'l': { lflag = true; break; }
ca21463 Jamozed 2021-01-03 14:02:38
41
	case 'm': { mflag = true; break; }
ca21463 Jamozed 2021-01-03 14:02:38
42
	case 'w': { wflag = true; break; }
ca21463 Jamozed 2021-01-03 14:02:38
43
	case 256: { hlp(); return 0; }
ca21463 Jamozed 2021-01-03 14:02:38
44
	case 257: { ver(); return 0; }
ca21463 Jamozed 2021-01-03 14:02:38
45
	default: { return 1; }
ca21463 Jamozed 2021-01-03 14:02:38
46
	}
ca21463 Jamozed 2021-01-03 14:02:38
47
	
ca21463 Jamozed 2021-01-03 14:02:38
48
	char *lc = setlocale(LC_ALL, "");
ca21463 Jamozed 2021-01-03 14:02:38
49
	
ca21463 Jamozed 2021-01-03 14:02:38
50
	// If no options specified, use default format
ca21463 Jamozed 2021-01-03 14:02:38
51
	if (!cflag && !lflag && !mflag && !wflag) { cflag = lflag = wflag = true; }
ca21463 Jamozed 2021-01-03 14:02:38
52
	bool warned = false;
ca21463 Jamozed 2021-01-03 14:02:38
53
	
ca21463 Jamozed 2021-01-03 14:02:38
54
	if (opt.ind == ac) { wc(NULL); }
ca21463 Jamozed 2021-01-03 14:02:38
55
	else for (char **p = &av[opt.ind]; *p; ++p) if (wc(*p)) {
ca21463 Jamozed 2021-01-03 14:02:38
56
		warn("%s: %s", *p, serr()); warned = true;
ca21463 Jamozed 2021-01-03 14:02:38
57
	}
ca21463 Jamozed 2021-01-03 14:02:38
58
	
ca21463 Jamozed 2021-01-03 14:02:38
59
	if ((ac - opt.ind) > 1) { report(ctotal, ltotal, mtotal, wtotal, "total"); }
ca21463 Jamozed 2021-01-03 14:02:38
60
	
ca21463 Jamozed 2021-01-03 14:02:38
61
	return warned;
ca21463 Jamozed 2021-01-03 14:02:38
62
}
ca21463 Jamozed 2021-01-03 14:02:38
63
ca21463 Jamozed 2021-01-03 14:02:38
64
/*
ca21463 Jamozed 2021-01-03 14:02:38
65
	Count the number of bytes, characters, words and lines in a file.
ca21463 Jamozed 2021-01-03 14:02:38
66
	If the file path given is NULL or "-", then use stdin.
ca21463 Jamozed 2021-01-03 14:02:38
67
*/
ca21463 Jamozed 2021-01-03 14:02:38
68
static inline int wc(const char *file) {
be0a1d2 Jamozed 2021-01-08 02:51:54
69
	char buf[BUFSIZ * 16]; FILE *fi;
be0a1d2 Jamozed 2021-01-08 02:51:54
70
	
be0a1d2 Jamozed 2021-01-08 02:51:54
71
	size_t ccount = 0, lcount = 0, mcount = 0, wcount = 0;
be0a1d2 Jamozed 2021-01-08 02:51:54
72
	bool inword = false;
ca21463 Jamozed 2021-01-03 14:02:38
73
	
ca21463 Jamozed 2021-01-03 14:02:38
74
	if (!file || (file[0] == '-' && file[1] == 0)) { fi = stdin; }
ca21463 Jamozed 2021-01-03 14:02:38
75
	else if (!(fi = fopen(file, "r"))) { return 1; }
ca21463 Jamozed 2021-01-03 14:02:38
76
	
be0a1d2 Jamozed 2021-01-08 02:51:54
77
	for (size_t c; (c = fread(buf, 1, sizeof (buf), fi)) != 0;) {
be0a1d2 Jamozed 2021-01-08 02:51:54
78
		size_t r = c; ccount += c; bool failflag = false;
be0a1d2 Jamozed 2021-01-08 02:51:54
79
		
be0a1d2 Jamozed 2021-01-08 02:51:54
80
		for (char *p = buf; r != 0;) {
be0a1d2 Jamozed 2021-01-08 02:51:54
81
			wchar_t wc; size_t n = mbrtowc(&wc, p, r, NULL);
be0a1d2 Jamozed 2021-01-08 02:51:54
82
			
be0a1d2 Jamozed 2021-01-08 02:51:54
83
			switch (n) {
be0a1d2 Jamozed 2021-01-08 02:51:54
84
			// FIXME Case (size_t)-2 fails when incomplete strings of more than
be0a1d2 Jamozed 2021-01-08 02:51:54
85
			// one byte are encountered, I can't think of a way to fix this
be0a1d2 Jamozed 2021-01-08 02:51:54
86
			// right now because I have no way of knowing how many bytes it has
be0a1d2 Jamozed 2021-01-08 02:51:54
87
			// processed. This shouldn't occur on correcly encoded files though
be0a1d2 Jamozed 2021-01-08 02:51:54
88
			case (size_t)-2: { ++p; --r; continue; }
be0a1d2 Jamozed 2021-01-08 02:51:54
89
			case (size_t)-1: { ++p; --r; continue; }
be0a1d2 Jamozed 2021-01-08 02:51:54
90
			case 0: { n = 1; wc = 0; break; }
be0a1d2 Jamozed 2021-01-08 02:51:54
91
			}
be0a1d2 Jamozed 2021-01-08 02:51:54
92
			
be0a1d2 Jamozed 2021-01-08 02:51:54
93
			if (wc == '\n') { ++lcount; }
be0a1d2 Jamozed 2021-01-08 02:51:54
94
			if (iswspace(wc)) { if (inword) { inword = false; ++wcount; }}
be0a1d2 Jamozed 2021-01-08 02:51:54
95
			else { inword = true; }
be0a1d2 Jamozed 2021-01-08 02:51:54
96
			
be0a1d2 Jamozed 2021-01-08 02:51:54
97
			p += n; r -= n; ++mcount;
be0a1d2 Jamozed 2021-01-08 02:51:54
98
		}
ca21463 Jamozed 2021-01-03 14:02:38
99
	}
be0a1d2 Jamozed 2021-01-08 02:51:54
100
	
be0a1d2 Jamozed 2021-01-08 02:51:54
101
	if (inword) { ++wcount; } // Add final word to count if applicable
ca21463 Jamozed 2021-01-03 14:02:38
102
	
ca21463 Jamozed 2021-01-03 14:02:38
103
	report(ccount, lcount, mcount, wcount, file);
ca21463 Jamozed 2021-01-03 14:02:38
104
	ctotal += ccount; ltotal += lcount; mtotal += mcount; wtotal += wcount;
ca21463 Jamozed 2021-01-03 14:02:38
105
	
ca21463 Jamozed 2021-01-03 14:02:38
106
	if (fi != stdin) { fclose(fi); } return 0;
ca21463 Jamozed 2021-01-03 14:02:38
107
}
ca21463 Jamozed 2021-01-03 14:02:38
108
ca21463 Jamozed 2021-01-03 14:02:38
109
/* Report the appropriate metrics */
be0a1d2 Jamozed 2021-01-08 02:51:54
110
static void report(size_t c, size_t l, size_t m, size_t w, const char *f) {
be0a1d2 Jamozed 2021-01-08 02:51:54
111
	if (lflag) { printf("%zu ", l); } if (wflag) { printf("%zu ", w); }
be0a1d2 Jamozed 2021-01-08 02:51:54
112
	if (mflag) { printf("%zu ", m); } if (cflag) { printf("%zu ", c); }
ca21463 Jamozed 2021-01-03 14:02:38
113
	if (f) { fputs(f, stdout); } fputc('\n', stdout); return;
ca21463 Jamozed 2021-01-03 14:02:38
114
}
ca21463 Jamozed 2021-01-03 14:02:38
115
ca21463 Jamozed 2021-01-03 14:02:38
116
/* Print help information */
ca21463 Jamozed 2021-01-03 14:02:38
117
static void hlp(void) {
ca21463 Jamozed 2021-01-03 14:02:38
118
	puts("wc - word, line, and byte or character count\n");
ca21463 Jamozed 2021-01-03 14:02:38
119
	puts("usage: wc [-clmw] [file...]\n");
ca21463 Jamozed 2021-01-03 14:02:38
120
	puts("options:");
ca21463 Jamozed 2021-01-03 14:02:38
121
	puts("  -c         Print the number of bytes in the file");
ca21463 Jamozed 2021-01-03 14:02:38
122
	puts("  -l         Print the number of newlines in the file");
ca21463 Jamozed 2021-01-03 14:02:38
123
	puts("  -m         Print the number of characters in the file");
ca21463 Jamozed 2021-01-03 14:02:38
124
	puts("  -w         Print the number of words in the file");
ca21463 Jamozed 2021-01-03 14:02:38
125
	puts("  --help     Display help information");
ca21463 Jamozed 2021-01-03 14:02:38
126
	puts("  --version  Display version information");
ca21463 Jamozed 2021-01-03 14:02:38
127
}
ca21463 Jamozed 2021-01-03 14:02:38
128
ca21463 Jamozed 2021-01-03 14:02:38
129
/* Print version information */
ca21463 Jamozed 2021-01-03 14:02:38
130
static void ver(void) {
ca21463 Jamozed 2021-01-03 14:02:38
131
	puts("OMKOV coreutils wc, version " VERSION);
ca21463 Jamozed 2021-01-03 14:02:38
132
	puts("Copyright (C) 2020, Jakob Wakeling");
e2140ec Jamozed 2022-03-06 15:27:45
133
	puts("MIT Licence (https://opensource.org/licenses/MIT)");
ca21463 Jamozed 2021-01-03 14:02:38
134
}
135