be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
0
|
// wc.c, version 0.2.0 |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
1
|
// OMKOV coreutils implementation of POSIX wc |
1fcd349 |
Jamozed |
2021-01-08 03:07:11 |
2
|
// Copyright (C) 2021, Jakob Wakeling |
e2140ec |
Jamozed |
2022-03-06 15:27:45 |
3
|
// MIT Licence |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
4
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
5
|
/* |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
6
|
FIXME See line 112. |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
7
|
*/ |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
8
|
|
b181413 |
Jamozed |
2022-02-05 22:32:00 |
9
|
#include "util/error.h" |
b181413 |
Jamozed |
2022-02-05 22:32:00 |
10
|
#include "util/optget.h" |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
11
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
12
|
#include <ctype.h> |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
13
|
#include <locale.h> |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
14
|
#include <stdbool.h> |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
15
|
#include <stdio.h> |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
16
|
#include <wchar.h> |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
17
|
#include <wctype.h> |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
18
|
|
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
19
|
#define VERSION "0.2.0" |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
20
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
21
|
static struct lop lops[] = { |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
22
|
{ "help", ARG_NUL, 256 }, |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
23
|
{ "version", ARG_NUL, 257 }, |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
24
|
{ NULL, 0, 0 } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
25
|
}; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
26
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
27
|
static bool cflag, lflag, mflag, wflag; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
28
|
static size_t ctotal, ltotal, mtotal, wtotal; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
29
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
30
|
static inline int wc(const char *file); |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
31
|
static void report(size_t c, size_t l, size_t m, size_t w, const char *f); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
32
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
33
|
static void hlp(void); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
34
|
static void ver(void); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
35
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
36
|
int main(int ac, char *av[]) { A0 = av[0]; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
37
|
struct opt opt = OPTGET_INIT; opt.str = "clmw"; opt.lops = lops; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
38
|
for (int o; (o = optget(&opt, av, 1)) != -1;) switch (o) { |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
39
|
case 'c': { cflag = true; break; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
40
|
case 'l': { lflag = true; break; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
41
|
case 'm': { mflag = true; break; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
42
|
case 'w': { wflag = true; break; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
43
|
case 256: { hlp(); return 0; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
44
|
case 257: { ver(); return 0; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
45
|
default: { return 1; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
46
|
} |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
47
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
48
|
char *lc = setlocale(LC_ALL, ""); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
49
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
50
|
// If no options specified, use default format |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
51
|
if (!cflag && !lflag && !mflag && !wflag) { cflag = lflag = wflag = true; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
52
|
bool warned = false; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
53
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
54
|
if (opt.ind == ac) { wc(NULL); } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
55
|
else for (char **p = &av[opt.ind]; *p; ++p) if (wc(*p)) { |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
56
|
warn("%s: %s", *p, serr()); warned = true; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
57
|
} |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
58
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
59
|
if ((ac - opt.ind) > 1) { report(ctotal, ltotal, mtotal, wtotal, "total"); } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
60
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
61
|
return warned; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
62
|
} |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
63
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
64
|
/* |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
65
|
Count the number of bytes, characters, words and lines in a file. |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
66
|
If the file path given is NULL or "-", then use stdin. |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
67
|
*/ |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
68
|
static inline int wc(const char *file) { |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
69
|
char buf[BUFSIZ * 16]; FILE *fi; |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
70
|
|
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
71
|
size_t ccount = 0, lcount = 0, mcount = 0, wcount = 0; |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
72
|
bool inword = false; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
73
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
74
|
if (!file || (file[0] == '-' && file[1] == 0)) { fi = stdin; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
75
|
else if (!(fi = fopen(file, "r"))) { return 1; } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
76
|
|
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
77
|
for (size_t c; (c = fread(buf, 1, sizeof (buf), fi)) != 0;) { |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
78
|
size_t r = c; ccount += c; bool failflag = false; |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
79
|
|
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
80
|
for (char *p = buf; r != 0;) { |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
81
|
wchar_t wc; size_t n = mbrtowc(&wc, p, r, NULL); |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
82
|
|
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
83
|
switch (n) { |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
84
|
// FIXME Case (size_t)-2 fails when incomplete strings of more than |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
85
|
// one byte are encountered, I can't think of a way to fix this |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
86
|
// right now because I have no way of knowing how many bytes it has |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
87
|
// processed. This shouldn't occur on correcly encoded files though |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
88
|
case (size_t)-2: { ++p; --r; continue; } |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
89
|
case (size_t)-1: { ++p; --r; continue; } |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
90
|
case 0: { n = 1; wc = 0; break; } |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
91
|
} |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
92
|
|
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
93
|
if (wc == '\n') { ++lcount; } |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
94
|
if (iswspace(wc)) { if (inword) { inword = false; ++wcount; }} |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
95
|
else { inword = true; } |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
96
|
|
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
97
|
p += n; r -= n; ++mcount; |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
98
|
} |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
99
|
} |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
100
|
|
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
101
|
if (inword) { ++wcount; } // Add final word to count if applicable |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
102
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
103
|
report(ccount, lcount, mcount, wcount, file); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
104
|
ctotal += ccount; ltotal += lcount; mtotal += mcount; wtotal += wcount; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
105
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
106
|
if (fi != stdin) { fclose(fi); } return 0; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
107
|
} |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
108
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
109
|
/* Report the appropriate metrics */ |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
110
|
static void report(size_t c, size_t l, size_t m, size_t w, const char *f) { |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
111
|
if (lflag) { printf("%zu ", l); } if (wflag) { printf("%zu ", w); } |
be0a1d2 |
Jamozed |
2021-01-08 02:51:54 |
112
|
if (mflag) { printf("%zu ", m); } if (cflag) { printf("%zu ", c); } |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
113
|
if (f) { fputs(f, stdout); } fputc('\n', stdout); return; |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
114
|
} |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
115
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
116
|
/* Print help information */ |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
117
|
static void hlp(void) { |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
118
|
puts("wc - word, line, and byte or character count\n"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
119
|
puts("usage: wc [-clmw] [file...]\n"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
120
|
puts("options:"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
121
|
puts(" -c Print the number of bytes in the file"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
122
|
puts(" -l Print the number of newlines in the file"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
123
|
puts(" -m Print the number of characters in the file"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
124
|
puts(" -w Print the number of words in the file"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
125
|
puts(" --help Display help information"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
126
|
puts(" --version Display version information"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
127
|
} |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
128
|
|
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
129
|
/* Print version information */ |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
130
|
static void ver(void) { |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
131
|
puts("OMKOV coreutils wc, version " VERSION); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
132
|
puts("Copyright (C) 2020, Jakob Wakeling"); |
e2140ec |
Jamozed |
2022-03-06 15:27:45 |
133
|
puts("MIT Licence (https://opensource.org/licenses/MIT)"); |
ca21463 |
Jamozed |
2021-01-03 14:02:38 |
134
|
} |
|
|
|
135
|
|