ESH

Executive Shell
git clone http://git.omkov.net/ESH
Log | Tree | Refs | README | Download

ESH/src/lex.c (90 lines, 2.4 KiB) -rw-r--r-- file download

8344335 Jakob Wakeling 2023-12-27 15:41:39
0
// Copyright (C) 2023, Jakob Wakeling
8344335 Jakob Wakeling 2023-12-27 15:41:39
1
// All rights reserved.
8344335 Jakob Wakeling 2023-12-27 15:41:39
2
8344335 Jakob Wakeling 2023-12-27 15:41:39
3
#include "lex.h"
8344335 Jakob Wakeling 2023-12-27 15:41:39
4
#include "util/log.h"
8344335 Jakob Wakeling 2023-12-27 15:41:39
5
#include "util/stack.h"
8344335 Jakob Wakeling 2023-12-27 15:41:39
6
#include "util/util.h"
8344335 Jakob Wakeling 2023-12-27 15:41:39
7
8344335 Jakob Wakeling 2023-12-27 15:41:39
8
#include <stdio.h>
8344335 Jakob Wakeling 2023-12-27 15:41:39
9
#include <stdlib.h>
8344335 Jakob Wakeling 2023-12-27 15:41:39
10
#include <string.h>
8344335 Jakob Wakeling 2023-12-27 15:41:39
11
8344335 Jakob Wakeling 2023-12-27 15:41:39
12
char *tok_ks[] = {
1e336bf Jakob Wakeling 2023-12-28 13:35:12
13
	"TK_VOID", "TK_EOF", "TK_WORD", "TK_END",
1e336bf Jakob Wakeling 2023-12-28 13:35:12
14
	"TK_PIPE", "TK_RIN", "TK_ROUT", "TK_RAPP",
8344335 Jakob Wakeling 2023-12-27 15:41:39
15
};
8344335 Jakob Wakeling 2023-12-27 15:41:39
16
8344335 Jakob Wakeling 2023-12-27 15:41:39
17
#define is_space(c) (c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v')
8344335 Jakob Wakeling 2023-12-27 15:41:39
18
8344335 Jakob Wakeling 2023-12-27 15:41:39
19
/* Initialise a lexer. */
8344335 Jakob Wakeling 2023-12-27 15:41:39
20
lex lex_init(char *str, u64 len) {
8344335 Jakob Wakeling 2023-12-27 15:41:39
21
	lex l = { .s = str, .p = str, .q = str + len }; lex_next(&l); return l;
8344335 Jakob Wakeling 2023-12-27 15:41:39
22
}
8344335 Jakob Wakeling 2023-12-27 15:41:39
23
8344335 Jakob Wakeling 2023-12-27 15:41:39
24
#define P (l->p) /* Pointer to the Current Character */
8344335 Jakob Wakeling 2023-12-27 15:41:39
25
#define Q (l->q) /* Pointer to EOF */
8344335 Jakob Wakeling 2023-12-27 15:41:39
26
#define T (l->t) /* Current Token */
8344335 Jakob Wakeling 2023-12-27 15:41:39
27
8344335 Jakob Wakeling 2023-12-27 15:41:39
28
/* Return the current token. */
8344335 Jakob Wakeling 2023-12-27 15:41:39
29
tok lex_peek(lex *l) { return T; }
8344335 Jakob Wakeling 2023-12-27 15:41:39
30
8344335 Jakob Wakeling 2023-12-27 15:41:39
31
/* Lex the next token, and return the current one. */
8344335 Jakob Wakeling 2023-12-27 15:41:39
32
tok lex_next(lex *l) {
8344335 Jakob Wakeling 2023-12-27 15:41:39
33
	if (T.k == TK_EOF) { return T; }
8344335 Jakob Wakeling 2023-12-27 15:41:39
34
	tok t = T; T = (tok){ .k = TK_VOID };
4a1246f Jakob Wakeling 2023-12-28 14:53:40
35
8344335 Jakob Wakeling 2023-12-27 15:41:39
36
	/* Skip null characters and whitespace */
8344335 Jakob Wakeling 2023-12-27 15:41:39
37
	skip:; for (; P != Q && (!P[0] || is_space(P[0])); P += 1);
4a1246f Jakob Wakeling 2023-12-28 14:53:40
38
8344335 Jakob Wakeling 2023-12-27 15:41:39
39
	/* Return the current token immediately if EOF or END is reached */
8344335 Jakob Wakeling 2023-12-27 15:41:39
40
	if (P == Q) { T.k = TK_EOF; return t; }
8344335 Jakob Wakeling 2023-12-27 15:41:39
41
	if (P[0] == '\n' || P[0] == ';') { P += 1; T.k = TK_END; return t; }
4a1246f Jakob Wakeling 2023-12-28 14:53:40
42
8344335 Jakob Wakeling 2023-12-27 15:41:39
43
	/* Skip comments */
8344335 Jakob Wakeling 2023-12-27 15:41:39
44
	if (P[0] == '#') {
8344335 Jakob Wakeling 2023-12-27 15:41:39
45
		for (P += 1; P != Q && P[0] != '\n'; P += 1);
8344335 Jakob Wakeling 2023-12-27 15:41:39
46
		if (P[0] == '\n') { P += 1; } goto skip;
8344335 Jakob Wakeling 2023-12-27 15:41:39
47
	}
4a1246f Jakob Wakeling 2023-12-28 14:53:40
48
8344335 Jakob Wakeling 2023-12-27 15:41:39
49
	switch (P[0]) {
1e336bf Jakob Wakeling 2023-12-28 13:35:12
50
	case '|': { T.k = TK_PIPE; P += 1; } break;
1e336bf Jakob Wakeling 2023-12-28 13:35:12
51
	case '<': { T.k = TK_RIN;  P += 1; } break;
1e336bf Jakob Wakeling 2023-12-28 13:35:12
52
	case '>': switch (P[1]) {
1e336bf Jakob Wakeling 2023-12-28 13:35:12
53
		default:  { T.k = TK_ROUT; P += 1; } break;
1e336bf Jakob Wakeling 2023-12-28 13:35:12
54
		case '>': { T.k = TK_RAPP; P += 2; } break;
1e336bf Jakob Wakeling 2023-12-28 13:35:12
55
	} break;
4a1246f Jakob Wakeling 2023-12-28 14:53:40
56
8344335 Jakob Wakeling 2023-12-27 15:41:39
57
	/* Handle words, TODO review quotes and substitutions */
8344335 Jakob Wakeling 2023-12-27 15:41:39
58
	default: {
8344335 Jakob Wakeling 2023-12-27 15:41:39
59
		stack s = stack_init(sizeof (char), NULL);
4a1246f Jakob Wakeling 2023-12-28 14:53:40
60
8344335 Jakob Wakeling 2023-12-27 15:41:39
61
		for (; P != Q && P[0] != '\n' && P[0] != ';' && P[0] != ' '; P += 1) {
8344335 Jakob Wakeling 2023-12-27 15:41:39
62
			if (P[0] == '|' || P[0] == '<' || P[0] == '>') { break; }
4a1246f Jakob Wakeling 2023-12-28 14:53:40
63
8344335 Jakob Wakeling 2023-12-27 15:41:39
64
			/* Handle single quotes */
8344335 Jakob Wakeling 2023-12-27 15:41:39
65
			else if (P[0] == '\'') for (P += 1;; P += 1) {
8344335 Jakob Wakeling 2023-12-27 15:41:39
66
				/* FIXME memory leak upon missing closing ', needs refinement */
8344335 Jakob Wakeling 2023-12-27 15:41:39
67
				if (P == Q) { log_warn("Missing closing \'"); return t; }
8344335 Jakob Wakeling 2023-12-27 15:41:39
68
				else if (P[0] == '\'') { break; }
8344335 Jakob Wakeling 2023-12-27 15:41:39
69
				else { stack_push(&s, P[0]); }
8344335 Jakob Wakeling 2023-12-27 15:41:39
70
			}
4a1246f Jakob Wakeling 2023-12-28 14:53:40
71
8344335 Jakob Wakeling 2023-12-27 15:41:39
72
			/* Handle all other characters */
8344335 Jakob Wakeling 2023-12-27 15:41:39
73
			else { stack_push(&s, P[0]); }
8344335 Jakob Wakeling 2023-12-27 15:41:39
74
		}
4a1246f Jakob Wakeling 2023-12-28 14:53:40
75
8344335 Jakob Wakeling 2023-12-27 15:41:39
76
		T.s = strndup(s.a, s.al); T.k = TK_WORD; stack_free(&s);
8344335 Jakob Wakeling 2023-12-27 15:41:39
77
	} break;
8344335 Jakob Wakeling 2023-12-27 15:41:39
78
	}
4a1246f Jakob Wakeling 2023-12-28 14:53:40
79
8344335 Jakob Wakeling 2023-12-27 15:41:39
80
	return t;
8344335 Jakob Wakeling 2023-12-27 15:41:39
81
}
8344335 Jakob Wakeling 2023-12-27 15:41:39
82
8344335 Jakob Wakeling 2023-12-27 15:41:39
83
/* Print lexer debug output. */
8344335 Jakob Wakeling 2023-12-27 15:41:39
84
void lex_debug(lex *l) {
8344335 Jakob Wakeling 2023-12-27 15:41:39
85
	for (tok t = lex_next(l); t.k != TK_EOF; free(t.s), t = lex_next(l)) {
8344335 Jakob Wakeling 2023-12-27 15:41:39
86
		printf("%s \"%s\"\n", tok_ks[t.k], t.s);
8344335 Jakob Wakeling 2023-12-27 15:41:39
87
	}
8344335 Jakob Wakeling 2023-12-27 15:41:39
88
}
89