8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
0
|
// Copyright (C) 2023, Jakob Wakeling |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
1
|
// All rights reserved. |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
2
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
3
|
#include "lex.h" |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
4
|
#include "util/log.h" |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
5
|
#include "util/stack.h" |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
6
|
#include "util/util.h" |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
7
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
8
|
#include <stdio.h> |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
9
|
#include <stdlib.h> |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
10
|
#include <string.h> |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
11
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
12
|
char *tok_ks[] = { |
1e336bf |
Jakob Wakeling |
2023-12-28 13:35:12 |
13
|
"TK_VOID", "TK_EOF", "TK_WORD", "TK_END", |
1e336bf |
Jakob Wakeling |
2023-12-28 13:35:12 |
14
|
"TK_PIPE", "TK_RIN", "TK_ROUT", "TK_RAPP", |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
15
|
}; |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
16
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
17
|
#define is_space(c) (c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v') |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
18
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
19
|
/* Initialise a lexer. */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
20
|
lex lex_init(char *str, u64 len) { |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
21
|
lex l = { .s = str, .p = str, .q = str + len }; lex_next(&l); return l; |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
22
|
} |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
23
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
24
|
#define P (l->p) /* Pointer to the Current Character */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
25
|
#define Q (l->q) /* Pointer to EOF */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
26
|
#define T (l->t) /* Current Token */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
27
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
28
|
/* Return the current token. */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
29
|
tok lex_peek(lex *l) { return T; } |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
30
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
31
|
/* Lex the next token, and return the current one. */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
32
|
tok lex_next(lex *l) { |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
33
|
if (T.k == TK_EOF) { return T; } |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
34
|
tok t = T; T = (tok){ .k = TK_VOID }; |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
35
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
36
|
/* Skip null characters and whitespace */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
37
|
skip:; for (; P != Q && (!P[0] || is_space(P[0])); P += 1); |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
38
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
39
|
/* Return the current token immediately if EOF or END is reached */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
40
|
if (P == Q) { T.k = TK_EOF; return t; } |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
41
|
if (P[0] == '\n' || P[0] == ';') { P += 1; T.k = TK_END; return t; } |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
42
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
43
|
/* Skip comments */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
44
|
if (P[0] == '#') { |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
45
|
for (P += 1; P != Q && P[0] != '\n'; P += 1); |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
46
|
if (P[0] == '\n') { P += 1; } goto skip; |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
47
|
} |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
48
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
49
|
switch (P[0]) { |
1e336bf |
Jakob Wakeling |
2023-12-28 13:35:12 |
50
|
case '|': { T.k = TK_PIPE; P += 1; } break; |
1e336bf |
Jakob Wakeling |
2023-12-28 13:35:12 |
51
|
case '<': { T.k = TK_RIN; P += 1; } break; |
1e336bf |
Jakob Wakeling |
2023-12-28 13:35:12 |
52
|
case '>': switch (P[1]) { |
1e336bf |
Jakob Wakeling |
2023-12-28 13:35:12 |
53
|
default: { T.k = TK_ROUT; P += 1; } break; |
1e336bf |
Jakob Wakeling |
2023-12-28 13:35:12 |
54
|
case '>': { T.k = TK_RAPP; P += 2; } break; |
1e336bf |
Jakob Wakeling |
2023-12-28 13:35:12 |
55
|
} break; |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
56
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
57
|
/* Handle words, TODO review quotes and substitutions */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
58
|
default: { |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
59
|
stack s = stack_init(sizeof (char), NULL); |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
60
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
61
|
for (; P != Q && P[0] != '\n' && P[0] != ';' && P[0] != ' '; P += 1) { |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
62
|
if (P[0] == '|' || P[0] == '<' || P[0] == '>') { break; } |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
63
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
64
|
/* Handle single quotes */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
65
|
else if (P[0] == '\'') for (P += 1;; P += 1) { |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
66
|
/* FIXME memory leak upon missing closing ', needs refinement */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
67
|
if (P == Q) { log_warn("Missing closing \'"); return t; } |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
68
|
else if (P[0] == '\'') { break; } |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
69
|
else { stack_push(&s, P[0]); } |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
70
|
} |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
71
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
72
|
/* Handle all other characters */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
73
|
else { stack_push(&s, P[0]); } |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
74
|
} |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
75
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
76
|
T.s = strndup(s.a, s.al); T.k = TK_WORD; stack_free(&s); |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
77
|
} break; |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
78
|
} |
4a1246f |
Jakob Wakeling |
2023-12-28 14:53:40 |
79
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
80
|
return t; |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
81
|
} |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
82
|
|
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
83
|
/* Print lexer debug output. */ |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
84
|
void lex_debug(lex *l) { |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
85
|
for (tok t = lex_next(l); t.k != TK_EOF; free(t.s), t = lex_next(l)) { |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
86
|
printf("%s \"%s\"\n", tok_ks[t.k], t.s); |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
87
|
} |
8344335 |
Jakob Wakeling |
2023-12-27 15:41:39 |
88
|
} |
|
|
|
89
|
|