01234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
|
// Copyright (C) 2023, Jakob Wakeling
// All rights reserved.
#include "lex.h"
#include "util/log.h"
#include "util/stack.h"
#include "util/util.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *tok_ks[] = {
"TK_VOID", "TK_EOF", "TK_WORD", "TK_END",
"TK_PIPE", "TK_RIN", "TK_ROUT", "TK_RAPP",
};
#define is_space(c) (c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v')
/* Initialise a lexer. */
lex lex_init(char *str, u64 len) {
lex l = { .s = str, .p = str, .q = str + len }; lex_next(&l); return l;
}
#define P (l->p) /* Pointer to the Current Character */
#define Q (l->q) /* Pointer to EOF */
#define T (l->t) /* Current Token */
/* Return the current token. */
tok lex_peek(lex *l) { return T; }
/* Lex the next token, and return the current one. */
tok lex_next(lex *l) {
if (T.k == TK_EOF) { return T; }
tok t = T; T = (tok){ .k = TK_VOID };
/* Skip null characters and whitespace */
skip:; for (; P != Q && (!P[0] || is_space(P[0])); P += 1);
/* Return the current token immediately if EOF or END is reached */
if (P == Q) { T.k = TK_EOF; return t; }
if (P[0] == '\n' || P[0] == ';') { P += 1; T.k = TK_END; return t; }
/* Skip comments */
if (P[0] == '#') {
for (P += 1; P != Q && P[0] != '\n'; P += 1);
if (P[0] == '\n') { P += 1; } goto skip;
}
switch (P[0]) {
case '|': { T.k = TK_PIPE; P += 1; } break;
case '<': { T.k = TK_RIN; P += 1; } break;
case '>': switch (P[1]) {
default: { T.k = TK_ROUT; P += 1; } break;
case '>': { T.k = TK_RAPP; P += 2; } break;
} break;
/* Handle words, TODO review quotes and substitutions */
default: {
stack s = stack_init(sizeof (char), NULL);
for (; P != Q && P[0] != '\n' && P[0] != ';' && P[0] != ' '; P += 1) {
if (P[0] == '|' || P[0] == '<' || P[0] == '>') { break; }
/* Handle single quotes */
else if (P[0] == '\'') for (P += 1;; P += 1) {
/* FIXME memory leak upon missing closing ', needs refinement */
if (P == Q) { log_warn("Missing closing \'"); return t; }
else if (P[0] == '\'') { break; }
else { stack_push(&s, P[0]); }
}
/* Handle all other characters */
else { stack_push(&s, P[0]); }
}
T.s = strndup(s.a, s.al); T.k = TK_WORD; stack_free(&s);
} break;
}
return t;
}
/* Print lexer debug output. */
void lex_debug(lex *l) {
for (tok t = lex_next(l); t.k != TK_EOF; free(t.s), t = lex_next(l)) {
printf("%s \"%s\"\n", tok_ks[t.k], t.s);
}
}
|