// Copyright (C) 2023, Jakob Wakeling // All rights reserved. #include "lex.h" #include "util/log.h" #include "util/stack.h" #include "util/util.h" #include #include #include char *tok_ks[] = { "TK_VOID", "TK_EOF", "TK_WORD", "TK_END", "TK_PIPE", "TK_RIN", "TK_ROUT", "TK_RAPP", }; #define is_space(c) (c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v') /* Initialise a lexer. */ lex lex_init(char *str, u64 len) { lex l = { .s = str, .p = str, .q = str + len }; lex_next(&l); return l; } #define P (l->p) /* Pointer to the Current Character */ #define Q (l->q) /* Pointer to EOF */ #define T (l->t) /* Current Token */ /* Return the current token. */ tok lex_peek(lex *l) { return T; } /* Lex the next token, and return the current one. */ tok lex_next(lex *l) { if (T.k == TK_EOF) { return T; } tok t = T; T = (tok){ .k = TK_VOID }; /* Skip null characters and whitespace */ skip:; for (; P != Q && (!P[0] || is_space(P[0])); P += 1); /* Return the current token immediately if EOF or END is reached */ if (P == Q) { T.k = TK_EOF; return t; } if (P[0] == '\n' || P[0] == ';') { P += 1; T.k = TK_END; return t; } /* Skip comments */ if (P[0] == '#') { for (P += 1; P != Q && P[0] != '\n'; P += 1); if (P[0] == '\n') { P += 1; } goto skip; } switch (P[0]) { case '|': { T.k = TK_PIPE; P += 1; } break; case '<': { T.k = TK_RIN; P += 1; } break; case '>': switch (P[1]) { default: { T.k = TK_ROUT; P += 1; } break; case '>': { T.k = TK_RAPP; P += 2; } break; } break; /* Handle words, TODO review quotes and substitutions */ default: { stack s = stack_init(sizeof (char), NULL); for (; P != Q && P[0] != '\n' && P[0] != ';' && P[0] != ' '; P += 1) { if (P[0] == '|' || P[0] == '<' || P[0] == '>') { break; } /* Handle single quotes */ else if (P[0] == '\'') for (P += 1;; P += 1) { /* FIXME memory leak upon missing closing ', needs refinement */ if (P == Q) { log_warn("Missing closing \'"); return t; } else if (P[0] == '\'') { break; } else { stack_push(&s, P[0]); } } /* Handle all other characters */ else { stack_push(&s, P[0]); } } T.s = strndup(s.a, s.al); T.k = TK_WORD; stack_free(&s); } break; } return t; } /* Print lexer debug output. */ void lex_debug(lex *l) { for (tok t = lex_next(l); t.k != TK_EOF; free(t.s), t = lex_next(l)) { printf("%s \"%s\"\n", tok_ks[t.k], t.s); } }