Author | Jakob Wakeling <[email protected]> |
Date | 2023-12-27 02:41:39 |
Commit | 83443356e0b7800961022bbba57700e3cb6411d1 |
Parent | a1eb486624da6a305653a383152eb1791351a6b8 |
Add basic lexer
Diffstat
A | src/eval.c | | | 14 | ++++++++++++++ |
A | src/eval.h | | | 13 | +++++++++++++ |
A | src/lex.c | | | 81 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/lex.h | | | 28 | ++++++++++++++++++++++++++++ |
M | src/lineread.h | | | 6 | +++--- |
M | src/main.c | | | 17 | +++++++++++------ |
M | src/util/log.h | | | 6 | +++--- |
M | src/util/optget.h | | | 6 | +++--- |
A | src/util/stack.c | | | 35 | +++++++++++++++++++++++++++++++++++ |
A | src/util/stack.h | | | 19 | +++++++++++++++++++ |
M | src/util/util.h | | | 6 | +++--- |
11 files changed, 213 insertions, 18 deletions
diff --git a/src/eval.c b/src/eval.c new file mode 100644 index 0000000..b8762c7 --- /dev/null +++ b/src/eval.c @@ -0,0 +1,14 @@ +// Copyright (C) 2023, Jakob Wakeling +// All rights reserved. + +#include "eval.h" +#include "lex.h" + +bool Eflag, pflag; + +void eval(char *src, u64 len) { + lex l = lex_init(src, len); + if (Eflag) { lex_debug(&l); return; } + + /* TODO parse and execute */ +} diff --git a/src/eval.h b/src/eval.h new file mode 100644 index 0000000..d395bf7 --- /dev/null +++ b/src/eval.h @@ -0,0 +1,13 @@ +// Copyright (C) 2023, Jakob Wakeling +// All rights reserved. + +#ifndef ESH_EVAL_H_BJWB8XPC +#define ESH_EVAL_H_BJWB8XPC + +#include "util/util.h" + +extern bool Eflag, pflag; + +extern void eval(char *src, u64 len); + +#endif // ESH_EVAL_H_BJWB8XPC diff --git a/src/lex.c b/src/lex.c new file mode 100644 index 0000000..7c49270 --- /dev/null +++ b/src/lex.c @@ -0,0 +1,81 @@ +// Copyright (C) 2023, Jakob Wakeling +// All rights reserved. + +#include "lex.h" +#include "util/log.h" +#include "util/stack.h" +#include "util/util.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +char *tok_ks[] = { + "NULL", "EOF", "WORD", "END", +}; + +#define is_space(c) (c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v') + +/* Initialise a lexer. */ +lex lex_init(char *str, u64 len) { + lex l = { .s = str, .p = str, .q = str + len }; lex_next(&l); return l; +} + +#define P (l->p) /* Pointer to the Current Character */ +#define Q (l->q) /* Pointer to EOF */ +#define T (l->t) /* Current Token */ + +/* Return the current token. */ +tok lex_peek(lex *l) { return T; } + +/* Lex the next token, and return the current one. */ +tok lex_next(lex *l) { + if (T.k == TK_EOF) { return T; } + tok t = T; T = (tok){ .k = TK_VOID }; + + /* Skip null characters and whitespace */ + skip:; for (; P != Q && (!P[0] || is_space(P[0])); P += 1); + + /* Return the current token immediately if EOF or END is reached */ + if (P == Q) { T.k = TK_EOF; return t; } + if (P[0] == '\n' || P[0] == ';') { P += 1; T.k = TK_END; return t; } + + /* Skip comments */ + if (P[0] == '#') { + for (P += 1; P != Q && P[0] != '\n'; P += 1); + if (P[0] == '\n') { P += 1; } goto skip; + } + + switch (P[0]) { + /* Handle words, TODO review quotes and substitutions */ + default: { + stack s = stack_init(sizeof (char), NULL); + + for (; P != Q && P[0] != '\n' && P[0] != ';' && P[0] != ' '; P += 1) { + if (P[0] == '|' || P[0] == '<' || P[0] == '>') { break; } + + /* Handle single quotes */ + else if (P[0] == '\'') for (P += 1;; P += 1) { + /* FIXME memory leak upon missing closing ', needs refinement */ + if (P == Q) { log_warn("Missing closing \'"); return t; } + else if (P[0] == '\'') { break; } + else { stack_push(&s, P[0]); } + } + + /* Handle all other characters */ + else { stack_push(&s, P[0]); } + } + + T.s = strndup(s.a, s.al); T.k = TK_WORD; stack_free(&s); + } break; + } + + return t; +} + +/* Print lexer debug output. */ +void lex_debug(lex *l) { + for (tok t = lex_next(l); t.k != TK_EOF; free(t.s), t = lex_next(l)) { + printf("%s \"%s\"\n", tok_ks[t.k], t.s); + } +} diff --git a/src/lex.h b/src/lex.h new file mode 100644 index 0000000..7ea5454 --- /dev/null +++ b/src/lex.h @@ -0,0 +1,28 @@ +// Copyright (C) 2023, Jakob Wakeling +// All rights reserved. + +#ifndef ESH_LEX_H_Q5L6L9EM +#define ESH_LEX_H_Q5L6L9EM + +#include "util/util.h" + +/* Remember to update tok_ks in lex.c */ +typedef enum { + TK_VOID, TK_EOF, TK_WORD, TK_END, +} tok_k; + +/* k : Kind, s : String */ +typedef struct { tok_k k; char *s; } tok; + +/* s : Start of File, p : Current Character, q : End of File */ +typedef struct { char *s, *p, *q; tok t; } lex; + +extern char *tok_ks[]; + +extern lex lex_init(char *src, u64 len); +extern tok lex_peek(lex *l); +extern tok lex_next(lex *l); + +extern void lex_debug(lex *l); + +#endif // ESH_LEX_H_Q5L6L9EM diff --git a/src/lineread.h b/src/lineread.h index a79c0af..b501fa5 100644 --- a/src/lineread.h +++ b/src/lineread.h @@ -1,10 +1,10 @@ // Copyright (C) 2021, Jakob Wakeling // All rights reserved. -#ifndef LINEREAD_H_TVNWORP2 -#define LINEREAD_H_TVNWORP2 +#ifndef ESH_LINEREAD_H_TVNWORP2 +#define ESH_LINEREAD_H_TVNWORP2 extern char *lineread(); extern void linefree(); -#endif // LINEREAD_H_TVNWORP2 +#endif // ESH_LINEREAD_H_TVNWORP2 diff --git a/src/main.c b/src/main.c index de6382e..b0a9221 100644 --- a/src/main.c +++ b/src/main.c @@ -1,6 +1,7 @@ // Copyright (C) 2023, Jakob Wakeling // All rights reserved. +#include "eval.h" #include "lineread.h" #include "util/log.h" #include "util/optget.h" @@ -22,10 +23,10 @@ static jmp_buf jmp; static sig_atomic_t jmpflag = false; int main(int, char *av[]) { - struct opt opt = OPTGET_INIT; opt.str = ""; opt.lops = (struct lop[]){ - { "help", ARG_NUL, 256 }, - { "version", ARG_NUL, 257 }, - { "debug", ARG_NUL, 258 }, + struct opt opt = OPTGET_INIT; opt.str = "Ep"; opt.lops = (struct lop[]){ + { "help", ARG_NUL, 256 }, + { "version", ARG_NUL, 257 }, + { "debug", ARG_NUL, 258 }, { NULL, 0, 0 }, }; @@ -33,9 +34,11 @@ int main(int, char *av[]) { for (int c; (c = optget(&opt, av, 1)) != -1;) { switch (c) { + case 'E': { Eflag = true; } break; + case 'p': { pflag = true; } break; + case 258: { __debug = true; } break; case 256: { fputs(help, stdout); } return 0; case 257: { fputs(version, stdout); } return 0; - case 258: { __debug = true; } break; default: { return -1; } } } @@ -49,7 +52,7 @@ int main(int, char *av[]) { char *line = lineread(); if (!line) { if (errno) { log_warn("lineread: %s", strerror(errno)); } break; } - printf("%s", line); free(line); + eval(line, strlen(line)); free(line); } while (true); return __warned; @@ -67,6 +70,8 @@ static const char *const help = " --help Display help information\n" " --version Display version information\n" " --debug Enable debug logging\n" + " -E Output lexer tokens\n" + " -p Output parser AST \n" ; static const char *const version = diff --git a/src/util/log.h b/src/util/log.h index 8a5a09e..42f12cf 100644 --- a/src/util/log.h +++ b/src/util/log.h @@ -1,8 +1,8 @@ // Copyright (C) 2023, Jakob Wakeling // All rights reserved. -#ifndef UTIL_LOG_H_MNZFBC4G -#define UTIL_LOG_H_MNZFBC4G +#ifndef ESH_UTIL_LOG_H_MNZFBC4G +#define ESH_UTIL_LOG_H_MNZFBC4G #define log_warn(format, ...) do { \ log_print(format __VA_OPT__(,) __VA_ARGS__); \ @@ -16,4 +16,4 @@ extern bool __debug, __warned; extern void log_print(const char *restrict format, ...); extern void log_debug(const char *restrict format, ...); -#endif // UTIL_LOG_H_MNZFBC4G +#endif // ESH_UTIL_LOG_H_MNZFBC4G diff --git a/src/util/optget.h b/src/util/optget.h index dba5ab8..9c668b7 100644 --- a/src/util/optget.h +++ b/src/util/optget.h @@ -1,8 +1,8 @@ // Copyright (C) 2020, Jakob Wakeling // MIT Licence -#ifndef UTIL_OPTGET_H_ZPCLTG8D -#define UTIL_OPTGET_H_ZPCLTG8D +#ifndef ESH_UTIL_OPTGET_H_ZPCLTG8D +#define ESH_UTIL_OPTGET_H_ZPCLTG8D #define ARG_NUL 0 #define ARG_REQ 1 @@ -23,4 +23,4 @@ extern const struct opt OPTGET_INIT; extern int optget(struct opt *opt, char *av[], int flags); -#endif // UTIL_OPTGET_H_ZPCLTG8D +#endif // ESH_UTIL_OPTGET_H_ZPCLTG8D diff --git a/src/util/stack.c b/src/util/stack.c new file mode 100644 index 0000000..a56b71f --- /dev/null +++ b/src/util/stack.c @@ -0,0 +1,35 @@ +// Copyright (C) 2023, Jakob Wakeling +// All rights reserved. + +#include "stack.h" +#include "util.h" + +#include <stdlib.h> +#include <string.h> + +/* Initialise a stack. */ +stack stack_init(u64 el, void (*free)(void *)) { + return (stack){ .el = el, .free = free }; +} + +/* Uninitialise a stack. */ +void stack_free(stack *s) { + if (s) { + if (s->free) for (u64 i = 0; i < s->al; i += 1) { + void *e; memcpy(&e, s->a + i * s->el, s->el); s->free(e); + } + + free(s->a); + } +} + +/* Push a pointer to the top of a stack. */ +void _stack_push(stack *s, void *e) { + s->a = xrealloc(s->a, (s->ac += 1) * s->el); + memcpy(s->a + (s->al * s->el), &e, s->el); s->al += 1; +} + +/* Pop a pointer from the top of a stack. */ +void *stack_pop(stack *s) { + void *e; memcpy(&e, s->a + ((s->al -= 1) * s->el), s->el); return e; +} diff --git a/src/util/stack.h b/src/util/stack.h new file mode 100644 index 0000000..e6f8ac5 --- /dev/null +++ b/src/util/stack.h @@ -0,0 +1,19 @@ +// Copyright (C) 2023, Jakob Wakeling +// All rights reserved. + +#ifndef ESH_UTIL_STACK_H_XP00FNGQ +#define ESH_UTIL_STACK_H_XP00FNGQ + +#include "util.h" + +#define stack_push(s, e) _stack_push(s, (void *)(uptr)(e)) + +typedef struct { void *a; u64 al, ac, el; void (*free)(void *); } stack; + +extern stack stack_init(u64 el, void (*free)(void *)); +extern void stack_free(stack *s); + +extern void _stack_push(stack *s, void *e); +extern void *stack_pop(stack *s); + +#endif // ESH_UTIL_STACK_H_XP00FNGQ diff --git a/src/util/util.h b/src/util/util.h index 216aaaa..4fff5db 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -1,8 +1,8 @@ // Copyright (C) 2023, Jakob Wakeling // All rights reserved. -#ifndef UTIL_UTIL_H_WIAX91EM -#define UTIL_UTIL_H_WIAX91EM +#ifndef ESH_UTIL_UTIL_H_WIAX91EM +#define ESH_UTIL_UTIL_H_WIAX91EM #include <float.h> #include <stddef.h> @@ -61,4 +61,4 @@ extern void *_xmalloc(size_t size, const char *file, int line); extern void *_xcalloc(size_t nmemb, size_t size, const char *file, int line); extern void *_xrealloc(void *ptr, size_t size, const char *file, int line); -#endif // UTIL_UTIL_H_WIAX91EM +#endif // ESH_UTIL_UTIL_H_WIAX91EM