Author | Jamozed <[email protected]> |
Date | 2021-11-11 03:29:39 |
Commit | e9c29a96ef41435cd3db50bdccbe7481da96dd5a |
Parent | 7e36ae142479671fd82e18aceaf0bb9738761644 |
Refactor lexer to use pointers instead of an offset
Diffstat
M | src/bltns/eval.c | | | 2 | +- |
M | src/esh.h | | | 23 | +++++------------------ |
M | src/eval.c | | | 6 | +++--- |
M | src/lex.c | | | 78 | +++++++++++++++++++++++++++++++++++++++--------------------------------------- |
A | src/lex.h | | | 54 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | src/lineread.c | | | 20 | ++++++++++---------- |
M | src/lineread.h | | | 4 | ++-- |
M | src/main.c | | | 12 | ++++++++++-- |
M | src/parse.c | | | 8 | ++++---- |
9 files changed, 128 insertions, 79 deletions
diff --git a/src/bltns/eval.c b/src/bltns/eval.c index 778bf7e..797cf3b 100644 --- a/src/bltns/eval.c +++ b/src/bltns/eval.c @@ -55,7 +55,7 @@ int bltn_eval(int ac, char *av[]) { default: { return 1; } } - u8 *args, *dest; UINT size = 0; + char *args, *dest; UINT size = 0; for (UINT i = 0; i != ac - opt.ind; i += 1) { size += (strlen(av[opt.ind + i]) + 1); diff --git a/src/esh.h b/src/esh.h index 592fd5d..9adb654 100644 --- a/src/esh.h +++ b/src/esh.h @@ -33,32 +33,20 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #ifndef ESH_ESH_H_SAZDXXFN #define ESH_ESH_H_SAZDXXFN +#include "lex.h" #include "util/array.h" #include "util/util.h" -typedef enum { - LK_NULL, LK_EOF, LK_WORD, LK_END, - LK_PIPE, LK_RDIN, LK_ROUT, LK_RERR, -} tok_k; - typedef enum { AK_NULL, AK_COMP, AK_COMM } ast_k; typedef struct ast_ *ast; -typedef struct { tok_k k; u8 *s; } tok; -typedef struct { u8 *s; UINT sp, sl; tok t; } lex; - -struct ast_ { ast_k k; u8 *s; array c; }; +struct ast_ { ast_k k; char *s; array c; }; extern bool Lflag, Pflag; extern int _loop, _ret; -extern u8 *tok_ks[]; -extern u8 *ast_ks[]; - -extern lex lex_init(u8 *src, UINT len); -extern tok lex_next(lex *l); -extern tok lex_peek(lex *l); +extern char *ast_ks[]; extern ast ast_init(void); extern void ast_free(ast a); @@ -67,10 +55,9 @@ extern ast parse(lex *l); extern int execute(ast a); -extern void eval(u8 *src, UINT len); -extern void eval_file(const u8 *file); +extern void eval(char *src, UINT len); +extern void eval_file(const char *file); -extern void lex_debug(lex *l); extern void ast_debug(ast a); #endif // ESH_ESH_H_SAZDXXFN diff --git a/src/eval.c b/src/eval.c index 991aac3..18ddc8d 100644 --- a/src/eval.c +++ b/src/eval.c @@ -40,7 +40,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #include <string.h> /* Evaluate a string. */ -extern void eval(u8 *src, UINT len) { +extern void eval(char *src, UINT len) { lex l = lex_init(src, len); if (Lflag) { lex_debug(&l); goto ret; } @@ -54,8 +54,8 @@ ret:; } /* Evaluate a file. */ -extern void eval_file(const u8 *file) { - FILE *fi; u8 *fb; size_t fl; +extern void eval_file(const char *file) { + FILE *fi; char *fb; size_t fl; if (!(fi = fopen((char *)file, "r"))) { warn("%s: %s", file, serr()); diff --git a/src/lex.c b/src/lex.c index 73c79bf..426f62d 100644 --- a/src/lex.c +++ b/src/lex.c @@ -1,4 +1,4 @@ -// lex.h +// lex.c // Lexer source file for ESH // Copyright (C) 2021, Jakob Wakeling // All rights reserved. @@ -30,7 +30,7 @@ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. */ -#include "esh.h" +#include "lex.h" #include "util/util.h" #include <ctype.h> @@ -38,65 +38,65 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #include <stdlib.h> #include <string.h> -#define C (l->s[l->sp]) -#define D (l->s[l->sp + 1]) - -u8 *tok_ks[] = { - (u8 *)"NULL", (u8 *)"EOF", (u8 *)"WORD", (u8 *)"END", - (u8 *)"PIPE", (u8 *)"RDIN", (u8 *)"ROUT", (u8 *)"RERR", +char *tok_ks[] = { + "NULL", "EOF", "WORD", "END", + "PIPE", "RDIN", "ROUT", "RERR", }; /* Initialise a lexer. */ -lex lex_init(u8 *src, UINT len) { - lex l = { src, 0, len }; lex_next(&l); return l; +lex lex_init(char *src, UINT len) { + lex l = { src, src, src + len }; lex_next(&l); return l; } +#define P (l->p) /* Pointer to the Current Character */ +#define Q (l->q) /* Pointer to EOF */ +#define C (l->p[0]) /* Current Character */ +#define D (l->p[1]) /* Next Character */ +#define T (l->t) /* Current Token */ + +/* Return the current token. */ +tok lex_peek(lex *l) { return l->t; } + /* Lex the next token, and return the current one. */ tok lex_next(lex *l) { - /* Short circuit if the current token is EOF */ - if (l->t.k == LK_EOF) { return l->t; } - - tok t = l->t, n = { 0 }; u8 *ss; UINT sl = 0; + if (T.k == TK_EOF) { return T; } + tok t = T; T = (tok){ TK_NULL, 0 }; skip:; /* Skip null characters and whitespace */ - for (; l->sp != l->sl && (!C || isspace(C) && C != '\n'); l->sp += 1); + for (; P != Q && (!C || isspace(C) && C != '\n'); P += 1); - if (l->sp == l->sl) { n.k = LK_EOF; goto ret; } - if (C == '\n' || C == ';') { l->sp += 1; n.k = LK_END; goto ret; } + /* Return the current token immediately if EOF or END is reached */ + if (P == Q) { T.k = TK_EOF; return t; } + if (C == '\n' || C == ';') { P += 1; T.k = TK_END; return t; } - if (C == '#') { for (l->sp += 1; C != '\n'; l->sp += 1) {} goto skip; } + /* Skip comments */ + if (C == '#') { for (P += 1; P != Q && C != '\n'; P += 1) {} goto skip; } - ss = &C; if (l->sp != l->sl && C != '\n' && C != ';') { - switch (C) { - case '|': { n.k = LK_PIPE; } goto esc_1; - case '<': { n.k = LK_RDIN; } goto esc_1; - case '>': { n.k = LK_ROUT; } goto esc_1; - esc_1: { /*ss = &C;*/ sl += 1; l->sp += 1; } goto end; - - /* Parse anything else as a word */ - default: { n.k = LK_WORD; /*sl += 1; l->sp += 1;*/ } break; - } + /* Handle punctuators and operators */ + switch (C) { + case '|': { T.k = TK_PIPE; P += 1; } break; + case '<': { T.k = TK_RDIN; P += 1; } break; + case '>': { T.k = TK_ROUT; P += 1; } break; + + /* Handle anything else as a word */ + default: { + char *s = P; UINT sl; - for (; l->sp != l->sl && C != '\n' && C != ';' && C != ' ';) { + for (P += 1; P != Q && C != '\n' && C != ';' && C != ' '; P += 1) { if (C == '|' || C == '<' || C == '>') { break; } - else { sl += 1; l->sp += 1; } } + + sl = P - s; T.s = strndup(s, sl); T.k = TK_WORD; + } break; } -end:; - n.s = (u8 *)strndup((char *)ss, sl); - -ret:; - l->t = n; return t; + return t; } -/* Return the current token. */ -tok lex_peek(lex *l) { return l->t; } - /* Print lexer debug output. */ void lex_debug(lex *l) { - for (tok t = lex_next(l); t.k != LK_EOF; free(t.s), t = lex_next(l)) { + for (tok t = lex_next(l); t.k != TK_EOF; free(t.s), t = lex_next(l)) { printf("%s \"%s\"\n", tok_ks[t.k], t.s); } } diff --git a/src/lex.h b/src/lex.h new file mode 100644 index 0000000..3dd67cf --- /dev/null +++ b/src/lex.h @@ -0,0 +1,54 @@ +// lex.h +// Lexer header file for ESH +// Copyright (C) 2021, Jakob Wakeling +// All rights reserved. + +/* +OMKOV Permissive Licence, version 1.0 + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimers. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimers in the documentation and/or + other materials provided with the distribution. +* Neither the names of the copyright holders, nor the names of its contributors + may be used to endorse or promote products derived from this Software without + specific prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT +HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +*/ + +#ifndef ESH_LEX_H_MCIXGGPK +#define ESH_LEX_H_MCIXGGPK + +#include "util/util.h" + +typedef enum { + TK_NULL, TK_EOF, TK_WORD, TK_END, + TK_PIPE, TK_RDIN, TK_ROUT, TK_RERR, +} tok_k; + +typedef struct { tok_k k; char *s; } tok; +typedef struct { char *s, *p, *q; tok t; } lex; + +extern char *tok_ks[]; + +extern lex lex_init(char *src, UINT len); +extern tok lex_peek(lex *l); +extern tok lex_next(lex *l); + +extern void lex_debug(lex *l); + +#endif // ESH_LEX_H_MCIXGGPK diff --git a/src/lineread.c b/src/lineread.c index 0489df6..e229868 100644 --- a/src/lineread.c +++ b/src/lineread.c @@ -61,9 +61,9 @@ static bool rawflag = false, ateflag = false; static struct hist h = { NULL, 0, 0, 0, 0 }; -static u8 *linentty(void); -static u8 *lineedit(void); -static void line_esc(struct line *l, register int c); +static char *linentty(void); +static char *lineedit(void); +static void line_esc(struct line *l, register int c); static inline void tcraw(void); static inline void tcrestore(void); @@ -96,7 +96,7 @@ static void hist_move_end(struct hist *h); static void hist_push(struct hist *h, struct line l); /* Read a line from stdin */ -u8 *lineread(void) { +char *lineread(void) { if (!isatty(STDIN_FILENO)) { errno = 0; return linentty(); } else { return lineedit(); } } @@ -105,8 +105,8 @@ u8 *lineread(void) { void linefree(void) { hist_free(&h); } /* Read from a non-terminal stdin */ -static u8 *linentty(void) { - struct line l; register u8 *r; +static char *linentty(void) { + struct line l; register char *r; l.sp = 0; l.sl = 0; l.sc = 1024; if (!(l.s = malloc(l.sc * sizeof (*l.s)))) { return NULL; } @@ -117,15 +117,15 @@ static u8 *linentty(void) { } end:; - r = (u8 *)strndup(l.s, l.sl); + r = strndup(l.s, l.sl); ret:; free(l.s); return r; } #define l h.a[h.ap] /* Dynamically read a line from stdin */ -static u8 *lineedit(void) { - register u8 *r; +static char *lineedit(void) { + register char *r; { if (h.a == NULL) { if (hist_init(&h)) { return NULL; } } @@ -194,7 +194,7 @@ static u8 *lineedit(void) { } end:; - r = (u8 *)strndup(l.s, l.sl); + r = strndup(l.s, l.sl); ret:; tcrestore(); fputc('\n', stdout); return r; } diff --git a/src/lineread.h b/src/lineread.h index 7acab8a..6352471 100644 --- a/src/lineread.h +++ b/src/lineread.h @@ -35,7 +35,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #include "util/util.h" -extern u8 *lineread(void); -extern void linefree(void); +extern char *lineread(void); +extern void linefree(void); #endif // OMKOV_ESH_LINEREAD_H_RPVXY3N7 diff --git a/src/main.c b/src/main.c index 2487ab0..8e0113f 100644 --- a/src/main.c +++ b/src/main.c @@ -30,6 +30,14 @@ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. */ +/* + FIXME: When the file $XDG_CONFIG_HOME/esh/eshrc does not exist, an error + message is printed every time the shell recieves an input. The shell + should create this file if it does not already exist, but should also + not repeatedly display errors like this anyway. + FIXME: Every second press of the delete key prints garbage to the terminal. +*/ + #include "esh.h" #include "lineread.h" #include "util/util.h" @@ -84,7 +92,7 @@ int main(int ac, char *av[]) { (void)(ac); A0 = av[0]; do { if (sigsetjmp(jmp, 1)) { fputc('\n', stdout); } jmpflag = true; - u8 *line = lineread(); + char *line = lineread(); if (!line) { if (errno) { warn("%s, %d", serr(), errno); } break; } eval(line, strlen((char *)line)); free(line); @@ -115,7 +123,7 @@ static void eshrc(void) { path = assert_malloc(strlen(dir) + strlen(base) + 1); strcpy(path, dir); strcat(path, base); free(dir); - eval_file((u8 *)path); free(path); + eval_file(path); free(path); } static void hlp(void) { diff --git a/src/parse.c b/src/parse.c index c4f731f..c818e0a 100644 --- a/src/parse.c +++ b/src/parse.c @@ -39,7 +39,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #include <stdio.h> #include <stdlib.h> -u8 *ast_ks[] = { (u8 *)"NULL", (u8 *)"COMP", (u8 *)"COMM" }; +char *ast_ks[] = { "NULL", "COMP", "COMM" }; static ast parse_comp(lex *l); static ast parse_comm(lex *l); @@ -56,7 +56,7 @@ void ast_free(ast a) { /* Parse a program. */ ast parse(lex *l) { - if (lex_peek(l).k == LK_EOF) { return NULL; } + if (lex_peek(l).k == TK_EOF) { return NULL; } return parse_comp(l); } @@ -67,7 +67,7 @@ static ast parse_comp(lex *l) { a->c = array_init((void (*)(ptr))&ast_free); /* Push each command onto the array */ - for (; lex_peek(l).k != LK_EOF;) { array_push(a->c, parse_comm(l)); } + for (; lex_peek(l).k != TK_EOF;) { array_push(a->c, parse_comm(l)); } return a; } @@ -79,7 +79,7 @@ static ast parse_comm(lex *l) { /* Push each command argument onto the array. */ array_push(a->c, (a->s = lex_next(l).s)); - for (tok t; (t = lex_next(l)).k == LK_WORD;) { array_push(a->c, t.s); } + for (tok t; (t = lex_next(l)).k == TK_WORD;) { array_push(a->c, t.s); } array_push(a->c, NULL); return a; }