Author | Jamozed <[email protected]> |
Date | 2021-09-08 18:45:58 |
Commit | 2341ac552803feba98a1047d9f5012ceb0ea37ce |
Parent | 3c5297e05b3c0f1f3c88b3f1ec5b6540c4f9bf54 |
Implement parser with AST nodes
Diffstat
M | CHANGELOG | | | 1 | + |
M | src/bltn.c | | | 3 | ++- |
M | src/esh.h | | | 25 | ++++++++++++++++--------- |
M | src/exec.c | | | 39 | +++++++++++++++++++++++++++------------ |
M | src/exec.h | | | 4 | +--- |
M | src/lex.c | | | 10 | ++++------ |
M | src/main.c | | | 25 | ++++++++++++++----------- |
M | src/parse.c | | | 106 | +++++++++++++++++++++++++++++++++---------------------------------------------- |
D | src/parse.h | | | 38 | -------------------------------------- |
9 files changed, 110 insertions, 141 deletions
diff --git a/CHANGELOG b/CHANGELOG index 2e092b0..022064b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,7 @@ * Implement --help and --version * Implement command history * Implement configuration +* Switch to AST parsing 0.4.0, 2021-04-01 * Fix SIGINT handling at prompt diff --git a/src/bltn.c b/src/bltn.c index f991a6e..708429a 100644 --- a/src/bltn.c +++ b/src/bltn.c @@ -32,6 +32,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #include "alias.h" #include "bltn.h" +#include "esh.h" #include "exec.h" #include "cll/error.h" @@ -39,7 +40,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #include <stdio.h> -static int bltn_eval(char *av[]) { return execute(&av[1]); } +static int bltn_eval(char *av[]) { return execute__(&av[1]); } static int bltn_exit(char *av[]) { (void)(av); _loop = 0; return 0; } static int bltn_false(char *av[]) { (void)(av); return 1; } static int bltn_help(char *av[]); diff --git a/src/esh.h b/src/esh.h index 05f3909..d62f33d 100644 --- a/src/esh.h +++ b/src/esh.h @@ -30,8 +30,8 @@ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. */ -#ifndef OMKOV_ESH_ESH_H_SAZDXXFN -#define OMKOV_ESH_ESH_H_SAZDXXFN +#ifndef ESH_ESH_H_SAZDXXFN +#define ESH_ESH_H_SAZDXXFN #include "cll/cll.h" @@ -44,19 +44,26 @@ typedef enum { AK_NIL, AK_COM } ast_k; typedef struct { tok_k k; u8 *s; } tok; typedef struct { u8 *s; UINT sp, sl; tok t; } lex; -typedef struct { ast_k k; u8 *s; struct ast *c; } ast; + +typedef struct ast { ast_k k; u8 *s, **a; UINT al; struct ast *c; } ast; + +extern int _loop, _ret; extern u8 *tok_ks[]; extern u8 *ast_ks[]; -extern lex lex_init(u8 *src, UINT len); -extern tok lex_next(lex *l); -extern tok lex_peek(lex *l); -extern void lex_debug(u8 *s); +extern lex lex_init(u8 *src, UINT len); +extern tok lex_next(lex *l); +extern tok lex_peek(lex *l); extern ast *ast_init(void); extern void ast_free(ast *a); -// extern ast *parse(lex *l); +extern ast *parse(lex *l); + +extern int execute(ast *a); + +extern void lex_debug(lex *l); +extern void ast_debug(ast *a); -#endif // OMKOV_ESH_ESH_H_SAZDXXFN +#endif // ESH_ESH_H_SAZDXXFN diff --git a/src/exec.c b/src/exec.c index b53e457..9af7135 100644 --- a/src/exec.c +++ b/src/exec.c @@ -31,6 +31,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. */ #include "bltn.h" +#include "esh.h" #include "exec.h" #include "cll/error.h" @@ -44,33 +45,47 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #include <stdlib.h> #include <string.h> -static int launch(char *av[]); +static int launch(u8 *av[]); -/* Execute a command */ -int execute(char *av[]) { - if (!av[0]) { return _ret; } +/* Execute a program. */ +int execute(ast *a) { + /* TODO dont assume that the ast is a command */ - for (struct bltn *b = bltns; b->s; ++b) { - if (strcmp(av[0], b->s) == 0) { return b->f(av); } + if (!a->s) { return _ret; } /* What does this achieve? */ + + for (struct bltn *b = bltns; b->s; b += 1) { + if (strcmp((char *)a->s, b->s) == 0) { return b->f((char **)a->a); } } - return launch(av); + return launch(a->a); } -/* Fork and execute an executable */ -static int launch(char *av[]) { +/* Fork and execute an executable. */ +static int launch(u8 *av[]) { pid_t pid, wpid; int status; if ((pid = fork()) == 0) { signal(SIGINT, SIG_DFL); - if (execvp(av[0], av) == -1) { + if (execvp((char *)av[0], (char **)av) == -1) { if (errno == ENOENT) { warn("%s: Command not found", av[0]); } - else { perror(av[0]); } exit(1); + else { perror((char *)av[0]); } exit(1); } } - else if (pid == -1) { perror(av[0]); } + else if (pid == -1) { perror((char *)av[0]); } else { waitpid(pid, &status, 0); } return WEXITSTATUS(status); } + +/* -------------------------------------------------------------------------- */ + +int execute__(char *av[]) { + if (!av[0]) { return _ret; } + + for (struct bltn *b = bltns; b->s; ++b) { + if (strcmp(av[0], b->s) == 0) { return b->f(av); } + } + + return launch((u8 **)av); +} diff --git a/src/exec.h b/src/exec.h index 7bc9cfb..6c0f343 100644 --- a/src/exec.h +++ b/src/exec.h @@ -33,8 +33,6 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #ifndef OMKOV_ESH_EXEC_H_VI5E8LVV #define OMKOV_ESH_EXEC_H_VI5E8LVV -extern int _loop, _ret; - -extern int execute(char *argv[]); +extern int execute__(char *argv[]); #endif // OMKOV_ESH_EXEC_H_VI5E8LVV diff --git a/src/lex.c b/src/lex.c index 5d45ce1..202c072 100644 --- a/src/lex.c +++ b/src/lex.c @@ -57,7 +57,7 @@ tok lex_next(lex *l) { /* Short circuit if the current token is EOF */ if (l->t.k == LK_EOF) { return l->t; } - tok t = l->t, n; u8 *ss; UINT sl = 0; + tok t = l->t, n = { 0 }; u8 *ss; UINT sl = 0; skip:; /* Skip null characters and whitespace */ @@ -95,11 +95,9 @@ ret:; /* Return the current token. */ tok lex_peek(lex *l) { return l->t; } -/* */ -void lex_debug(u8 *s) { - lex l = lex_init(s, strlen((char *)s)); - - for (tok t = lex_next(&l); t.k != LK_EOF; free(t.s), t = lex_next(&l)) { +/* Print lexer debug output. */ +void lex_debug(lex *l) { + for (tok t = lex_next(l); t.k != LK_EOF; free(t.s), t = lex_next(l)) { printf("%s \"%s\"\n", tok_ks[t.k], t.s); } } diff --git a/src/main.c b/src/main.c index df3756f..22cbd1a 100644 --- a/src/main.c +++ b/src/main.c @@ -33,7 +33,6 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #include "esh.h" #include "exec.h" #include "lineread.h" -#include "parse.h" #include "cll/cll.h" #include "cll/error.h" @@ -85,16 +84,18 @@ int main(int ac, char *av[]) { (void)(ac); A0 = av[0]; u8 *line = lineread(); if (!line) { if (errno) { warn("%s, %d", serr(), errno); } break; } - if (Lflag) { lex_debug((u8 *)line); free(line); continue; } - for (char *e = (char *)line; *e;) { - char **args = parse(e, &e); - if (!args) { if (errno) { warn("%s", serr()); } break; } - - _ret = execute(args); - - for (size_t i = 0; args[i]; ++i) { free(args[i]); } free(args); - } free(line); + lex l = lex_init(line, strlen((char *)line)); + if (Lflag) { lex_debug(&l); goto loop; } + + /* FIXME there is a memory leak somewhere */ + ast *a = parse(&l); if (!a) { goto loop; } + if (Pflag) { ast_debug(a); goto loop; } + + _ret = execute(a); + +loop:; + ast_free(a); free(line); } while (_loop); linefree(); return warned; @@ -106,8 +107,10 @@ static void reset(int signo) { (void)(signo); static void hlp(void) { puts("ESH - Executive Shell\n"); - puts("Usage: esh\n"); + puts("Usage: esh [-LP]\n"); puts("Options:"); + puts(" -L Print lexer debug output"); + puts(" -P Print parser debug output"); puts(" --help Display help information"); puts(" --version Display version information"); return; diff --git a/src/parse.c b/src/parse.c index 1c9b7f6..92a58a6 100644 --- a/src/parse.c +++ b/src/parse.c @@ -30,84 +30,68 @@ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. */ -#include "parse.h" +#include "esh.h" + +#include "cll/cll.h" #include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -struct lex { char *s; size_t sp; }; +typedef struct { u8 **a; UINT al; } arr; -struct arr { char **a; size_t al, ac; }; -struct str { char *s; size_t sl, sc; }; +static const arr ARR_INIT = { NULL, 0 }; -static inline char *lex(struct lex *l); +u8 *ast_ks[] = { (u8 *)"NIL", (u8 *)"COM" }; -static inline void apush(struct arr *a, char *s); -static inline void spush(struct str *s, char c); +static void arr_push(arr *a, u8 *s); -/* Parse a string into an argument vector */ -char **parse(char *s, char **e) { - struct lex l; l.s = s; l.sp = 0; - struct arr a; a.al = 0; a.ac = 64; - - if (!(a.a = malloc(a.ac * sizeof (*a.a)))) { return NULL; } - - for (char *s = lex(&l); s; s = lex(&l)) { apush(&a, s); } - - a.a[a.al] = NULL; *e = s + l.sp; return a.a; -} +static ast *parse_com(lex *l); -#define c l->s[l->sp] +ast *ast_init(void) { return calloc(1, sizeof (ast)); } -/* Lex the next token from the string */ -static inline char *lex(struct lex *l) { - struct str s; s.sl = 0; s.sc = 64; - -skip:; - for (; isspace(c) && c != '\n'; ++l->sp) {} if (!c) { return NULL; } - - if (c == '\n' || c == ';') { ++l->sp; return NULL; } - if (c == '#') { for (++l->sp; !(c == '\n'); ++l->sp) {} goto skip; } +void ast_free(ast *a) { + for (; a->al; a->al -= 1) { free(a->a[a->al - 1]); } + free(a->a); free(a); +} + +/* Parse a program. */ +ast *parse(lex *l) { + if (lex_peek(l).k == LK_EOF) { return NULL; } - if (!(s.s = malloc(s.sc * sizeof (*s.s)))) { return NULL; } + /* TODO handle compound commands */ + return parse_com(l); +} + +/* Parse a command. */ +static ast *parse_com(lex *l) { + ast *a = ast_init(); arr av = ARR_INIT; + a->k = AK_COM; arr_push(&av, a->s = lex_next(l).s); - for (; c && c != '\n' && c != ';'; ++l->sp) { - if (isspace(c)) { ++l->sp; break; } - - switch (c) { - case '\\': switch (l->s[++l->sp]) { - case '\n': { continue; } - default: { spush(&s, c); continue; } - } - case '\'': { // SINGLE QUOTES - for (++l->sp; c != '\''; ++l->sp) { spush(&s, c); } continue; - } - case '\"': { // DOUBLE QUOTES - for (++l->sp; c != '\"'; ++l->sp) { - /* TODO */ spush(&s, c); - } continue; - } - default: { spush(&s, c); continue; } - } - } + /* Push each of the command arguments onto the array */ + for (tok t; (t = lex_next(l)).k == LK_WORD;) { arr_push(&av, t.s); } - return s.s; + a->al = av.al; arr_push(&av, NULL); a->a = av.a; return a; } -#undef c // l.s[l.sp] - -/* Push a string to the end of an array */ -static inline void apush(struct arr *a, char *s) { - if (a->al + 1 > a->ac) { a->ac *= 2; - a->a = realloc(a->a, a->ac * sizeof (*a->a)); - } a->a[a->al] = s; a->a[++a->al] = 0; return; +/* Push a string to the end of an array. */ +void arr_push(arr *a, u8 *s) { + a->a = realloc(a->a, a->al * sizeof (*a->a)); + a->a[a->al] = s; a->al += 1; } -/* Push a character to the end of a string */ -static inline void spush(struct str *s, char c) { - if (s->sl + 1 > s->sc) { s->sc *= 2; - s->s = realloc(s->s, s->sc * sizeof (*s->s)); - } s->s[s->sl] = c; s->s[++s->sl] = 0; return; +/* Print parser debug output with an indent. */ +static void ast_debug_indent(ast *a, UINT i) { + for (UINT j = 0; j != i; ++j) { printf("\t"); } + printf("%s: %s", ast_ks[a->k], a->s); + + /* Print command arguments if present. */ + for (UINT i = 0; i != a->al; i += 1) { printf(" %s", a->a[i]); } + printf("\n"); + + if (a->c) { ast_debug_indent(a->c, i + 1); } } + +/* Print parser debug output. */ +void ast_debug(ast *a) { ast_debug_indent(a, 0); } diff --git a/src/parse.h b/src/parse.h deleted file mode 100644 index d855040..0000000 --- a/src/parse.h +++ /dev/null @@ -1,38 +0,0 @@ -// parse.h -// Parser header file for ESH -// Copyright (C) 2020, Jakob Wakeling -// All rights reserved. - -/* -OMKOV Permissive Licence, version 1.0 - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimers. -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimers in the documentation and/or - other materials provided with the distribution. -* Neither the names of the copyright holders, nor the names of its contributors - may be used to endorse or promote products derived from this Software without - specific prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT -HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. -*/ - -#ifndef OMKOV_ESH_PARSE_H_G9TJ04KR -#define OMKOV_ESH_PARSE_H_G9TJ04KR - -extern char **parse(char *s, char **e); - -#endif // OMKOV_ESH_PARSE_H_G9TJ04KR