Author | Jamozed <[email protected]> |
Date | 2021-09-07 12:33:03 |
Commit | 66d0b960e79836eac1f06426b1736806c7a3dbb7 |
Parent | ee92cc70f92734c71e9dc64546f223082a8cc2cc |
Implement lexer with proper tokens
Diffstat
A | src/cll/cll.h | | | 98 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/esh.h | | | 62 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/lex.c | | | 105 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | src/main.c | | | 10 | +++++++++- |
4 files changed, 274 insertions, 1 deletions
diff --git a/src/cll/cll.h b/src/cll/cll.h new file mode 100644 index 0000000..8adf5b1 --- /dev/null +++ b/src/cll/cll.h @@ -0,0 +1,98 @@ +// cll.h +// cll header file for cll +// Copyright (C) 2021, Jakob Wakeling +// All rights reserved. + +/* +OMKOV Permissive Licence, version 1.0 + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimers. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimers in the documentation and/or + other materials provided with the distribution. +* Neither the names of the copyright holders, nor the names of its contributors + may be used to endorse or promote products derived from this Software without + specific prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT +HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +*/ + +#ifndef OMKOV_CLL_CLL_H_ZO7IRBJS +#define OMKOV_CLL_CLL_H_ZO7IRBJS + +#include <float.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +typedef void *ptr; + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +typedef uintptr_t uint_; + +#define UINT uint_ + +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; +typedef intptr_t sint; + +typedef float f32; +typedef double f64; +typedef long double f128; + +#define U8_MIN UINT8_MIN +#define U8_MAX UINT8_MAX +#define U16_MIN UINT16_MIN +#define U16_MAX UINT16_MAX +#define U32_MIN UINT32_MIN +#define U32_MAX UINT32_MAX +#define U64_MIN UINT64_MIN +#define U64_MAX UINT64_MAX +#define UINT_MIN UINTPTR_MIN +#define UINT_MAX UINTPTR_MAX + +#define S8_MIN INT8_MIN +#define S8_MAX INT8_MAX +#define S16_MIN INT16_MIN +#define S16_MAX INT16_MAX +#define S32_MIN INT32_MIN +#define S32_MAX INT32_MAX +#define S64_MIN INT64_MIN +#define S64_MAX INT64_MAX +#define SINT_MIN INTPTR_MIN +#define SINT_MAX INTPTR_MAX + +#define F32_MIN FLT_MIN +#define F32_MAX FLT_MAX +#define F64_MIN DBL_MIN +#define F64_MAX DBL_MAX +#define F128_MIN LDBL_MIN +#define F128_MAX LDBL_MAX + +#define BIT(x) (1 << (x)) + +#define IS_BIN(c) (c == '0' || c == '1') +#define IS_OCT(c) (c >= '0' && c <= '7') +#define IS_DEC(c) (c >= '0' && c <= '9') +#define IS_DOZ(c) ((c >= '0' && c <= '9') || (c == 'A' || c == 'B')) +#define IS_HEX(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')) + +#endif // OMKOV_CLL_CLL_H_ZO7IRBJS diff --git a/src/esh.h b/src/esh.h new file mode 100644 index 0000000..05f3909 --- /dev/null +++ b/src/esh.h @@ -0,0 +1,62 @@ +// esh.h +// Common header file for ESH +// Copyright (C) 2021, Jakob Wakeling +// All rights reserved. + +/* +OMKOV Permissive Licence, version 1.0 + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimers. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimers in the documentation and/or + other materials provided with the distribution. +* Neither the names of the copyright holders, nor the names of its contributors + may be used to endorse or promote products derived from this Software without + specific prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT +HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +*/ + +#ifndef OMKOV_ESH_ESH_H_SAZDXXFN +#define OMKOV_ESH_ESH_H_SAZDXXFN + +#include "cll/cll.h" + +typedef enum { + LK_NIL, LK_EOF, LK_WORD, LK_END, + LK_PIPE, LK_RDIN, LK_ROUT, LK_RERR, +} tok_k; + +typedef enum { AK_NIL, AK_COM } ast_k; + +typedef struct { tok_k k; u8 *s; } tok; +typedef struct { u8 *s; UINT sp, sl; tok t; } lex; +typedef struct { ast_k k; u8 *s; struct ast *c; } ast; + +extern u8 *tok_ks[]; +extern u8 *ast_ks[]; + +extern lex lex_init(u8 *src, UINT len); +extern tok lex_next(lex *l); +extern tok lex_peek(lex *l); +extern void lex_debug(u8 *s); + +extern ast *ast_init(void); +extern void ast_free(ast *a); + +// extern ast *parse(lex *l); + +#endif // OMKOV_ESH_ESH_H_SAZDXXFN diff --git a/src/lex.c b/src/lex.c new file mode 100644 index 0000000..5d45ce1 --- /dev/null +++ b/src/lex.c @@ -0,0 +1,105 @@ +// lex.h +// Lexer source file for ESH +// Copyright (C) 2021, Jakob Wakeling +// All rights reserved. + +/* +OMKOV Permissive Licence, version 1.0 + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimers. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimers in the documentation and/or + other materials provided with the distribution. +* Neither the names of the copyright holders, nor the names of its contributors + may be used to endorse or promote products derived from this Software without + specific prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT +HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +*/ + +#include "esh.h" + +#include "cll/cll.h" + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define C (l->s[l->sp]) +#define D (l->s[l->sp + 1]) + +u8 *tok_ks[] = { + (u8 *)"NIL", (u8 *)"EOF", (u8 *)"WORD", (u8 *)"END", + (u8 *)"PIPE", (u8 *)"RDIN", (u8 *)"ROUT", (u8 *)"RERR", +}; + +/* Initialise a lexer. */ +lex lex_init(u8 *src, UINT len) { + lex l = { src, 0, len }; lex_next(&l); return l; +} + +/* Lex the next token, and return the current one. */ +tok lex_next(lex *l) { + /* Short circuit if the current token is EOF */ + if (l->t.k == LK_EOF) { return l->t; } + + tok t = l->t, n; u8 *ss; UINT sl = 0; + +skip:; + /* Skip null characters and whitespace */ + for (; l->sp != l->sl && (!C || isspace(C) && C != '\n'); l->sp += 1); + + if (l->sp == l->sl) { n.k = LK_EOF; goto ret; } + if (C == '\n' || C == ';') { l->sp += 1; n.k = LK_END; goto ret; } + if (C == '#') { for (l->sp += 1; C != '\n'; l->sp += 1) {} goto skip; } + + for (n.k = LK_WORD, ss = &C; l->sp != l->sl && C != '\n' && C != ';';) { + if (isspace(C)) { l->sp += 1; break; } + + /* + FIXME currently words are ignored if they are immediately followed + by another character such as '|', '<', or '>' + */ + switch (C) { + case '\'': { /* TODO */ } break; + case '\"': { /* TODO */ } break; + case '|': { n.k = LK_PIPE; } goto esc_1; + case '<': { n.k = LK_RDIN; } goto esc_1; + case '>': { n.k = LK_ROUT; } goto esc_1; + esc_1: { ss = &C; sl += 1; l->sp += 1; } goto end; + default: { sl += 1; l->sp += 1; } break; + } + } + +end:; + n.s = (u8 *)strndup((char *)ss, sl); + +ret:; + l->t = n; return t; +} + +/* Return the current token. */ +tok lex_peek(lex *l) { return l->t; } + +/* */ +void lex_debug(u8 *s) { + lex l = lex_init(s, strlen((char *)s)); + + for (tok t = lex_next(&l); t.k != LK_EOF; free(t.s), t = lex_next(&l)) { + printf("%s \"%s\"\n", tok_ks[t.k], t.s); + } +} diff --git a/src/main.c b/src/main.c index 8ce38e6..3eb22bb 100644 --- a/src/main.c +++ b/src/main.c @@ -30,10 +30,12 @@ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. */ +#include "esh.h" #include "exec.h" #include "lineread.h" #include "parse.h" +#include "cll/cll.h" #include "cll/error.h" #include "cll/optget.h" @@ -45,6 +47,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. #include <stdint.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> static struct lop lops[] = { { "help", ARG_NUL, 256 }, @@ -52,6 +55,8 @@ static struct lop lops[] = { { NULL, 0, 0 } }; +static bool Lflag = false, Pflag = false; + int _loop = 1, _ret; static jmp_buf jmp; @@ -63,8 +68,10 @@ static void hlp(void); static void ver(void); int main(int ac, char *av[]) { (void)(ac); A0 = av[0]; - struct opt opt = OPTGET_INIT; opt.str = ""; opt.lops = lops; + struct opt opt = OPTGET_INIT; opt.str = "LP"; opt.lops = lops; for (int o; (o = optget(&opt, av, 1)) != -1;) switch (o) { + case 'L': { Lflag = true; } break; + case 'P': { Pflag = true; } break; case 256: { hlp(); return 0; } case 257: { ver(); return 0; } default: { return 1; } @@ -78,6 +85,7 @@ int main(int ac, char *av[]) { (void)(ac); A0 = av[0]; char *line = lineread(); if (!line) { if (errno) { warn("%s, %d", serr(), errno); } break; } + if (Lflag) { lex_debug((u8 *)line); free(line); continue; } for (char *e = line; *e;) { char **args = parse(e, &e);