Author | Jakob Wakeling <[email protected]> |
Date | 2022-03-31 06:57:01 |
Commit | fd64da68f0e34993b9171bbf1fc41846655b01e8 |
Parent | b2691843413f657ab3cc611e833e57dc67122e1f |
parse: Implement expression parsing
Diffstat
M | CMakeLists.txt | | | 0 | |
M | README.md | | | 8 | +++++--- |
A | examples/expr.g | | | 1 | + |
M | src/lex.c | | | 16 | ++++++++++++++++ |
M | src/lex.h | | | 7 | +++++++ |
M | src/parse.c | | | 85 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ |
M | src/parse.h | | | 9 | +++++++-- |
7 files changed, 114 insertions, 12 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index ef6267c..269c961 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.12) +cmake_minimum_required(VERSION 3.14) project(G VERSION 0.2.0 LANGUAGES C) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin) diff --git a/README.md b/README.md index ade1857..664cb60 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ main :: proc() -> s32 { ### Dependencies -- CMake >= 3.12, to build +- CMake >= 3.14, to build - LLVM, to build - Clang, for linking compiled object files @@ -57,13 +57,15 @@ command. The second command will output an executable file, *a.out* by default. > Not all todo items will necesarilly be implemented -- [ ] Implement main entrypoint -- [ ] Implement procedure declarations +- [x] Implement procedure declarations - [ ] Implement procedure calls +- [x] Implement variable declarations - [ ] Implement variable assignments - [x] Implement integers - [ ] Implement reals - [ ] Implement arrays +- [x] Implement expression parsing +- [ ] Implement expression code generation - [ ] Implement the *type* type - [ ] Implement multiple return values - [ ] Implement *defer* diff --git a/examples/expr.g b/examples/expr.g new file mode 100644 index 0000000..1e5ff82 --- /dev/null +++ b/examples/expr.g @@ -0,0 +1 @@ +main :: proc() -> s64 { return 2 + 3 - 1 * 4 / 2; } diff --git a/src/lex.c b/src/lex.c index 89ff592..9fcc7fb 100644 --- a/src/lex.c +++ b/src/lex.c @@ -32,6 +32,22 @@ char *tok_ks[] = { "TK_AS_NOT", "TK_AS_AND", "TK_AS_OR", "TK_AS_XOR", "TK_AS_SHL", "TK_AS_SHR", }; +/* Push a token to a token array. */ +void tok_a_push(tok_a *a, tok t) { + tok *ta = realloc(a->a, (a->al += 1) * sizeof (tok)); + if (!ta) { error(1, SERR); } else { a->a = ta; a->a[a->al - 1] = t; } +} + +/* Pop a token from a token array. */ +tok tok_a_pop(tok_a *a) { + return (a->al ? a->a[a->al -= 1] : (tok){ 0 }); +} + +/* Pop a token from a token array. */ +tok tok_a_peek(tok_a *a) { + return (a->al ? a->a[a->al - 1] : (tok){ 0 }); +} + /* Initialise a lexer. */ lex lex_init(const char *file, char *src, UINT len) { lex l = { file, src, src, src + len, 0, 0, 0 }; diff --git a/src/lex.h b/src/lex.h index 4c23e49..4c06cff 100644 --- a/src/lex.h +++ b/src/lex.h @@ -30,12 +30,18 @@ typedef struct { union { u64 v_u64; s64 v_s64; f64 v_f64; }; } tok; +typedef struct { tok *a; UINT al; } tok_a; + typedef struct { const char *n; char *s, *p, *q; UINT ln, cl; tok t; } lex; extern char *tok_ks[]; +extern void tok_a_push(tok_a *a, tok t); +extern tok tok_a_pop(tok_a *a); +extern tok tok_a_peek(tok_a *a); + extern lex lex_init(const char *file, char *src, UINT len); extern tok lex_peek(lex *l); extern tok lex_next(lex *l); diff --git a/src/parse.c b/src/parse.c index c79afa4..90990cf 100644 --- a/src/parse.c +++ b/src/parse.c @@ -16,11 +16,11 @@ #include <string.h> char *ast_ks[] = { - "AK_NULL", "AK_PROG", + "AK_NULL", "AK_PROG", "AK_PROC", "AK_STMT", "AK_COMP", "AK_DECL", "AK_RETURN", "AK_IF", "AK_FOR", - "AK_EXPR", "AK_PROC", + "AK_OP_ADD", "AK_OP_SUB", "AK_OP_MUL", "AK_OP_DIV", "AK_OP_MOD", "AK_VAR", "AK_INT", }; @@ -53,6 +53,17 @@ void ast_push(ast *a, ast *c) { c->p = a; a->c.a[a->c.al - 1] = c; } +/* Push an AST to an AST array. */ +void ast_a_push(ast_a *aa, ast *a) { + ast **ta = realloc(aa->a, (aa->al += 1) * sizeof (ast *)); + if (!ta) { error(1, SERR); } else { aa->a = ta; aa->a[aa->al - 1] = a; } +} + +/* Pop an AST from an AST array. */ +ast *ast_a_pop(ast_a *aa) { + return (aa->al ? aa->a[aa->al -= 1] : NULL); +} + #define T (l->t) /* lex_peek equivalent */ /* Parse a program. */ @@ -173,11 +184,38 @@ static ast *parse_stmt_for(lex *l, syt *st) { ast_push(a, parse_stmt(l, st)); return a; } +typedef struct { tok_k tk; ast_k ak; s32 o; bool as; } op; + +static const op ops[] = { + { TK_OP_MUL, AK_OP_MUL, 2, false }, + { TK_OP_DIV, AK_OP_DIV, 2, false }, + { TK_OP_MOD, AK_OP_MOD, 2, false }, + { TK_OP_ADD, AK_OP_ADD, 1, false }, + { TK_OP_SUB, AK_OP_SUB, 1, false }, +}; + +static inline op ops_lookup(tok_k k) { + for (UINT i = 0; i < (sizeof (ops) / sizeof (*ops)); i += 1) { + if (k == ops[i].tk) { return ops[i]; } + } return (op){ 0 }; +} + +static inline void shunt(ast_a *aa, tok t, op o) { + ast *r = ast_a_pop(aa); ast *l = ast_a_pop(aa); + + ast *a = ast_init(); a->k = o.ak; a->ln = t.ln; a->cl = t.cl; + ast_push(a, l); ast_push(a, r); ast_a_push(aa, a); +} + /* Parse an expression. */ static ast *parse_expr(lex *l, syt *st) { if (T.k == TK_PROC) { return parse_expr_proc(l, st); } - if (T.k == TK_INT) { return parse_int(l, st); } - if (T.k == TK_ID) { + + tok_a ts = { 0 }; ast_a as = { 0 }; + + /* Parse expressions with a shunting-yard algorithm */ + for (;;) switch (T.k) { + case TK_ID: { /* TODO handle procedure calls */ ast *a = ast_init(); tok t = lex_kind(l, TK_ID); a->k = AK_VAR; a->ln = t.ln; a->cl = t.cl; @@ -185,9 +223,33 @@ static ast *parse_expr(lex *l, syt *st) { if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } - return a; + ast_a_push(&as, a); + } break; + case TK_INT: { ast_a_push(&as, parse_int(l, st)); } break; + case TK_LPAREN: { tok_a_push(&ts, lex_next(l)); } break; + case TK_RPAREN: { /* TODO */ error(1, "RPAREN: TODO"); } break; + default: { + op o1; if ((o1 = ops_lookup(T.k)).tk == TK_NULL) { goto eox; } + + for (op o2 = ops_lookup(tok_a_peek(&ts).k);; o2 = ops_lookup(tok_a_peek(&ts).k)) { + if (o2.tk == TK_LPAREN || (o1.o > o2.o && (o1.o != o2.o || o1.as == true))) { break; } + + shunt(&as, tok_a_pop(&ts), o2); + } + + tok_a_push(&ts, lex_next(l)); + } break; + } eox:; + + /* Pop any remaining operators from the operator stack */ + for (tok t = tok_a_pop(&ts); t.k != TK_NULL; t = tok_a_pop(&ts)) { + if (t.k == TK_LPAREN) { error(1, "LPAREN: TODO"); } + if (t.k == TK_RPAREN) { error(1, "RPAREN: TODO"); } + + shunt(&as, t, ops_lookup(t.k)); } - else { error(1, "PARSE_EXPR %s", tok_ks[T.k]); } + + return ast_a_pop(&as); } /* Parse a procedure expression. */ @@ -224,7 +286,7 @@ static ast *parse_int(lex *l, syt *st) { ast *a = ast_init(); tok t = lex_kind(l, TK_INT); a->k = AK_INT; a->ln = t.ln; a->cl = t.cl; - a->t = &types[TY_U64]; /* TODO properly determine type */ + a->t = &TYPE(TY_S64); /* TODO properly determine type */ if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } if (!(a->v = val_parse_int(t.s)).k) { error(1, "%s: %s", t.s, SERR); } @@ -236,7 +298,12 @@ static ast *parse_int(lex *l, syt *st) { void ast_print(ast *a, UINT i) { for (UINT j = 0; j != i; ++j) { printf(" "); } printf("%zu:%zu: %s: %s", a->ln + 1, a->cl + 1, ast_ks[a->k], a->s); - if (a->k == AK_PROC) { printf(" -> %s", a->t->s); } + + switch (a->k) { + case AK_PROC: case AK_INT: { printf(" -> %s", a->t->s); } break; + default: {} break; + } + fputc('\n', stdout); if (a->c.a != NULL) for (UINT ci = 0; ci != a->c.al; ci += 1) { diff --git a/src/parse.h b/src/parse.h index 3900784..f26036a 100644 --- a/src/parse.h +++ b/src/parse.h @@ -13,11 +13,11 @@ #include "value.h" typedef enum { - AK_NULL, AK_PROG, + AK_NULL, AK_PROG, AK_PROC, AK_STMT, AK_COMP, AK_DECL, AK_RETURN, AK_IF, AK_FOR, - AK_EXPR, AK_PROC, + AK_OP_ADD, AK_OP_SUB, AK_OP_MUL, AK_OP_DIV, AK_OP_MOD, AK_VAR, AK_INT, } ast_k; @@ -29,11 +29,16 @@ typedef struct ast_s { struct ast_s *p; struct { struct ast_s **a; UINT al; } c; } ast; +typedef struct { ast **a; UINT al; } ast_a; + extern char *ast_ks[]; extern ast *ast_init(void); extern void ast_push(ast *a, ast *c); +extern void ast_a_push(ast_a *aa, ast *a); +extern ast *ast_a_pop(ast_a *aa); + extern ast *parse(lex *l); extern void ast_print(ast *a, UINT i);