Author | Jakob Wakeling <[email protected]> |
Date | 2023-06-12 13:44:44 |
Commit | 19793bd05244bc52fa239c26f4fc71415de221bb |
Parent | d44ca567693a048a42551b84af16eeaf09101aa0 |
Replace the expression parser with a Pratt parser
Diffstat
M | doc/g.ebnf | | | 4 | ++-- |
M | examples/main.g | | | 5 | ++--- |
M | src/lex.c | | | 2 | +- |
M | src/llvm.c | | | 10 | +++++++++- |
M | src/parse.c | | | 198 | +++++++++++++++++++++++++++++++++++-------------------------------------------- |
M | src/parse.h | | | 4 | +++- |
M | src/type.c | | | 2 | +- |
M | src/type.h | | | 2 | +- |
8 files changed, 108 insertions, 119 deletions
diff --git a/doc/g.ebnf b/doc/g.ebnf index 75d223f..91d8fad 100644 --- a/doc/g.ebnf +++ b/doc/g.ebnf @@ -14,12 +14,12 @@ stmt = stmt_compound | [ expr ], ";" | "return", [ expr ], ";" | "if", "(", expr, ")", stmt, [ "else", stmt ] - | "for", "(" expr, ";", [ expr, ";" ], [ expr, ";" ], ")", stmt + | "for", "(" expr, [ ";", expr ], [ ";", expr ], ")", stmt ; stmt_compound = "{", { stmt }, "}" ; -stmt_decl = iden, ":", ( decl_constant | decl_variable ) ; +stmt_decl = iden, ":", ( stmt_decl_constant | stmt_decl_variable ) ; stmt_decl_constant = [ type ], ":", expr ; stmt_decl_variable = [ type ], "=", expr | type ; diff --git a/examples/main.g b/examples/main.g index 1d5cfa9..09b6fc1 100644 --- a/examples/main.g +++ b/examples/main.g @@ -1,5 +1,4 @@ -main :: proc() -> u64 { +main :: proc() -> u32 { #syscall(u64(60), u64(42)); - var : u64 = 69; - return var; + return u32(s32(-1.0)); } diff --git a/src/lex.c b/src/lex.c index 5914950..716b46e 100644 --- a/src/lex.c +++ b/src/lex.c @@ -94,7 +94,7 @@ tok lex_next(lex *l) { case '*': { UINT d = 1; for (P += 2, CL += 2; P != Q && d; P += 1) { if (C == '/' && D == '*') { P += 2; CL += 2; d += 1; continue; } - if (C == '*' && D == '/') { P += 2; CL += 2; d -= 1; continue; } + if (C == '*' && D == '/') { P += d == 1 ? 1 : 2; CL += 2; d -= 1; continue; } if (C == '\n') { LN += 1; CL = 0; } else { CL += 1; } } } goto skip; diff --git a/src/llvm.c b/src/llvm.c index a8fedb7..b3cb5fb 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -271,6 +271,14 @@ static LLVMValueRef llvm_expr(ast *a, syt *st) { case AK_OP_MOD: { return LLVMBuildSRem(llvm_builder, llvm_expr(A.c.a[0], st), llvm_expr(A.c.a[1], st), "mod"); } break; + case AK_OP_NEG: { + type *t = ast_type(A.c.a[0], st); + if (t == NULL) { note("TODO", A.ln, A.cl, -1, "Subtree is missing a type"); } + + if (is_int(t)) { return LLVMBuildNeg(llvm_builder, llvm_expr(A.c.a[0], st), "neg"); } + else if (is_flt(t)) { return LLVMBuildFNeg(llvm_builder, llvm_expr(A.c.a[0], st), "neg"); } + else { note("TODO", A.ln, A.cl, -1, "Expression cannot be made negative (LLVM Failsafe)"); } + } default: { error(2, "llvm_expr unknown kind %s", ast_ks[a->k]); } break; } } @@ -385,7 +393,7 @@ static inline void llvm_free(void) { /* Return the appropriate LLVMTypeRef for a G type. */ static LLVMTypeRef llvm_type(type *t) { switch (t->k) { - case TY_NULL: { return LLVMVoidType(); } break; + case TY_ZERO: { return LLVMVoidType(); } break; case TY_B8: { return LLVMIntType(8); } break; case TY_B16: { return LLVMIntType(16); } break; case TY_B32: { return LLVMIntType(32); } break; diff --git a/src/parse.c b/src/parse.c index 8325461..c896a3c 100644 --- a/src/parse.c +++ b/src/parse.c @@ -16,15 +16,12 @@ #include <stdlib.h> #include <string.h> -/* tk : Token Kind, ak : AST Kind, o : Precedence, as : Left Associative */ -typedef struct { tok_k tk; ast_k ak; s32 o; bool as; } op; - char *ast_ks[] = { "AK_ZERO", "AK_PROG", "AK_PROC", "AK_TYPE", "AK_CAST", "AK_STMT", "AK_COMP", "AK_DECL", "AK_RETURN", "AK_IF", "AK_FOR", - "AK_OP_POS", "AK_OP_NEG", + "AK_OP_POS", "AK_OP_NEG", "AK_OP_ADO", "AK_OP_DRF", "AK_OP_ADD", "AK_OP_SUB", "AK_OP_MUL", "AK_OP_DIV", "AK_OP_MOD", "AK_ASSIGN", "AK_AS_ADD", "AK_AS_SUB", "AK_AS_MUL", "AK_AS_DIV", "AK_AS_MOD", @@ -43,21 +40,27 @@ static ast *parse_stmt_return(lex *l, syt *st); static ast *parse_stmt_if(lex *l, syt *st); static ast *parse_stmt_for(lex *l, syt *st); -static ast *parse_expr(lex *l, syt *st, bool arg); +static ast *parse_expr(lex *l, syt *st, s32 o); static ast *parse_expr_proc(lex *l, syt *st); static ast *parse_num(lex *l, syt *st); static ast *parse_int(lex *l, syt *st); static ast *parse_flt(lex *l, syt *st); -static op op_lookup(tok_k tk, bool unary); +static s32 tok_precedence(tok_k tk); +static s32 ast_precedence(ast_k ak); /* Initialise an AST node. */ -ast *ast_init(void) { +inline ast *ast_init(void) { ast *a = calloc(1, sizeof (ast)); if (!a) { error(1, SERR); } return a; } +/* Initialise an AST node of a specific kind. */ +inline ast *ast_kind(ast_k kind) { + ast *a = ast_init(); a->k = kind; return a; +} + /* Push a child AST node to an AST node. */ void ast_push(ast *a, ast *c) { ast **ca = realloc(a->c.a, (a->c.al += 1) * sizeof (ast *)); @@ -189,7 +192,7 @@ static ast *parse_stmt_decl(lex *l, syt *st, ast *a) { ); } /* Assign a constant or variable value */ - if (T.k == TK_COLON || T.k == TK_ASSIGN) { lex_next(l); ast_push(a, parse_expr(l, st, false)); } + if (T.k == TK_COLON || T.k == TK_ASSIGN) { lex_next(l); ast_push(a, parse_expr(l, st, 0)); } else { error(1, "%s:%zu:%zu: error: expected ':' or '='", l->n, T.ln + 1, T.cl + 1); } /* Parse a semicolon if one is required */ @@ -208,7 +211,7 @@ static ast *parse_stmt_assn(lex *l, syt *st, ast *a) { case TK_AS_MUL: { a->k = AK_AS_MUL; } goto expr; case TK_AS_DIV: { a->k = AK_AS_DIV; } goto expr; case TK_AS_MOD: { a->k = AK_AS_MOD; } goto expr; - expr: { lex_next(l); ast_push(a, parse_expr(l, st, false)); } break; + expr: { lex_next(l); ast_push(a, parse_expr(l, st, 0)); } break; default: { error(1, "%s:%zu:%zu: error: expected assignment operator", l->n, T.ln + 1, T.cl + 1); } break; } @@ -217,7 +220,7 @@ static ast *parse_stmt_assn(lex *l, syt *st, ast *a) { /* Parse an expression statement. */ static ast *parse_stmt_expr(lex *l, syt *st) { - ast *a = NULL; if (T.k != TK_SCOLON) { a = parse_expr(l, st, false); } + ast *a = NULL; if (T.k != TK_SCOLON) { a = parse_expr(l, st, 0); } lex_kind(l, TK_SCOLON); return a; } @@ -226,7 +229,7 @@ static ast *parse_stmt_return(lex *l, syt *st) { lex_kind(l, TK_RETURN); ast *a = ast_init(); a->k = AK_RETURN; - if (T.k != TK_SCOLON) { ast_push(a, parse_expr(l, st, false)); } + if (T.k != TK_SCOLON) { ast_push(a, parse_expr(l, st, 0)); } lex_kind(l, TK_SCOLON); return a; } @@ -237,7 +240,7 @@ static ast *parse_stmt_if(lex *l, syt *st) { ast *a = ast_init(); a->k = AK_IF; /* Parse expression and closing parenthesis */ - ast_push(a, parse_expr(l, st, false)); lex_kind(l, TK_RPAREN); + ast_push(a, parse_expr(l, st, 0)); lex_kind(l, TK_RPAREN); /* Parse the if statement body */ ast_push(a, parse_stmt(l, st)); return a; @@ -251,30 +254,18 @@ static ast *parse_stmt_for(lex *l, syt *st) { /* Parse one to three expressions and a closing parenthesis */ ast_push(a, parse_stmt_expr(l, st)); ast_push(a, parse_stmt_expr(l, st)); - ast_push(a, parse_expr(l, st, false)); lex_kind(l, TK_RPAREN); + ast_push(a, parse_expr(l, st, 0)); lex_kind(l, TK_RPAREN); /* Parse the for statement body */ ast_push(a, parse_stmt(l, st)); return a; } -static inline void shunt(ast_a *aa, tok t, op o) { - ast *r = ast_a_pop(aa); ast *l = ast_a_pop(aa); - - ast *a = ast_init(); a->k = o.ak; a->ln = t.ln; a->cl = t.cl; - ast_push(a, l); ast_push(a, r); ast_a_push(aa, a); -} - /* Parse an expression. */ -static ast *parse_expr(lex *l, syt *st, bool arg) { - if (T.k == TK_PROC) { return parse_expr_proc(l, st); } - - tok_a ts = { 0 }; ast_a as = { 0 }; - - /* Parse expressions with a shunting-yard algorithm */ - for (;;) switch (T.k) { +static ast *parse_expr(lex *l, syt *st, s32 o) { + ast *left = NULL; switch (T.k) { case TK_ID: { - ast *a = ast_init(); tok t = lex_kind(l, TK_ID); - a->ln = t.ln; a->cl = t.cl; + left = ast_init(); tok t = lex_kind(l, TK_ID); + left->ln = t.ln; left->cl = t.cl; ast *sym = syt_search_h(st, t.h, t.s); if (sym == NULL) { note(l->n, t.ln, t.cl, -1, "use of undeclared identifier \"%s\"", t.s); } @@ -282,94 +273,66 @@ static ast *parse_expr(lex *l, syt *st, bool arg) { if (T.k == TK_LPAREN) { lex_kind(l, TK_LPAREN); - if (sym->k == AK_TYPE) { a->k = AK_CAST; a->t = sym->t; } - else { a->k = AK_CALL; } + if (sym->k == AK_TYPE) { left->k = AK_CAST; left->t = sym->t; } + else { left->k = AK_CALL; } if (T.k != TK_RPAREN) for (;;) { - ast_push(a, parse_expr(l, st, true)); + ast_push(left, parse_expr(l, st, 0)); if (T.k != TK_COMMA) { break; } - if (a->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); } + if (left->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); } lex_kind(l, TK_COMMA); } lex_kind(l, TK_RPAREN); } - else { - a->k = AK_ID_VAR; a->t = sym->t; - } - - if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } - - ast_a_push(&as, a); - } break; - case TK_NUM: { ast_a_push(&as, parse_num(l, st)); } break; - case TK_COLON: { if (arg) { goto eox; }} break; - case TK_LPAREN: { tok_a_push(&ts, lex_next(l)); } break; - case TK_RPAREN: { - for (tok t = tok_a_pop(&ts);; t = tok_a_pop(&ts)) { - if (t.k == TK_ZERO) /* Stack is empty */ { - if (arg) { goto eox; } - note(l->n, T.ln, T.cl, -1, "expected left parenthesis"); - } - if (t.k == TK_LPAREN) { break; } - - shunt(&as, t, op_lookup(t.k, false)); - } + else { left->k = AK_ID_VAR; left->t = sym->t; } - lex_next(l); + if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); } } break; + case TK_NUM: { left = parse_num(l, st); } break; + case TK_PROC: { return parse_expr_proc(l, st); } break; case TK_HASH: { - ast *a = ast_init(); tok t = lex_kind(l, TK_HASH); - a->ln = t.ln; a->cl = t.cl; bool needs_args = false; + left = ast_init(); tok t = lex_kind(l, TK_HASH); + left->ln = t.ln; left->cl = t.cl; bool needs_args = false; - if (strcmp(t.s, "#syscall") == 0) { a->k = AK_HASH_SYSCALL; needs_args = true; } + if (strcmp(t.s, "#syscall") == 0) { left->k = AK_HASH_SYSCALL; needs_args = true; } else { note("TODO", t.ln, t.cl, 0, "%s: unrecognised hash procedure", t.s); } if (needs_args) { lex_kind(l, TK_LPAREN); if (T.k != TK_RPAREN) for (;;) { - ast_push(a, parse_expr(l, st, true) /* FIXME THIS IS NULL WHEN STRING OR ANY UNHANDLED EXPRESSION */); + ast_push(left, parse_expr(l, st, true)); if (T.k != TK_COMMA) { break; } - if (a->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); } + if (left->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); } lex_kind(l, TK_COMMA); } lex_kind(l, TK_RPAREN); } - if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } - - ast_a_push(&as, a); + if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); } } break; - default: /* Handle operators */ { - op o1, o2; if ((o1 = op_lookup(T.k, false)).tk == TK_ZERO) { goto eox; } - - /* - If there is an operator at the top of the operator stack that is not - a left parenthesis, and has greater precedence than o1 or has the - same precedence as o1 and o1 is left-associative, then pop it from - the stack onto the output. - */ - for (o2 = op_lookup(tok_a_peek(&ts).k, false); o2.tk != TK_ZERO; o2 = op_lookup(tok_a_peek(&ts).k, false)) { - if (o2.tk == TK_LPAREN || (o1.o < o2.o && (o1.o != o2.o || o1.as == true))) { break; } - - shunt(&as, tok_a_pop(&ts), o2); - } - - tok_a_push(&ts, lex_next(l)); - } break; - } eox:; + case TK_LPAREN: { lex_next(l); left = parse_expr(l, st, 0); lex_kind(l, TK_RPAREN); } break; + case TK_OP_ADD: { left = ast_kind(AK_OP_POS); } goto prefix; + case TK_OP_SUB: { left = ast_kind(AK_OP_NEG); } goto prefix; + case TK_BW_AND: { left = ast_kind(AK_OP_ADO); } goto prefix; + case TK_OP_MUL: { left = ast_kind(AK_OP_DRF); } goto prefix; + prefix: { lex_next(l); ast_push(left, parse_expr(l, st, ast_precedence(left->k))); } break; + default: { note(l->n, T.ln, T.cl, -1, "Unhandled expression of kind %s", tok_ks[T.k]); } break; + } - /* Pop any remaining operators from the operator stack */ - for (tok t = tok_a_pop(&ts); t.k != TK_ZERO; t = tok_a_pop(&ts)) { - if (t.k == TK_LPAREN) { error(1, "LPAREN: TODO"); } - if (t.k == TK_RPAREN) { error(1, "RPAREN: TODO"); } - - shunt(&as, t, op_lookup(t.k, false)); + /* Parse an infix expression if one is present */ + for (ast *a = NULL; tok_precedence(T.k) > o; left = a) switch (T.k) { + case TK_OP_ADD: { a = ast_kind(AK_OP_ADD); } goto infix; + case TK_OP_SUB: { a = ast_kind(AK_OP_SUB); } goto infix; + case TK_OP_MUL: { a = ast_kind(AK_OP_MUL); } goto infix; + case TK_OP_DIV: { a = ast_kind(AK_OP_DIV); } goto infix; + case TK_OP_MOD: { a = ast_kind(AK_OP_MOD); } goto infix; + infix: { lex_next(l); ast_push(a, left); ast_push(a, parse_expr(l, st, ast_precedence(a->k))); } break; } - return ast_a_pop(&as); + return left; } /* Parse a procedure expression. */ @@ -396,7 +359,7 @@ static ast *parse_expr_proc(lex *l, syt *st) { a->t = s->t; } - else { a->t = &TYPE(TY_NULL); } + else { a->t = &TYPE(TY_ZERO); } ast_push(a, parse_stmt_compound(l, st)); return a; } @@ -444,21 +407,36 @@ static ast *parse_flt(lex *l, syt *st) { return a; } -/* Lookup the operator associated with a particular token. */ -static op op_lookup(tok_k tk, bool unary) { - if (unary) switch (tk) { - case TK_OP_ADD: { return (op){ TK_OP_ADD, AK_OP_POS, 2, true }; } - case TK_OP_SUB: { return (op){ TK_OP_SUB, AK_OP_NEG, 2, true }; } - default: { return (op){ 0 }; } break; +/* + Expression operator precedence: + 8 > expression group (parenthesis), function call + 7 > + 6 > positive (prefix +), negative (prefix -), address-of (prefix &), dereference (prefix *) + 5 > + 4 > multiplication (*), division (/), modulo (%) + 3 > addition (+), subtraction (-) + 2 > + 1 > +*/ + +/* Get the infix precedence of a token kind. */ +static s32 tok_precedence(tok_k tk) { + switch (tk) { + case TK_LPAREN: { return 8; } + case TK_OP_MUL: case TK_OP_DIV: case TK_OP_MOD: { return 4; } + case TK_OP_ADD: case TK_OP_SUB: { return 3; } + default: { return 0; } + } +} + +/* Get the precedence of an AST kind. */ +static s32 ast_precedence(ast_k ak) { + switch (ak) { + case AK_OP_POS: case AK_OP_NEG: case AK_OP_ADO: case AK_OP_DRF: { return 6; } + case AK_OP_MUL: case AK_OP_DIV: case AK_OP_MOD: { return 4; } + case AK_OP_ADD: case AK_OP_SUB: { return 3; } + default: { return 0; } } - else switch (tk) { - case TK_OP_ADD: { return (op){ TK_OP_ADD, AK_OP_ADD, 4, false }; } break; - case TK_OP_SUB: { return (op){ TK_OP_SUB, AK_OP_SUB, 4, false }; } break; - case TK_OP_MUL: { return (op){ TK_OP_MUL, AK_OP_MUL, 3, false }; } break; - case TK_OP_DIV: { return (op){ TK_OP_DIV, AK_OP_DIV, 3, false }; } break; - case TK_OP_MOD: { return (op){ TK_OP_MOD, AK_OP_MOD, 3, false }; } break; - default: { return (op){ 0 }; } break; - }; } /* Recursively print an AST. */ diff --git a/src/parse.h b/src/parse.h index 7781e50..203db0c 100644 --- a/src/parse.h +++ b/src/parse.h @@ -20,7 +20,7 @@ typedef enum { AK_STMT, AK_COMP, AK_DECL, AK_RETURN, AK_IF, AK_FOR, - AK_OP_POS, AK_OP_NEG, + AK_OP_POS, AK_OP_NEG, AK_OP_ADO, AK_OP_DRF, AK_OP_ADD, AK_OP_SUB, AK_OP_MUL, AK_OP_DIV, AK_OP_MOD, AK_ASSIGN, AK_AS_ADD, AK_AS_SUB, AK_AS_MUL, AK_AS_DIV, AK_AS_MOD, @@ -46,6 +46,7 @@ typedef struct { ast **a; UINT al; } ast_a; extern char *ast_ks[]; extern ast *ast_init(void); +extern ast *ast_kind(ast_k kind); extern void ast_push(ast *a, ast *c); extern void ast_displace(ast *a, ast *c); extern type *ast_type(ast *a, syt *st); diff --git a/src/type.c b/src/type.c index 548ec6e..e053f0e 100644 --- a/src/type.c +++ b/src/type.c @@ -6,7 +6,7 @@ #include "type.h" type types[] = { - { TY_NULL, 0, 0, "void" }, + { TY_ZERO, 0, 0, "void" }, { TY_TYPE, 0, -1, "type" }, { TY_PTR, TF_PTR, -1, "ptr" }, { TY_AUTO, 0, -1, "auto" }, diff --git a/src/type.h b/src/type.h index 2fbd7e1..9a56b7a 100644 --- a/src/type.h +++ b/src/type.h @@ -11,7 +11,7 @@ #define TYPE(a) (types[a]) typedef enum { - TY_NULL, TY_TYPE, TY_PTR, TY_AUTO, + TY_ZERO, TY_TYPE, TY_PTR, TY_AUTO, TY_BOOL, TY_B8, TY_B16, TY_B32, TY_B64,