Author | Jakob Wakeling <[email protected]> |
Date | 2023-07-01 03:15:32 |
Commit | 828da145ae807f2023e1ef42127b5650c80e40c3 |
Parent | f05bcdbc0f36b14cc6a35475944ca081233cf497 |
Parse procedure calls as an infix operator
Diffstat
M | README.md | | | 17 | ++--------------- |
M | doc/g.ebnf | | | 7 | +++---- |
M | doc/spec.md | | | 5 | +++++ |
M | src/analyse.c | | | 21 | ++++++++++++++++++--- |
M | src/llvm.c | | | 53 | ++++++++++++++++++++++++----------------------------- |
M | src/parse.c | | | 66 | +++++++++++++++++++++++++++++++++++------------------------------- |
M | src/parse.h | | | 2 | +- |
7 files changed, 88 insertions, 83 deletions
diff --git a/README.md b/README.md index 164afd1..03b583b 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,6 @@ Influenced by **C**, **C++**, **Odin**, and others. Note that at present, **G** is highly unstable and will certainly change. -## Example - -```g -import "std:io"; - -main :: proc() -> s32 { - io.print("Hello, World!\n"); - return 0; -} -``` - -*Note that this example will not currently compile.* - ## Usage **G** is being developed on x86-64 Linux, and is untested elsewhere. @@ -26,7 +13,7 @@ main :: proc() -> s32 { - CMake >= 3.14, to build - LLVM, to build -- Clang, for linking compiled object files +- Clang, for `_start()` and linking ### Building @@ -67,7 +54,7 @@ command. The second command will output an executable file, *a.out* by default. - [x] Implement reals - [x] Implement pointers - [x] Implement arrays -- [ ] Implement variable length arrays +- [ ] Implement variable length arrays (?) - [x] Implement expressions - [x] Implement type casting - [ ] Implement type casting to pointers and arrays diff --git a/doc/g.ebnf b/doc/g.ebnf index 862f5e1..b893a12 100644 --- a/doc/g.ebnf +++ b/doc/g.ebnf @@ -30,6 +30,7 @@ expr = iden | literal | "(", expr, ")" | type, "(", expr, ")" (* Type cast *) | iden, "(", [ expr, { ",", expr } ], ")" (* Procedure call *) + | iden, "[", expr, "]" (* Array access *) | "#", iden, [ "(", [ expr, { ",", expr } ], ")" ] (* Hash expression *) | "+", expr | "-", expr (* Unary POS and NEG *) | "!", expr | "~", expr (* Logical and bitwise NOT *) @@ -60,8 +61,8 @@ expr_proc = "proc", "(", [ parm_list ], ")", [ "->", type ], stmt_compound ; parm = iden, ":", type ; parm_list = parm, { ",", parm } ; -iden = alphu, { alphu | digit } ; -type = iden ; +iden = ( alpha | "_" ), { alpha | digit | "_" } ; +type = { "*" | "[", [ literal_int ], "]" }, iden ; (* Literals *) literal_null = "null" ; @@ -96,7 +97,6 @@ digit_doz = digit_dec | "A" | "B" ; digit_hex = digit_dec | "A" | "B" | "C" | "D" | "E" | "F" ; quadd_hex = digit_hex, digit_hex, digit_hex, digit_hex ; -alphu = alpha | "_" ; alpha = alpha_upper | alpha_lower ; alpha_upper = ANY_UPPERCASE_ENGLISH_LETTER ; alpha_lower = ANY_LOWERCASE_ENGLISH_LETTER ; diff --git a/doc/spec.md b/doc/spec.md index c0c9994..392abaf 100644 --- a/doc/spec.md +++ b/doc/spec.md @@ -13,6 +13,7 @@ - [Variables](#variables) - [Types](#types) - [Basic Types](#basic-types) + - [Arrays](#arrays) ## Lexical Elements @@ -177,3 +178,7 @@ f16be f32be f64be f128be // big endian char // alias of u8 rune // alias of u32 ``` + +### Arrays + +An array is a fixed length sequence of elements of a single type. diff --git a/src/analyse.c b/src/analyse.c index d24d48f..2f275d6 100644 --- a/src/analyse.c +++ b/src/analyse.c @@ -141,10 +141,24 @@ static void analyse_stmt_for(ast *a, syt *st) { /* Analyse an expression. */ static void analyse_expr(ast *a, syt *st) { - if (A.k == AK_PROC) { analyse_expr_proc(a, st); } - if (A.k == AK_OP_DRF) { A.t = ast_type(C[0], st)->base; } - - for (UINT i = 0; i < CL; i += 1) { analyse_expr(C[i], st); } + switch (A.k) { + case AK_CALL: { + assert(C[0]->k == AK_ID); + assert(C[0]->h != 0); + assert(C[0]->s != NULL); + + ast *sym = syt_search_h(st, C[0]->h, C[0]->s); + if (sym == NULL) { note("TODO", A.ln, A.cl, 0, "Use of undeclared identifier \"%s\"", C[0]->s); } + + else if (sym->k == AK_TYPE) { + A.k = AK_CAST; A.t = sym->t; + if (CL > 2) { note("TODO", A.ln, A.cl, 0, "Type casts must only have a single argument"); } + } + } break; + case AK_PROC: { analyse_expr_proc(a, st); } break; + case AK_OP_DRF: { A.t = ast_type(C[0], st)->base; } break; + default: { for (UINT i = 0; i < CL; i += 1) { analyse_expr(C[i], st); }} break; + } } /* Analyse a procedure expression. */ diff --git a/src/llvm.c b/src/llvm.c index 852823b..01b71a3 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -170,12 +170,8 @@ static LLVMValueRef llvm_stmt_decl(ast *a, syt *st) { /* Generate IR for an assignment statement. */ static LLVMValueRef llvm_stmt_assn(ast *a, syt *st) { assert( - a->k == AK_ASSIGN || - a->k == AK_AS_ADD || - a->k == AK_AS_SUB || - a->k == AK_AS_MUL || - a->k == AK_AS_DIV || - a->k == AK_AS_MOD + a->k == AK_ASSIGN || a->k == AK_AS_ADD || a->k == AK_AS_SUB || + a->k == AK_AS_MUL || a->k == AK_AS_DIV || a->k == AK_AS_MOD ); ast *v = syt_search(st, C[0]->s); @@ -242,7 +238,7 @@ static LLVMValueRef llvm_expr(ast *a, syt *st) { case AK_ARR: { return llvm_arr(a, st); } break; case AK_PROC: { return llvm_expr_proc(a, st); } break; case AK_CAST: { return llvm_expr_cast(a, st); } break; - case AK_ID_VAR: { + case AK_ID: { ast *sym = syt_search(st, a->s); if (sym == NULL) { note(file_name, A.ln, A.cl, 0, "Undefined variable %s", A.s); } @@ -251,12 +247,14 @@ static LLVMValueRef llvm_expr(ast *a, syt *st) { return LLVMBuildLoad2(llvm_builder, llvm_type(sym->t), sym->llvm_v, ""); } break; case AK_CALL: { - ast *v = syt_search(st, a->s); - if (v == NULL) { error(2, "llvm_expr: Undefined procedure %s", a->s); } + ast *sym = syt_search_h(st, C[0]->h, C[0]->s); + if (sym == NULL) { note(file_name, A.ln, A.cl, -1, "Undefined procedure \"%s\" (llvm:llvm_expr)", C[0]->s); } - if (!v->llvm_v) { error(2, "llvm_expr: Procedure follows"); } + if (!sym->llvm_v) { note(file_name, A.ln, A.cl, -1, "Procedure \"%s\" follows (llvm:llvm_expr)", C[0]->s); } - return LLVMBuildCall2(llvm_builder, v->llvm_t, v->llvm_v, NULL, 0, ""); + /* TODO procedure call arguments */ + + return LLVMBuildCall2(llvm_builder, sym->llvm_t, sym->llvm_v, NULL, 0, ""); } break; case AK_OP_POS: { a = C[0]; goto reset; /* no-op */ } case AK_OP_NEG: { @@ -321,54 +319,55 @@ static LLVMValueRef llvm_expr_proc(ast *a, syt *st) { /* Generate IR for a type cast. */ static LLVMValueRef llvm_expr_cast(ast *a, syt *st) { - assert(a->t != NULL); assert(a->cl > 0); + assert(A.t != NULL); + assert(CL == 2); - type *expr_type = ast_type(C[0], st); + type *expr_type = ast_type(C[1], st); if (is_ptr(expr_type)) { if (is_int(a->t)) { - return LLVMBuildPtrToInt(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), ""); + return LLVMBuildPtrToInt(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), ""); } } else if (is_bool(expr_type)) { if (is_int(a->t)) { - return LLVMBuildIntCast2(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), is_sign(a->t), ""); + return LLVMBuildIntCast2(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), is_sign(a->t), ""); } } else if (is_int(expr_type)) { if (is_bool(a->t) || is_int(a->t)) { - return LLVMBuildIntCast2(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), is_sign(a->t), ""); + return LLVMBuildIntCast2(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), is_sign(a->t), ""); } else if (is_flt(a->t)) { if (is_sign(expr_type)) { - return LLVMBuildSIToFP(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "stof"); + return LLVMBuildSIToFP(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "stof"); } else { - return LLVMBuildUIToFP(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "utof"); + return LLVMBuildUIToFP(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "utof"); } } } else if (is_flt(expr_type)) { if (is_flt(a->t)) { - return LLVMBuildFPCast(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "cast"); + return LLVMBuildFPCast(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "cast"); } else if (is_int(a->t)) { if (is_sign(a->t)) { - return LLVMBuildFPToSI(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "ftos"); + return LLVMBuildFPToSI(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "ftos"); } else { - return LLVMBuildFPToUI(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "ftou"); + return LLVMBuildFPToUI(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "ftou"); } } } else if (expr_type->k == TY_ARR) { if (is_ptr(a->t)) { a->t->base = expr_type->base; - return LLVMBuildBitCast(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), ""); + return LLVMBuildBitCast(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), ""); } } - note(file_name, A.ln, A.cl, -1, "unhandled type %s or %s (llvm:llvm_expr_cast)", expr_type->s, a->t->s); + note(file_name, A.ln, A.cl, -1, "Unhandled cast \"%s\" -> \"%s\" (llvm:llvm_expr_cast)", expr_type->s, A.t->s); } /* Generate IR for an array. */ diff --git a/src/parse.c b/src/parse.c index 5d80fe5..2a9b439 100644 --- a/src/parse.c +++ b/src/parse.c @@ -27,7 +27,7 @@ char *ast_ks[] = { "AK_ASSIGN", "AK_AS_ADD", "AK_AS_SUB", "AK_AS_MUL", "AK_AS_DIV", "AK_AS_MOD", - "AK_ID_VAR", "AK_CALL", "AK_BOOL", "AK_INT", "AK_FLT", "AK_ARR", + "AK_ID", "AK_CALL", "AK_BOOL", "AK_INT", "AK_FLT", "AK_ARR", "AK_HASH_SYSCALL" }; @@ -52,6 +52,8 @@ static ast *parse_flt(lex *l, syt *st); static s32 tok_precedence(tok_k tk); static s32 ast_precedence(ast_k ak); +static inline char *strdup_or_fail(const char *s); + /* Initialise an AST node. */ inline ast *ast_init(ast_k kind, UINT ln, UINT cl) { ast *a = calloc(1, sizeof (*a)); if (a == NULL) { error(1, SERR); } @@ -236,30 +238,8 @@ static ast *parse_stmt_for(lex *l, syt *st) { static ast *parse_expr(lex *l, syt *st, s32 o) { ast *left = NULL; switch (T.k) { case TK_ID: { - tok t = lex_next(l); - left = ast_init(0, t.ln, t.cl); - - ast *sym = syt_search_h(st, t.h, t.s); - if (sym == NULL) { note(l->n, t.ln, t.cl, -1, "Use of undeclared identifier \"%s\"", t.s); } - - if (T.k == TK_LPAREN) { - lex_kind(l, TK_LPAREN); - - if (sym->k == AK_TYPE) { left->k = AK_CAST; left->t = sym->t; } - else { left->k = AK_CALL; } - - if (T.k != TK_RPAREN) for (;;) { - ast_push(left, parse_expr(l, st, 0)); - if (T.k != TK_COMMA) { break; } - if (left->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); } - lex_kind(l, TK_COMMA); - } - - lex_kind(l, TK_RPAREN); - } - else { left->k = AK_ID_VAR; left->t = sym->t; } - - if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); } + left = ast_init(AK_ID, T.ln, T.cl); left->h = T.h; + left->s = strdup_or_fail(T.s); lex_next(l); } break; case TK_TRUE: { left = ast_init(AK_BOOL, T.ln, T.cl); left->v_bool = true; } goto boolean; case TK_FALSE: { left = ast_init(AK_BOOL, T.ln, T.cl); left->v_bool = false; } goto boolean; @@ -279,7 +259,7 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { lex_kind(l, TK_LPAREN); if (T.k != TK_RPAREN) for (;;) { - ast_push(left, parse_expr(l, st, true)); + ast_push(left, parse_expr(l, st, 0)); if (T.k == TK_COMMA) { lex_next(l); } else { break; } } @@ -300,6 +280,21 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { /* Parse an infix expression if one is present */ for (ast *a = NULL; tok_precedence(T.k) > o; left = a) switch (T.k) { + case TK_LPAREN: { + a = ast_init(AK_CALL, left->ln, left->cl); + lex_next(l); ast_push(a, left); + + /* Parse call arguments if present */ + if (T.k != TK_RPAREN) for (;;) { + ast_push(a, parse_expr(l, st, 0)); + if (T.k == TK_COMMA) { lex_next(l); } else { break; } + } + + lex_kind(l, TK_RPAREN); + } break; + case TK_LBRACK: { + /* TODO array access */ + } break; case TK_ASSIGN: { a = ast_init(AK_ASSIGN, T.ln, T.cl); } goto infix; case TK_AS_ADD: { a = ast_init(AK_AS_ADD, T.ln, T.cl); } goto infix; case TK_AS_SUB: { a = ast_init(AK_AS_SUB, T.ln, T.cl); } goto infix; @@ -314,6 +309,8 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { infix: { lex_next(l); ast_push(a, left); ast_push(a, parse_expr(l, st, ast_precedence(a->k))); } break; } + if (left == NULL) { note(l->n, T.ln, T.cl, 0, "Expected an expression"); } + return left; } @@ -393,8 +390,7 @@ static ast *parse_int(lex *l, syt *st) { tok t = lex_next(l); ast *a = ast_init(AK_INT, t.ln, t.cl); - if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } - + a->s = strdup_or_fail(t.s); a->v_int = t.v_int; /* Determine the minimum integer type */ @@ -414,8 +410,7 @@ static ast *parse_flt(lex *l, syt *st) { tok t = lex_next(l); ast *a = ast_init(AK_FLT, t.ln, t.cl); - if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } - + a->s = strdup_or_fail(t.s); a->v_flt = t.v_flt; /* Determine the minimum float type */ @@ -452,6 +447,7 @@ static s32 tok_precedence(tok_k tk) { /* Get the precedence of an AST kind. */ static s32 ast_precedence(ast_k ak) { switch (ak) { + case AK_CALL: { return 8; } case AK_OP_POS: case AK_OP_NEG: case AK_BW_NOT: case AK_OP_ADO: case AK_OP_DRF: { return 6; } case AK_OP_MUL: case AK_OP_DIV: case AK_OP_MOD: { return 4; } case AK_OP_ADD: case AK_OP_SUB: { return 3; } @@ -460,6 +456,13 @@ static s32 ast_precedence(ast_k ak) { } } +/* Duplicate a string or fail. */ +static inline char *strdup_or_fail(const char *s) { + register char *r = strdup(s); + if (r == NULL) { error(1, "%s", SERR); } + return r; +} + /* Recursively print an AST. */ void ast_print(ast *a, UINT indent) { for (UINT i = 0; i < indent; ++i) { printf(" "); } diff --git a/src/parse.h b/src/parse.h index 672346a..d875d72 100644 --- a/src/parse.h +++ b/src/parse.h @@ -24,7 +24,7 @@ typedef enum { AK_ASSIGN, AK_AS_ADD, AK_AS_SUB, AK_AS_MUL, AK_AS_DIV, AK_AS_MOD, - AK_ID_VAR, AK_CALL, AK_BOOL, AK_INT, AK_FLT, AK_ARR, + AK_ID, AK_CALL, AK_BOOL, AK_INT, AK_FLT, AK_ARR, AK_HASH_SYSCALL } ast_k;