Author | Jakob Wakeling <[email protected]> |
Date | 2023-07-04 05:52:43 |
Commit | 421414826683053aae33f88755c0fbea14a06513 |
Parent | cc58a1d3fcd21571a5d231359a3acbbe5564d97d |
Improve hash expression handling
Diffstat
M | examples/hello.g | | | 5 | ++--- |
A | examples/io.g | | | 11 | +++++++++++ |
M | src/analyse.c | | | 19 | +++++++++++++++++-- |
M | src/lex.c | | | 33 | +++++++++++---------------------- |
M | src/lex.h | | | 4 | ++-- |
M | src/llvm.c | | | 65 | +++++++++++++++++++++++++++-------------------------------------- |
M | src/parse.c | | | 29 | ++++++++--------------------- |
M | src/parse.h | | | 2 | +- |
8 files changed, 79 insertions, 89 deletions
diff --git a/examples/hello.g b/examples/hello.g index 3525d82..62f4cde 100644 --- a/examples/hello.g +++ b/examples/hello.g @@ -1,5 +1,4 @@ -main :: proc() -> u64 { +main :: proc() -> sint { hello : [14]u8 = { 'H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!', '\n' }; - #syscall(u64(1), u64(1), hello, u64(14)); - return u64(0); + return #syscall(uint(1), sint(1), ptr(hello), uint(14)); } diff --git a/examples/io.g b/examples/io.g new file mode 100644 index 0000000..a8bfe87 --- /dev/null +++ b/examples/io.g @@ -0,0 +1,11 @@ +main :: proc() -> sint { + file : [6]u8 = { 'g', '.', 't', 'x', 't', '\0' }; + buf : [7]u8 = { 'O', 'u', 't', 'p', 'u', 't', '\n' }; + + /* Open file with O_WRONLY and O_CREAT flags */ + fd : sint = #syscall(uint(2), ptr(file), s32(0b1000010), u16(0o644)); + r : sint = #syscall(uint(1), fd, ptr(buf), uint(7)); + #syscall(uint(3), fd); + + return r; +} diff --git a/src/analyse.c b/src/analyse.c index ed071f4..1df4abb 100644 --- a/src/analyse.c +++ b/src/analyse.c @@ -12,6 +12,7 @@ #include <assert.h> #include <stdio.h> +#include <string.h> static void analyse_stmt(ast *a, syt *st); static inline void analyse_stmt_comp(ast *a, syt *st); @@ -22,6 +23,7 @@ static void analyse_stmt_for(ast *a, syt *st); static void analyse_expr(ast *a, syt *st); static void analyse_expr_proc(ast *a, syt *st); +static void analyse_expr_hash(ast *a, syt *st); #define A (*a) #define C (a->c.a) /* AST child shorthand "C[i]" */ @@ -111,6 +113,8 @@ static void analyse_stmt_decl(ast *a, syt *st) { /* Analyse a return statement. */ static void analyse_stmt_return(ast *a, syt *st) { + if (A.c.al == 1) { analyse_expr(C[0], st); } + type *t = A.c.al != 0 ? ast_type(C[0], st) : &TYPE(TY_VOID); ast *p = A.p; for (; p->k != AK_PROC; p = p->p); @@ -125,8 +129,6 @@ static void analyse_stmt_return(ast *a, syt *st) { note("TODO", A.ln, A.cl, 0, "Explicit cast required from %s to %s", t->s, p->t->s); } } - - if (A.c.al == 1) { analyse_expr(C[0], st); } } /* Analyse an if statement. */ @@ -164,6 +166,7 @@ static void analyse_expr(ast *a, syt *st) { } } break; case AK_PROC: { analyse_expr_proc(a, st); } break; + case AK_HASH: { analyse_expr_hash(a, st); } break; case AK_OP_DRF: { A.t = ast_type(C[0], st)->base; } break; default: { for (UINT i = 0; i < CL; i += 1) { analyse_expr(C[i], st); }} break; } @@ -181,3 +184,13 @@ static void analyse_expr_proc(ast *a, syt *st) { /* Analyse the procedure body */ analyse_stmt_comp(C[CL - 1], st); } + +/* Analyse a hash expression. */ +static void analyse_expr_hash(ast *a, syt *st) { + assert(A.k == AK_HASH); + + if (strcmp(A.s, "syscall") == 0) { A.k = AK_HASH_SYSCALL; A.t = &TYPE(TY_SINT); } + else { note("TODO", A.ln, A.cl, 0, "Unrecognised hash expression \"%s\"", A.s); } + + for (UINT i = 0; i < CL; i += 1) { analyse_expr(C[i], st); } +} diff --git a/src/lex.c b/src/lex.c index 2180e47..8584ec8 100644 --- a/src/lex.c +++ b/src/lex.c @@ -16,12 +16,12 @@ #include <stdio.h> char *tok_ks[] = { - "TK_VOID", "TK_EOF", "TK_ID", "TK_INT", "TK_FLT", "TK_STR", "TK_HASH", + "TK_VOID", "TK_EOF", "TK_ID", "TK_INT", "TK_FLT", "TK_STR", "TK_NULL", "TK_TRUE", "TK_FALSE", "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC", "TK_LPAREN", "TK_RPAREN", "TK_LBRACK", "TK_RBRACK", "TK_LBRACE", "TK_RBRACE", - "TK_COLON", "TK_SCOLON", "TK_COMMA", "TK_PERIOD", "TK_RARROW", "TK_QMARK", + "TK_COLON", "TK_SCOLON", "TK_COMMA", "TK_PERIOD", "TK_RARROW", "TK_QMARK", "TK_HASH", "TK_OP_ADD", "TK_OP_SUB", "TK_OP_MUL", "TK_OP_DIV", "TK_OP_MOD", "TK_OP_EQ", "TK_OP_NEQ", "TK_OP_GT", "TK_OP_LT", "TK_OP_GTE", "TK_OP_LTE", @@ -112,14 +112,14 @@ tok lex_next(lex *l) { for (P += 1; is_alpha(C) || is_digit_dec(C) || C == '_'; P += 1); sl = P - s; CL += sl; T.h = syt_hash(s, sl); - if (strncmp(s, "null", sl) == 0) { T.k = TK_NULL; } - else if (strncmp(s, "true", sl) == 0) { T.k = TK_TRUE; } - else if (strncmp(s, "false", sl) == 0) { T.k = TK_FALSE; } - else if (strncmp(s, "return", sl) == 0) { T.k = TK_RETURN; } - else if (strncmp(s, "if", sl) == 0) { T.k = TK_IF; } - else if (strncmp(s, "else", sl) == 0) { T.k = TK_ELSE; } - else if (strncmp(s, "for", sl) == 0) { T.k = TK_FOR; } - else if (strncmp(s, "proc", sl) == 0) { T.k = TK_PROC; } + if (strncmp(s, "null", 4) == 0) { T.k = TK_NULL; } + else if (strncmp(s, "true", 5) == 0) { T.k = TK_TRUE; } + else if (strncmp(s, "false", 6) == 0) { T.k = TK_FALSE; } + else if (strncmp(s, "return", 6) == 0) { T.k = TK_RETURN; } + else if (strncmp(s, "if", 2) == 0) { T.k = TK_IF; } + else if (strncmp(s, "else", 4) == 0) { T.k = TK_ELSE; } + else if (strncmp(s, "for", 3) == 0) { T.k = TK_FOR; } + else if (strncmp(s, "proc", 4) == 0) { T.k = TK_PROC; } else { T.k = TK_ID; if (!(T.s = strndup(s, sl))) { error(1, SERR); }} } @@ -142,18 +142,6 @@ tok lex_next(lex *l) { } } - /* Handle hash procedures */ - else if (C == '#') { - char *s = P; UINT sl; - - for (P += 1; is_alpha(C) || is_digit_dec(C) || C == '_'; P += 1); - sl = P - s; CL += sl; - - if (sl <= 1) { note(l->n, T.ln, T.cl, 0, "A hash must be followed by an identifier"); goto reset; } - - T.k = TK_HASH; T.h = syt_hash(s, sl); if (!(T.s = strndup(s, sl))) { error(1, SERR); } - } - /* Handle punctuators and operators */ else switch (C) { case '(': { T.k = TK_LPAREN; P += 1; CL += 1; } break; @@ -167,6 +155,7 @@ tok lex_next(lex *l) { case ',': { T.k = TK_COMMA; P += 1; CL += 1; } break; case '.': { T.k = TK_PERIOD; P += 1; CL += 1; } break; case '?': { T.k = TK_QMARK; P += 1; CL += 1; } break; + case '#': { T.k = TK_HASH; P += 1; CL += 1; } break; case '+': switch (D) { default: { T.k = TK_OP_ADD; P += 1; CL += 1; } break; case '=': { T.k = TK_AS_ADD; P += 2; CL += 2; } break; diff --git a/src/lex.h b/src/lex.h index 6592620..f48d162 100644 --- a/src/lex.h +++ b/src/lex.h @@ -10,12 +10,12 @@ /* Remember to update tok_ks in lex.c */ typedef enum { - TK_VOID, TK_EOF, TK_ID, TK_INT, TK_FLT, TK_STR, TK_HASH, + TK_VOID, TK_EOF, TK_ID, TK_INT, TK_FLT, TK_STR, TK_NULL, TK_TRUE, TK_FALSE, TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC, TK_LPAREN, TK_RPAREN, TK_LBRACK, TK_RBRACK, TK_LBRACE, TK_RBRACE, - TK_COLON, TK_SCOLON, TK_COMMA, TK_PERIOD, TK_RARROW, TK_QMARK, + TK_COLON, TK_SCOLON, TK_COMMA, TK_PERIOD, TK_RARROW, TK_QMARK, TK_HASH, TK_OP_ADD, TK_OP_SUB, TK_OP_MUL, TK_OP_DIV, TK_OP_MOD, TK_OP_EQ, TK_OP_NEQ, TK_OP_GT, TK_OP_LT, TK_OP_GTE, TK_OP_LTE, diff --git a/src/llvm.c b/src/llvm.c index 3be9794..41a2b30 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -37,11 +37,10 @@ static LLVMValueRef llvm_stmt_for(ast *a, syt *st); static LLVMValueRef llvm_expr(ast *a, syt *st, bool load); static LLVMValueRef llvm_expr_proc(ast *a, syt *st); static LLVMValueRef llvm_expr_cast(ast *a, syt *st); +static LLVMValueRef llvm_expr_hash(ast *a, syt *st); static LLVMValueRef llvm_arr(ast *a, syt *st); -static LLVMValueRef llvm_hash(ast *a, syt *st); - static inline void llvm_init(char *file); static inline void llvm_free(void); static LLVMTypeRef llvm_type(type *t); @@ -121,7 +120,6 @@ static LLVMValueRef llvm_stmt(ast *a, syt *st) { case AK_RETURN: { return llvm_stmt_return(a, st); } break; case AK_IF: { return llvm_stmt_if(a, st); } break; case AK_FOR: { return llvm_stmt_for(a, st); } break; - case AK_HASH_SYSCALL: { return llvm_hash(a, st); } break; default: { return llvm_expr(a, st, true); } break; } } @@ -217,6 +215,8 @@ static LLVMValueRef llvm_expr(ast *a, syt *st, bool load) { return LLVMBuildCall2(llvm_builder, sym->llvm_t, sym->llvm_v, args, CL - 1, ""); } break; + case AK_HASH_SYSCALL: + case AK_HASH: { return llvm_expr_hash(a, st); } break; case AK_SUBS: { ast *sym = syt_search_h(st, C[0]->h, C[0]->s); if (sym == NULL) { note(file_name, A.ln, A.cl, 0, "Undefined variable \"%s\"", C[0]->s); } @@ -229,7 +229,7 @@ static LLVMValueRef llvm_expr(ast *a, syt *st, bool load) { if (!load) { return vr; } else { return LLVMBuildLoad2(llvm_builder, llvm_type(sym->t->base), vr, ""); } } - case AK_OP_POS: { a = C[0]; goto reset; /* no-op */ } + case AK_OP_POS: { a = C[0]; goto reset; /* no-op */ } break; case AK_OP_NEG: { type *t = ast_type(C[0], st); if (t == NULL) { note(file_name, A.ln, A.cl, -1, "Subtree is missing a type (llvm:llvm_expr)"); } @@ -370,46 +370,27 @@ static LLVMValueRef llvm_expr_cast(ast *a, syt *st) { return NULL; } -/* Generate IR for an array. */ -static LLVMValueRef llvm_arr(ast *a, syt *st) { - assert(A.k == AK_ARR); - - LLVMValueRef *va = calloc(CL, sizeof (LLVMValueRef)); - - for (UINT i = 0; i < CL; i += 1) { va[i] = llvm_expr(C[i], st, true); } - - return LLVMConstArray(llvm_type(C[0]->t), va, CL); -} - /* Generate IR for a hash procedure. */ -static LLVMValueRef llvm_hash(ast *a, syt *st) { - assert(a->k == AK_HASH_SYSCALL); +static LLVMValueRef llvm_expr_hash(ast *a, syt *st) { + assert(A.k == AK_HASH_SYSCALL); - UINT arg_count = a->c.al; - LLVMValueRef *args = calloc(arg_count, sizeof (LLVMValueRef)); - for (UINT i = 0; i < arg_count; i += 1) { - args[i] = llvm_expr(a->c.a[i], st, true); + LLVMValueRef args[CL]; LLVMTypeRef argt[CL]; + for (UINT i = 0; i < CL; i += 1) { + args[i] = llvm_expr(C[i], st, true); + argt[i] = llvm_type(ast_type(C[i], st)); } - LLVMTypeRef unsigned_integer_type = llvm_type(&TYPE(TY_UINT)); - LLVMTypeRef *typs = calloc(arg_count, sizeof (LLVMTypeRef)); - for (UINT i = 0; i < arg_count; i += 1) { - /* FIXME hardcoded for write syscall */ - if (i == 2) { typs[i] = llvm_type(type_ptr(&TYPE(TY_U8), 1)); } - else { typs[i] = unsigned_integer_type; } - } - - LLVMTypeRef func_type = LLVMFunctionType(llvm_type(&TYPE(TY_UINT)), typs, arg_count, false); + LLVMTypeRef func_type = LLVMFunctionType(llvm_type(&TYPE(TY_SINT)), argt, CL, false); LLVMValueRef inline_asm = NULL; /* TODO check architecture */ { /* x86-64 */ - assert(arg_count <= 7); + assert(CL <= 7); char constraints[128] = "={rax}"; char const *registers[] = { "rax", "rdi", "rsi", "rdx", "r10", "r8", "r9" }; - for (UINT i = 0; i < arg_count; i += 1) { + for (UINT i = 0; i < CL; i += 1) { strcat(constraints, ",{"); strcat(constraints, registers[i]); strcat(constraints, "}"); @@ -422,7 +403,18 @@ static LLVMValueRef llvm_hash(ast *a, syt *st) { ); } - return LLVMBuildCall2(llvm_builder, func_type, inline_asm, args, arg_count, ""); + return LLVMBuildCall2(llvm_builder, func_type, inline_asm, args, CL, ""); +} + +/* Generate IR for an array. */ +static LLVMValueRef llvm_arr(ast *a, syt *st) { + assert(A.k == AK_ARR); + + LLVMValueRef *va = calloc(CL, sizeof (LLVMValueRef)); + + for (UINT i = 0; i < CL; i += 1) { va[i] = llvm_expr(C[i], st, true); } + + return LLVMConstArray(llvm_type(C[0]->t), va, CL); } /* Initialise LLVM. */ diff --git a/src/parse.c b/src/parse.c index bf81581..2b1fed7 100644 --- a/src/parse.c +++ b/src/parse.c @@ -29,7 +29,7 @@ char *ast_ks[] = { "AK_ID", "AK_CALL", "AK_BOOL", "AK_INT", "AK_FLT", "AK_ARR", "AK_SUBS", - "AK_HASH_SYSCALL" + "AK_HASH", "AK_HASH_SYSCALL" }; static ast *parse_stmt(lex *l, syt *st); @@ -253,24 +253,8 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { case TK_LBRACE: { return parse_expr_compound(l, st); } break; case TK_PROC: { return parse_expr_proc(l, st); } break; case TK_HASH: { - tok t = lex_next(l); bool needs_args = false; - left = ast_init(0, t.ln, t.cl); - - if (strcmp(t.s, "#syscall") == 0) { left->k = AK_HASH_SYSCALL; needs_args = true; } - else { note("TODO", t.ln, t.cl, 0, "%s: unrecognised hash procedure", t.s); } - - if (needs_args) { - lex_kind(l, TK_LPAREN); - - if (T.k != TK_RPAREN) for (;;) { - ast_push(left, parse_expr(l, st, 0)); - if (T.k == TK_COMMA) { lex_next(l); } else { break; } - } - - lex_kind(l, TK_RPAREN); - } - - if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); } + left = ast_init(AK_HASH, T.ln, T.cl); lex_next(l); + left->h = T.h; left->s = strdup_or_fail(T.s); lex_kind(l, TK_ID); } break; case TK_LPAREN: { lex_next(l); left = parse_expr(l, st, 0); lex_kind(l, TK_RPAREN); } break; case TK_OP_ADD: { left = ast_init(AK_OP_POS, T.ln, T.cl); } goto prefix; @@ -285,8 +269,10 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { /* Parse an infix expression if one is present */ for (ast *a = NULL; tok_precedence(T.k) > o; left = a) switch (T.k) { case TK_LPAREN: { - a = ast_init(AK_CALL, left->ln, left->cl); - lex_next(l); ast_push(a, left); + if (left->k == AK_HASH) { a = left; } + else { a = ast_init(AK_CALL, left->ln, left->cl); ast_push(a, left); } + + lex_next(l); /* Parse call arguments if present */ if (T.k != TK_RPAREN) for (;;) { diff --git a/src/parse.h b/src/parse.h index 5b489f5..50dd366 100644 --- a/src/parse.h +++ b/src/parse.h @@ -26,7 +26,7 @@ typedef enum { AK_ID, AK_CALL, AK_BOOL, AK_INT, AK_FLT, AK_ARR, AK_SUBS, - AK_HASH_SYSCALL + AK_HASH, AK_HASH_SYSCALL } ast_k; /*