Author | Jakob Wakeling <[email protected]> |
Date | 2023-07-06 02:11:33 |
Commit | 1aa55193278a3067cd2b55634c052bbc0ac3553a |
Parent | 0f9bb9dd1ca7d9724da7e4b2ce0624dc2b2e1eb0 |
Implement if and else statements
Diffstat
M | README.md | | | 13 | +++++++------ |
M | doc/g.ebnf | | | 22 | +++++++++------------- |
M | src/analyse.c | | | 13 | +++++++++---- |
M | src/lex.c | | | 42 | +++++++++++++++--------------------------- |
M | src/lex.h | | | 8 | ++------ |
M | src/llvm.c | | | 42 | +++++++++++++++++++++++++++++++++++++----- |
M | src/main.c | | | 3 | +-- |
M | src/parse.c | | | 70 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- |
M | src/parse.h | | | 1 | + |
M | src/symbol.c | | | 2 | -- |
M | src/symbol.h | | | 1 | - |
11 files changed, 138 insertions, 79 deletions
diff --git a/README.md b/README.md index 831a715..55eae29 100644 --- a/README.md +++ b/README.md @@ -59,17 +59,18 @@ command. The second command will output an executable file, *a.out* by default. - [x] Implement type casting - [ ] Implement type casting to pointers and arrays - [ ] Implement the *type* type -- [ ] Implement *defer* -- [ ] Implement *errdefer* -- [ ] Implement *if* and *else* +- [ ] Implement labels and *goto* +- [x] Implement *if* and *else* - [ ] Implement *for* +- [ ] Implement *break* and *continue* +- [ ] Implement *defer* +- [ ] Implement *errdefer* (?) - [ ] Implement first class strings - [x] Implement syscalls -- [ ] Implement foreign code calling - [ ] Implement generics of some kind - [ ] Implement module definition - [ ] Implement module use -- [ ] Implement C procedure calling +- [ ] Implement foreign code calling - ... - [ ] Re-write compiler in **G** diff --git a/doc/g.ebnf b/doc/g.ebnf index b893a12..a881212 100644 --- a/doc/g.ebnf +++ b/doc/g.ebnf @@ -9,19 +9,16 @@ prog = { stmt_decl } ; (* Statements *) -stmt = stmt_compound - | stmt_decl, ";" - | [ expr ], ";" - | "return", [ expr ], ";" - | "if", "(", expr, ")", stmt, [ "else", stmt ] - | "for", "(" expr, [ ";", expr ], [ ";", expr ], ")", stmt - ; +stmt = stmt_compound | stmt_expr | stmt_decl | stmt_return | stmt_if | stmt_for ; +stmt_simple = stmt_expr | stmt_decl ; stmt_compound = "{", { stmt }, "}" ; +stmt_expr = [ expr ], ";" ; -stmt_decl = iden, ":", ( stmt_decl_constant | stmt_decl_variable ) ; -stmt_decl_constant = [ type ], ":", expr ; -stmt_decl_variable = [ type ], "=", expr ; +stmt_decl = iden, ":", [ type ], ( ":" | "=" ), expr, ";" ; +stmt_return = "return", [ expr ], ";" ; +stmt_if = "if", "(", expr, ")", stmt, [ "else", stmt ] ; +stmt_for = "for", "(", expr, [ ";", expr ], [ ";", expr ], ")", stmt ; (* Expressions *) expr = iden | literal diff --git a/src/analyse.c b/src/analyse.c index 1df4abb..0f8f026 100644 --- a/src/analyse.c +++ b/src/analyse.c @@ -46,7 +46,7 @@ static void analyse_stmt(ast *a, syt *st) { } } -/* Analyse a compound statement. */ +/* Analyse a compound statement. AK_COMP has a scope. */ static inline void analyse_stmt_comp(ast *a, syt *st) { assert(A.k == AK_COMP); @@ -131,12 +131,17 @@ static void analyse_stmt_return(ast *a, syt *st) { } } -/* Analyse an if statement. */ +/* Analyse an if statement. AK_IF has a scope. */ static void analyse_stmt_if(ast *a, syt *st) { - /* TODO */ + assert(A.k == AK_IF); + assert(CL == 3 || CL == 4); + + if (C[0]->k != AK_VOID) { analyse_stmt(C[0], &A.st); } + analyse_expr(C[1], &A.st); analyse_stmt(C[2], &A.st); + if (CL == 4) { analyse_stmt(C[3], &A.st); } } -/* Analyse a for statement. */ +/* Analyse a for statement. AK_FOR has a scope. */ static void analyse_stmt_for(ast *a, syt *st) { /* TODO */ } diff --git a/src/lex.c b/src/lex.c index cc63f7f..5d82014 100644 --- a/src/lex.c +++ b/src/lex.c @@ -6,19 +6,18 @@ #include "lex.h" #include "log.h" #include "symbol.h" -#include "type.h" #include "util/error.h" #include "util/util.h" -#include <stdint.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> -#include <stdio.h> char *tok_ks[] = { "TK_VOID", "TK_EOF", "TK_ID", "TK_INT", "TK_FLT", "TK_STR", - "TK_NULL", "TK_TRUE", "TK_FALSE", "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC", + "TK_NULL", "TK_TRUE", "TK_FALSE", "TK_PROC", "TK_GOTO", "TK_RETURN", + "TK_IF", "TK_ELSE", "TK_FOR", "TK_BREAK", "TK_CONTINUE", "TK_LPAREN", "TK_RPAREN", "TK_LBRACK", "TK_RBRACK", "TK_LBRACE", "TK_RBRACE", "TK_COLON", "TK_SCOLON", "TK_COMMA", "TK_PERIOD", "TK_RARROW", "TK_QMARK", "TK_HASH", @@ -44,22 +43,6 @@ char *tok_ks[] = { static inline u64 parse_int(char *s); static inline f128 parse_flt(char *s); -/* Push a token to a token array. */ -void tok_a_push(tok_a *a, tok t) { - tok *ta = realloc(a->a, (a->al += 1) * sizeof (tok)); - if (!ta) { error(1, SERR); } else { a->a = ta; a->a[a->al - 1] = t; } -} - -/* Pop a token from a token array. A zero token is returned when empty. */ -tok tok_a_pop(tok_a *a) { - return (a->al ? a->a[a->al -= 1] : (tok){ 0 }); -} - -/* Pop a token from a token array. A zero token is returned when empty. */ -tok tok_a_peek(tok_a *a) { - return (a->al ? a->a[a->al - 1] : (tok){ 0 }); -} - /* Initialise a lexer. */ lex lex_init(const char *file, char *src, UINT len) { lex l = { file, src, src, src + len, 0, 0, 0, 0 }; @@ -113,14 +96,17 @@ tok lex_next(lex *l) { for (P += 1; is_alpha(C) || is_digit_dec(C) || C == '_'; P += 1); sl = P - s; CL += sl; T.h = syt_hash(s, sl); - if (strncmp(s, "null", 4) == 0) { T.k = TK_NULL; } - else if (strncmp(s, "true", 4) == 0) { T.k = TK_TRUE; } - else if (strncmp(s, "false", 5) == 0) { T.k = TK_FALSE; } - else if (strncmp(s, "return", 6) == 0) { T.k = TK_RETURN; } - else if (strncmp(s, "if", 2) == 0) { T.k = TK_IF; } - else if (strncmp(s, "else", 4) == 0) { T.k = TK_ELSE; } - else if (strncmp(s, "for", 3) == 0) { T.k = TK_FOR; } - else if (strncmp(s, "proc", 4) == 0) { T.k = TK_PROC; } + if (strncmp(s, "null", 4) == 0) { T.k = TK_NULL; } + else if (strncmp(s, "true", 4) == 0) { T.k = TK_TRUE; } + else if (strncmp(s, "false", 5) == 0) { T.k = TK_FALSE; } + else if (strncmp(s, "proc", 4) == 0) { T.k = TK_PROC; } + else if (strncmp(s, "goto", 4) == 0) { T.k = TK_GOTO; } + else if (strncmp(s, "return", 6) == 0) { T.k = TK_RETURN; } + else if (strncmp(s, "if", 2) == 0) { T.k = TK_IF; } + else if (strncmp(s, "else", 4) == 0) { T.k = TK_ELSE; } + else if (strncmp(s, "for", 3) == 0) { T.k = TK_FOR; } + else if (strncmp(s, "break", 5) == 0) { T.k = TK_BREAK; } + else if (strncmp(s, "continue", 8) == 0) { T.k = TK_CONTINUE; } else { T.k = TK_ID; if (!(T.s = strndup(s, sl))) { error(1, SERR); }} } diff --git a/src/lex.h b/src/lex.h index b6adccf..033b259 100644 --- a/src/lex.h +++ b/src/lex.h @@ -12,7 +12,8 @@ typedef enum { TK_VOID, TK_EOF, TK_ID, TK_INT, TK_FLT, TK_STR, - TK_NULL, TK_TRUE, TK_FALSE, TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC, + TK_NULL, TK_TRUE, TK_FALSE, TK_PROC, TK_GOTO, TK_RETURN, + TK_IF, TK_ELSE, TK_FOR, TK_BREAK, TK_CONTINUE, TK_LPAREN, TK_RPAREN, TK_LBRACK, TK_RBRACK, TK_LBRACE, TK_RBRACE, TK_COLON, TK_SCOLON, TK_COMMA, TK_PERIOD, TK_RARROW, TK_QMARK, TK_HASH, @@ -32,7 +33,6 @@ typedef enum { v_int : Int Value, v_flt : Flt Value */ typedef struct { tok_k k; UINT ln, cl; u64 h; char *s; union { u64 v_int; f128 v_flt; }; } tok; -typedef struct { tok *a; UINT al; } tok_a; /* n : File Name, s : Start of File, p : Current Character, q : End of File, @@ -42,10 +42,6 @@ typedef struct { const char *n; char *s, *p, *q; UINT ln, cl; tok t; } lex; extern char *tok_ks[]; -extern void tok_a_push(tok_a *a, tok t); -extern tok tok_a_pop(tok_a *a); -extern tok tok_a_peek(tok_a *a); - extern lex lex_init(const char *file, char *src, UINT len); extern tok lex_peek(lex *l); extern tok lex_next(lex *l); diff --git a/src/llvm.c b/src/llvm.c index 652ac76..93cdc30 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -18,6 +18,7 @@ #include <llvm-c/TargetMachine.h> #include <llvm-c/Types.h> +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -93,7 +94,7 @@ void llvm_bitcode(ast *a, char *file) { error(2, "LLVMWriteBitcodeToFile failure"); } - char *err; + char *err = NULL; LLVMVerifyModule(llvm_module, LLVMAbortProcessAction, &err); LLVMDisposeMessage(err); llvm_free(); } @@ -104,10 +105,11 @@ void llvm_ir(ast *a, char *file) { /* Generate IR for all child nodes */ for (UINT i = 0; i < a->c.al; i += 1) { llvm_stmt_decl(a->c.a[i], &a->st); } - /* TODO */ - + if (LLVMPrintModuleToFile(llvm_module, "llvm.ll", NULL)) { + error(2, "LLVMPrintModuleToFile failure"); + } - char *err; + char *err = NULL; LLVMVerifyModule(llvm_module, LLVMAbortProcessAction, &err); LLVMDisposeMessage(err); llvm_free(); } @@ -143,7 +145,7 @@ static LLVMValueRef llvm_stmt_decl(ast *a, syt *st) { LLVMTypeRef ft = LLVMFunctionType(llvm_type(C[0]->t), art, C[0]->c.al - 1, 0); LLVMValueRef f = LLVMAddFunction(llvm_module, A.s, ft); - LLVMBasicBlockRef bb = LLVMAppendBasicBlock(f, "entry"); + LLVMBasicBlockRef bb = LLVMAppendBasicBlock(f, ""); LLVMPositionBuilderAtEnd(llvm_builder, bb); for (UINT i = 0; i < C[0]->c.al - 1; i += 1) { @@ -175,8 +177,32 @@ static LLVMValueRef llvm_stmt_return(ast *a, syt *st) { } /* Generate IR for an if statement. */ +/* TODO handle return statements inside if/else */ static LLVMValueRef llvm_stmt_if(ast *a, syt *st) { - return NULL; /* TODO */ + assert(A.k == AK_IF); + assert(CL == 3 || CL == 4); + + if (C[0]->k != AK_VOID) { llvm_stmt(C[0], &A.st); } + + ast *p = find_proc(a); assert(p->k == AK_DECL); + + LLVMBasicBlockRef b1 = LLVMAppendBasicBlock(p->llvm_v, ""), b2 = NULL; + if (CL == 4) { b2 = LLVMAppendBasicBlock(p->llvm_v, ""); } + LLVMBasicBlockRef b3 = LLVMAppendBasicBlock(p->llvm_v, ""); + + LLVMBuildCondBr(llvm_builder, llvm_expr(C[1], &A.st, true), b1, CL == 4 ? b2 : b3); + + LLVMPositionBuilderAtEnd(llvm_builder, b1); + llvm_stmt(C[2], &A.st); LLVMBuildBr(llvm_builder, b3); + + if (CL == 4) { + LLVMPositionBuilderAtEnd(llvm_builder, b2); + llvm_stmt(C[3], &A.st); LLVMBuildBr(llvm_builder, b3); + } + + LLVMPositionBuilderAtEnd(llvm_builder, b3); + + return NULL; } /* Generate IR for a for statement. */ @@ -486,7 +512,7 @@ static LLVMTypeRef llvm_type(type *t) { case TY_VOID: { return LLVMVoidType(); } break; case TY_PTR: { return LLVMPointerType(llvm_type(t->base), 0); } break; case TY_ARR: { return LLVMArrayType(llvm_type(t->base), t->l); } break; - case TY_BOOL: { return LLVMIntType(8); } break; + case TY_BOOL: { return LLVMIntType(1); } break; case TY_B8: { return LLVMIntType(8); } break; case TY_B16: { return LLVMIntType(16); } break; case TY_B32: { return LLVMIntType(32); } break; @@ -519,7 +545,7 @@ static LLVMValueRef llvm_ival(type *t) { return LLVMConstArray(llvm_type(t->base), va, t->l); } break; - case TY_BOOL: { return LLVMConstInt(LLVMIntType(8), 0, false); } break; + case TY_BOOL: { return LLVMConstInt(LLVMIntType(1), 0, false); } break; case TY_B8: { return LLVMConstInt(LLVMIntType(8), 0, false); } break; case TY_B16: { return LLVMConstInt(LLVMIntType(16), 0, false); } break; case TY_B32: { return LLVMConstInt(LLVMIntType(32), 0, false); } break; diff --git a/src/main.c b/src/main.c index bfba797..9c52165 100644 --- a/src/main.c +++ b/src/main.c @@ -12,8 +12,7 @@ #include "util/optget.h" #include "util/util.h" -#include <stdbool.h> -#include <stddef.h> +#include <assert.h> #include <stdio.h> #include <stdlib.h> diff --git a/src/parse.c b/src/parse.c index 1628314..ce6ee58 100644 --- a/src/parse.c +++ b/src/parse.c @@ -11,6 +11,7 @@ #include "util/error.h" #include "util/util.h" +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -38,8 +39,9 @@ char *ast_ks[] = { }; static ast *parse_stmt(lex *l, syt *st); +static ast *parse_stmt_init(lex *l, syt *st); static ast *parse_stmt_compound(lex *l, syt *st); -static ast *parse_stmt_decl(lex *l, syt *st); +static ast *parse_stmt_decl(lex *l, syt *st, bool scolon); static ast *parse_stmt_expr(lex *l, syt *st); static ast *parse_stmt_return(lex *l, syt *st); static ast *parse_stmt_if(lex *l, syt *st); @@ -66,6 +68,7 @@ inline ast *ast_init(ast_k kind, UINT ln, UINT cl) { } inline void ast_free(ast **a) { + if (a == NULL || *a == NULL) { return; } if ((*a)->s != NULL) { free((*a)->s); } if ((*a)->c.a != NULL) { free((*a)->c.a); } free(*a); a = NULL; /* TODO free LLVM pointers? */ @@ -73,6 +76,8 @@ inline void ast_free(ast **a) { /* Push a child AST node to an AST node. */ void ast_push(ast *a, ast *c) { + assert(a != NULL); assert(c != NULL); + ast **ca = realloc(a->c.a, (a->c.al += 1) * sizeof (ast *)); if (!ca) { error(1, SERR); } else { a->c.a = ca; ca = NULL; } @@ -141,7 +146,7 @@ ast *parse(lex *l) { /* Parse and append all child nodes */ for (ast *c; T.k != TK_EOF;) { - if ((c = parse_stmt_decl(l, &a->st)) != NULL) { ast_push(a, c); } + if ((c = parse_stmt_decl(l, &a->st, true)) != NULL) { ast_push(a, c); } else { note(l->n, T.ln, T.cl, -1, "NULL AST (parse:parse_stmt_decl)"); } } @@ -154,8 +159,8 @@ static ast *parse_stmt(lex *l, syt *st) { case TK_LBRACE: { return parse_stmt_compound(l, st); } break; case TK_ID: { lex ll = *l; switch (lex_next(&ll), ll.t.k) { - case TK_COLON: { return parse_stmt_decl(l, st); } break; - default: { return parse_stmt_expr(l, st); } break; + case TK_COLON: { return parse_stmt_decl(l, st, true); } break; + default: { return parse_stmt_expr(l, st); } break; } } break; case TK_RETURN: { return parse_stmt_return(l, st); } break; @@ -165,7 +170,20 @@ static ast *parse_stmt(lex *l, syt *st) { } } -/* Parse a compound statement. */ +/* Parse an initialisation statement. */ +static ast *parse_stmt_init(lex *l, syt *st) { + switch (T.k) { + case TK_ID: { + lex ll = *l; switch (lex_next(&ll), ll.t.k) { + case TK_COLON: { return parse_stmt_decl(l, st, false); } break; + default: { return parse_expr(l, st, 0); } break; + } + } break; + default: { return parse_expr(l, st, 0); } break; + } +} + +/* Parse a compound statement. AK_COMP has a scope. */ static ast *parse_stmt_compound(lex *l, syt *st) { ast *a = ast_init(AK_COMP, T.ln, T.cl); lex_kind(l, TK_LBRACE); a->st.pt = st; @@ -176,7 +194,7 @@ static ast *parse_stmt_compound(lex *l, syt *st) { } /* Parse a declaration statement. */ -static ast *parse_stmt_decl(lex *l, syt *st) { +static ast *parse_stmt_decl(lex *l, syt *st, bool scolon) { ast *a = ast_init(AK_DECL, T.ln, T.cl); a->h = T.h; a->s = T.s; assert(T.k == TK_ID); lex_next(l); assert(T.k == TK_COLON); lex_next(l); @@ -191,7 +209,7 @@ static ast *parse_stmt_decl(lex *l, syt *st) { /* Ensure that a type is known and consume a semicolon if one is required */ if (a->t == NULL && a->c.al == 0) { note(l->n, T.ln, T.cl, 0, "A declaration without a type is invalid"); } - if (a->c.al < 1 || a->c.a[0]->k != AK_PROC) { lex_kind(l, TK_SCOLON); } + if (scolon && (a->c.al < 1 || a->c.a[0]->k != AK_PROC)) { lex_kind(l, TK_SCOLON); } /* Insert the new symbol and return */ syt_insert_h(st, a->h, a->s, a); return a; @@ -213,21 +231,39 @@ static ast *parse_stmt_return(lex *l, syt *st) { lex_kind(l, TK_SCOLON); return a; } -/* Parse an if statement. */ +/* Parse an if statement. AK_IF has a scope. */ static ast *parse_stmt_if(lex *l, syt *st) { assert(T.k == TK_IF); ast *a = ast_init(AK_IF, T.ln, T.cl); - lex_next(l); lex_kind(l, TK_LPAREN); + lex_next(l); lex_kind(l, TK_LPAREN); a->st.pt = st; - /* TODO Parse expression and closing parenthesis */ - ast_push(a, parse_expr(l, st, 0)); lex_kind(l, TK_RPAREN); + if (T.k != TK_RPAREN) { + register ast *c1 = parse_stmt_init(l, &a->st); + + if (T.k == TK_SCOLON) { + lex_next(l); ast_push(a, c1); + ast_push(a, parse_expr(l, &a->st, 0)); + } + else { + if (c1->k == AK_DECL) { note(l->n, c1->ln, c1->cl, 0, "Expected an expression"); } + ast_push(a, ast_init(AK_VOID, 0, 0)); ast_push(a, c1); + } + } + else { note(l->n, T.ln, T.cl, 0, "Expected an expression"); } + + lex_kind(l, TK_RPAREN); /* Parse the if statement body */ - ast_push(a, parse_stmt(l, st)); return a; + ast_push(a, parse_stmt(l, &a->st)); + + /* Parse an else statement if present */ + if (T.k == TK_ELSE) { lex_next(l); ast_push(a, parse_stmt(l, &a->st)); } + + return a; } -/* Parse a for statement. */ +/* Parse a for statement. AK_FOR has a scope. */ static ast *parse_stmt_for(lex *l, syt *st) { assert(T.k == TK_FOR); @@ -536,3 +572,9 @@ void ast_print(ast *a, UINT indent) { return; } + +/* Find the procedure containing a specified node. */ +ast *find_proc(register ast *a) { + for (a = a->p; a->k != AK_PROC; a = a->p); + return a->p->k == AK_DECL ? a->p : a; +} diff --git a/src/parse.h b/src/parse.h index 98beb68..8e4c781 100644 --- a/src/parse.h +++ b/src/parse.h @@ -64,5 +64,6 @@ extern ast *ast_a_pop(ast_a *aa); extern ast *parse(lex *l); extern void ast_print(ast *a, UINT i); +extern ast *find_proc(ast *a); #endif // G_PARSE_H_VB50JOSX diff --git a/src/symbol.c b/src/symbol.c index 8ef3b2d..769d931 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -14,7 +14,6 @@ #include "symbol.h" #include "util/util.h" -#include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> diff --git a/src/symbol.h b/src/symbol.h index 5bc42f0..d759cce 100644 --- a/src/symbol.h +++ b/src/symbol.h @@ -6,7 +6,6 @@ #ifndef G_SYMBOL_H_Q1VLFKFE #define G_SYMBOL_H_Q1VLFKFE -#include "type.h" #include "util/util.h" typedef struct ast_s ast;