Author | Jakob Wakeling <[email protected]> |
Date | 2023-06-21 08:43:19 |
Commit | 74972c29942d72717c2d02e107d1d035fbc75494 |
Parent | 861312df2e6595ce69883e19124fd938e76939d4 |
Various improvements to permit rudimentary IO
Diffstat
M | doc/types.md | | | 3 | ++- |
A | examples/hello_rudimentary.g | | | 20 | ++++++++++++++++++++ |
M | examples/main.g | | | 2 | +- |
M | src/analyse.c | | | 48 | +++++++++++++++++++++++++++++++++++------------- |
M | src/init.c | | | 2 | +- |
M | src/lex.c | | | 26 | ++++++++++++++++++++------ |
M | src/lex.h | | | 6 | +++--- |
M | src/llvm.c | | | 38 | ++++++++++++++++---------------------- |
M | src/main.c | | | 4 | ++-- |
M | src/parse.c | | | 133 | +++++++++++++++++++++++++++++++++++++++++++------------------------------------ |
M | src/parse.h | | | 2 | +- |
M | src/type.c | | | 21 | ++++++++++++++++++--- |
M | src/type.h | | | 3 | ++- |
13 files changed, 193 insertions, 115 deletions
diff --git a/doc/types.md b/doc/types.md index bc69064..7ade28a 100644 --- a/doc/types.md +++ b/doc/types.md @@ -36,7 +36,8 @@ s128 128-bit signed integer type uint word-sized unsigned integer type sint word-sized signed integer type -char alias for u8 ? +byte alias for u8 +char alias for u8 rune alias for u32 representing a Unicode code point ``` diff --git a/examples/hello_rudimentary.g b/examples/hello_rudimentary.g new file mode 100644 index 0000000..70b57ac --- /dev/null +++ b/examples/hello_rudimentary.g @@ -0,0 +1,20 @@ +main :: proc() -> u64 { + c : u8; + + c = u8('H'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('e'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('l'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('l'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('o'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8(','); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8(' '); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('W'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('o'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('r'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('l'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('d'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('!'); #syscall(u64(1), u64(1), &c, u64(1)); + c = u8('\n'); #syscall(u64(1), u64(1), &c, u64(1)); + + return u64(0); +} diff --git a/examples/main.g b/examples/main.g index ca0c6c4..60b4b2f 100644 --- a/examples/main.g +++ b/examples/main.g @@ -1,4 +1,4 @@ -pepo :: proc() -> u64 { +main :: proc() -> u64 { var1 : f32 = f32(1.0); var2 : *f32 = &var1; return u64(var2); diff --git a/src/analyse.c b/src/analyse.c index e77bd8e..7f69f18 100644 --- a/src/analyse.c +++ b/src/analyse.c @@ -14,9 +14,11 @@ #include <stdio.h> static void analyse_stmt(ast *a, syt *st); -static void analyse_stmt_comp(ast *a, syt *st); +static inline void analyse_stmt_comp(ast *a, syt *st); static void analyse_stmt_decl(ast *a, syt *st); static void analyse_stmt_return(ast *a, syt *st); +static void analyse_stmt_if(ast *a, syt *st); +static void analyse_stmt_for(ast *a, syt *st); static void analyse_expr(ast *a, syt *st); static void analyse_expr_proc(ast *a, syt *st); @@ -26,9 +28,6 @@ static void analyse_expr_proc(ast *a, syt *st); /* Analyse a program. */ void analyse(ast *a) { - assert(A.k == AK_PROG); - - /* Analyse all child nodes */ for (UINT i = 0; i < A.c.al; i += 1) { analyse_stmt_decl(A.c.a[i], &A.st); } } @@ -38,27 +37,26 @@ static void analyse_stmt(ast *a, syt *st) { case AK_COMP: { analyse_stmt_comp(a, st); } break; case AK_DECL: { analyse_stmt_decl(a, st); } break; case AK_RETURN: { analyse_stmt_return(a, st); } break; + case AK_IF: { analyse_stmt_if(a, st); } break; + case AK_FOR: { analyse_stmt_for(a, st); } break; default: { analyse_expr(a, st); } break; } } /* Analyse a compound statement. */ -static void analyse_stmt_comp(ast *a, syt *st) { - assert(A.k == AK_COMP); - - /* Analyse all child nodes */ +static inline void analyse_stmt_comp(ast *a, syt *st) { for (UINT i = 0; i < A.c.al; i += 1) { analyse_stmt(A.c.a[i], st); } } /* Analyse a declaration statement. */ static void analyse_stmt_decl(ast *a, syt *st) { assert(A.c.al == 0 || A.c.al == 1); - if (a->c.al == 0) { assert(a->t != NULL); return; } + if (A.c.al == 0) { assert(a->t != NULL); return; } - analyse_expr(a->c.a[0], st); - type *value_type = ast_type(a->c.a[0], st); + analyse_expr(C[0], st); + type *value_type = ast_type(C[0], st); - if (a->c.a[0]->k == AK_PROC) { return; /* TODO */ } + if (C[0]->k == AK_PROC) { return; /* TODO */ } else if (is_int(value_type)) { /* If a type has not been specified, set the type based on the value */ if (a->t == NULL) { @@ -103,8 +101,32 @@ static void analyse_stmt_decl(ast *a, syt *st) { /* Analyse a return statement. */ static void analyse_stmt_return(ast *a, syt *st) { - /* TODO Check if the return type matches or is compatible with the given value */ - analyse_expr(C[0], st); + type *t = A.c.al != 0 ? ast_type(C[0], st) : &TYPE(TY_VOID); + ast *p = A.p; for (; p->k != AK_PROC; p = p->p); + + if (is_equal(t, &TYPE(TY_VOID)) && !is_equal(p->t, &TYPE(TY_VOID))) { + note("TODO", A.ln, A.cl, 0, "Non-void procedure \"%s\" should return a value", p->p->s); + } + else if (!is_equal(t, p->t)) { + if (!is_com(t, p->t)) { + note("TODO", A.ln, A.cl, 0, "Incompatible return type of %s", t->s); + } + else { + note("TODO", A.ln, A.cl, 0, "Explicit cast required from %s to %s", t->s, p->t->s); + } + } + + if (A.c.al == 1) { analyse_expr(C[0], st); } +} + +/* Analyse an if statement. */ +static void analyse_stmt_if(ast *a, syt *st) { + /* TODO */ +} + +/* Analyse a for statement. */ +static void analyse_stmt_for(ast *a, syt *st) { + /* TODO */ } /* Analyse an expression. */ diff --git a/src/init.c b/src/init.c index b78f831..abc8d2a 100644 --- a/src/init.c +++ b/src/init.c @@ -44,7 +44,7 @@ static ast kwds[] = { { AK_TYPE, 0, 0, 0, "char", &TYPE(TY_CHAR), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "rune", &TYPE(TY_RUNE), { 0 }, NULL }, - { AK_ZERO, 0, 0, 0, NULL, NULL, { 0 }, NULL } + { AK_VOID, 0, 0, 0, NULL, NULL, { 0 }, NULL } }; void initialise(void) { diff --git a/src/lex.c b/src/lex.c index 716b46e..a558be8 100644 --- a/src/lex.c +++ b/src/lex.c @@ -16,7 +16,7 @@ #include <stdio.h> char *tok_ks[] = { - "TK_ZERO", "TK_EOF", "TK_ID", "TK_NUM", "TK_CHR", "TK_STR", "TK_HASH", + "TK_VOID", "TK_EOF", "TK_ID", "TK_NUM", "TK_CHR", "TK_STR", "TK_HASH", "TK_NULL", "TK_TRUE", "TK_FALSE", "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC", @@ -58,7 +58,7 @@ tok tok_a_peek(tok_a *a) { /* Initialise a lexer. */ lex lex_init(const char *file, char *src, UINT len) { - lex l = { file, src, src, src + len, 0, 0, 0 }; + lex l = { file, src, src, src + len, 0, 0, 0, 0 }; lex_next(&l); return l; } @@ -219,7 +219,7 @@ tok lex_next(lex *l) { for (; C != quote && C != '\n' && P != Q;) { /* Non escape characters are not altered */ - if (C != '\\') { *head = C; head += 1; } + if (C != '\\') { *head = C; P += 1; head += 1; } /* Escape characters are processed and re-written to head */ else switch (D) { @@ -250,11 +250,21 @@ tok lex_next(lex *l) { else if (P != Q) { P += 1; CL += 1; } if (quote == '\'') { - T.k = TK_NUM; + T.k = TK_NUM; T.h = 1; if (!(T.s = strndup(s, sl))) { error(1, SERR); } - note(l->n, T.ln, T.cl, 1, "Characters are not yet fully implemented"); - /* Lex as TK_NUM, but how to distinguish at parsing stage? */ + /* + Numerical value of character literals is calculated and + stored in T.v, T.h is set to non-zero to indicate that + parsing is already complete + */ + for (UINT i = 0; i < sl; i += 1) { + if (T.v > (U32_MAX - s[i]) / 256) { + note(l->n, T.ln, T.cl, 1, "Character constant exceeds maximum size"); break; + } + + T.v = T.v * 256 + s[i]; + } } else if (quote == '\"') { T.k = TK_STR; T.h = syt_hash(s, sl); @@ -275,14 +285,15 @@ tok lex_next(lex *l) { /* Lex the next token if the current is of a specific type. */ tok lex_kind(lex *l, tok_k k) { if (T.k != k) { - note(l->n, T.ln, T.cl, 0, "Unexpected: \"%s\", was expecting: \"%s\"", tok_ks[T.k], tok_ks[k]); + note(l->n, T.ln, T.cl, 0, "Unexpected \"%s\", was expecting \"%s\"", tok_ks[T.k], tok_ks[k]); } return lex_next(l); } -/* Print lexer debug output and exit. */ +/* Print lexer debug output. */ void lex_debug(lex *l) { + printf("--- %s ---\n", l->n); for (tok t = lex_next(l); t.k != TK_EOF; free(t.s), t = lex_next(l)) { printf("%zu:%zu: %s \"%s\"\n", t.ln + 1, t.cl + 1, tok_ks[t.k], t.s); } diff --git a/src/lex.h b/src/lex.h index ff94340..ab21797 100644 --- a/src/lex.h +++ b/src/lex.h @@ -10,7 +10,7 @@ /* Remember to update tok_ks in lex.c */ typedef enum { - TK_ZERO, TK_EOF, TK_ID, TK_NUM, TK_CHR, TK_STR, TK_HASH, + TK_VOID, TK_EOF, TK_ID, TK_NUM, TK_CHR, TK_STR, TK_HASH, TK_NULL, TK_TRUE, TK_FALSE, TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC, @@ -26,8 +26,8 @@ typedef enum { TK_AS_NOT, TK_AS_AND, TK_AS_OR, TK_AS_XOR, TK_AS_SHL, TK_AS_SHR, } tok_k; -/* k : Kind, ln : Line, cl : Column, h : Hash, s : String */ -typedef struct { tok_k k; UINT ln, cl; u64 h; char *s; } tok; +/* k : Kind, ln : Line, cl : Column, h : Hash, v : Value, s : String */ +typedef struct { tok_k k; UINT ln, cl; u64 h, v; char *s; } tok; typedef struct { tok *a; UINT al; } tok_a; /* diff --git a/src/llvm.c b/src/llvm.c index 2b315a5..2fbc9d1 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -120,23 +120,20 @@ static LLVMValueRef llvm_stmt(ast *a, syt *st) { switch (a->k) { case AK_COMP: { return llvm_stmt_compound(a, st); } break; case AK_DECL: { return llvm_stmt_decl(a, st); } break; - case AK_ASSIGN: case AK_AS_ADD: case AK_AS_SUB: case AK_AS_MUL: case AK_AS_DIV: case AK_AS_MOD: - { return llvm_stmt_assn(a, st); } break; case AK_RETURN: { return llvm_stmt_return(a, st); } break; case AK_IF: { return llvm_stmt_if(a, st); } break; case AK_FOR: { return llvm_stmt_for(a, st); } break; + case AK_ASSIGN: case AK_AS_ADD: case AK_AS_SUB: case AK_AS_MUL: case AK_AS_DIV: case AK_AS_MOD: + { return llvm_stmt_assn(a, st); } break; case AK_HASH_SYSCALL: { return llvm_hash(a, st); } break; - default: { note(file_name, a->ln, a->cl, -1, "Unhandled AST kind %s (llvm:llvm_stmt)", ast_ks[a->k]); } break; + default: { return llvm_expr(a, st); } break; } } /* Generate IR for a compound statement. */ static LLVMValueRef llvm_stmt_compound(ast *a, syt *st) { - assert(A.k == AK_COMP); - - for (UINT i = 0; i < a->c.al; i += 1) { llvm_stmt(C[i], &A.st); } - + for (UINT i = 0; i < A.c.al; i += 1) { llvm_stmt(C[i], &A.st); } return NULL; } @@ -181,36 +178,36 @@ static LLVMValueRef llvm_stmt_assn(ast *a, syt *st) { a->k == AK_AS_MOD ); - ast *v = syt_search(st, a->s); + ast *v = syt_search(st, C[0]->s); if (v == NULL) { note(file_name, a->ln, a->cl, 0, "Undefined variable %s (LLVM Failsafe)", a->s); } switch (a->k) { case AK_ASSIGN: { - return LLVMBuildStore(llvm_builder, llvm_expr(C[0], st), v->llvm_v); + return LLVMBuildStore(llvm_builder, llvm_expr(C[1], st), v->llvm_v); } break; case AK_AS_ADD: { LLVMValueRef vr = LLVMBuildLoad2(llvm_builder, llvm_type(v->t), v->llvm_v, v->s); - LLVMValueRef rr = LLVMBuildAdd(llvm_builder, vr, llvm_expr(C[0], st), "as_add"); + LLVMValueRef rr = LLVMBuildAdd(llvm_builder, vr, llvm_expr(C[1], st), "as_add"); return LLVMBuildStore(llvm_builder, rr, v->llvm_v); } break; case AK_AS_SUB: { LLVMValueRef vr = LLVMBuildLoad2(llvm_builder, llvm_type(v->t), v->llvm_v, v->s); - LLVMValueRef rr = LLVMBuildSub(llvm_builder, vr, llvm_expr(C[0], st), "as_sub"); + LLVMValueRef rr = LLVMBuildSub(llvm_builder, vr, llvm_expr(C[1], st), "as_sub"); return LLVMBuildStore(llvm_builder, rr, v->llvm_v); } break; case AK_AS_MUL: { LLVMValueRef vr = LLVMBuildLoad2(llvm_builder, llvm_type(v->t), v->llvm_v, v->s); - LLVMValueRef rr = LLVMBuildMul(llvm_builder, vr, llvm_expr(C[0], st), "as_mul"); + LLVMValueRef rr = LLVMBuildMul(llvm_builder, vr, llvm_expr(C[1], st), "as_mul"); return LLVMBuildStore(llvm_builder, rr, v->llvm_v); } break; case AK_AS_DIV: { LLVMValueRef vr = LLVMBuildLoad2(llvm_builder, llvm_type(v->t), v->llvm_v, v->s); - LLVMValueRef rr = LLVMBuildSDiv(llvm_builder, vr, llvm_expr(C[0], st), "as_div"); + LLVMValueRef rr = LLVMBuildSDiv(llvm_builder, vr, llvm_expr(C[1], st), "as_div"); return LLVMBuildStore(llvm_builder, rr, v->llvm_v); } break; case AK_AS_MOD: { LLVMValueRef vr = LLVMBuildLoad2(llvm_builder, llvm_type(v->t), v->llvm_v, v->s); - LLVMValueRef rr = LLVMBuildSRem(llvm_builder, vr, llvm_expr(C[0], st), "as_mod"); + LLVMValueRef rr = LLVMBuildSRem(llvm_builder, vr, llvm_expr(C[1], st), "as_mod"); return LLVMBuildStore(llvm_builder, rr, v->llvm_v); } break; } @@ -366,7 +363,9 @@ static LLVMValueRef llvm_hash(ast *a, syt *st) { LLVMTypeRef unsigned_integer_type = llvm_type(&TYPE(TY_UINT)); LLVMTypeRef *typs = calloc(arg_count, sizeof (LLVMTypeRef)); for (UINT i = 0; i < arg_count; i += 1) { - typs[i] = unsigned_integer_type; + /* FIXME hardcoded for write syscall */ + if (i == 2) { typs[i] = llvm_type(type_ptrc(&TYPE(TY_U8), 1)); } + else { typs[i] = unsigned_integer_type; } } LLVMTypeRef func_type = LLVMFunctionType(llvm_type(&TYPE(TY_UINT)), typs, arg_count, false); diff --git a/src/main.c b/src/main.c index 21552d4..4aa5271 100644 --- a/src/main.c +++ b/src/main.c @@ -76,11 +76,11 @@ static void compile(const char * file, char *src, UINT len) { ast *a = parse(&l); if (pflag) { if (!qflag) { ast_print(a, 0); } goto end; } - if (has_error()) { exit(1); } - analyse(a); + assert(a->k == AK_PROG); analyse(a); if (Pflag) { if (!qflag) { ast_print(a, 0); } goto end; } + if (has_error()) { exit(1); } if (bflag) { llvm_ir(a, strdup(file)); } else if (Bflag) { llvm_bitcode(a, strdup(file)); } diff --git a/src/parse.c b/src/parse.c index ae3c65d..d8449eb 100644 --- a/src/parse.c +++ b/src/parse.c @@ -17,7 +17,7 @@ #include <string.h> char *ast_ks[] = { - "AK_ZERO", "AK_PROG", "AK_PROC", "AK_TYPE", "AK_CAST", + "AK_VOID", "AK_PROG", "AK_PROC", "AK_TYPE", "AK_CAST", "AK_STMT", "AK_COMP", "AK_DECL", "AK_RETURN", "AK_IF", "AK_FOR", @@ -33,8 +33,7 @@ char *ast_ks[] = { static ast *parse_stmt(lex *l, syt *st); static ast *parse_stmt_compound(lex *l, syt *st); -static ast *parse_stmt_decl(lex *l, syt *st, ast *a); -static ast *parse_stmt_assn(lex *l, syt *st, ast *a); +static ast *parse_stmt_decl(lex *l, syt *st); static ast *parse_stmt_expr(lex *l, syt *st); static ast *parse_stmt_return(lex *l, syt *st); static ast *parse_stmt_if(lex *l, syt *st); @@ -56,15 +55,15 @@ inline ast *ast_init(void) { if (!a) { error(1, SERR); } return a; } -inline void ast_free(ast **a) { - fprintf(stderr, "ast_free() NOT IMPLEMENTED\n"); /* TODO */ -} - /* Initialise an AST node of a specific kind. */ inline ast *ast_kind(ast_k kind) { ast *a = ast_init(); a->k = kind; return a; } +inline void ast_free(ast **a) { + fprintf(stderr, "ast_free() NOT IMPLEMENTED\n"); /* TODO */ +} + /* Push a child AST node to an AST node. */ void ast_push(ast *a, ast *c) { ast **ca = realloc(a->c.a, (a->c.al += 1) * sizeof (ast *)); @@ -95,7 +94,10 @@ type *ast_type(ast *a, syt *st) { /* Search the symbol table for a type first */ if (a->s) { ast *v = syt_search(st, a->s); - if (v != NULL) { return v->t; } + if (v != NULL) { + if (v->c.al != 0 && v->c.a[0]->k == AK_PROC) { return v->c.a[0]->t; } + else { return v->t; } + } } /* If the given node has a type, then return that */ @@ -123,11 +125,11 @@ ast *ast_a_pop(ast_a *aa) { /* Parse a program. */ ast *parse(lex *l) { - ast *a = ast_init(); a->k = AK_PROG; + ast *a = ast_kind(AK_PROG); /* Parse and append all child nodes */ for (ast *c; T.k != TK_EOF;) { - if ((c = parse_stmt_decl(l, &a->st, NULL)) != NULL) { ast_push(a, c); } + if ((c = parse_stmt_decl(l, &a->st)) != NULL) { ast_push(a, c); } else { note(l->n, T.ln, T.cl, -1, "NULL AST (parse:parse_stmt_decl)"); } } @@ -139,8 +141,10 @@ static ast *parse_stmt(lex *l, syt *st) { switch (T.k) { case TK_LBRACE: { return parse_stmt_compound(l, st); } break; case TK_ID: { - ast *a = ast_init(); a->ln = T.ln; a->cl = T.cl; a->h = T.h; a->s = T.s; lex_kind(l, TK_ID); - return T.k == TK_COLON ? parse_stmt_decl(l, st, a) : parse_stmt_assn(l, st, a); + lex ll = *l; switch (lex_next(&ll), ll.t.k) { + case TK_COLON: { return parse_stmt_decl(l, st); } break; + default: { return parse_stmt_expr(l, st); } break; + } } break; case TK_RETURN: { return parse_stmt_return(l, st); } break; case TK_IF: { return parse_stmt_if(l, st); } break; @@ -153,7 +157,7 @@ static ast *parse_stmt(lex *l, syt *st) { static ast *parse_stmt_compound(lex *l, syt *st) { lex_kind(l, TK_LBRACE); - ast *a = ast_init(); a->k = AK_COMP; a->st.pt = st; + ast *a = ast_kind(AK_COMP); a->st.pt = st; for (; T.k != TK_EOF && T.k != TK_RBRACE;) { ast_push(a, parse_stmt(l, &a->st)); } lex_kind(l, TK_RBRACE); return a; @@ -166,13 +170,9 @@ static ast *parse_stmt_compound(lex *l, syt *st) { it should be positioned after, and a should contain the line, column, hash, and string of the identifier. */ -static ast *parse_stmt_decl(lex *l, syt *st, ast *a) { - if (a == NULL) { - a = ast_init(); a->ln = T.ln; a->cl = T.cl; a->h = T.h; a->s = T.s; - lex_kind(l, TK_ID); - } - - a->k = AK_DECL; lex_kind(l, TK_COLON); +static ast *parse_stmt_decl(lex *l, syt *st) { + ast *a = ast_kind(AK_DECL); a->ln = T.ln; a->cl = T.cl; a->h = T.h; a->s = T.s; + lex_kind(l, TK_ID); lex_kind(l, TK_COLON); u64 pn = 0; for (; T.k == TK_OP_MUL; lex_next(l), pn += 1); @@ -200,22 +200,6 @@ static ast *parse_stmt_decl(lex *l, syt *st, ast *a) { end:; syt_insert_h(st, a->h, a->s, a); return a; } -/* Should only be called by parse_stmt_decl (?) */ -static ast *parse_stmt_assn(lex *l, syt *st, ast *a) { - switch (T.k) { - case TK_ASSIGN: { a->k = AK_ASSIGN; } goto expr; - case TK_AS_ADD: { a->k = AK_AS_ADD; } goto expr; - case TK_AS_SUB: { a->k = AK_AS_SUB; } goto expr; - case TK_AS_MUL: { a->k = AK_AS_MUL; } goto expr; - case TK_AS_DIV: { a->k = AK_AS_DIV; } goto expr; - case TK_AS_MOD: { a->k = AK_AS_MOD; } goto expr; - expr: { lex_next(l); ast_push(a, parse_expr(l, st, 0)); } break; - default: { note(l->n, T.ln, T.cl, 0, "Expected assignment operator"); } break; - } - - lex_kind(l, TK_SCOLON); return a; -} - /* Parse an expression statement. */ static ast *parse_stmt_expr(lex *l, syt *st) { ast *a = NULL; if (T.k != TK_SCOLON) { a = parse_expr(l, st, 0); } @@ -301,9 +285,7 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { if (T.k != TK_RPAREN) for (;;) { ast_push(left, parse_expr(l, st, true)); - if (T.k != TK_COMMA) { break; } - if (left->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); } - lex_kind(l, TK_COMMA); + if (T.k == TK_COMMA) { lex_next(l); } else { break; } } lex_kind(l, TK_RPAREN); @@ -322,6 +304,12 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { /* Parse an infix expression if one is present */ for (ast *a = NULL; tok_precedence(T.k) > o; left = a) switch (T.k) { + case TK_ASSIGN: { a = ast_kind(AK_ASSIGN); } goto infix; + case TK_AS_ADD: { a = ast_kind(AK_AS_ADD); } goto infix; + case TK_AS_SUB: { a = ast_kind(AK_AS_SUB); } goto infix; + case TK_AS_MUL: { a = ast_kind(AK_AS_MUL); } goto infix; + case TK_AS_DIV: { a = ast_kind(AK_AS_DIV); } goto infix; + case TK_AS_MOD: { a = ast_kind(AK_AS_MOD); } goto infix; case TK_OP_ADD: { a = ast_kind(AK_OP_ADD); } goto infix; case TK_OP_SUB: { a = ast_kind(AK_OP_SUB); } goto infix; case TK_OP_MUL: { a = ast_kind(AK_OP_MUL); } goto infix; @@ -358,18 +346,20 @@ static ast *parse_expr_proc(lex *l, syt *st) { /* Parse a number. */ static ast *parse_num(lex *l, syt *st) { - if (strchr(l->s, '.')) { return parse_flt(l, st); } + if (T.h == 0 && strchr(T.s, '.')) { return parse_flt(l, st); } else { return parse_int(l, st); } } /* Parse an integer. */ static ast *parse_int(lex *l, syt *st) { - ast *a = ast_init(); tok t = lex_kind(l, TK_NUM); - a->k = AK_INT; a->ln = t.ln; a->cl = t.cl; + ast *a = ast_kind(AK_INT); a->ln = T.ln; a->cl = T.cl; + tok t = lex_kind(l, TK_NUM); if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } - if (!(a->v = val_parse_int(t.s)).k) { - note(l->n, t.ln, t.cl, 0, "%s: %s", t.s, SERR); + + if (t.h != 0) { a->v = val_u64(t.v); } + else if (!(a->v = val_parse_int(t.s)).k) { + note(l->n, t.ln, t.cl, 0, "%s: %s", t.s, SERR); errno = 0; } /* Determine the minimum integer type */ @@ -408,7 +398,7 @@ static ast *parse_flt(lex *l, syt *st) { 4 > multiplication (*), division (/), modulo (%) 3 > addition (+), subtraction (-) 2 > - 1 > + 1 > assignment (=, +=, -=, *=, /=, %=) */ /* Get the infix precedence of a token kind. */ @@ -417,6 +407,7 @@ static s32 tok_precedence(tok_k tk) { case TK_LPAREN: { return 8; } case TK_OP_MUL: case TK_OP_DIV: case TK_OP_MOD: { return 4; } case TK_OP_ADD: case TK_OP_SUB: { return 3; } + case TK_ASSIGN: case TK_AS_ADD: case TK_AS_SUB: case TK_AS_MUL: case TK_AS_DIV: case TK_AS_MOD: { return 1; } default: { return 0; } } } @@ -427,39 +418,47 @@ static s32 ast_precedence(ast_k ak) { case AK_OP_POS: case AK_OP_NEG: case AK_OP_ADO: case AK_OP_DRF: { return 6; } case AK_OP_MUL: case AK_OP_DIV: case AK_OP_MOD: { return 4; } case AK_OP_ADD: case AK_OP_SUB: { return 3; } + case AK_ASSIGN: case AK_AS_ADD: case AK_AS_SUB: case AK_AS_MUL: case AK_AS_DIV: case AK_AS_MOD: { return 1; } default: { return 0; } } } /* Recursively print an AST. */ -void ast_print(ast *a, UINT i) { - for (UINT j = 0; j != i; ++j) { printf(" "); } - printf("%zu:%zu: %s", a->ln + 1, a->cl + 1, ast_ks[a->k]); - if (a->s != NULL) { printf(": \"%s\"", a->s); } - - switch (a->k) { - case AK_DECL: { if (a->t == NULL) { break; }} - case AK_PROC: case AK_CAST: case AK_INT: case AK_FLT: case AK_OP_DRF: { - printf(" -> %s", a->t != NULL ? a->t->s : "untyped"); - if (a->t != NULL) for (type *t = a->t->base; t != NULL; t = t->base) { - printf(" -> %s", t->s); - } - } break; - default: {} break; +void ast_print(ast *a, UINT indent) { + for (UINT i = 0; i < indent; ++i) { printf(" "); } + + /* Print basic AST information (line:column: kind "string") */ + printf("%zu:%zu: %s \"%s\"", a->ln + 1, a->cl + 1, ast_ks[a->k], a->s); + + /* Print type information if present */ + for (type *t = a->t; t != NULL; t = t->base) { printf(" -> %s", t->s); } + + /* Indicate presence of various fields */ + fputs(" [", stdout); + fputc(a->h != 0 ? 'h' : '-', stdout); + fputc(a->v.k != VK_NULL ? 'v' : '-', stdout); + fputc(a->st.a != NULL ? 's' : '-', stdout); + fputc(']', stdout); + + /* Indicate if the AST node has a value */ + switch (a->v.k) { + case VK_BOOL: { printf(a->v.v_bool ? " = true" : " = false"); } break; + case VK_INT: { printf(" = %lu", a->v.v_int); } break; + case VK_FLT: { printf(" = %lf", a->v.v_flt); } break; } + /* Indicate if the AST node has no parent */ if (a->p == NULL) { printf(" NO PARENT"); } fputc('\n', stdout); - if (a->c.a != NULL) for (UINT ci = 0; ci != a->c.al; ci += 1) { - ast_print(a->c.a[ci], i + 1); - } + /* Print AST children */ + for (UINT i = 0; i < a->c.al; i += 1) { ast_print(a->c.a[i], indent + 1); } if (a->st.a != NULL) { - printf("--- SYT for %s: %s ---\n", ast_ks[a->k], a->s); + printf("--- SYT for %s \"%s\" ---\n", ast_ks[a->k], a->s); syt_print(&a->st); - printf("--- SYT for %s: %s ---\n", ast_ks[a->k], a->s); + printf("--- SYT for %s \"%s\" ---\n", ast_ks[a->k], a->s); } return; diff --git a/src/parse.h b/src/parse.h index f5f6697..f33d96f 100644 --- a/src/parse.h +++ b/src/parse.h @@ -16,7 +16,7 @@ /* Remember to update ast_ks in parse.c */ typedef enum { - AK_ZERO, AK_PROG, AK_PROC, AK_TYPE, AK_CAST, + AK_VOID, AK_PROG, AK_PROC, AK_TYPE, AK_CAST, AK_STMT, AK_COMP, AK_DECL, AK_RETURN, AK_IF, AK_FOR, diff --git a/src/type.c b/src/type.c index 5f53617..e201182 100644 --- a/src/type.c +++ b/src/type.c @@ -10,8 +10,8 @@ type types[] = { { TY_VOID, 0, 0, "void" }, - { TY_TYPE, 0, -1, "type" }, { TY_PTR, TF_PTR, -1, "ptr" }, + { TY_TYPE, 0, -1, "type" }, { TY_AUTO, 0, -1, "auto" }, { TY_BOOL, TF_BOOL, 1, "bool" }, @@ -102,7 +102,7 @@ type types[] = { type_a types_a = { NULL, 0, 0 }; -/* Ensure that the types array has a minimum number of spaces. */ +/* Allocate a types array with a minimum number of spaces. */ static inline void types_alloc(type_a *ta, u64 min) { if (ta->a == NULL) { ta->ac = 128; ta->a = malloc(ta->ac * sizeof (type)); @@ -151,10 +151,23 @@ inline bool is_flt(type *t) { return (t->f & TF_FLT); } /* Check if a type is signed. */ inline bool is_sign(type *t) { return (t->f & TF_SIGN); } +/* Check if two types are equal. */ +bool is_equal(type *t1, type *t2) { + if (t1 == NULL || t2 == NULL) { return false; } + if (t1 == t2) { return true; } + + if (t1->k != t2->k) { return false; } + if (t1->f != t2->f) { return false; } + if (t1->l != t2->l) { return false; } + if (t1->base != t2->base) { return false; } + + return true; +} + /* Check if two types are compatible. */ bool is_com(type *t1, type *t2) { if (t1 == NULL || t2 == NULL) { return false; } - if (t1 == t2) { /* TODO improve */ return true; } + if (is_equal(t1, t2)) { return true; } if (is_int(t1) && is_int(t2)) { return true; } if (is_flt(t1) && is_flt(t2)) { return true; } diff --git a/src/type.h b/src/type.h index afd19ce..079a41b 100644 --- a/src/type.h +++ b/src/type.h @@ -11,7 +11,7 @@ #define TYPE(a) (types[a]) typedef enum { - TY_VOID, TY_TYPE, TY_PTR, TY_AUTO, + TY_VOID, TY_PTR, TY_TYPE, TY_AUTO, TY_BOOL, TY_B8, TY_B16, TY_B32, TY_B64, @@ -71,6 +71,7 @@ extern bool is_int(type *t); extern bool is_flt(type *t); extern bool is_sign(type *t); +extern bool is_equal(type *t1, type *t2); extern bool is_com(type *t1, type *t2); #endif // G_TYPE_H_QHH0TJJQ