Author | Jakob Wakeling <[email protected]> |
Date | 2023-05-15 13:42:50 |
Commit | c4932f6022fd80cd96b1141df6839f9ae78ef23c |
Parent | 4bdd6fc6552ec5e4bc4d6c05c7f84ea03b3bc612 |
Implement type casting and real numbers
Diffstat
M | README.md | | | 8 | ++++---- |
M | doc/g.ebnf | | | 2 | ++ |
M | examples/main.g | | | 11 | ++++++++--- |
M | src/analyse.c | | | 97 | ++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------- |
M | src/analyse.h | | | 1 | - |
M | src/init.c | | | 53 | +++++++++++++++++++++++++++++------------------------ |
M | src/lex.c | | | 18 | ++++++++++-------- |
M | src/lex.h | | | 21 | +++++++++++---------- |
M | src/llvm.c | | | 51 | +++++++++++++++++++++++++++++++++++++++++++++++++-- |
M | src/log.c | | | 5 | ++--- |
M | src/parse.c | | | 184 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------ |
M | src/parse.h | | | 6 | ++++-- |
M | src/symbol.c | | | 2 | +- |
M | src/symbol.h | | | 4 | ++++ |
M | src/type.c | | | 31 | +++++++++++++++++++++---------- |
M | src/type.h | | | 3 | ++- |
M | src/value.c | | | 12 | ++++++++++++ |
M | src/value.h | | | 1 | + |
18 files changed, 365 insertions, 145 deletions
diff --git a/README.md b/README.md index 7cff0eb..85f9d99 100644 --- a/README.md +++ b/README.md @@ -58,24 +58,25 @@ command. The second command will output an executable file, *a.out* by default. > Not all todo items will necesarilly be implemented - [x] Implement procedure declarations -- [ ] Implement procedure calls +- [x] Implement procedure calls - [ ] Implement procedure arguments - [x] Implement variable declarations - [x] Implement variable assignments - [x] Implement integers -- [ ] Implement reals +- [x] Implement reals - [ ] Implement arrays - [x] Implement expression parsing - [x] Implement expression code generation +- [x] Implement type casting - [ ] Implement the *type* type - [ ] Implement *defer* - [ ] Implement *errdefer* - [ ] Implement *if* and *else* - [ ] Implement *for* -- [ ] Implement multiple return values - [ ] Implement generics of some kind - [ ] Implement module definition - [ ] Implement module use +- [ ] Implement C procedure calling - ... - [ ] Re-write compiler in **G** diff --git a/doc/g.ebnf b/doc/g.ebnf index 70a90d5..faae4e0 100644 --- a/doc/g.ebnf +++ b/doc/g.ebnf @@ -28,6 +28,8 @@ expr = iden | literal | expr_proc | "(", expr, ")" + | type, "(", expr, ")" (* Type cast *) + | iden, "(", [ expr, { ",", expr } ], ")" (* Procedure call *) | "+", expr | "-", expr (* Unary POS and NEG *) | "!", expr | "~", expr (* Logical and bitwise NOT *) diff --git a/examples/main.g b/examples/main.g index 02cf938..a6de101 100644 --- a/examples/main.g +++ b/examples/main.g @@ -1,4 +1,9 @@ -main :: proc() -> s64 { - var: s64; - return var; +var := 42; + +test :: proc() -> u64 { + return u64(var); +} + +main :: proc() -> u64 { + return test(); } diff --git a/src/analyse.c b/src/analyse.c index 3226257..5ee38bc 100644 --- a/src/analyse.c +++ b/src/analyse.c @@ -4,26 +4,26 @@ // All rights reserved. #include "analyse.h" +#include "log.h" #include "parse.h" #include "symbol.h" #include "type.h" -#include "util/error.h" #include "util/util.h" #include <assert.h> +#include <stdio.h> static void analyse_stmt(ast *a, syt *st); static void analyse_stmt_comp(ast *a, syt *st); static void analyse_stmt_decl(ast *a, syt *st); static void analyse_stmt_expr(ast *a, syt *st); +static void analyse_stmt_return(ast *a, syt *st); static void analyse_expr(ast *a, syt *st); static void analyse_expr_proc(ast *a, syt *st); -static type *type_expr(ast *a, syt *st); - -#define A (*a) -#define ST (*st) +#define A (*a) +#define C (a->c.a) /* AST child shorthand "C[i]" */ /* Analyse a program. */ void analyse(ast *a) { @@ -36,9 +36,10 @@ void analyse(ast *a) { /* Analyse a statement. */ static void analyse_stmt(ast *a, syt *st) { switch (A.k) { - case AK_COMP: { analyse_stmt_comp(a, st); } break; - case AK_DECL: { analyse_stmt_decl(a, st); } break; - default: { analyse_stmt_expr(a, st); } break; + case AK_COMP: { analyse_stmt_comp(a, st); } break; + case AK_DECL: { analyse_stmt_decl(a, st); } break; + case AK_RETURN: { analyse_stmt_return(a, st); } break; + default: { analyse_stmt_expr(a, st); } break; } } @@ -53,20 +54,52 @@ static void analyse_stmt_comp(ast *a, syt *st) { /* Analyse a declaration statement. */ static void analyse_stmt_decl(ast *a, syt *st) { assert(A.c.al == 0 || A.c.al == 1); + if (a->c.al == 0) { assert(a->t != NULL); return; } - /* If an initialisation value is present, then find its type */ - type *t = NULL; if (A.c.al == 1) { - analyse_expr(A.c.a[0], st); - t = type_expr(A.c.a[0], st); - } + analyse_expr(a->c.a[0], st); + type *value_type = ast_type(a->c.a[0], st); - /* If the declaration is already typed, then ensure that it matches */ - if (A.t) { if (t != NULL && A.t != t) { - error(1, "%s:%zu:%zu: error: incorrect type", "", A.ln + 1, A.cl + 1); - }} + if (a->c.a[0]->k == AK_PROC) { return; /* TODO */ } + else if (is_int(value_type)) { + /* If a type has not been specified, set the type based on the value */ + if (a->t == NULL) { + a->t = C[0]->t; + + /* If the value type is smaller than 32 bit, upgrade it */ + if (a->t->l < 4) { + if (is_sign(a->t)) { a->t = &TYPE(TY_S32); } + else { a->t = &TYPE(TY_U32); } + + /* Insert a type cast node between parent and child */ + /* FIXME this behaviour is incorrect, should only implicitely cast for literals, not all expressions */ + ast *cast = ast_init(); cast->k = AK_CAST; + cast->ln = C[0]->ln; cast->cl = C[0]->cl; + cast->t = a->t; + + ast *child = C[0]; + + ast_displace(a, child); + ast_push(cast, child); + ast_push(a, cast); + } + } + + /* If the type has been specified, check that the value is compatible */ + else { + /* TODO */ + } + } + else if (is_flt(value_type)) { + /* If a type has not been specified, set the type based on the value */ + if (a->t == NULL) { a->t = a->c.a[0]->t; } + + /* If the type has been specified, check that the value is compatible */ + else { + /* TODO */ + } + } - /* Otherwise store the initialisation type */ - else { A.t = t; } + else { note("TODO", a->ln, a->cl, -1, "unhandled value kind %s", ast_ks[a->c.a[0]->k]); } } /* Analyse an expression statement. */ @@ -74,6 +107,11 @@ static void analyse_stmt_expr(ast *a, syt *st) { analyse_expr(a, st); } +/* Analyse a return statement. */ +static void analyse_stmt_return(ast *a, syt *st) { + /* TODO Check if the return type matches or is compatible with the given value */ +} + /* Analyse an expression. */ static void analyse_expr(ast *a, syt *st) { if (A.k == AK_PROC) { analyse_expr_proc(a, st); } @@ -86,15 +124,3 @@ static void analyse_expr_proc(ast *a, syt *st) { /* Analyse the procedure body */ analyse_stmt_comp(A.c.a[0], st); } - -/* Find the ultimate type of an expression. */ -static type *type_expr(ast *a, syt *st) { - /* If the given node has a type, then return that */ - if (A.t) { return A.t; } - - /* Otherwise recurse down the first child */ - if (A.c.al) { return type_expr(A.c.a[0], st); } - - /* If no type is found, return NULL */ - return NULL; -} diff --git a/src/analyse.h b/src/analyse.h index 8d32bbf..f58a8e2 100644 --- a/src/analyse.h +++ b/src/analyse.h @@ -7,7 +7,6 @@ #define G_ANALYSE_H_N80HV3K4 #include "parse.h" -#include "util/util.h" extern void analyse(ast *a); diff --git a/src/init.c b/src/init.c index 949124d..2d7e6f6 100644 --- a/src/init.c +++ b/src/init.c @@ -12,35 +12,37 @@ static ast kwds[] = { /* Boolean Types */ - { AK_TYPE, 0, 0, 0, "b8", &types[TY_B8], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "b16", &types[TY_B16], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "b32", &types[TY_B32], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "b64", &types[TY_B64], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "bool", &types[TY_B8], { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "b8", &TYPE(TY_B8), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "b16", &TYPE(TY_B16), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "b32", &TYPE(TY_B32), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "b64", &TYPE(TY_B64), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "bool", &TYPE(TY_B8), { 0 }, NULL }, /* Integer Types */ - { AK_TYPE, 0, 0, 0, "u8", &types[TY_U8], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "u16", &types[TY_U16], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "u32", &types[TY_U32], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "u64", &types[TY_U64], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "u128", &types[TY_U128], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "uint", &types[TY_UINT], { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "u8", &TYPE(TY_U8), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "u16", &TYPE(TY_U16), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "u32", &TYPE(TY_U32), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "u64", &TYPE(TY_U64), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "u128", &TYPE(TY_U128), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "uint", &TYPE(TY_UINT), { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "s8", &types[TY_S8], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "s16", &types[TY_S16], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "s32", &types[TY_S32], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "s64", &types[TY_S64], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "s128", &types[TY_S128], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "sint", &types[TY_SINT], { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "s8", &TYPE(TY_S8), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "s16", &TYPE(TY_S16), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "s32", &TYPE(TY_S32), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "s64", &TYPE(TY_S64), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "s128", &TYPE(TY_S128), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "sint", &TYPE(TY_SINT), { 0 }, NULL }, /* Floating Point Types */ - { AK_TYPE, 0, 0, 0, "f16", &types[TY_F32], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "f32", &types[TY_F32], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "f64", &types[TY_F64], { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "f128", &types[TY_F128], { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "f16", &TYPE(TY_F32), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "f32", &TYPE(TY_F32), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "f64", &TYPE(TY_F64), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "f128", &TYPE(TY_F128), { 0 }, NULL }, - // { SK_NULL, 0, 0, 0, "proc", NULL, { 0 }, NULL }, - // { SK_NULL, 0, 0, 0, "return", NULL, { 0 }, NULL }, + /* Alias Types */ + { AK_TYPE, 0, 0, 0, "byte", &TYPE(TY_BYTE), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "char", &TYPE(TY_CHAR), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "rune", &TYPE(TY_RUNE), { 0 }, NULL }, { AK_NULL, 0, 0, 0, NULL, NULL, { 0 }, NULL } }; diff --git a/src/lex.c b/src/lex.c index 9fcc7fb..b6e421c 100644 --- a/src/lex.c +++ b/src/lex.c @@ -4,6 +4,7 @@ // All rights reserved. #include "lex.h" +#include "log.h" #include "symbol.h" #include "type.h" #include "util/error.h" @@ -16,7 +17,7 @@ #include <stdio.h> char *tok_ks[] = { - "TK_NULL", "TK_EOF", "TK_ID", "TK_INT", "TK_FLT", "TK_STR", + "TK_NULL", "TK_EOF", "TK_ID", "TK_NUM", "TK_STR", "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC", @@ -94,7 +95,7 @@ tok lex_next(lex *l) { /* FIXME beyond this point EOF isn't checked properly so if a file does not - have a trailing newline it **may** cause a segfault + have a trailing newline it **may** cause a segfault (?) */ T.ln = LN; T.cl = CL; @@ -119,9 +120,11 @@ tok lex_next(lex *l) { char *s = P; UINT sl; for (P += 1; isalnum(C); P += 1); + if (C == '.') { P += 1; for (P += 1; isdigit(C); P += 1); } + sl = P - s; CL += sl; - T.k = TK_INT; if (!(T.s = strndup(s, sl))) { error(1, SERR); } + T.k = TK_NUM; if (!(T.s = strndup(s, sl))) { error(1, SERR); } } /* Handle punctuators and operators */ @@ -198,7 +201,7 @@ tok lex_next(lex *l) { /* Handle unknown characters */ default: { - warn("%zu:%zu: Unknown character: %X '%c'", LN, CL, C, C); + note("TODO", LN, CL, 1, "Unknown character: %X '%c'", C, C); P += 1; CL += 1; } break; } @@ -208,10 +211,9 @@ tok lex_next(lex *l) { /* Lex the next token if the current is of a specific type. */ tok lex_kind(lex *l, tok_k k) { - if (T.k != k) { error( - 1, "%zu:%zu: Unexpected: \"%s\", was expecting: \"%s\"", - T.ln + 1, T.cl + 1, tok_ks[T.k], tok_ks[k] - ); } + if (T.k != k) { + note("TODO", T.ln, T.cl, 0, "Unexpected: \"%s\", was expecting: \"%s\"", tok_ks[T.k], tok_ks[k]); + } return lex_next(l); } diff --git a/src/lex.h b/src/lex.h index 24a451f..849c729 100644 --- a/src/lex.h +++ b/src/lex.h @@ -8,8 +8,9 @@ #include "util/util.h" +/* Remember to update tok_ks in lex.c */ typedef enum { - TK_NULL, TK_EOF, TK_ID, TK_INT, TK_FLT, TK_STR, + TK_NULL, TK_EOF, TK_ID, TK_NUM, TK_STR, TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC, @@ -25,17 +26,15 @@ typedef enum { TK_AS_NOT, TK_AS_AND, TK_AS_OR, TK_AS_XOR, TK_AS_SHL, TK_AS_SHR, } tok_k; -/* k : Kind, ln : Line, cl : Column, h : Hash, s : String, v_? : Value */ -typedef struct { - tok_k k; UINT ln, cl; u64 h; char *s; - union { u64 v_u64; s64 v_s64; f64 v_f64; }; -} tok; - +/* k : Kind, ln : Line, cl : Column, h : Hash, s : String */ +typedef struct { tok_k k; UINT ln, cl; u64 h; char *s; } tok; typedef struct { tok *a; UINT al; } tok_a; -typedef struct { - const char *n; char *s, *p, *q; UINT ln, cl; tok t; -} lex; +/* + n : File Name, s : Start of File, p : Current Character, q : End of File, + ln : Line Index, cl : Column Index, t : Current Token +*/ +typedef struct { const char *n; char *s, *p, *q; UINT ln, cl; tok t; } lex; extern char *tok_ks[]; diff --git a/src/llvm.c b/src/llvm.c index 2d917b3..cf80459 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -4,6 +4,7 @@ // All rights reserved. #include "llvm.h" +#include "log.h" #include "parse.h" #include "symbol.h" #include "type.h" @@ -34,8 +35,10 @@ static LLVMValueRef llvm_stmt_for(ast *a, syt *st); static LLVMValueRef llvm_expr(ast *a, syt *st); static LLVMValueRef llvm_expr_proc(ast *a, syt *st); +static LLVMValueRef llvm_expr_cast(ast *a, syt *st); static LLVMValueRef llvm_int(ast *a); +static LLVMValueRef llvm_flt(ast *a); static inline void llvm_init(void); static inline void llvm_free(void); @@ -205,14 +208,16 @@ static LLVMValueRef llvm_stmt_for(ast *a, syt *st) { static LLVMValueRef llvm_expr(ast *a, syt *st) { switch (A.k) { case AK_PROC: { return llvm_expr_proc(a, st); } break; + case AK_CAST: { return llvm_expr_cast(a, st); } break; case AK_INT: { return llvm_int(a); } break; + case AK_FLT: { return llvm_flt(a); } break; case AK_ID_VAR: { ast *v = syt_search(st, a->s); if (v == NULL) { error(2, "llvm_expr: Undefined variable %s", a->s); } return LLVMBuildLoad2(llvm_builder, llvm_type(v->t), v->llvm_v, v->s); } break; - case AK_ID_PROC: { + case AK_CALL: { ast *v = syt_search(st, a->s); if (v == NULL) { error(2, "llvm_expr: Undefined procedure %s", a->s); } @@ -244,9 +249,50 @@ static LLVMValueRef llvm_expr_proc(ast *a, syt *st) { return llvm_stmt_compound(C[0], st); } +/* Generate IR for a type cast. */ +static LLVMValueRef llvm_expr_cast(ast *a, syt *st) { + assert(a->t != NULL); assert(a->cl > 0); + + type *expr_type = ast_type(C[0], st); + + if (is_int(expr_type)) { + if (is_int(a->t)) { + return LLVMBuildIntCast2(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), is_sign(a->t), "cast"); + } + else if (is_flt(a->t)) { + if (is_sign(expr_type)) { + return LLVMBuildSIToFP(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "stof"); + } + else { + return LLVMBuildUIToFP(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "utof"); + } + } + } + else if (is_flt(expr_type)) { + if (is_flt(a->t)) { + return LLVMBuildFPCast(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "cast"); + } + else if (is_int(a->t)) { + if (is_sign(a->t)) { + return LLVMBuildFPToSI(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "ftos"); + } + else { + return LLVMBuildFPToUI(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "ftou"); + } + } + } + + note("TODO", a->ln, a->cl, -1, "llvm_expr_cast unhandled type %s or %s", expr_type->s, a->t->s); +} + /* Generate IR for an integer. */ static LLVMValueRef llvm_int(ast *a) { - return LLVMConstInt(LLVMIntType(64), a->v.v_int, false); + return LLVMConstInt(llvm_type(a->t), a->v.v_int, false); +} + +/* Generate IR for a real. */ +static LLVMValueRef llvm_flt(ast *a) { + return LLVMConstReal(llvm_type(a->t), a->v.v_flt); } /* Initialise LLVM. */ diff --git a/src/log.c b/src/log.c index 059a378..b32eb2e 100644 --- a/src/log.c +++ b/src/log.c @@ -4,7 +4,6 @@ // All rights reserved. #include "log.h" -#include "util/error.h" #include "util/util.h" #include <stdarg.h> @@ -26,7 +25,7 @@ void note(const char *file, UINT ln, UINT cl, sint level, const char *format, .. else if (level >= 4) { fprintf(stderr, "note: "); } va_list va; va_start(va, format); vfprintf(stderr, format, va); va_end(va); - fputc('\n', stderr); if (level == -1) { exit(1); } + fputc('\n', stderr); if (level <= -1) { exit(1); } - if (level == 0 && (log_count += 1) == log_limit) { error(1, "fatal: error limit reached"); } + if (level == 0 && (log_count += 1) == log_limit) { fprintf(stderr, "fatal: error limit reached"); exit(1); } } diff --git a/src/parse.c b/src/parse.c index f4f8160..b373212 100644 --- a/src/parse.c +++ b/src/parse.c @@ -4,6 +4,7 @@ // All rights reserved. #include "lex.h" +#include "log.h" #include "parse.h" #include "symbol.h" #include "type.h" @@ -19,7 +20,7 @@ typedef struct { tok_k tk; ast_k ak; s32 o; bool as; } op; char *ast_ks[] = { - "AK_NULL", "AK_PROG", "AK_PROC", "AK_TYPE", + "AK_NULL", "AK_PROG", "AK_PROC", "AK_TYPE", "AK_CAST", "AK_STMT", "AK_COMP", "AK_DECL", "AK_RETURN", "AK_IF", "AK_FOR", @@ -28,22 +29,24 @@ char *ast_ks[] = { "AK_ASSIGN", "AK_AS_ADD", "AK_AS_SUB", "AK_AS_MUL", "AK_AS_DIV", "AK_AS_MOD", - "AK_ID_VAR", "AK_ID_PROC", "AK_INT", + "AK_ID_VAR", "AK_CALL", "AK_INT", "AK_FLT" }; static ast *parse_stmt(lex *l, syt *st); static ast *parse_stmt_compound(lex *l, syt *st); -static ast *parse_stmt_decl(lex *l, syt *st); +static ast *parse_stmt_decl(lex *l, syt *st, ast *a); static ast *parse_stmt_assn(lex *l, syt *st, ast *a); static ast *parse_stmt_expr(lex *l, syt *st); static ast *parse_stmt_return(lex *l, syt *st); static ast *parse_stmt_if(lex *l, syt *st); static ast *parse_stmt_for(lex *l, syt *st); -static ast *parse_expr(lex *l, syt *st); +static ast *parse_expr(lex *l, syt *st, bool arg); static ast *parse_expr_proc(lex *l, syt *st); +static ast *parse_num(lex *l, syt *st); static ast *parse_int(lex *l, syt *st); +static ast *parse_flt(lex *l, syt *st); static op op_lookup(tok_k tk, bool unary); @@ -62,6 +65,40 @@ void ast_push(ast *a, ast *c) { c->p = a; a->c.a[a->c.al - 1] = c; } +/* Displace a child AST node from a parent AST node. */ +void ast_displace(ast *a, ast *c) { + ast **oa = a->c.a; UINT ol = a->c.al; bool found = false; + + for (UINT i = 0; i < ol; i += 1) { if (oa[i] == c) { found = true; break; }} + if (found == false) { return; } + + ast **ca = calloc((a->c.al -= 1), sizeof (ast *)); + if (!ca) { error(1, SERR); } else { a->c.a = ca; ca = NULL; } + + for (UINT i = 0, j = 0; i < ol && j < a->c.al; i += 1) { + if (oa[i] == c) { continue; } a->c.a[j] = oa[i]; j += 1; + } + + free(oa); +} + +type *ast_type(ast *a, syt *st) { + /* Search the symbol table for a type first */ + if (a->s) { + ast *v = syt_search(st, a->s); + if (v != NULL) { return v->t; } + } + + /* If the given node has a type, then return that */ + if (a->t) { return a->t; } + + /* Otherwise recurse down the first child */ + if (a->c.al) { return ast_type(a->c.a[0], st); } + + /* If no type is found, return NULL */ + return NULL; +} + /* Push an AST to an AST array. */ void ast_a_push(ast_a *aa, ast *a) { ast **ta = realloc(aa->a, (aa->al += 1) * sizeof (ast *)); @@ -81,7 +118,7 @@ ast *parse(lex *l) { /* Parse and append all child nodes */ for (ast *c; T.k != TK_EOF;) { - if ((c = parse_stmt_decl(l, &a->st))) { ast_push(a, c); } + if ((c = parse_stmt_decl(l, &a->st, NULL))) { ast_push(a, c); } else { error(1, "NULL AST (parse:parse_stmt_decl)"); } } @@ -92,7 +129,10 @@ ast *parse(lex *l) { static ast *parse_stmt(lex *l, syt *st) { switch (T.k) { case TK_LBRACE: { return parse_stmt_compound(l, st); } break; - case TK_ID: { return parse_stmt_decl(l, st); } break; + case TK_ID: { + ast *a = ast_init(); a->ln = T.ln; a->cl = T.cl; a->h = T.h; a->s = T.s; lex_kind(l, TK_ID); + return T.k == TK_COLON ? parse_stmt_decl(l, st, a) : parse_stmt_assn(l, st, a); + } break; case TK_RETURN: { return parse_stmt_return(l, st); } break; case TK_IF: { return parse_stmt_if(l, st); } break; case TK_FOR: { return parse_stmt_for(l, st); } break; @@ -110,14 +150,20 @@ static ast *parse_stmt_compound(lex *l, syt *st) { lex_kind(l, TK_RBRACE); return a; } -/* Parse a declaration statement. */ -static ast *parse_stmt_decl(lex *l, syt *st) { - ast *sm = ast_init(); sm->k = AK_DECL; - sm->ln = T.ln; sm->cl = T.cl; sm->h = T.h; sm->s = T.s; +/* + Parse a declaration statement. + + If a is null, the lexer should be positioned at the indentifier, otherwise + it should be positioned after, and a should contain the line, column, hash, + and string of the identifier. +*/ +static ast *parse_stmt_decl(lex *l, syt *st, ast *a) { + if (a == NULL) { + a = ast_init(); a->ln = T.ln; a->cl = T.cl; a->h = T.h; a->s = T.s; + lex_kind(l, TK_ID); + } - lex_kind(l, TK_ID); - if (T.k != TK_COLON) { return parse_stmt_assn(l, st, sm); } - lex_kind(l, TK_COLON); + a->k = AK_DECL; lex_kind(l, TK_COLON); /* Store the declaration's type if one is specified */ if (T.k == TK_ID) { @@ -132,7 +178,7 @@ static ast *parse_stmt_decl(lex *l, syt *st) { l->n, T.ln + 1, T.cl + 1 ); } - sm->t = s->t; lex_next(l); + a->t = s->t; lex_next(l); if (T.k == TK_SCOLON) { lex_next(l); goto end; } } else if (T.k == TK_SCOLON) { error( @@ -141,14 +187,14 @@ static ast *parse_stmt_decl(lex *l, syt *st) { ); } /* Assign a constant or variable value */ - if (T.k == TK_COLON || T.k == TK_ASSIGN) { lex_next(l); ast_push(sm, parse_expr(l, st)); } + if (T.k == TK_COLON || T.k == TK_ASSIGN) { lex_next(l); ast_push(a, parse_expr(l, st, false)); } else { error(1, "%s:%zu:%zu: error: expected ':' or '='", l->n, T.ln + 1, T.cl + 1); } /* Parse a semicolon if one is required */ - if (sm->c.a[0]->k != AK_PROC) { lex_kind(l, TK_SCOLON); } + if (a->c.a[0]->k != AK_PROC) { lex_kind(l, TK_SCOLON); } /* Insert the new symbol and return */ - end:; syt_insert_h(st, sm->h, sm->s, sm); return sm; + end:; syt_insert_h(st, a->h, a->s, a); return a; } /* Should only be called by parse_stmt_decl (?) */ @@ -160,7 +206,7 @@ static ast *parse_stmt_assn(lex *l, syt *st, ast *a) { case TK_AS_MUL: { a->k = AK_AS_MUL; } goto expr; case TK_AS_DIV: { a->k = AK_AS_DIV; } goto expr; case TK_AS_MOD: { a->k = AK_AS_MOD; } goto expr; - expr: { lex_next(l); ast_push(a, parse_expr(l, st)); } break; + expr: { lex_next(l); ast_push(a, parse_expr(l, st, false)); } break; default: { error(1, "%s:%zu:%zu: error: expected assignment operator", l->n, T.ln + 1, T.cl + 1); } break; } @@ -169,7 +215,7 @@ static ast *parse_stmt_assn(lex *l, syt *st, ast *a) { /* Parse an expression statement. */ static ast *parse_stmt_expr(lex *l, syt *st) { - ast *a = NULL; if (T.k != TK_SCOLON) { a = parse_expr(l, st); } + ast *a = NULL; if (T.k != TK_SCOLON) { a = parse_expr(l, st, false); } lex_kind(l, TK_SCOLON); return a; } @@ -178,7 +224,7 @@ static ast *parse_stmt_return(lex *l, syt *st) { lex_kind(l, TK_RETURN); ast *a = ast_init(); a->k = AK_RETURN; - ast_push(a, parse_expr(l, st)); + ast_push(a, parse_expr(l, st, false)); lex_kind(l, TK_SCOLON); return a; } @@ -189,7 +235,7 @@ static ast *parse_stmt_if(lex *l, syt *st) { ast *a = ast_init(); a->k = AK_IF; /* Parse expression and closing parenthesis */ - ast_push(a, parse_expr(l, st)); lex_kind(l, TK_RPAREN); + ast_push(a, parse_expr(l, st, false)); lex_kind(l, TK_RPAREN); /* Parse the if statement body */ ast_push(a, parse_stmt(l, st)); return a; @@ -203,7 +249,7 @@ static ast *parse_stmt_for(lex *l, syt *st) { /* Parse one to three expressions and a closing parenthesis */ ast_push(a, parse_stmt_expr(l, st)); ast_push(a, parse_stmt_expr(l, st)); - ast_push(a, parse_expr(l, st)); lex_kind(l, TK_RPAREN); + ast_push(a, parse_expr(l, st, false)); lex_kind(l, TK_RPAREN); /* Parse the for statement body */ ast_push(a, parse_stmt(l, st)); return a; @@ -217,7 +263,7 @@ static inline void shunt(ast_a *aa, tok t, op o) { } /* Parse an expression. */ -static ast *parse_expr(lex *l, syt *st) { +static ast *parse_expr(lex *l, syt *st, bool arg) { if (T.k == TK_PROC) { return parse_expr_proc(l, st); } tok_a ts = { 0 }; ast_a as = { 0 }; @@ -228,26 +274,41 @@ static ast *parse_expr(lex *l, syt *st) { ast *a = ast_init(); tok t = lex_kind(l, TK_ID); a->ln = t.ln; a->cl = t.cl; + ast *sym = syt_search_h(st, t.h, t.s); + if (sym == NULL) { note(l->n, t.ln, t.cl, -1, "use of undeclared identifier \"%s\"", t.s); } + if (T.k == TK_LPAREN) { - a->k = AK_ID_PROC; lex_kind(l, TK_LPAREN); + lex_kind(l, TK_LPAREN); + + if (sym->k == AK_TYPE) { a->k = AK_CAST; a->t = sym->t; } + else { a->k = AK_CALL; } + + if (T.k != TK_RPAREN) for (;;) { + ast_push(a, parse_expr(l, st, true)); + if (T.k != TK_COMMA) { break; } + if (a->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); } + lex_kind(l, TK_COMMA); + } - /* TODO handle procedure arguments */ lex_kind(l, TK_RPAREN); } - else { a->k = AK_ID_VAR; } - - ast *s = syt_search_h(st, t.h, t.s); - a->t = s ? s->t : NULL; + else { + a->k = AK_ID_VAR; a->t = sym->t; + } if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } ast_a_push(&as, a); } break; - case TK_INT: { ast_a_push(&as, parse_int(l, st)); } break; + case TK_NUM: { ast_a_push(&as, parse_num(l, st)); } break; + case TK_COLON: { if (arg) { goto eox; }} break; case TK_LPAREN: { tok_a_push(&ts, lex_next(l)); } break; case TK_RPAREN: { for (tok t = tok_a_pop(&ts);; t = tok_a_pop(&ts)) { - if (t.k == TK_NULL) { error(1, "todo: error: expected left parenthesis"); } + if (t.k == TK_NULL) { + if (arg) { goto eox; } + note(l->n, T.ln, T.cl, -1, "expected left parenthesis"); + } if (t.k == TK_LPAREN) { break; } shunt(&as, t, op_lookup(t.k, false)); @@ -266,7 +327,7 @@ static ast *parse_expr(lex *l, syt *st) { same precedence as o1 and o1 is left-associative, then pop it from the stack onto the output. */ - for (o2 = op_lookup(tok_a_peek(&ts).k, false); o2.tk != NULL; o2 = op_lookup(tok_a_peek(&ts).k, false)) { + for (o2 = op_lookup(tok_a_peek(&ts).k, false); o2.tk != TK_NULL; o2 = op_lookup(tok_a_peek(&ts).k, false)) { if (o2.tk == TK_LPAREN || (o1.o < o2.o && (o1.o != o2.o || o1.as == true))) { break; } shunt(&as, tok_a_pop(&ts), o2); @@ -316,21 +377,45 @@ static ast *parse_expr_proc(lex *l, syt *st) { ast_push(a, parse_stmt_compound(l, st)); return a; } -/* Parse an interger. */ +/* Parse a number. */ +static ast *parse_num(lex *l, syt *st) { + if (strchr(l->s, '.')) { return parse_flt(l, st); } + else { return parse_int(l, st); } +} + +/* Parse an integer. */ static ast *parse_int(lex *l, syt *st) { - ast *a = ast_init(); tok t = lex_kind(l, TK_INT); + ast *a = ast_init(); tok t = lex_kind(l, TK_NUM); a->k = AK_INT; a->ln = t.ln; a->cl = t.cl; if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } - if (!(a->v = val_parse_int(t.s)).k) { error(1, "%s: %s", t.s, SERR); } + if (!(a->v = val_parse_int(t.s)).k) { + note(l->n, t.ln, t.cl, 0, "%s: %s", t.s, SERR); + } - /* Determine integer type */ - // if (a->v.v_int <= U8_MAX) { a->t = &TYPE(TY_U8); } - // else if (a->v.v_int <= U16_MAX) { a->t = &TYPE(TY_U16); } - // else if (a->v.v_int <= U32_MAX) { a->t = &TYPE(TY_U32); } - // else if (a->v.v_int <= U64_MAX) { a->t = &TYPE(TY_U64); } + /* Determine the minimum integer type */ + if (a->v.v_int <= U8_MAX) { a->t = &TYPE(TY_U8); } + else if (a->v.v_int <= U16_MAX) { a->t = &TYPE(TY_U16); } + else if (a->v.v_int <= U32_MAX) { a->t = &TYPE(TY_U32); } + else if (a->v.v_int <= U64_MAX) { a->t = &TYPE(TY_U64); } - a->t = &TYPE(TY_S64); // TODO remove temporary hack + return a; +} + +/* Parse a real. */ +static ast *parse_flt(lex *l, syt *st) { + ast *a = ast_init(); tok t = lex_kind(l, TK_NUM); + a->k = AK_FLT; a->ln = t.ln; a->cl = t.cl; + + if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } + if (!(a->v = val_parse_flt(t.s)).k) { + note(l->n, t.ln, t.cl, 0, "%s: %s", t.s, SERR); + } + + /* Determine the minimum float type */ + if (a->v.v_flt <= F32_MAX) { a->t = &TYPE(TY_F32); } + else if (a->v.v_flt <= F64_MAX) { a->t = &TYPE(TY_F64); } + else if (a->v.v_flt <= F128_MAX) { a->t = &TYPE(TY_F128); } return a; } @@ -355,14 +440,19 @@ static op op_lookup(tok_k tk, bool unary) { /* Recursively print an AST. */ void ast_print(ast *a, UINT i) { for (UINT j = 0; j != i; ++j) { printf(" "); } - printf("%zu:%zu: %s: %s", a->ln + 1, a->cl + 1, ast_ks[a->k], a->s); + printf("%zu:%zu: %s", a->ln + 1, a->cl + 1, ast_ks[a->k]); + if (a->s != NULL) { printf(": \"%s\"", a->s); } switch (a->k) { case AK_DECL: { if (a->t == NULL) { break; }} - case AK_PROC: case AK_INT: { printf(" -> %s", a->t->s); } break; + case AK_PROC: case AK_CAST: case AK_INT: case AK_FLT: { + printf(" -> %s", a->t != NULL ? a->t->s : "untyped"); + } break; default: {} break; } + if (a->p == NULL) { printf(" NO PARENT"); } + fputc('\n', stdout); if (a->c.a != NULL) for (UINT ci = 0; ci != a->c.al; ci += 1) { diff --git a/src/parse.h b/src/parse.h index b1e5a8d..0642c50 100644 --- a/src/parse.h +++ b/src/parse.h @@ -16,7 +16,7 @@ /* Remember to update ast_ks in parse.c */ typedef enum { - AK_NULL, AK_PROG, AK_PROC, AK_TYPE, + AK_NULL, AK_PROG, AK_PROC, AK_TYPE, AK_CAST, AK_STMT, AK_COMP, AK_DECL, AK_RETURN, AK_IF, AK_FOR, @@ -25,7 +25,7 @@ typedef enum { AK_ASSIGN, AK_AS_ADD, AK_AS_SUB, AK_AS_MUL, AK_AS_DIV, AK_AS_MOD, - AK_ID_VAR, AK_ID_PROC, AK_INT, + AK_ID_VAR, AK_CALL, AK_INT, AK_FLT } ast_k; /* @@ -45,6 +45,8 @@ extern char *ast_ks[]; extern ast *ast_init(void); extern void ast_push(ast *a, ast *c); +extern void ast_displace(ast *a, ast *c); +extern type *ast_type(ast *a, syt *st); extern void ast_a_push(ast_a *aa, ast *a); extern ast *ast_a_pop(ast_a *aa); diff --git a/src/symbol.c b/src/symbol.c index 4ff4d92..09ff20d 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -160,7 +160,7 @@ ast *syt_search_h(syt *st, u64 h, char *k) { /* Print a basic representation of a map to stdout. */ void syt_print(syt *st) { for (UINT i = 0; i < st->ac; i += 1) if (st->a[i].h != 0) { - if (st->a[i].v->t) { printf("%s -> %s\n", st->a[i].k, st->a[i].v->t->s); } + if (ast_type(st->a[i].v, st)) { printf("%s -> %s\n", st->a[i].k, ast_type(st->a[i].v, st)->s); } else { printf("%s -> (null)\n", st->a[i].k); } } } diff --git a/src/symbol.h b/src/symbol.h index af3554f..d7b049e 100644 --- a/src/symbol.h +++ b/src/symbol.h @@ -12,6 +12,10 @@ typedef struct ast_s ast; +/* + a : Array, a.h : Hash, a.k : Key, a.v : Node, al : Array Length, + ac : Array Capacity, pt : Parent Table +*/ typedef struct syt_s { struct { u64 h; char *k; ast *v; } *a; UINT al, ac; struct syt_s *pt; diff --git a/src/type.c b/src/type.c index 3d90809..548ec6e 100644 --- a/src/type.c +++ b/src/type.c @@ -9,7 +9,7 @@ type types[] = { { TY_NULL, 0, 0, "void" }, { TY_TYPE, 0, -1, "type" }, { TY_PTR, TF_PTR, -1, "ptr" }, - { TY_ANY, 0, -1, "any" }, + { TY_AUTO, 0, -1, "auto" }, { TY_BOOL, TF_BOOL, 1, "bool" }, { TY_B8, TF_BOOL, 1, "b8" }, @@ -55,14 +55,14 @@ type types[] = { { TY_U64BE, TF_INT | TF_BE, 8, "u64be" }, { TY_U128BE, TF_INT | TF_BE, 16, "u128be" }, - { TY_S16LE, TF_INT | TF_SIGN | TF_LE, 2, "i16le" }, - { TY_S32LE, TF_INT | TF_SIGN | TF_LE, 4, "i32le" }, - { TY_S64LE, TF_INT | TF_SIGN | TF_LE, 8, "i64le" }, - { TY_S128LE, TF_INT | TF_SIGN | TF_LE, 16, "i128le" }, - { TY_S16BE, TF_INT | TF_SIGN | TF_BE, 2, "i16be" }, - { TY_S32BE, TF_INT | TF_SIGN | TF_BE, 4, "i32be" }, - { TY_S64BE, TF_INT | TF_SIGN | TF_BE, 8, "i64be" }, - { TY_S128BE, TF_INT | TF_SIGN | TF_BE, 16, "i128be" }, + { TY_S16LE, TF_INT | TF_SIGN | TF_LE, 2, "s16le" }, + { TY_S32LE, TF_INT | TF_SIGN | TF_LE, 4, "s32le" }, + { TY_S64LE, TF_INT | TF_SIGN | TF_LE, 8, "s64le" }, + { TY_S128LE, TF_INT | TF_SIGN | TF_LE, 16, "s128le" }, + { TY_S16BE, TF_INT | TF_SIGN | TF_BE, 2, "s16be" }, + { TY_S32BE, TF_INT | TF_SIGN | TF_BE, 4, "s32be" }, + { TY_S64BE, TF_INT | TF_SIGN | TF_BE, 8, "s64be" }, + { TY_S128BE, TF_INT | TF_SIGN | TF_BE, 16, "s128be" }, { TY_F16LE, TF_FLT | TF_SIGN | TF_LE, 2, "f16le" }, { TY_F32LE, TF_FLT | TF_SIGN | TF_LE, 4, "f32le" }, @@ -118,7 +118,16 @@ inline bool is_int(type *t) { return (t->f & TF_INT); } /* Check if a type is floating point. */ inline bool is_flt(type *t) { return (t->f & TF_FLT); } +/* Check if a type is signed. */ +inline bool is_sign(type *t) { return (t->f & TF_SIGN); } + /* Check if two types are compatible. */ bool is_com(type *t1, type *t2) { - return NULL; /* TODO */ + if (t1 == NULL || t2 == NULL) { return false; } + if (t1 == t2) { /* TODO improve */ return true; } + + if (is_int(t1) && is_int(t2)) { return true; } + if (is_flt(t1) && is_flt(t2)) { return true; } + + return false; /* TODO */ } diff --git a/src/type.h b/src/type.h index e818f2f..2fbd7e1 100644 --- a/src/type.h +++ b/src/type.h @@ -11,7 +11,7 @@ #define TYPE(a) (types[a]) typedef enum { - TY_NULL, TY_TYPE, TY_PTR, TY_ANY, + TY_NULL, TY_TYPE, TY_PTR, TY_AUTO, TY_BOOL, TY_B8, TY_B16, TY_B32, TY_B64, @@ -66,6 +66,7 @@ extern type types[]; extern bool is_num(type *t); extern bool is_int(type *t); extern bool is_flt(type *t); +extern bool is_sign(type *t); extern bool is_com(type *t1, type *t2); diff --git a/src/value.c b/src/value.c index 60fc7d2..4c46d83 100644 --- a/src/value.c +++ b/src/value.c @@ -7,6 +7,7 @@ #include "value.h" #include <errno.h> +#include <stdlib.h> const val val_null = { VK_NULL }; @@ -36,3 +37,14 @@ val val_parse_int(char *s) { return v; } + +/* Parse a real string into a value. */ +/* TODO remove reliance on strtold(). */ +val val_parse_flt(char *s) { + val v = { VK_FLT, .v_flt = 0 }; u64 c; char *endptr; + + v.v_flt = strtold(s, &endptr); + if (*endptr != '\0') { return val_null; } + + return v; +} diff --git a/src/value.h b/src/value.h index 0486adb..2c74007 100644 --- a/src/value.h +++ b/src/value.h @@ -18,5 +18,6 @@ extern val val_u64(u64 v); extern val val_f128(f128 v); extern val val_parse_int(char *s); +extern val val_parse_flt(char *s); #endif // G_VALUE_H_X2RKXBBA