Author | Jakob Wakeling <[email protected]> |
Date | 2023-06-28 01:05:05 |
Commit | f718ad0837fb154deaacf843c679f8b6975d3e50 |
Parent | 33e34d4e2e25b4ea33dc2c84ec05967a59a42254 |
Parse integers at lex time and refine type parsing
Diffstat
M | README.md | | | 1 | + |
M | doc/spec.md | | | 6 | ++++++ |
M | src/analyse.c | | | 15 | +++++++++------ |
M | src/init.c | | | 10 | +++------- |
M | src/lex.c | | | 51 | ++++++++++++++++++++++++++++++++++++++++++--------- |
M | src/lex.h | | | 7 | ++++++- |
M | src/llvm.c | | | 4 | +++- |
M | src/parse.c | | | 228 | ++++++++++++++++++++++++++++++++++++++----------------------------------------- |
M | src/parse.h | | | 3 | +-- |
M | src/type.c | | | 29 | ++++++++++++++++++++++++----- |
M | src/type.h | | | 7 | ++++--- |
M | src/util/util.h | | | 9 | +++++++++ |
12 files changed, 219 insertions, 151 deletions
diff --git a/README.md b/README.md index 289bad9..ca35bcf 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,7 @@ command. The second command will output an executable file, *a.out* by default. - [ ] Implement arrays - [x] Implement expressions - [x] Implement type casting +- [ ] Implement type casting to pointers and arrays - [ ] Implement the *type* type - [ ] Implement *defer* - [ ] Implement *errdefer* diff --git a/doc/spec.md b/doc/spec.md index 8b7260c..c0c9994 100644 --- a/doc/spec.md +++ b/doc/spec.md @@ -134,6 +134,12 @@ at compile time. ```ebnf stmt_decl_constant = iden, ":", [ type ], ":", expr ; + +iden = ( alpha | "_" ), { alpha | digit | "_" } ; +type = { "*" | "[", [ literal_int ], "]" }, iden ; + +alpha = ANY_ENGLISH_LETTER ; +digit = digit_dec ; ``` ## Variables diff --git a/src/analyse.c b/src/analyse.c index 6d5ba1d..d24d48f 100644 --- a/src/analyse.c +++ b/src/analyse.c @@ -25,10 +25,11 @@ static void analyse_expr_proc(ast *a, syt *st); #define A (*a) #define C (a->c.a) /* AST child shorthand "C[i]" */ +#define CL (a->c.al) /* AST child array length shorthand */ /* Analyse a program. */ void analyse(ast *a) { - for (UINT i = 0; i < A.c.al; i += 1) { analyse_stmt_decl(A.c.a[i], &A.st); } + for (UINT i = 0; i < A.c.al; i += 1) { analyse_stmt_decl(C[i], &A.st); } } /* Analyse a statement. */ @@ -45,7 +46,9 @@ static void analyse_stmt(ast *a, syt *st) { /* Analyse a compound statement. */ static inline void analyse_stmt_comp(ast *a, syt *st) { - for (UINT i = 0; i < A.c.al; i += 1) { analyse_stmt(A.c.a[i], &A.st); } + assert(A.k == AK_COMP); + + for (UINT i = 0; i < A.c.al; i += 1) { analyse_stmt(C[i], &A.st); } } /* Analyse a declaration statement. */ @@ -78,9 +81,7 @@ static void analyse_stmt_decl(ast *a, syt *st) { /* Insert a type cast node between parent and child */ /* FIXME this behaviour is incorrect, should only implicitely cast for literals, not all expressions */ - ast *cast = ast_init(); cast->k = AK_CAST; - cast->ln = C[0]->ln; cast->cl = C[0]->cl; - cast->t = A.t; + ast *cast = ast_init(AK_CAST, C[0]->ln, C[0]->cl); cast->t = A.t; ast *child = C[0]; @@ -143,7 +144,7 @@ static void analyse_expr(ast *a, syt *st) { if (A.k == AK_PROC) { analyse_expr_proc(a, st); } if (A.k == AK_OP_DRF) { A.t = ast_type(C[0], st)->base; } - for (UINT i = 0; i < A.c.al; i += 1) { analyse_expr(C[i], st); } + for (UINT i = 0; i < CL; i += 1) { analyse_expr(C[i], st); } } /* Analyse a procedure expression. */ @@ -151,5 +152,5 @@ static void analyse_expr_proc(ast *a, syt *st) { assert(A.k == AK_PROC); /* Analyse the procedure body */ - analyse_stmt_comp(A.c.a[0], st); + analyse_stmt_comp(C[0], st); } diff --git a/src/init.c b/src/init.c index abc8d2a..c9587b5 100644 --- a/src/init.c +++ b/src/init.c @@ -19,19 +19,19 @@ static ast kwds[] = { { AK_TYPE, 0, 0, 0, "bool", &TYPE(TY_B8), { 0 }, NULL }, /* Integer Types */ + { AK_TYPE, 0, 0, 0, "uint", &TYPE(TY_UINT), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "u8", &TYPE(TY_U8), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "u16", &TYPE(TY_U16), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "u32", &TYPE(TY_U32), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "u64", &TYPE(TY_U64), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "u128", &TYPE(TY_U128), { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "uint", &TYPE(TY_UINT), { 0 }, NULL }, + { AK_TYPE, 0, 0, 0, "sint", &TYPE(TY_SINT), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "s8", &TYPE(TY_S8), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "s16", &TYPE(TY_S16), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "s32", &TYPE(TY_S32), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "s64", &TYPE(TY_S64), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "s128", &TYPE(TY_S128), { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "sint", &TYPE(TY_SINT), { 0 }, NULL }, /* Floating Point Types */ { AK_TYPE, 0, 0, 0, "f16", &TYPE(TY_F32), { 0 }, NULL }, @@ -39,11 +39,6 @@ static ast kwds[] = { { AK_TYPE, 0, 0, 0, "f64", &TYPE(TY_F64), { 0 }, NULL }, { AK_TYPE, 0, 0, 0, "f128", &TYPE(TY_F128), { 0 }, NULL }, - /* Alias Types */ - { AK_TYPE, 0, 0, 0, "byte", &TYPE(TY_BYTE), { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "char", &TYPE(TY_CHAR), { 0 }, NULL }, - { AK_TYPE, 0, 0, 0, "rune", &TYPE(TY_RUNE), { 0 }, NULL }, - { AK_VOID, 0, 0, 0, NULL, NULL, { 0 }, NULL } }; diff --git a/src/lex.c b/src/lex.c index aac8c2c..c27fc8c 100644 --- a/src/lex.c +++ b/src/lex.c @@ -16,7 +16,7 @@ #include <stdio.h> char *tok_ks[] = { - "TK_VOID", "TK_EOF", "TK_ID", "TK_NUM", "TK_CHR", "TK_STR", "TK_HASH", + "TK_VOID", "TK_EOF", "TK_ID", "TK_INT", "TK_NUM", "TK_STR", "TK_HASH", "TK_NULL", "TK_TRUE", "TK_FALSE", "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC", @@ -40,6 +40,9 @@ char *tok_ks[] = { #define is_digit_doz(c) ((c >= '0' && c <= '9') || (c == 'A' || c == 'B')) #define is_digit_hex(c) ((c >= '0' && c <= '9') || (c >= 'A' || c == 'F')) +static inline u128 parse_int(char *s); +static inline f128 parse_flt(char *s); + /* Push a token to a token array. */ void tok_a_push(tok_a *a, tok t) { tok *ta = realloc(a->a, (a->al += 1) * sizeof (tok)); @@ -122,14 +125,16 @@ tok lex_next(lex *l) { /* Handle number literals */ else if (is_digit_dec(C)) { - char *s = P; UINT sl; + T.k = TK_INT; char *s = P; for (P += 1; is_alpha(C) || is_digit_dec(C); P += 1); - if (C == '.') { P += 1; for (P += 1; is_digit_dec(C); P += 1); } + if (C == '.') { T.k = TK_NUM; P += 1; for (P += 1; is_digit_dec(C); P += 1); } + /* TODO lex exponent part of real numbers */ - sl = P - s; CL += sl; + UINT sl = P - s; CL += sl; - T.k = TK_NUM; if (!(T.s = strndup(s, sl))) { error(1, SERR); } + if (!(T.s = strndup(s, sl))) { error(1, SERR); } + if (T.k == TK_INT) { T.v_int = parse_int(T.s); } } /* Handle hash procedures */ @@ -250,20 +255,18 @@ tok lex_next(lex *l) { else if (P != Q) { P += 1; CL += 1; } if (quote == '\'') { - T.k = TK_NUM; T.h = 1; - if (!(T.s = strndup(s, sl))) { error(1, SERR); } + T.k = TK_INT; if (!(T.s = strndup(s, sl))) { error(1, SERR); } /* Numerical value of character literals is calculated and - stored in T.v, T.h is set to non-zero to indicate that - parsing is already complete + stored in T.v_int */ for (UINT i = 0; i < sl; i += 1) { - if (T.v > (U32_MAX - s[i]) / 256) { + if (T.v_int > (U32_MAX - s[i]) / 256) { note(l->n, T.ln, T.cl, 1, "Character constant exceeds maximum size"); break; } - T.v = T.v * 256 + s[i]; + T.v_int = T.v_int * 256 + s[i]; } } else if (quote == '\"') { @@ -298,3 +301,31 @@ void lex_debug(lex *l) { printf("%zu:%zu: %s \"%s\"\n", t.ln + 1, t.cl + 1, tok_ks[t.k], t.s); } } + +/* Parse an integer string into a value. */ +static inline u128 parse_int(char *s) { + register u128 v = 0, c; register UINT b = 10; + + if (s[0] == '0') switch (s[1]) { + case 'b': { s += 2; b = 2; } break; case 'o': { s += 2; b = 8; } break; + case 'd': { s += 2; b = 10; } break; case 'z': { s += 2; b = 12; } break; + case 'x': { s += 2; b = 16; } break; default: { s += 1; } break; + } + + for (; s[0]; ++s) { + if (s[0] >= '0' && s[0] <= '9') { c = *s - '0'; } + else if (s[0] >= 'A' && s[0] <= 'F') { c = *s - ('A' - 10); } + + /* TODO better error handling */ + if (c >= b) { errno = EDOM; return 0; } + if (v > (U128_MAX - c) / b) { errno = ERANGE; return 0; } + + v = v * b + c; + } + + return v; +} + +static inline f128 parse_flt(char *s) { + return 0.0; +} diff --git a/src/lex.h b/src/lex.h index ab21797..0f22a2e 100644 --- a/src/lex.h +++ b/src/lex.h @@ -10,7 +10,7 @@ /* Remember to update tok_ks in lex.c */ typedef enum { - TK_VOID, TK_EOF, TK_ID, TK_NUM, TK_CHR, TK_STR, TK_HASH, + TK_VOID, TK_EOF, TK_ID, TK_INT, TK_NUM, TK_STR, TK_HASH, TK_NULL, TK_TRUE, TK_FALSE, TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC, @@ -26,8 +26,11 @@ typedef enum { TK_AS_NOT, TK_AS_AND, TK_AS_OR, TK_AS_XOR, TK_AS_SHL, TK_AS_SHR, } tok_k; -/* k : Kind, ln : Line, cl : Column, h : Hash, v : Value, s : String */ -typedef struct { tok_k k; UINT ln, cl; u64 h, v; char *s; } tok; +/* + k : Kind, ln : Line, cl : Column, h : Hash, s : String, + v_int : Int Value, v_flt : Flt Value +*/ +typedef struct { tok_k k; UINT ln, cl; u64 h; char *s; union { u64 v_int; f128 v_flt; }; } tok; typedef struct { tok *a; UINT al; } tok_a; /* diff --git a/src/llvm.c b/src/llvm.c index 14bed4d..b3e5e02 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -53,6 +53,7 @@ static LLVMValueRef llvm_ival(type *t); #define A (*a) #define C (a->c.a) /* AST child shorthand "C[i]" */ +#define CL (a->c.al) /* AST child array length shorthand */ /* Generate IR from an AST with LLVM. */ void llvm(ast *a, char *file) { @@ -392,7 +393,7 @@ static LLVMValueRef llvm_hash(ast *a, syt *st) { LLVMTypeRef *typs = calloc(arg_count, sizeof (LLVMTypeRef)); for (UINT i = 0; i < arg_count; i += 1) { /* FIXME hardcoded for write syscall */ - if (i == 2) { typs[i] = llvm_type(type_ptrc(&TYPE(TY_U8), 1)); } + if (i == 2) { typs[i] = llvm_type(type_ptr(&TYPE(TY_U8), 1)); } else { typs[i] = unsigned_integer_type; } } diff --git a/src/parse.c b/src/parse.c index 039c5ab..433a092 100644 --- a/src/parse.c +++ b/src/parse.c @@ -42,8 +42,8 @@ static ast *parse_stmt_for(lex *l, syt *st); static ast *parse_expr(lex *l, syt *st, s32 o); static ast *parse_expr_proc(lex *l, syt *st); -static inline ast *parse_bool(lex *l, syt *st); -static ast *parse_num(lex *l, syt *st); +static inline type *parse_type(lex *l, syt *st); + static ast *parse_int(lex *l, syt *st); static ast *parse_flt(lex *l, syt *st); @@ -51,14 +51,9 @@ static s32 tok_precedence(tok_k tk); static s32 ast_precedence(ast_k ak); /* Initialise an AST node. */ -inline ast *ast_init(void) { - ast *a = calloc(1, sizeof (ast)); - if (!a) { error(1, SERR); } return a; -} - -/* Initialise an AST node of a specific kind. */ -inline ast *ast_kind(ast_k kind) { - ast *a = ast_init(); a->k = kind; return a; +inline ast *ast_init(ast_k kind, UINT ln, UINT cl) { + ast *a = calloc(1, sizeof (*a)); if (a == NULL) { error(1, SERR); } + a->k = kind; a->ln = ln; a->cl = cl; return a; } inline void ast_free(ast **a) { @@ -126,7 +121,7 @@ ast *ast_a_pop(ast_a *aa) { /* Parse a program. */ ast *parse(lex *l) { - ast *a = ast_kind(AK_PROG); + ast *a = ast_init(AK_PROG, 0, 0); /* Parse and append all child nodes */ for (ast *c; T.k != TK_EOF;) { @@ -156,62 +151,44 @@ static ast *parse_stmt(lex *l, syt *st) { /* Parse a compound statement. */ static ast *parse_stmt_compound(lex *l, syt *st) { - lex_kind(l, TK_LBRACE); + ast *a = ast_init(AK_COMP, T.ln, T.cl); + lex_kind(l, TK_LBRACE); a->st.pt = st; - ast *a = ast_kind(AK_COMP); a->st.pt = st; for (; T.k != TK_EOF && T.k != TK_RBRACE;) { ast_push(a, parse_stmt(l, &a->st)); } lex_kind(l, TK_RBRACE); return a; } -/* - Parse a declaration statement. - - If a is null, the lexer should be positioned at the indentifier, otherwise - it should be positioned after, and a should contain the line, column, hash, - and string of the identifier. -*/ +/* Parse a declaration statement. */ static ast *parse_stmt_decl(lex *l, syt *st) { - ast *a = ast_kind(AK_DECL); a->ln = T.ln; a->cl = T.cl; a->h = T.h; a->s = T.s; - lex_kind(l, TK_ID); lex_kind(l, TK_COLON); - - u64 pn = 0; for (; T.k == TK_OP_MUL; lex_next(l), pn += 1); + ast *a = ast_init(AK_DECL, T.ln, T.cl); a->h = T.h; a->s = T.s; + assert(T.k == TK_ID); lex_next(l); assert(T.k == TK_COLON); lex_next(l); /* Store the declaration's type if one is specified */ - if (T.k == TK_ID) { - ast *s = syt_search_h(st, T.h, T.s); - - if (s == NULL) { note(l->n, T.ln, T.cl, 0, "Use of undeclared identifier \"%s\"", T.s); } - if (s->k != AK_TYPE) { note(l->n, T.ln, T.cl, 0, "Expected type identifier"); } - - a->t = type_ptrc(s->t, pn); lex_next(l); - if (T.k == TK_SCOLON) { lex_next(l); goto end; } - } - else if (pn != 0) { note(l->n, T.ln, T.cl, 0, "Expected a type identifier for pointer type"); } - else if (T.k == TK_SCOLON) { note(l->n, T.ln, T.cl, 0, "A declaration without a type is invalid"); } + if (T.k == TK_ID || T.k == TK_OP_MUL || T.k == TK_LBRACK) { a->t = parse_type(l, st); } - /* Assign a constant or variable value */ + /* Assign a constant or variable value if one is specified */ if (T.k == TK_COLON || T.k == TK_ASSIGN) { lex_next(l); ast_push(a, parse_expr(l, st, 0)); } - else { note(l->n, T.ln, T.cl, 0, "expected ':' or '='"); } - /* Parse a semicolon if one is required */ - if (a->c.a[0]->k != AK_PROC) { lex_kind(l, TK_SCOLON); } + /* Ensure that a type is known and consume a semicolon if one is required */ + if (a->t == NULL && a->c.al == 0) { note(l->n, T.ln, T.cl, 0, "A declaration without a type is invalid"); } + if (a->c.al < 1 || a->c.a[0]->k != AK_PROC) { lex_kind(l, TK_SCOLON); } /* Insert the new symbol and return */ - end:; syt_insert_h(st, a->h, a->s, a); return a; + syt_insert_h(st, a->h, a->s, a); return a; } /* Parse an expression statement. */ static ast *parse_stmt_expr(lex *l, syt *st) { - ast *a = NULL; if (T.k != TK_SCOLON) { a = parse_expr(l, st, 0); } - lex_kind(l, TK_SCOLON); return a; + for (; T.k == TK_SCOLON; lex_next(l)); /* Skip lonely semicolons */ + ast *a = parse_expr(l, st, 0); lex_kind(l, TK_SCOLON); return a; } /* Parse a return statement. */ static ast *parse_stmt_return(lex *l, syt *st) { - lex_kind(l, TK_RETURN); + assert(T.k == TK_RETURN); - ast *a = ast_init(); a->k = AK_RETURN; + ast *a = ast_init(AK_RETURN, T.ln, T.cl); lex_next(l); if (T.k != TK_SCOLON) { ast_push(a, parse_expr(l, st, 0)); } lex_kind(l, TK_SCOLON); return a; @@ -219,10 +196,12 @@ static ast *parse_stmt_return(lex *l, syt *st) { /* Parse an if statement. */ static ast *parse_stmt_if(lex *l, syt *st) { - lex_kind(l, TK_IF); lex_kind(l, TK_LPAREN); - ast *a = ast_init(); a->k = AK_IF; + assert(T.k == TK_IF); + + ast *a = ast_init(AK_IF, T.ln, T.cl); + lex_next(l); lex_kind(l, TK_LPAREN); - /* Parse expression and closing parenthesis */ + /* TODO Parse expression and closing parenthesis */ ast_push(a, parse_expr(l, st, 0)); lex_kind(l, TK_RPAREN); /* Parse the if statement body */ @@ -231,10 +210,12 @@ static ast *parse_stmt_if(lex *l, syt *st) { /* Parse a for statement. */ static ast *parse_stmt_for(lex *l, syt *st) { - lex_kind(l, TK_FOR); lex_kind(l, TK_LPAREN); - ast *a = ast_init(); a->k = AK_FOR; + assert(T.k == TK_FOR); - /* Parse one to three expressions and a closing parenthesis */ + ast *a = ast_init(AK_FOR, T.ln, T.cl); + lex_next(l); lex_kind(l, TK_LPAREN); + + /* TODO Parse one to three expressions and a closing parenthesis */ ast_push(a, parse_stmt_expr(l, st)); ast_push(a, parse_stmt_expr(l, st)); ast_push(a, parse_expr(l, st, 0)); lex_kind(l, TK_RPAREN); @@ -247,11 +228,11 @@ static ast *parse_stmt_for(lex *l, syt *st) { static ast *parse_expr(lex *l, syt *st, s32 o) { ast *left = NULL; switch (T.k) { case TK_ID: { - left = ast_init(); tok t = lex_kind(l, TK_ID); - left->ln = t.ln; left->cl = t.cl; + tok t = lex_next(l); + left = ast_init(0, t.ln, t.cl); ast *sym = syt_search_h(st, t.h, t.s); - if (sym == NULL) { note(l->n, t.ln, t.cl, -1, "use of undeclared identifier \"%s\"", t.s); } + if (sym == NULL) { note(l->n, t.ln, t.cl, -1, "Use of undeclared identifier \"%s\"", t.s); } if (T.k == TK_LPAREN) { lex_kind(l, TK_LPAREN); @@ -272,14 +253,15 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); } } break; - case TK_TRUE: { left = ast_kind(AK_BOOL); left->v.v_bool = true; } goto boolean; - case TK_FALSE: { left = ast_kind(AK_BOOL); left->v.v_bool = false; } goto boolean; - boolean: { left->ln = T.ln; left->cl = T.cl; left->t = &TYPE(TY_BOOL); lex_next(l); } break; - case TK_NUM: { left = parse_num(l, st); } break; + case TK_TRUE: { left = ast_init(AK_BOOL, T.ln, T.cl); left->v = val_bool(true); } goto boolean; + case TK_FALSE: { left = ast_init(AK_BOOL, T.ln, T.cl); left->v = val_bool(false); } goto boolean; + boolean: { left->t = &TYPE(TY_BOOL); lex_next(l); } break; + case TK_INT: { left = parse_int(l, st); } break; + case TK_NUM: { left = parse_flt(l, st); } break; case TK_PROC: { return parse_expr_proc(l, st); } break; case TK_HASH: { - left = ast_init(); tok t = lex_kind(l, TK_HASH); - left->ln = t.ln; left->cl = t.cl; bool needs_args = false; + tok t = lex_next(l); bool needs_args = false; + left = ast_init(0, t.ln, t.cl); if (strcmp(t.s, "#syscall") == 0) { left->k = AK_HASH_SYSCALL; needs_args = true; } else { note("TODO", t.ln, t.cl, 0, "%s: unrecognised hash procedure", t.s); } @@ -298,28 +280,28 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); } } break; case TK_LPAREN: { lex_next(l); left = parse_expr(l, st, 0); lex_kind(l, TK_RPAREN); } break; - case TK_OP_ADD: { left = ast_kind(AK_OP_POS); } goto prefix; - case TK_OP_SUB: { left = ast_kind(AK_OP_NEG); } goto prefix; - case TK_BW_NOT: { left = ast_kind(AK_BW_NOT); } goto prefix; - case TK_BW_AND: { left = ast_kind(AK_OP_ADO); } goto prefix; - case TK_OP_MUL: { left = ast_kind(AK_OP_DRF); } goto prefix; + case TK_OP_ADD: { left = ast_init(AK_OP_POS, T.ln, T.cl); } goto prefix; + case TK_OP_SUB: { left = ast_init(AK_OP_NEG, T.ln, T.cl); } goto prefix; + case TK_BW_NOT: { left = ast_init(AK_BW_NOT, T.ln, T.cl); } goto prefix; + case TK_BW_AND: { left = ast_init(AK_OP_ADO, T.ln, T.cl); } goto prefix; + case TK_OP_MUL: { left = ast_init(AK_OP_DRF, T.ln, T.cl); } goto prefix; prefix: { lex_next(l); ast_push(left, parse_expr(l, st, ast_precedence(left->k))); } break; default: { note(l->n, T.ln, T.cl, -1, "Unhandled expression of kind %s", tok_ks[T.k]); } break; } /* Parse an infix expression if one is present */ for (ast *a = NULL; tok_precedence(T.k) > o; left = a) switch (T.k) { - case TK_ASSIGN: { a = ast_kind(AK_ASSIGN); } goto infix; - case TK_AS_ADD: { a = ast_kind(AK_AS_ADD); } goto infix; - case TK_AS_SUB: { a = ast_kind(AK_AS_SUB); } goto infix; - case TK_AS_MUL: { a = ast_kind(AK_AS_MUL); } goto infix; - case TK_AS_DIV: { a = ast_kind(AK_AS_DIV); } goto infix; - case TK_AS_MOD: { a = ast_kind(AK_AS_MOD); } goto infix; - case TK_OP_ADD: { a = ast_kind(AK_OP_ADD); } goto infix; - case TK_OP_SUB: { a = ast_kind(AK_OP_SUB); } goto infix; - case TK_OP_MUL: { a = ast_kind(AK_OP_MUL); } goto infix; - case TK_OP_DIV: { a = ast_kind(AK_OP_DIV); } goto infix; - case TK_OP_MOD: { a = ast_kind(AK_OP_MOD); } goto infix; + case TK_ASSIGN: { a = ast_init(AK_ASSIGN, T.ln, T.cl); } goto infix; + case TK_AS_ADD: { a = ast_init(AK_AS_ADD, T.ln, T.cl); } goto infix; + case TK_AS_SUB: { a = ast_init(AK_AS_SUB, T.ln, T.cl); } goto infix; + case TK_AS_MUL: { a = ast_init(AK_AS_MUL, T.ln, T.cl); } goto infix; + case TK_AS_DIV: { a = ast_init(AK_AS_DIV, T.ln, T.cl); } goto infix; + case TK_AS_MOD: { a = ast_init(AK_AS_MOD, T.ln, T.cl); } goto infix; + case TK_OP_ADD: { a = ast_init(AK_OP_ADD, T.ln, T.cl); } goto infix; + case TK_OP_SUB: { a = ast_init(AK_OP_SUB, T.ln, T.cl); } goto infix; + case TK_OP_MUL: { a = ast_init(AK_OP_MUL, T.ln, T.cl); } goto infix; + case TK_OP_DIV: { a = ast_init(AK_OP_DIV, T.ln, T.cl); } goto infix; + case TK_OP_MOD: { a = ast_init(AK_OP_MOD, T.ln, T.cl); } goto infix; infix: { lex_next(l); ast_push(a, left); ast_push(a, parse_expr(l, st, ast_precedence(a->k))); } break; } @@ -328,58 +310,74 @@ static ast *parse_expr(lex *l, syt *st, s32 o) { /* Parse a procedure expression. */ static ast *parse_expr_proc(lex *l, syt *st) { - lex_kind(l, TK_PROC); lex_kind(l, TK_LPAREN); - ast *a = ast_init(); a->k = AK_PROC; + assert(T.k == TK_PROC); + + ast *a = ast_init(AK_PROC, T.ln, T.cl); + lex_next(l); lex_kind(l, TK_LPAREN); /* Parse optional procedure parameter(s) */ /* TODO */ lex_kind(l, TK_RPAREN); - /* Parse optional procedure return type(s) */ - if (T.k == TK_RARROW) { - lex_next(l); tok t = lex_kind(l, TK_ID); - ast *s = syt_search_h(st, t.h, t.s); - - if (s == NULL) { note(l->n, T.ln, T.cl, 0, "use of undefined identifier \"%s\"", t.s); } - if (s->k != AK_TYPE) { note(l->n, T.ln, T.cl, 0, "expected type identifier"); } - - a->t = s->t; - } + /* Parse optional procedure return type */ + if (T.k == TK_RARROW) { lex_next(l); a->t = parse_type(l, st); } else { a->t = &TYPE(TY_VOID); } ast_push(a, parse_stmt_compound(l, st)); return a; } -/* Parse a number. */ -static ast *parse_num(lex *l, syt *st) { - if (T.h == 0 && strchr(T.s, '.')) { return parse_flt(l, st); } - else { return parse_int(l, st); } +/* Parse a type identifier. */ +static inline type *parse_type(lex *l, syt *st) { + type *r = NULL, *c; /* root and deepest child */ + + /* Parse optional pointer and array specifiers */ + for (register type *t = NULL;;) switch (T.k) { + case TK_OP_MUL: { lex_next(l); t = type_ptr(NULL, 1); } goto store; + case TK_LBRACK: { + lex_next(l); + + /* TODO parse integer expression */ + + lex_kind(l, TK_RBRACK); + } goto store; + store: { if (r == NULL) { r = c = t; } else { c = (c->base = t); }} break; + default: { goto escape; } + } escape:; + + /* Parse the base type identifier */ + tok t = lex_kind(l, TK_ID); ast *s = syt_search_h(st, t.h, t.s); + if (s == NULL) { note(l->n, t.ln, t.cl, 0, "Use of undeclared identifier \"%s\"", t.s); } + else if (s->k != AK_TYPE) { note(l->n, t.ln, t.cl, 0, "Expected type identifier"); } + + if (r == NULL) { return s->t; } else { c->base = s->t; return r; } } /* Parse an integer. */ static ast *parse_int(lex *l, syt *st) { - ast *a = ast_kind(AK_INT); a->ln = T.ln; a->cl = T.cl; - tok t = lex_kind(l, TK_NUM); + assert(T.k == TK_INT); + + tok t = lex_next(l); + ast *a = ast_init(AK_INT, t.ln, t.cl); if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } - if (t.h != 0) { a->v = val_u64(t.v); } - else if (!(a->v = val_parse_int(t.s)).k) { - note(l->n, t.ln, t.cl, 0, "%s: %s", t.s, SERR); errno = 0; - } + a->v = val_u64(t.v_int); /* Determine the minimum integer type */ if (a->v.v_int <= U8_MAX) { a->t = &TYPE(TY_U8); } else if (a->v.v_int <= U16_MAX) { a->t = &TYPE(TY_U16); } else if (a->v.v_int <= U32_MAX) { a->t = &TYPE(TY_U32); } else if (a->v.v_int <= U64_MAX) { a->t = &TYPE(TY_U64); } + else if (a->v.v_int <= U128_MAX) { a->t = &TYPE(TY_U128); } return a; } /* Parse a real. */ static ast *parse_flt(lex *l, syt *st) { - ast *a = ast_init(); tok t = lex_kind(l, TK_NUM); - a->k = AK_FLT; a->ln = t.ln; a->cl = t.cl; + assert(T.k == TK_NUM); + + tok t = lex_next(l); + ast *a = ast_init(AK_FLT, t.ln, t.cl); if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } if (!(a->v = val_parse_flt(t.s)).k) { diff --git a/src/parse.h b/src/parse.h index 96f30af..d0ad962 100644 --- a/src/parse.h +++ b/src/parse.h @@ -45,9 +45,8 @@ typedef struct { ast **a; UINT al; } ast_a; extern char *ast_ks[]; -extern ast *ast_init(void); +extern ast *ast_init(ast_k kind, UINT ln, UINT cl); extern void ast_free(ast **a); -extern ast *ast_kind(ast_k kind); extern void ast_push(ast *a, ast *c); extern void ast_displace(ast *a, ast *c); diff --git a/src/type.c b/src/type.c index ef7eb0c..15b1293 100644 --- a/src/type.c +++ b/src/type.c @@ -11,6 +11,7 @@ type types[] = { { TY_VOID, 0, 0, "void" }, { TY_PTR, TF_PTR, -1, "ptr" }, + { TY_ARR, TF_PTR, -1, "arr" }, { TY_TYPE, 0, -1, "type" }, { TY_AUTO, 0, -1, "auto" }, @@ -100,7 +101,7 @@ type types[] = { { TY_STR, TF_STR, -1, "str" }, }; -type_a types_a = { NULL, 0, 0 }; +static type_a types_a = { NULL, 0, 0 }; /* Allocate a types array with a minimum number of spaces. */ static inline void types_alloc(type_a *ta, u64 min) { @@ -124,8 +125,8 @@ static inline type *types_next(type_a *ta) { register type *r = &ta->a[ta->al]; ta->al += 1; return r; } -/* Initialise a new pointer chain type. */ -type *type_ptrc(type *base, u64 n) { +/* Initialise a new pointer type. */ +type *type_ptr(type *base, u64 n) { types_alloc(&types_a, n); type *r = base; for (u64 i = 0; i < n; i += 1) { @@ -136,6 +137,12 @@ type *type_ptrc(type *base, u64 n) { return r; } +/* Initialise a new array type. */ +type *type_arr(type *base, u64 l) { + types_alloc(&types_a, 1); + return base; /* TODO */ +} + /* Check if a type is a pointer. */ inline bool is_ptr(type *t) { return (t->f & TF_PTR); } @@ -155,20 +162,29 @@ inline bool is_flt(type *t) { return (t->f & TF_FLT); } inline bool is_sign(type *t) { return (t->f & TF_SIGN); } /* Check if two types are equal. */ -bool is_equal(type *t1, type *t2) { +inline bool is_equal(type *t1, type *t2) { if (t1 == NULL || t2 == NULL) { return false; } if (t1 == t2) { return true; } if (t1->k != t2->k) { return false; } if (t1->f != t2->f) { return false; } if (t1->l != t2->l) { return false; } - if (t1->base != t2->base) { return false; } + + /* Check if all base types match */ + for (type *b1 = t1->base, *b2 = t2->base;; b1 = b1->base, b2 = b2->base) { + if (b1 == NULL && b2 == NULL) { break; } + if (b1 == NULL != b2 == NULL) { return false; } + + if (b1->k != b2->k) { return false; } + if (b1->f != b2->f) { return false; } + if (b1->l != b2->l) { return false; } + } return true; } /* Check if two types are compatible. */ -bool is_com(type *t1, type *t2) { +inline bool is_com(type *t1, type *t2) { if (t1 == NULL || t2 == NULL) { return false; } if (is_equal(t1, t2)) { return true; } diff --git a/src/type.h b/src/type.h index 2dd425e..313ecde 100644 --- a/src/type.h +++ b/src/type.h @@ -11,7 +11,7 @@ #define TYPE(a) (types[a]) typedef enum { - TY_VOID, TY_PTR, TY_TYPE, TY_AUTO, + TY_VOID, TY_PTR, TY_ARR, TY_TYPE, TY_AUTO, TY_BOOL, TY_B8, TY_B16, TY_B32, TY_B64, @@ -61,9 +61,9 @@ typedef struct type_s { typedef struct { type *a; UINT al, ac; } type_a; extern type types[]; -extern type_a types_a; -extern type *type_ptrc(type *base, u64 n); +extern type *type_ptr(type *base, u64 n); +extern type *type_arr(type *base, u64 l); extern bool is_ptr(type *t); extern bool is_bool(type *t); diff --git a/src/util/util.h b/src/util/util.h index 9ee19dc..a00cf31 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -33,6 +33,9 @@ typedef float f32; typedef double f64; typedef long double f128; +typedef __uint128_t u128; +typedef __int128_t s128; + /* Type Limits */ #define U8_MIN UINT8_MIN #define U8_MAX UINT8_MAX @@ -63,6 +66,11 @@ typedef long double f128; #define F128_MIN LDBL_MIN #define F128_MAX LDBL_MAX +#define S128_MAX (__int128)(((unsigned __int128) 1 << ((sizeof(__int128) * __CHAR_BIT__) - 1)) - 1) +#define S128_MIN (-S128_MAX - 1) +#define U128_MAX ((2 * (unsigned __int128) S128_MAX) + 1) +#define U128_MIN 0 + /* Miscellaneous */ #define BIT(x) (1 << (x))