Author | Jakob Wakeling <[email protected]> |
Date | 2022-03-26 12:08:46 |
Commit | 4bb593b4a0ffdd5188fa9453512c2ec25425cb89 |
Parent | 2232f077c81d49826f6e297514b634c261cc11ed |
parse: Refactor parser to better align with EBNF
Diffstat
M | CMakeLists.txt | | | 2 | +- |
M | src/main.c | | | 2 | +- |
M | src/parse.c | | | 271 | ++++++++++++++++++++++++++++++++++++------------------------------------------- |
M | src/parse.h | | | 20 | +++++++++++--------- |
M | src/symbol.c | | | 2 | +- |
M | src/symbol.h | | | 4 | ++-- |
M | src/value.c | | | 17 | ++++++----------- |
M | src/value.h | | | 8 | +------- |
8 files changed, 145 insertions, 181 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 26d87f9..ef6267c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.12) -project(G VERSION 0.1.0 LANGUAGES C) +project(G VERSION 0.2.0 LANGUAGES C) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin) add_compile_definitions(PROJECT_VERSION="${PROJECT_VERSION}") diff --git a/src/main.c b/src/main.c index 6d7184c..118bdf2 100644 --- a/src/main.c +++ b/src/main.c @@ -48,7 +48,7 @@ int main(int ac, char *av[]) { A0 = av[0]; /* Print help information. */ static void hlp(void) { - puts("G - G Compiler\n"); + puts("G - G Programming Language\n"); puts("Usage: g\n"); puts("Options:"); puts(" --help Display help information"); diff --git a/src/parse.c b/src/parse.c index 4f6442a..6432f13 100644 --- a/src/parse.c +++ b/src/parse.c @@ -3,12 +3,10 @@ // Copyright (C) 2021, Jakob Wakeling // All rights reserved. -#include "init.h" #include "lex.h" #include "parse.h" #include "symbol.h" #include "type.h" -#include "util/alloc.h" #include "util/error.h" #include "util/util.h" #include "value.h" @@ -20,65 +18,88 @@ char *ast_ks[] = { "AK_NULL", "AK_PROG", - "AK_DECL", "AK_COMP", "AK_PROC", "AK_RETURN", - "AK_INT", "AK_FLT", + "AK_STMT", "AK_COMP", "AK_DECL", "AK_RETURN", "AK_IF", "AK_FOR", + + "AK_EXPR", "AK_PROC", + + "AK_INT", }; -static ast *parse_decl(lex *l, syt *st); - static ast *parse_stmt(lex *l, syt *st); static ast *parse_stmt_compound(lex *l, syt *st); +static ast *parse_stmt_decl(lex *l, syt *st); +static ast *parse_stmt_expr(lex *l, syt *st); +static ast *parse_stmt_return(lex *l, syt *st); +static ast *parse_stmt_if(lex *l, syt *st); +static ast *parse_stmt_for(lex *l, syt *st); static ast *parse_expr(lex *l, syt *st); +static ast *parse_expr_proc(lex *l, syt *st); -static ast *parse_proc(lex *l, syt *st); - -static ast *parse_int(lex *l); - -/* Allocate an AST node. */ -ast *ast_aloc(void) { return xcalloc(1, sizeof (ast)); } +static inline ast *parse_int(lex *l); /* Initialise an AST node. */ -void ast_init(ast *a) { *a = (ast){ 0 }; } - -/* Uninitialise an AST node. */ -void ast_free(ast *a) { - if (a == NULL) { return; } - for (UINT i = 0; i < a->c.ac; i += 1) { ast_free(a->c.a[i]); } - free(a->c.a); free(a); *a = (ast){ 0 }; return; +ast *ast_init(void) { + ast *a = calloc(1, sizeof (ast)); + if (!a) { error(1, SERR); } return a; } +// void ast_free(ast *a) {} /* Push a child AST node to an AST node. */ -void ast_push(ast *a, ast *c) { - a->c.a = xrealloc(a->c.a, (a->c.ac += 1) * sizeof (ast *)); - a->c.a[a->c.al] = c; a->c.al += 1; +s32 ast_push(ast *a, ast *c) { + ast **ca = realloc(a->c.a, (a->c.al += 1) * sizeof (ast *)); + if (!ca) { error(1, SERR); } else { a->c.a = ca; ca = NULL; } + + a->c.a[a->c.al - 1] = c; return 0; } #define T (l->t) /* lex_peek equivalent */ /* Parse a program. */ ast *parse(lex *l) { - ast *a = ast_aloc(); a->k = AK_PROG; syt_init(&a->st); + ast *a = ast_init(); a->k = AK_PROG; + /* Parse and append all child nodes */ for (ast *c; T.k != LK_EOF;) { - /* Parse and append all child nodes */ - if ((c = parse_decl(l, &a->st)) != NULL) { c->p = a; ast_push(a, c); } - else { /* TODO */ error(1, "NULL AST (parse_decl)"); } + if ((c = parse_stmt_decl(l, &a->st))) { ast_push(a, c); } + else { error(1, "NULL AST (parse:parse_stmt_decl)"); } } return a; } -#define SAC (sm.a->c.a[0]) /* First child of the symbol's AST node */ +/* Parse a statement. */ +static ast *parse_stmt(lex *l, syt *st) { + switch (T.k) { + case LK_LBRACE: { return parse_stmt_compound(l, st); } break; + case LK_ID: { return parse_stmt_decl(l, st); } break; + case LK_RETURN: { return parse_stmt_return(l, st); } break; + case LK_IF: { return parse_stmt_if(l, st); } break; + case LK_FOR: { return parse_stmt_for(l, st); } break; + default: { return parse_stmt_expr(l, st); } break; + } +} -/* Parse a declaration. */ -static ast *parse_decl(lex *l, syt *st) { - /* Store the declaration's line, column, hash, and identifier */ - sym sm = { SK_NULL, T.ln, T.cl, T.h, T.s }; +/* Parse a compound statement. */ +static ast *parse_stmt_compound(lex *l, syt *st) { + lex_kind(l, LK_LBRACE); + ast *a = ast_init(); a->k = AK_COMP; + for (; T.k != LK_EOF && T.k != LK_RBRACE;) { ast_push(a, parse_stmt(l, st)); } + + lex_kind(l, LK_RBRACE); return a; +} + +/* Parse a declaration statement. */ +static ast *parse_stmt_decl(lex *l, syt *st) { + sym sm = { SK_NULL, T.ln, T.cl, T.h, T.s }; lex_kind(l, LK_ID); lex_kind(l, LK_COLON); - /* Search for a store the declaration's type if specified */ + sm.a = ast_init(); sm.a->k = AK_DECL; + if (!(sm.a->s = strdup(sm.s))) { error(1, SERR); } + + /* Store the declaration's type if one is specified */ + /* TODO store type when one is specified */ if (T.k == LK_ID) { sym s = syt_search_h(st, T.h, T.s); @@ -92,126 +113,73 @@ static ast *parse_decl(lex *l, syt *st) { ); } sm.t = s.t; lex_next(l); - - if (T.k == LK_SCOLON) { - /* Allocate the AST and skip ahead */ - lex_next(l); sm.a = ast_aloc(); sm.a->k = AK_DECL; - sm.a->s = strdup(sm.s); sm.k = SK_NASS; goto decl; - } + if (T.k == LK_SCOLON) { lex_next(l); goto end; } } - else if (T.k == LK_SCOLON) { error( /* ERROR */ + else if (T.k == LK_SCOLON) { error( 1, "%s:%zu:%zu: error: a declaration without a type is invalid", l->n, T.ln + 1, T.cl + 1 ); } /* Assign a constant or variable value */ - if (T.k == LK_COLON || T.k == LK_ASSIGN) { - lex_next(l); sm.a = ast_aloc(); sm.a->k = AK_DECL; - sm.a->s = strdup(sm.s); ast_push(sm.a, parse_expr(l, st)); - } - else { error( /* ERROR */ - 1, "%s:%zu:%zu: error: expected ':' or '='", - l->n, T.ln + 1, T.cl + 1 - ); } - - switch (SAC->k) { - case AK_PROC: { sm.k = SK_PROC; SAC->s = sm.a->s; SAC->p = sm.a; } break; - default: { sm.k = SK_NASS; } break; /* TODO */ - // default: { error( /* ERROR */ - // 1, "%s:%zu:%zu: error: unhandled AST kind \"%s\"", - // l->n, T.ln + 1, T.cl + 1, ast_ks[sm.a->k] - // ); } break; - } + if (T.k == LK_COLON || T.k == LK_ASSIGN) { lex_next(l); ast_push(sm.a, parse_expr(l, st)); } + else { error(1, "%s:%zu:%zu: error: expected ':' or '='", l->n, T.ln + 1, T.cl + 1); } - decl:; assert(sm.h != 0); + /* Parse a semicolon if one is required */ + if (sm.a->c.a[0]->k != AK_PROC) { lex_kind(l, LK_SCOLON); } - /* Confirm that the identifier is not a keyword */ - if (syt_lookup_h(&kwt, sm.h, sm.s).k != SK_NULL) { error( /* ERROR */ - 1, "%s:%zu:%zu: error: redefinition of keyword \"%s\"", - l->n, sm.ln + 1, sm.cl + 1, sm.s - ); } - - /* Confirm that the identifier is not being reused */ - if (syt_lookup_h(st, sm.h, sm.s).k != SK_NULL) { error( /* ERROR */ - 1, "%s:%zu:%zu: error: redefinition of identifier \"%s\"", - l->n, sm.ln + 1, sm.cl + 1, sm.s - ); } - - /* Otherwise insert the new symbol and return */ - syt_insert_h(st, sm.h, sm.s, sm); return sm.a; + /* Insert the new symbol and return */ + end:; syt_insert_h(st, sm.h, sm.s, sm); return sm.a; } -#undef SAC - -/* Parse a statement. */ -static ast *parse_stmt(lex *l, syt *st) { - if (T.k == LK_LBRACE) { return parse_stmt_compound(l, st); } - - ast *a = ast_aloc(); - - switch (T.k) { - case LK_RETURN: { - lex_kind(l, LK_RETURN); a->k = AK_RETURN; ast_push(a, parse_expr(l, st)); - } break; - default: { error( - 1, "%s:%zu:%zu: Unexpected: \"%s\" (parse_stmt)", - l->n, T.ln + 1, T.cl + 1, tok_ks[T.k] - ); } break; - } - +/* Parse an expression statement. */ +static ast *parse_stmt_expr(lex *l, syt *st) { + ast *a; if (T.k != LK_SCOLON) { a = parse_expr(l, st); } lex_kind(l, LK_SCOLON); return a; } -/* Parse a compound statement. */ -static ast *parse_stmt_compound(lex *l, syt *st) { - lex_kind(l, LK_LBRACE); - - ast *a = ast_aloc(); a->k = AK_COMP; syt_init(&a->st); +/* Parse a return statement. */ +static ast *parse_stmt_return(lex *l, syt *st) { + lex_kind(l, LK_RETURN); - /* Parse statements until EOF or closing brace */ - for (; T.k != LK_EOF && T.k != LK_RBRACE;) { - ast_push(a, parse_stmt(l, &a->st)); - } + ast *a = ast_init(); a->k = AK_RETURN; + ast_push(a, parse_expr(l, st)); - lex_kind(l, LK_RBRACE); return a; + lex_kind(l, LK_SCOLON); return a; } +/* Parse an if statement. */ +static ast *parse_stmt_if(lex *l, syt *st) { /* TODO */ } + +/* Parse a for statement. */ +static ast *parse_stmt_for(lex *l, syt *st) { /* TODO */ } + /* Parse an expression. */ static ast *parse_expr(lex *l, syt *st) { - switch (T.k) { - case LK_PROC: { return parse_proc(l, st); } break; - case LK_INT: { return parse_int(l); } break; - default: { error( - 1, "%s:%zu:%zu: Unexpected: \"%s\" (parse_expr)", - l->n, T.ln + 1, T.cl + 1, tok_ks[T.k] - ); } break; - } + if (T.k == LK_PROC) { return parse_expr_proc(l, st); } + if (T.k == LK_INT) { return parse_int(l); } + else { error(1, "PARSE_EXPR %s", tok_ks[T.k]); } } -/* Parse a procedure. */ -static ast *parse_proc(lex *l, syt *st) { +/* Parse a procedure expression. */ +static ast *parse_expr_proc(lex *l, syt *st) { lex_kind(l, LK_PROC); lex_kind(l, LK_LPAREN); - ast *a = ast_aloc(); a->k = AK_PROC; + ast *a = ast_init(); a->k = AK_PROC; /* Parse optional procedure parameter(s) */ - /* TODO parse parameters(s) */ - lex_kind(l, LK_RPAREN); + /* TODO */ lex_kind(l, LK_RPAREN); /* Parse optional procedure return type(s) */ - /* TODO parse more than one unnamed return type */ + /* TODO parse more than one return type */ if (T.k == LK_RARROW) { lex_next(l); tok t = lex_kind(l, LK_ID); sym s = syt_search_h(st, t.h, t.s); - /* Confirm that the return type exists */ if (s.k == SK_NULL) { error( /* ERROR */ - 1, "%s:%zu:%zu: error: undefined identifier \"%s\"", + 1, "%s:%zu:%zu: error: use of undefined identifier \"%s\"", l->n, T.ln + 1, T.cl + 1, t.s ); } - - /* Confirm that the return type is a type */ if (s.k != SK_TYPE) { error( /* ERROR */ - 1, "%s:%zu:%zu: error: expected a type", + 1, "%s:%zu:%zu: error: expected type identifier", l->n, T.ln + 1, T.cl + 1 ); } @@ -221,12 +189,18 @@ static ast *parse_proc(lex *l, syt *st) { ast_push(a, parse_stmt_compound(l, st)); return a; } -static ast *parse_int(lex *l) { - val v = val_strint(lex_kind(l, LK_INT).s); - ast *a = ast_aloc(); a->k = AK_INT; a->v = v; +/* Parse an interger. */ +static inline ast *parse_int(lex *l) { + ast *a = ast_init(); tok t = lex_kind(l, LK_INT); + a->k = AK_INT; a->ln = t.ln; a->cl = t.cl; + + if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } + if (!(a->v = val_parse_int(t.s)).k) { error(1, "%s: %s", t.s, SERR); } + return a; } +/* Recursively print an AST. */ void ast_print(ast *a, UINT i) { for (UINT j = 0; j != i; ++j) { printf(" "); } printf("%zu:%zu: %s: %s", a->ln, a->cl, ast_ks[a->k], a->s); diff --git a/src/parse.h b/src/parse.h index 7fe9994..11fe029 100644 --- a/src/parse.h +++ b/src/parse.h @@ -15,23 +15,25 @@ typedef enum { AK_NULL, AK_PROG, - AK_DECL, AK_COMP, AK_PROC, AK_RETURN, - AK_INT, AK_FLT, + AK_STMT, AK_COMP, AK_DECL, AK_RETURN, AK_IF, AK_FOR, + + AK_EXPR, AK_PROC, + + AK_INT, } ast_k; +/* k: kind, ln: line, cl: column */ +/* t: type, c: children */ typedef struct ast_s { - ast_k k; UINT ln, cl; u64 h; char *s; - type *t; val v; syt st; struct ast_s *p; - struct { struct ast_s **a; UINT al, ac; } c; + ast_k k; UINT ln, cl; u64 h; char *s; type *t; val v; syt st; + struct ast_s *p; struct { struct ast_s **a; UINT al; } c; } ast; extern char *ast_ks[]; -extern ast *ast_aloc(void); -extern void ast_init(ast *a); +extern ast *ast_init(void); extern void ast_free(ast *a); - -extern void ast_push(ast *a, ast *c); +extern s32 ast_push(ast *a, ast *c); extern ast *parse(lex *l); diff --git a/src/symbol.c b/src/symbol.c index d33f6d1..eb04bc5 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -30,7 +30,7 @@ static void syt_resize(syt *st); syt *syt_aloc(void) { return xcalloc(1, sizeof (syt)); } /* Initialise a symbol table. */ -void syt_init(syt *st) { *st = (syt){ NULL, 0, 0, NULL }; } +// void syt_init(syt *st) { *st = (syt){ NULL, 0, 0, NULL }; } /* Uninitialise a symbol table. */ void syt_free(syt *st) { diff --git a/src/symbol.h b/src/symbol.h index 859948b..b42ae7a 100644 --- a/src/symbol.h +++ b/src/symbol.h @@ -30,8 +30,8 @@ typedef struct syt_s { extern syt kwt; -extern syt *syt_aloc(void); -extern void syt_init(syt *st); +// extern syt *syt_aloc(void); +// extern void syt_init(syt *st); extern void syt_free(syt *st); extern u64 syt_hash(const char *s, UINT l); diff --git a/src/value.c b/src/value.c index 2836148..60fc7d2 100644 --- a/src/value.c +++ b/src/value.c @@ -10,16 +10,16 @@ const val val_null = { VK_NULL }; -val val_bool(bool b) { return (val){ VK_BOOL, .v_bool = b }; } -val val_u64(u64 i) { return (val){ VK_INT, .v_int = i }; } -val val_f128(f128 f) { return (val){ VK_FLT, .v_flt = f }; } +val val_bool(bool v) { return (val){ VK_BOOL, .v_bool = v }; } +val val_u64(u64 v) { return (val){ VK_INT, .v_int = v }; } +val val_f128(f128 v) { return (val){ VK_FLT, .v_flt = v }; } -/* Convert an integer string to a val. */ -val val_strint(char *s) { +/* Parse an integer string into a value. */ +val val_parse_int(char *s) { val v = { VK_INT, .v_int = 0 }; u64 c; UINT b = 10; if (s[0] == '0') switch (s[1]) { - case 'b': { s += 2; b = 2; } break; case 'o': { s += 2; b = 8; } break; + case 'b': { s += 2; b = 2; } break; case 'o': { s += 2; b = 8; } break; case 'd': { s += 2; b = 10; } break; case 'z': { s += 2; b = 12; } break; case 'x': { s += 2; b = 16; } break; default: { s += 1; } break; } @@ -36,8 +36,3 @@ val val_strint(char *s) { return v; } - -/* Convert a float string to a val. */ -val val_strflt(char *s) { - return val_null; /* TODO */ -} diff --git a/src/value.h b/src/value.h index 548c536..0486adb 100644 --- a/src/value.h +++ b/src/value.h @@ -9,11 +9,7 @@ #include "util/util.h" typedef enum { VK_NULL, VK_BOOL, VK_INT, VK_FLT } val_k; - -typedef struct { - val_k k; - union { bool v_bool; u64 v_int; f128 v_flt; }; -} val; +typedef struct { val_k k; union { bool v_bool; u64 v_int; f128 v_flt; }; } val; extern const val val_null; @@ -21,7 +17,6 @@ extern val val_bool(bool v); extern val val_u64(u64 v); extern val val_f128(f128 v); -extern val val_strint(char *s); -extern val val_strflt(char *s); +extern val val_parse_int(char *s); #endif // G_VALUE_H_X2RKXBBA