G

G Programming Language
git clone http://git.omkov.net/G
Log | Tree | Refs | README | Download

AuthorJakob Wakeling <[email protected]>
Date2022-03-26 12:08:46
Commit4bb593b4a0ffdd5188fa9453512c2ec25425cb89
Parent2232f077c81d49826f6e297514b634c261cc11ed

parse: Refactor parser to better align with EBNF

Diffstat

M CMakeLists.txt | 2 +-
M src/main.c | 2 +-
M src/parse.c | 271 ++++++++++++++++++++++++++++++++++++-------------------------------------------
M src/parse.h | 20 +++++++++++---------
M src/symbol.c | 2 +-
M src/symbol.h | 4 ++--
M src/value.c | 17 ++++++-----------
M src/value.h | 8 +-------

8 files changed, 145 insertions, 181 deletions

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 26d87f9..ef6267c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.12)
-project(G VERSION 0.1.0 LANGUAGES C)
+project(G VERSION 0.2.0 LANGUAGES C)
 
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
 add_compile_definitions(PROJECT_VERSION="${PROJECT_VERSION}")
diff --git a/src/main.c b/src/main.c
index 6d7184c..118bdf2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -48,7 +48,7 @@ int main(int ac, char *av[]) { A0 = av[0];
 
 /* Print help information. */
 static void hlp(void) {
-	puts("G - G Compiler\n");
+	puts("G - G Programming Language\n");
 	puts("Usage: g\n");
 	puts("Options:");
 	puts("  --help        Display help information");
diff --git a/src/parse.c b/src/parse.c
index 4f6442a..6432f13 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -3,12 +3,10 @@
 // Copyright (C) 2021, Jakob Wakeling
 // All rights reserved.
 
-#include "init.h"
 #include "lex.h"
 #include "parse.h"
 #include "symbol.h"
 #include "type.h"
-#include "util/alloc.h"
 #include "util/error.h"
 #include "util/util.h"
 #include "value.h"
@@ -20,65 +18,88 @@
 char *ast_ks[] = {
 	"AK_NULL", "AK_PROG",
 
-	"AK_DECL", "AK_COMP", "AK_PROC", "AK_RETURN",
-	"AK_INT", "AK_FLT",
+	"AK_STMT", "AK_COMP", "AK_DECL", "AK_RETURN", "AK_IF", "AK_FOR",
+	
+	"AK_EXPR", "AK_PROC",
+	
+	"AK_INT",
 };
 
-static ast *parse_decl(lex *l, syt *st);
-
 static ast *parse_stmt(lex *l, syt *st);
 static ast *parse_stmt_compound(lex *l, syt *st);
+static ast *parse_stmt_decl(lex *l, syt *st);
+static ast *parse_stmt_expr(lex *l, syt *st);
+static ast *parse_stmt_return(lex *l, syt *st);
+static ast *parse_stmt_if(lex *l, syt *st);
+static ast *parse_stmt_for(lex *l, syt *st);
 
 static ast *parse_expr(lex *l, syt *st);
+static ast *parse_expr_proc(lex *l, syt *st);
 
-static ast *parse_proc(lex *l, syt *st);
-
-static ast *parse_int(lex *l);
-
-/* Allocate an AST node. */
-ast *ast_aloc(void) { return xcalloc(1, sizeof (ast)); }
+static inline ast *parse_int(lex *l);
 
 /* Initialise an AST node. */
-void ast_init(ast *a) { *a = (ast){ 0 }; }
-
-/* Uninitialise an AST node. */
-void ast_free(ast *a) {
-	if (a == NULL) { return; }
-	for (UINT i = 0; i < a->c.ac; i += 1) { ast_free(a->c.a[i]); }
-	free(a->c.a); free(a); *a = (ast){ 0 }; return;
+ast *ast_init(void) {
+	ast *a = calloc(1, sizeof (ast));
+	if (!a) { error(1, SERR); } return a;
 }
+// void ast_free(ast *a) {}
 
 /* Push a child AST node to an AST node. */
-void ast_push(ast *a, ast *c) {
-	a->c.a = xrealloc(a->c.a, (a->c.ac += 1) * sizeof (ast *));
-	a->c.a[a->c.al] = c; a->c.al += 1;
+s32 ast_push(ast *a, ast *c) {
+	ast **ca = realloc(a->c.a, (a->c.al += 1) * sizeof (ast *));
+	if (!ca) { error(1, SERR); } else { a->c.a = ca; ca = NULL; }
+	
+	a->c.a[a->c.al - 1] = c; return 0;
 }
 
 #define T (l->t) /* lex_peek equivalent */
 
 /* Parse a program. */
 ast *parse(lex *l) {
-	ast *a = ast_aloc(); a->k = AK_PROG; syt_init(&a->st);
+	ast *a = ast_init(); a->k = AK_PROG;
 
+	/* Parse and append all child nodes */
 	for (ast *c; T.k != LK_EOF;) {
-		/* Parse and append all child nodes */
-		if ((c = parse_decl(l, &a->st)) != NULL) { c->p = a; ast_push(a, c); }
-		else { /* TODO */ error(1, "NULL AST (parse_decl)"); }
+		if ((c = parse_stmt_decl(l, &a->st))) { ast_push(a, c); }
+		else { error(1, "NULL AST (parse:parse_stmt_decl)"); }
 	}
 
 	return a;
 }
 
-#define SAC (sm.a->c.a[0]) /* First child of the symbol's AST node */
+/* Parse a statement. */
+static ast *parse_stmt(lex *l, syt *st) {
+	switch (T.k) {
+	case LK_LBRACE: { return parse_stmt_compound(l, st); } break;
+	case LK_ID:     { return parse_stmt_decl(l, st);     } break;
+	case LK_RETURN: { return parse_stmt_return(l, st);   } break;
+	case LK_IF:     { return parse_stmt_if(l, st);       } break;
+	case LK_FOR:    { return parse_stmt_for(l, st);      } break;
+	default:        { return parse_stmt_expr(l, st);     } break;
+	}
+}
 
-/* Parse a declaration. */
-static ast *parse_decl(lex *l, syt *st) {
-	/* Store the declaration's line, column, hash, and identifier */
-	sym sm = { SK_NULL, T.ln, T.cl, T.h, T.s };
+/* Parse a compound statement. */
+static ast *parse_stmt_compound(lex *l, syt *st) {
+	lex_kind(l, LK_LBRACE);
 
+	ast *a = ast_init(); a->k = AK_COMP;
+	for (; T.k != LK_EOF && T.k != LK_RBRACE;) { ast_push(a, parse_stmt(l, st)); }
+	
+	lex_kind(l, LK_RBRACE); return a;
+}
+
+/* Parse a declaration statement. */
+static ast *parse_stmt_decl(lex *l, syt *st) {
+	sym sm = { SK_NULL, T.ln, T.cl, T.h, T.s };
 	lex_kind(l, LK_ID); lex_kind(l, LK_COLON);
 
-	/* Search for a store the declaration's type if specified */
+	sm.a = ast_init(); sm.a->k = AK_DECL;
+	if (!(sm.a->s = strdup(sm.s))) { error(1, SERR); }
+	
+	/* Store the declaration's type if one is specified */
+	/* TODO store type when one is specified */
 	if (T.k == LK_ID) {
 		sym s = syt_search_h(st, T.h, T.s);
 
@@ -92,126 +113,73 @@ static ast *parse_decl(lex *l, syt *st) {
 		); }
 
 		sm.t = s.t; lex_next(l);
-		
-		if (T.k == LK_SCOLON) {
-			/* Allocate the AST and skip ahead */
-			lex_next(l); sm.a = ast_aloc(); sm.a->k = AK_DECL;
-			sm.a->s = strdup(sm.s); sm.k = SK_NASS; goto decl;
-		}
+		if (T.k == LK_SCOLON) { lex_next(l); goto end; }
 	}
-	else if (T.k == LK_SCOLON) { error( /* ERROR */
+	else if (T.k == LK_SCOLON) { error(
 		1, "%s:%zu:%zu: error: a declaration without a type is invalid",
 		l->n, T.ln + 1, T.cl + 1
 	); }
 
 	/* Assign a constant or variable value */
-	if (T.k == LK_COLON || T.k == LK_ASSIGN) {
-		lex_next(l); sm.a = ast_aloc(); sm.a->k = AK_DECL;
-		sm.a->s = strdup(sm.s); ast_push(sm.a, parse_expr(l, st));
-	}
-	else { error( /* ERROR */
-		1, "%s:%zu:%zu: error: expected ':' or '='",
-		l->n, T.ln + 1, T.cl + 1
-	); }
-	
-	switch (SAC->k) {
-	case AK_PROC: { sm.k = SK_PROC; SAC->s = sm.a->s; SAC->p = sm.a; } break;
-	default: { sm.k = SK_NASS; } break; /* TODO */
-	// default: { error( /* ERROR */
-	// 	1, "%s:%zu:%zu: error: unhandled AST kind \"%s\"",
-	// 	l->n, T.ln + 1, T.cl + 1, ast_ks[sm.a->k]
-	// ); } break;
-	}
+	if (T.k == LK_COLON || T.k == LK_ASSIGN) { lex_next(l); ast_push(sm.a, parse_expr(l, st)); }
+	else { error(1, "%s:%zu:%zu: error: expected ':' or '='", l->n, T.ln + 1, T.cl + 1); }
 
-	decl:; assert(sm.h != 0);
+	/* Parse a semicolon if one is required */
+	if (sm.a->c.a[0]->k != AK_PROC) { lex_kind(l, LK_SCOLON); }
 
-	/* Confirm that the identifier is not a keyword */
-	if (syt_lookup_h(&kwt, sm.h, sm.s).k != SK_NULL) { error( /* ERROR */
-		1, "%s:%zu:%zu: error: redefinition of keyword \"%s\"",
-		l->n, sm.ln + 1, sm.cl + 1, sm.s
-	); }
-	
-	/* Confirm that the identifier is not being reused */
-	if (syt_lookup_h(st, sm.h, sm.s).k != SK_NULL) { error( /* ERROR */
-		1, "%s:%zu:%zu: error: redefinition of identifier \"%s\"",
-		l->n, sm.ln + 1, sm.cl + 1, sm.s
-	); }
-	
-	/* Otherwise insert the new symbol and return */
-	syt_insert_h(st, sm.h, sm.s, sm); return sm.a;
+	/* Insert the new symbol and return */
+	end:; syt_insert_h(st, sm.h, sm.s, sm); return sm.a;
 }
 
-#undef SAC
-
-/* Parse a statement. */
-static ast *parse_stmt(lex *l, syt *st) {
-	if (T.k == LK_LBRACE) { return parse_stmt_compound(l, st); }
-	
-	ast *a = ast_aloc();
-	
-	switch (T.k) {
-	case LK_RETURN: {
-		lex_kind(l, LK_RETURN); a->k = AK_RETURN; ast_push(a, parse_expr(l, st));
-	} break;
-	default: { error(
-		1, "%s:%zu:%zu: Unexpected: \"%s\" (parse_stmt)",
-		l->n, T.ln + 1, T.cl + 1, tok_ks[T.k]
-	); } break;
-	}
-	
+/* Parse an expression statement. */
+static ast *parse_stmt_expr(lex *l, syt *st) {
+	ast *a; if (T.k != LK_SCOLON) { a = parse_expr(l, st); }
 	lex_kind(l, LK_SCOLON); return a;
 }
 
-/* Parse a compound statement. */
-static ast *parse_stmt_compound(lex *l, syt *st) {
-	lex_kind(l, LK_LBRACE);
-	
-	ast *a = ast_aloc(); a->k = AK_COMP; syt_init(&a->st);
+/* Parse a return statement. */
+static ast *parse_stmt_return(lex *l, syt *st) {
+	lex_kind(l, LK_RETURN);
 
-	/* Parse statements until EOF or closing brace */
-	for (; T.k != LK_EOF && T.k != LK_RBRACE;) {
-		ast_push(a, parse_stmt(l, &a->st));
-	}
+	ast *a = ast_init(); a->k = AK_RETURN;
+	ast_push(a, parse_expr(l, st));
 
-	lex_kind(l, LK_RBRACE); return a;
+	lex_kind(l, LK_SCOLON); return a;
 }
 
+/* Parse an if statement. */
+static ast *parse_stmt_if(lex *l, syt *st) { /* TODO */ }
+
+/* Parse a for statement. */
+static ast *parse_stmt_for(lex *l, syt *st) { /* TODO */ }
+
 /* Parse an expression. */
 static ast *parse_expr(lex *l, syt *st) {
-	switch (T.k) {
-	case LK_PROC: { return parse_proc(l, st); } break;
-	case LK_INT: { return parse_int(l); } break;
-	default: { error(
-		1, "%s:%zu:%zu: Unexpected: \"%s\" (parse_expr)",
-		l->n, T.ln + 1, T.cl + 1, tok_ks[T.k]
-	); } break;
-	}
+	if (T.k == LK_PROC) { return parse_expr_proc(l, st); }
+	if (T.k == LK_INT) { return parse_int(l); }
+	else { error(1, "PARSE_EXPR %s", tok_ks[T.k]); }
 }
 
-/* Parse a procedure. */
-static ast *parse_proc(lex *l, syt *st) {
+/* Parse a procedure expression. */
+static ast *parse_expr_proc(lex *l, syt *st) {
 	lex_kind(l, LK_PROC); lex_kind(l, LK_LPAREN);
-	ast *a = ast_aloc(); a->k = AK_PROC;
+	ast *a = ast_init(); a->k = AK_PROC;
 
 	/* Parse optional procedure parameter(s) */
-	/* TODO parse parameters(s) */
-	lex_kind(l, LK_RPAREN);
+	/* TODO */ lex_kind(l, LK_RPAREN);
 
 	/* Parse optional procedure return type(s) */
-	/* TODO parse more than one unnamed return type */
+	/* TODO parse more than one return type */
 	if (T.k == LK_RARROW) {
 		lex_next(l); tok t = lex_kind(l, LK_ID);
 		sym s = syt_search_h(st, t.h, t.s);
 
-		/* Confirm that the return type exists */
 		if (s.k == SK_NULL) { error( /* ERROR */
-			1, "%s:%zu:%zu: error: undefined identifier \"%s\"",
+			1, "%s:%zu:%zu: error: use of undefined identifier \"%s\"",
 			l->n, T.ln + 1, T.cl + 1, t.s
 		); }
-		
-		/* Confirm that the return type is a type */
 		if (s.k != SK_TYPE) { error( /* ERROR */
-			1, "%s:%zu:%zu: error: expected a type",
+			1, "%s:%zu:%zu: error: expected type identifier",
 			l->n, T.ln + 1, T.cl + 1
 		); }
 
@@ -221,12 +189,18 @@ static ast *parse_proc(lex *l, syt *st) {
 	ast_push(a, parse_stmt_compound(l, st)); return a;
 }
 
-static ast *parse_int(lex *l) {
-	val v = val_strint(lex_kind(l, LK_INT).s);
-	ast *a = ast_aloc(); a->k = AK_INT; a->v = v;
+/* Parse an interger. */
+static inline ast *parse_int(lex *l) {
+	ast *a = ast_init(); tok t = lex_kind(l, LK_INT);
+	a->k = AK_INT; a->ln = t.ln; a->cl = t.cl;
+	
+	if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
+	if (!(a->v = val_parse_int(t.s)).k) { error(1, "%s: %s", t.s, SERR); }
+	
 	return a;
 }
 
+/* Recursively print an AST. */
 void ast_print(ast *a, UINT i) {
 	for (UINT j = 0; j != i; ++j) { printf("    "); }
 	printf("%zu:%zu: %s: %s", a->ln, a->cl, ast_ks[a->k], a->s);
diff --git a/src/parse.h b/src/parse.h
index 7fe9994..11fe029 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -15,23 +15,25 @@
 typedef enum {
 	AK_NULL, AK_PROG,
 
-	AK_DECL, AK_COMP, AK_PROC, AK_RETURN,
-	AK_INT, AK_FLT,
+	AK_STMT, AK_COMP, AK_DECL, AK_RETURN, AK_IF, AK_FOR,
+	
+	AK_EXPR, AK_PROC,
+	
+	AK_INT,
 } ast_k;
 
+/* k: kind, ln: line, cl: column */
+/* t: type, c: children */
 typedef struct ast_s {
-	ast_k k; UINT ln, cl; u64 h; char *s;
-	type *t; val v; syt st; struct ast_s *p;
-	struct { struct ast_s **a; UINT al, ac; } c;
+	ast_k k; UINT ln, cl; u64 h; char *s; type *t; val v; syt st;
+	struct ast_s *p; struct { struct ast_s **a; UINT al; } c;
 } ast;
 
 extern char *ast_ks[];
 
-extern ast *ast_aloc(void);
-extern void ast_init(ast *a);
+extern ast *ast_init(void);
 extern void ast_free(ast *a);
-
-extern void ast_push(ast *a, ast *c);
+extern s32  ast_push(ast *a, ast *c);
 
 extern ast *parse(lex *l);
 
diff --git a/src/symbol.c b/src/symbol.c
index d33f6d1..eb04bc5 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -30,7 +30,7 @@ static void syt_resize(syt *st);
 syt *syt_aloc(void) { return xcalloc(1, sizeof (syt)); }
 
 /* Initialise a symbol table. */
-void syt_init(syt *st) { *st = (syt){ NULL, 0, 0, NULL }; }
+// void syt_init(syt *st) { *st = (syt){ NULL, 0, 0, NULL }; }
 
 /* Uninitialise a symbol table. */
 void syt_free(syt *st) {
diff --git a/src/symbol.h b/src/symbol.h
index 859948b..b42ae7a 100644
--- a/src/symbol.h
+++ b/src/symbol.h
@@ -30,8 +30,8 @@ typedef struct syt_s {
 
 extern syt kwt;
 
-extern syt *syt_aloc(void);
-extern void syt_init(syt *st);
+// extern syt *syt_aloc(void);
+// extern void syt_init(syt *st);
 extern void syt_free(syt *st);
 extern u64  syt_hash(const char *s, UINT l);
 
diff --git a/src/value.c b/src/value.c
index 2836148..60fc7d2 100644
--- a/src/value.c
+++ b/src/value.c
@@ -10,16 +10,16 @@
 
 const val val_null = { VK_NULL };
 
-val val_bool(bool b) { return (val){ VK_BOOL, .v_bool = b }; }
-val val_u64(u64 i)   { return (val){ VK_INT,  .v_int  = i }; }
-val val_f128(f128 f) { return (val){ VK_FLT,  .v_flt  = f }; }
+val val_bool(bool v) { return (val){ VK_BOOL, .v_bool = v }; }
+val val_u64(u64 v) { return (val){ VK_INT, .v_int = v }; }
+val val_f128(f128 v) { return (val){ VK_FLT, .v_flt = v }; }
 
-/* Convert an integer string to a val. */
-val val_strint(char *s) {
+/* Parse an integer string into a value. */
+val val_parse_int(char *s) {
 	val v = { VK_INT, .v_int = 0 }; u64 c; UINT b = 10;
 
 	if (s[0] == '0') switch (s[1]) {
-	case 'b': { s += 2; b = 2; } break;  case 'o': { s += 2; b = 8; } break;
+	case 'b': { s += 2; b = 2;  } break; case 'o': { s += 2; b = 8;  } break;
 	case 'd': { s += 2; b = 10; } break; case 'z': { s += 2; b = 12; } break;
 	case 'x': { s += 2; b = 16; } break; default:  { s += 1; } break;
 	}
@@ -36,8 +36,3 @@ val val_strint(char *s) {
 
 	return v;
 }
-
-/* Convert a float string to a val. */
-val val_strflt(char *s) {
-	return val_null; /* TODO */
-}
diff --git a/src/value.h b/src/value.h
index 548c536..0486adb 100644
--- a/src/value.h
+++ b/src/value.h
@@ -9,11 +9,7 @@
 #include "util/util.h"
 
 typedef enum { VK_NULL, VK_BOOL, VK_INT, VK_FLT } val_k;
-
-typedef struct {
-	val_k k;
-	union { bool v_bool; u64 v_int; f128 v_flt; };
-} val;
+typedef struct { val_k k; union { bool v_bool; u64 v_int; f128 v_flt; }; } val;
 
 extern const val val_null;
 
@@ -21,7 +17,6 @@ extern val val_bool(bool v);
 extern val val_u64(u64 v);
 extern val val_f128(f128 v);
 
-extern val val_strint(char *s);
-extern val val_strflt(char *s);
+extern val val_parse_int(char *s);
 
 #endif // G_VALUE_H_X2RKXBBA