G

G Programming Language
git clone http://git.omkov.net/G
Log | Tree | Refs | README | Download

AuthorJakob Wakeling <[email protected]>
Date2022-03-31 06:57:01
Commitfd64da68f0e34993b9171bbf1fc41846655b01e8
Parentb2691843413f657ab3cc611e833e57dc67122e1f

parse: Implement expression parsing

Diffstat

M CMakeLists.txt | 0
M README.md | 8 +++++---
A examples/expr.g | 1 +
M src/lex.c | 16 ++++++++++++++++
M src/lex.h | 7 +++++++
M src/parse.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
M src/parse.h | 9 +++++++--

7 files changed, 114 insertions, 12 deletions

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ef6267c..269c961 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.12)
+cmake_minimum_required(VERSION 3.14)
 project(G VERSION 0.2.0 LANGUAGES C)
 
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
diff --git a/README.md b/README.md
index ade1857..664cb60 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ main :: proc() -> s32 {
 
 ### Dependencies
 
-- CMake >= 3.12, to build
+- CMake >= 3.14, to build
 - LLVM, to build
 - Clang, for linking compiled object files
 
@@ -57,13 +57,15 @@ command. The second command will output an executable file, *a.out* by default.
 
 > Not all todo items will necesarilly be implemented
 
-- [ ] Implement main entrypoint
-- [ ] Implement procedure declarations
+- [x] Implement procedure declarations
 - [ ] Implement procedure calls
+- [x] Implement variable declarations
 - [ ] Implement variable assignments
 - [x] Implement integers
 - [ ] Implement reals
 - [ ] Implement arrays
+- [x] Implement expression parsing
+- [ ] Implement expression code generation
 - [ ] Implement the *type* type
 - [ ] Implement multiple return values
 - [ ] Implement *defer*
diff --git a/examples/expr.g b/examples/expr.g
new file mode 100644
index 0000000..1e5ff82
--- /dev/null
+++ b/examples/expr.g
@@ -0,0 +1 @@
+main :: proc() -> s64 { return 2 + 3 - 1 * 4 / 2; }
diff --git a/src/lex.c b/src/lex.c
index 89ff592..9fcc7fb 100644
--- a/src/lex.c
+++ b/src/lex.c
@@ -32,6 +32,22 @@ char *tok_ks[] = {
 	"TK_AS_NOT", "TK_AS_AND", "TK_AS_OR",  "TK_AS_XOR", "TK_AS_SHL", "TK_AS_SHR",
 };
 
+/* Push a token to a token array. */
+void tok_a_push(tok_a *a, tok t) {
+	tok *ta = realloc(a->a, (a->al += 1) * sizeof (tok));
+	if (!ta) { error(1, SERR); } else { a->a = ta; a->a[a->al - 1] = t; }
+}
+
+/* Pop a token from a token array. */
+tok tok_a_pop(tok_a *a) {
+	return (a->al ? a->a[a->al -= 1] : (tok){ 0 });
+}
+
+/* Pop a token from a token array. */
+tok tok_a_peek(tok_a *a) {
+	return (a->al ? a->a[a->al - 1] : (tok){ 0 });
+}
+
 /* Initialise a lexer. */
 lex lex_init(const char *file, char *src, UINT len) {
 	lex l = { file, src, src, src + len, 0, 0, 0 };
diff --git a/src/lex.h b/src/lex.h
index 4c23e49..4c06cff 100644
--- a/src/lex.h
+++ b/src/lex.h
@@ -30,12 +30,18 @@ typedef struct {
 	union { u64 v_u64; s64 v_s64; f64 v_f64; };
 } tok;
 
+typedef struct { tok *a; UINT al; } tok_a;
+
 typedef struct {
 	const char *n; char *s, *p, *q; UINT ln, cl; tok t;
 } lex;
 
 extern char *tok_ks[];
 
+extern void tok_a_push(tok_a *a, tok t);
+extern tok tok_a_pop(tok_a *a);
+extern tok tok_a_peek(tok_a *a);
+
 extern lex lex_init(const char *file, char *src, UINT len);
 extern tok lex_peek(lex *l);
 extern tok lex_next(lex *l);
diff --git a/src/parse.c b/src/parse.c
index c79afa4..90990cf 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -16,11 +16,11 @@
 #include <string.h>
 
 char *ast_ks[] = {
-	"AK_NULL", "AK_PROG",
+	"AK_NULL", "AK_PROG", "AK_PROC",
 
 	"AK_STMT", "AK_COMP", "AK_DECL", "AK_RETURN", "AK_IF", "AK_FOR",
 
-	"AK_EXPR", "AK_PROC",
+	"AK_OP_ADD", "AK_OP_SUB", "AK_OP_MUL", "AK_OP_DIV", "AK_OP_MOD",
 
 	"AK_VAR", "AK_INT",
 };
@@ -53,6 +53,17 @@ void ast_push(ast *a, ast *c) {
 	c->p = a; a->c.a[a->c.al - 1] = c;
 }
 
+/* Push an AST to an AST array. */
+void ast_a_push(ast_a *aa, ast *a) {
+	ast **ta = realloc(aa->a, (aa->al += 1) * sizeof (ast *));
+	if (!ta) { error(1, SERR); } else { aa->a = ta; aa->a[aa->al - 1] = a; }
+}
+
+/* Pop an AST from an AST array. */
+ast *ast_a_pop(ast_a *aa) {
+	return (aa->al ? aa->a[aa->al -= 1] : NULL);
+}
+
 #define T (l->t) /* lex_peek equivalent */
 
 /* Parse a program. */
@@ -173,11 +184,38 @@ static ast *parse_stmt_for(lex *l, syt *st) {
 	ast_push(a, parse_stmt(l, st)); return a;
 }
 
+typedef struct { tok_k tk; ast_k ak; s32 o; bool as; } op;
+
+static const op ops[] = {
+	{ TK_OP_MUL, AK_OP_MUL, 2, false },
+	{ TK_OP_DIV, AK_OP_DIV, 2, false },
+	{ TK_OP_MOD, AK_OP_MOD, 2, false },
+	{ TK_OP_ADD, AK_OP_ADD, 1, false },
+	{ TK_OP_SUB, AK_OP_SUB, 1, false },
+};
+
+static inline op ops_lookup(tok_k k) {
+	for (UINT i = 0; i < (sizeof (ops) / sizeof (*ops)); i += 1) {
+		if (k == ops[i].tk) { return ops[i]; }
+	} return (op){ 0 };
+}
+
+static inline void shunt(ast_a *aa, tok t, op o) {
+	ast *r = ast_a_pop(aa); ast *l = ast_a_pop(aa);
+	
+	ast *a = ast_init(); a->k = o.ak; a->ln = t.ln; a->cl = t.cl;
+	ast_push(a, l); ast_push(a, r); ast_a_push(aa, a);
+}
+
 /* Parse an expression. */
 static ast *parse_expr(lex *l, syt *st) {
 	if (T.k == TK_PROC) { return parse_expr_proc(l, st); }
-	if (T.k == TK_INT) { return parse_int(l, st); }
-	if (T.k == TK_ID) {
+	
+	tok_a ts = { 0 }; ast_a as = { 0 };
+	
+	/* Parse expressions with a shunting-yard algorithm */
+	for (;;) switch (T.k) {
+	case TK_ID: { /* TODO handle procedure calls */
 		ast *a = ast_init(); tok t = lex_kind(l, TK_ID);
 		a->k = AK_VAR; a->ln = t.ln; a->cl = t.cl;
 
@@ -185,9 +223,33 @@ static ast *parse_expr(lex *l, syt *st) {
 
 		if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
 
-		return a;
+		ast_a_push(&as, a);
+	} break;
+	case TK_INT:    { ast_a_push(&as, parse_int(l, st)); } break;
+	case TK_LPAREN: { tok_a_push(&ts, lex_next(l));      } break;
+	case TK_RPAREN: { /* TODO */ error(1, "RPAREN: TODO"); } break;
+	default: {
+		op o1; if ((o1 = ops_lookup(T.k)).tk == TK_NULL) { goto eox; }
+		
+		for (op o2 = ops_lookup(tok_a_peek(&ts).k);; o2 = ops_lookup(tok_a_peek(&ts).k)) {
+			if (o2.tk == TK_LPAREN || (o1.o > o2.o && (o1.o != o2.o || o1.as == true))) { break; }
+			
+			shunt(&as, tok_a_pop(&ts), o2);
+		}
+		
+		tok_a_push(&ts, lex_next(l));
+	} break;
+	} eox:;
+	
+	/* Pop any remaining operators from the operator stack */
+	for (tok t = tok_a_pop(&ts); t.k != TK_NULL; t = tok_a_pop(&ts)) {
+		if (t.k == TK_LPAREN) { error(1, "LPAREN: TODO"); }
+		if (t.k == TK_RPAREN) { error(1, "RPAREN: TODO"); }
+		
+		shunt(&as, t, ops_lookup(t.k));
 	}
-	else { error(1, "PARSE_EXPR %s", tok_ks[T.k]); }
+	
+	return ast_a_pop(&as);
 }
 
 /* Parse a procedure expression. */
@@ -224,7 +286,7 @@ static ast *parse_int(lex *l, syt *st) {
 	ast *a = ast_init(); tok t = lex_kind(l, TK_INT);
 	a->k = AK_INT; a->ln = t.ln; a->cl = t.cl;
 
-	a->t = &types[TY_U64]; /* TODO properly determine type */
+	a->t = &TYPE(TY_S64); /* TODO properly determine type */
 
 	if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
 	if (!(a->v = val_parse_int(t.s)).k) { error(1, "%s: %s", t.s, SERR); }
@@ -236,7 +298,12 @@ static ast *parse_int(lex *l, syt *st) {
 void ast_print(ast *a, UINT i) {
 	for (UINT j = 0; j != i; ++j) { printf("    "); }
 	printf("%zu:%zu: %s: %s", a->ln + 1, a->cl + 1, ast_ks[a->k], a->s);
-	if (a->k == AK_PROC) { printf(" -> %s", a->t->s); }
+	
+	switch (a->k) {
+	case AK_PROC: case AK_INT: { printf(" -> %s", a->t->s); } break;
+	default: {} break;
+	}
+	
 	fputc('\n', stdout);
 
 	if (a->c.a != NULL) for (UINT ci = 0; ci != a->c.al; ci += 1) {
diff --git a/src/parse.h b/src/parse.h
index 3900784..f26036a 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -13,11 +13,11 @@
 #include "value.h"
 
 typedef enum {
-	AK_NULL, AK_PROG,
+	AK_NULL, AK_PROG, AK_PROC,
 
 	AK_STMT, AK_COMP, AK_DECL, AK_RETURN, AK_IF, AK_FOR,
 
-	AK_EXPR, AK_PROC,
+	AK_OP_ADD, AK_OP_SUB, AK_OP_MUL, AK_OP_DIV, AK_OP_MOD,
 
 	AK_VAR, AK_INT,
 } ast_k;
@@ -29,11 +29,16 @@ typedef struct ast_s {
 	struct ast_s *p; struct { struct ast_s **a; UINT al; } c;
 } ast;
 
+typedef struct { ast **a; UINT al; } ast_a;
+
 extern char *ast_ks[];
 
 extern ast *ast_init(void);
 extern void ast_push(ast *a, ast *c);
 
+extern void ast_a_push(ast_a *aa, ast *a);
+extern ast *ast_a_pop(ast_a *aa);
+
 extern ast *parse(lex *l);
 
 extern void ast_print(ast *a, UINT i);