G

G Programming Language
git clone http://git.omkov.net/G
Log | Tree | Refs | README | Download

AuthorJakob Wakeling <[email protected]>
Date2023-06-12 13:44:44
Commit19793bd05244bc52fa239c26f4fc71415de221bb
Parentd44ca567693a048a42551b84af16eeaf09101aa0

Replace the expression parser with a Pratt parser

Diffstat

M doc/g.ebnf | 4 ++--
M examples/main.g | 5 ++---
M src/lex.c | 2 +-
M src/llvm.c | 10 +++++++++-
M src/parse.c | 198 +++++++++++++++++++++++++++++++++++--------------------------------------------
M src/parse.h | 4 +++-
M src/type.c | 2 +-
M src/type.h | 2 +-

8 files changed, 108 insertions, 119 deletions

diff --git a/doc/g.ebnf b/doc/g.ebnf
index 75d223f..91d8fad 100644
--- a/doc/g.ebnf
+++ b/doc/g.ebnf
@@ -14,12 +14,12 @@ stmt = stmt_compound
      | [ expr ], ";"
      | "return", [ expr ], ";"
      | "if", "(", expr, ")", stmt, [ "else", stmt ]
-     | "for", "(" expr, ";", [ expr, ";" ], [ expr, ";" ], ")", stmt
+     | "for", "(" expr, [ ";", expr ], [ ";", expr ], ")", stmt
      ;
 
 stmt_compound = "{", { stmt }, "}" ;
 
-stmt_decl = iden, ":", ( decl_constant | decl_variable ) ;
+stmt_decl = iden, ":", ( stmt_decl_constant | stmt_decl_variable ) ;
 stmt_decl_constant = [ type ], ":", expr ;
 stmt_decl_variable = [ type ], "=", expr | type ;
 
diff --git a/examples/main.g b/examples/main.g
index 1d5cfa9..09b6fc1 100644
--- a/examples/main.g
+++ b/examples/main.g
@@ -1,5 +1,4 @@
-main :: proc() -> u64 {
+main :: proc() -> u32 {
 	#syscall(u64(60), u64(42));
-	var : u64 = 69;
-	return var;
+	return u32(s32(-1.0));
 }
diff --git a/src/lex.c b/src/lex.c
index 5914950..716b46e 100644
--- a/src/lex.c
+++ b/src/lex.c
@@ -94,7 +94,7 @@ tok lex_next(lex *l) {
 		case '*': {
 			UINT d = 1; for (P += 2, CL += 2; P != Q && d; P += 1) {
 				if (C == '/' && D == '*') { P += 2; CL += 2; d += 1; continue; }
-				if (C == '*' && D == '/') { P += 2; CL += 2; d -= 1; continue; }
+				if (C == '*' && D == '/') { P += d == 1 ? 1 : 2; CL += 2; d -= 1; continue; }
 				if (C == '\n') { LN += 1; CL = 0; } else { CL += 1; }
 			}
 		} goto skip;
diff --git a/src/llvm.c b/src/llvm.c
index a8fedb7..b3cb5fb 100644
--- a/src/llvm.c
+++ b/src/llvm.c
@@ -271,6 +271,14 @@ static LLVMValueRef llvm_expr(ast *a, syt *st) {
 	case AK_OP_MOD: {
 		return LLVMBuildSRem(llvm_builder, llvm_expr(A.c.a[0], st), llvm_expr(A.c.a[1], st), "mod");
 	} break;
+	case AK_OP_NEG: {
+		type *t = ast_type(A.c.a[0], st);
+		if (t == NULL) { note("TODO", A.ln, A.cl, -1, "Subtree is missing a type"); }
+		
+		if (is_int(t)) { return LLVMBuildNeg(llvm_builder, llvm_expr(A.c.a[0], st), "neg"); }
+		else if (is_flt(t)) { return LLVMBuildFNeg(llvm_builder, llvm_expr(A.c.a[0], st), "neg"); }
+		else { note("TODO", A.ln, A.cl, -1, "Expression cannot be made negative (LLVM Failsafe)"); }
+	}
 	default: { error(2, "llvm_expr unknown kind %s", ast_ks[a->k]); } break;
 	}
 }
@@ -385,7 +393,7 @@ static inline void llvm_free(void) {
 /* Return the appropriate LLVMTypeRef for a G type. */
 static LLVMTypeRef llvm_type(type *t) {
 	switch (t->k) {
-	case TY_NULL: { return LLVMVoidType();  } break;
+	case TY_ZERO: { return LLVMVoidType();  } break;
 	case TY_B8:   { return LLVMIntType(8);  } break;
 	case TY_B16:  { return LLVMIntType(16); } break;
 	case TY_B32:  { return LLVMIntType(32); } break;
diff --git a/src/parse.c b/src/parse.c
index 8325461..c896a3c 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -16,15 +16,12 @@
 #include <stdlib.h>
 #include <string.h>
 
-/* tk : Token Kind, ak : AST Kind, o : Precedence, as : Left Associative */
-typedef struct { tok_k tk; ast_k ak; s32 o; bool as; } op;
-
 char *ast_ks[] = {
 	"AK_ZERO", "AK_PROG", "AK_PROC", "AK_TYPE", "AK_CAST",
 
 	"AK_STMT", "AK_COMP", "AK_DECL", "AK_RETURN", "AK_IF", "AK_FOR",
 
-	"AK_OP_POS", "AK_OP_NEG",
+	"AK_OP_POS", "AK_OP_NEG", "AK_OP_ADO", "AK_OP_DRF",
 	"AK_OP_ADD", "AK_OP_SUB", "AK_OP_MUL", "AK_OP_DIV", "AK_OP_MOD",
 
 	"AK_ASSIGN", "AK_AS_ADD", "AK_AS_SUB", "AK_AS_MUL", "AK_AS_DIV", "AK_AS_MOD",
@@ -43,21 +40,27 @@ static ast *parse_stmt_return(lex *l, syt *st);
 static ast *parse_stmt_if(lex *l, syt *st);
 static ast *parse_stmt_for(lex *l, syt *st);
 
-static ast *parse_expr(lex *l, syt *st, bool arg);
+static ast *parse_expr(lex *l, syt *st, s32 o);
 static ast *parse_expr_proc(lex *l, syt *st);
 
 static ast *parse_num(lex *l, syt *st);
 static ast *parse_int(lex *l, syt *st);
 static ast *parse_flt(lex *l, syt *st);
 
-static op op_lookup(tok_k tk, bool unary);
+static s32 tok_precedence(tok_k tk);
+static s32 ast_precedence(ast_k ak);
 
 /* Initialise an AST node. */
-ast *ast_init(void) {
+inline ast *ast_init(void) {
 	ast *a = calloc(1, sizeof (ast));
 	if (!a) { error(1, SERR); } return a;
 }
 
+/* Initialise an AST node of a specific kind. */
+inline ast *ast_kind(ast_k kind) {
+	ast *a = ast_init(); a->k = kind; return a;
+}
+
 /* Push a child AST node to an AST node. */
 void ast_push(ast *a, ast *c) {
 	ast **ca = realloc(a->c.a, (a->c.al += 1) * sizeof (ast *));
@@ -189,7 +192,7 @@ static ast *parse_stmt_decl(lex *l, syt *st, ast *a) {
 	); }
 
 	/* Assign a constant or variable value */
-	if (T.k == TK_COLON || T.k == TK_ASSIGN) { lex_next(l); ast_push(a, parse_expr(l, st, false)); }
+	if (T.k == TK_COLON || T.k == TK_ASSIGN) { lex_next(l); ast_push(a, parse_expr(l, st, 0)); }
 	else { error(1, "%s:%zu:%zu: error: expected ':' or '='", l->n, T.ln + 1, T.cl + 1); }
 
 	/* Parse a semicolon if one is required */
@@ -208,7 +211,7 @@ static ast *parse_stmt_assn(lex *l, syt *st, ast *a) {
 	case TK_AS_MUL: { a->k = AK_AS_MUL; } goto expr;
 	case TK_AS_DIV: { a->k = AK_AS_DIV; } goto expr;
 	case TK_AS_MOD: { a->k = AK_AS_MOD; } goto expr;
-	expr: { lex_next(l); ast_push(a, parse_expr(l, st, false)); } break;
+	expr: { lex_next(l); ast_push(a, parse_expr(l, st, 0)); } break;
 	default: { error(1, "%s:%zu:%zu: error: expected assignment operator", l->n, T.ln + 1, T.cl + 1); } break;
 	}
 
@@ -217,7 +220,7 @@ static ast *parse_stmt_assn(lex *l, syt *st, ast *a) {
 
 /* Parse an expression statement. */
 static ast *parse_stmt_expr(lex *l, syt *st) {
-	ast *a = NULL; if (T.k != TK_SCOLON) { a = parse_expr(l, st, false); }
+	ast *a = NULL; if (T.k != TK_SCOLON) { a = parse_expr(l, st, 0); }
 	lex_kind(l, TK_SCOLON); return a;
 }
 
@@ -226,7 +229,7 @@ static ast *parse_stmt_return(lex *l, syt *st) {
 	lex_kind(l, TK_RETURN);
 
 	ast *a = ast_init(); a->k = AK_RETURN;
-	if (T.k != TK_SCOLON) { ast_push(a, parse_expr(l, st, false)); }
+	if (T.k != TK_SCOLON) { ast_push(a, parse_expr(l, st, 0)); }
 
 	lex_kind(l, TK_SCOLON); return a;
 }
@@ -237,7 +240,7 @@ static ast *parse_stmt_if(lex *l, syt *st) {
 	ast *a = ast_init(); a->k = AK_IF;
 
 	/* Parse expression and closing parenthesis */
-	ast_push(a, parse_expr(l, st, false)); lex_kind(l, TK_RPAREN);
+	ast_push(a, parse_expr(l, st, 0)); lex_kind(l, TK_RPAREN);
 
 	/* Parse the if statement body */
 	ast_push(a, parse_stmt(l, st)); return a;
@@ -251,30 +254,18 @@ static ast *parse_stmt_for(lex *l, syt *st) {
 	/* Parse one to three expressions and a closing parenthesis */
 	ast_push(a, parse_stmt_expr(l, st));
 	ast_push(a, parse_stmt_expr(l, st));
-	ast_push(a, parse_expr(l, st, false)); lex_kind(l, TK_RPAREN);
+	ast_push(a, parse_expr(l, st, 0)); lex_kind(l, TK_RPAREN);
 
 	/* Parse the for statement body */
 	ast_push(a, parse_stmt(l, st)); return a;
 }
 
-static inline void shunt(ast_a *aa, tok t, op o) {
-	ast *r = ast_a_pop(aa); ast *l = ast_a_pop(aa);
-	
-	ast *a = ast_init(); a->k = o.ak; a->ln = t.ln; a->cl = t.cl;
-	ast_push(a, l); ast_push(a, r); ast_a_push(aa, a);
-}
-
 /* Parse an expression. */
-static ast *parse_expr(lex *l, syt *st, bool arg) {
-	if (T.k == TK_PROC) { return parse_expr_proc(l, st); }
-	
-	tok_a ts = { 0 }; ast_a as = { 0 };
-	
-	/* Parse expressions with a shunting-yard algorithm */
-	for (;;) switch (T.k) {
+static ast *parse_expr(lex *l, syt *st, s32 o) {
+	ast *left = NULL; switch (T.k) {
 	case TK_ID: {
-		ast *a = ast_init(); tok t = lex_kind(l, TK_ID);
-		a->ln = t.ln; a->cl = t.cl;
+		left = ast_init(); tok t = lex_kind(l, TK_ID);
+		left->ln = t.ln; left->cl = t.cl;
 
 		ast *sym = syt_search_h(st, t.h, t.s);
 		if (sym == NULL) { note(l->n, t.ln, t.cl, -1, "use of undeclared identifier \"%s\"", t.s); }
@@ -282,94 +273,66 @@ static ast *parse_expr(lex *l, syt *st, bool arg) {
 		if (T.k == TK_LPAREN) {
 			lex_kind(l, TK_LPAREN);
 
-			if (sym->k == AK_TYPE) { a->k = AK_CAST; a->t = sym->t; }
-			else { a->k = AK_CALL; }
+			if (sym->k == AK_TYPE) { left->k = AK_CAST; left->t = sym->t; }
+			else { left->k = AK_CALL; }
 
 			if (T.k != TK_RPAREN) for (;;) {
-				ast_push(a, parse_expr(l, st, true));
+				ast_push(left, parse_expr(l, st, 0));
 				if (T.k != TK_COMMA) { break; }
-				if (a->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); }
+				if (left->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); }
 				lex_kind(l, TK_COMMA);
 			}
 
 			lex_kind(l, TK_RPAREN);
 		}
-		else {
-			a->k = AK_ID_VAR; a->t = sym->t;
-		}
-		
-		if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
-		
-		ast_a_push(&as, a);
-	} break;
-	case TK_NUM:    { ast_a_push(&as, parse_num(l, st)); } break;
-	case TK_COLON:  { if (arg) { goto eox; }} break;
-	case TK_LPAREN: { tok_a_push(&ts, lex_next(l));      } break;
-	case TK_RPAREN: {
-		for (tok t = tok_a_pop(&ts);; t = tok_a_pop(&ts)) {
-			if (t.k == TK_ZERO) /* Stack is empty */ {
-				if (arg) { goto eox; }
-				note(l->n, T.ln, T.cl, -1, "expected left parenthesis");
-			}
-			if (t.k == TK_LPAREN) { break; }
-			
-			shunt(&as, t, op_lookup(t.k, false));
-		}
+		else { left->k = AK_ID_VAR; left->t = sym->t; }
 
-		lex_next(l);
+		if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); }
 	} break;
+	case TK_NUM:    { left = parse_num(l, st);       } break;
+	case TK_PROC:   { return parse_expr_proc(l, st); } break;
 	case TK_HASH: {
-		ast *a = ast_init(); tok t = lex_kind(l, TK_HASH);
-		a->ln = t.ln; a->cl = t.cl; bool needs_args = false;
+		left = ast_init(); tok t = lex_kind(l, TK_HASH);
+		left->ln = t.ln; left->cl = t.cl; bool needs_args = false;
 
-		if (strcmp(t.s, "#syscall") == 0) { a->k = AK_HASH_SYSCALL; needs_args = true; }
+		if (strcmp(t.s, "#syscall") == 0) { left->k = AK_HASH_SYSCALL; needs_args = true; }
 		else { note("TODO", t.ln, t.cl, 0, "%s: unrecognised hash procedure", t.s); }
 
 		if (needs_args) {
 			lex_kind(l, TK_LPAREN);
 
 			if (T.k != TK_RPAREN) for (;;) {
-				ast_push(a, parse_expr(l, st, true) /* FIXME THIS IS NULL WHEN STRING OR ANY UNHANDLED EXPRESSION */);
+				ast_push(left, parse_expr(l, st, true));
 				if (T.k != TK_COMMA) { break; }
-				if (a->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); }
+				if (left->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); }
 				lex_kind(l, TK_COMMA);
 			}
 
 			lex_kind(l, TK_RPAREN);
 		}
 
-		if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
-		
-		ast_a_push(&as, a);
+		if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); }
 	} break;
-	default: /* Handle operators */ {
-		op o1, o2; if ((o1 = op_lookup(T.k, false)).tk == TK_ZERO) { goto eox; }
-		
-		/*
-			If there is an operator at the top of the operator stack that is not
-			a left parenthesis, and has greater precedence than o1 or has the
-			same precedence as o1 and o1 is left-associative, then pop it from
-			the stack onto the output.
-		*/
-		for (o2 = op_lookup(tok_a_peek(&ts).k, false); o2.tk != TK_ZERO; o2 = op_lookup(tok_a_peek(&ts).k, false)) {
-			if (o2.tk == TK_LPAREN || (o1.o < o2.o && (o1.o != o2.o || o1.as == true))) { break; }
-			
-			shunt(&as, tok_a_pop(&ts), o2);
-		}
-		
-		tok_a_push(&ts, lex_next(l));
-	} break;
-	} eox:;
+	case TK_LPAREN: { lex_next(l); left = parse_expr(l, st, 0); lex_kind(l, TK_RPAREN); } break;
+	case TK_OP_ADD: { left = ast_kind(AK_OP_POS); } goto prefix;
+	case TK_OP_SUB: { left = ast_kind(AK_OP_NEG); } goto prefix;
+	case TK_BW_AND: { left = ast_kind(AK_OP_ADO); } goto prefix;
+	case TK_OP_MUL: { left = ast_kind(AK_OP_DRF); } goto prefix;
+	prefix: { lex_next(l); ast_push(left, parse_expr(l, st, ast_precedence(left->k))); } break;
+	default: { note(l->n, T.ln, T.cl, -1, "Unhandled expression of kind %s", tok_ks[T.k]); } break;
+	}
 
-	/* Pop any remaining operators from the operator stack */
-	for (tok t = tok_a_pop(&ts); t.k != TK_ZERO; t = tok_a_pop(&ts)) {
-		if (t.k == TK_LPAREN) { error(1, "LPAREN: TODO"); }
-		if (t.k == TK_RPAREN) { error(1, "RPAREN: TODO"); }
-		
-		shunt(&as, t, op_lookup(t.k, false));
+	/* Parse an infix expression if one is present */
+	for (ast *a = NULL; tok_precedence(T.k) > o; left = a) switch (T.k) {
+	case TK_OP_ADD: { a = ast_kind(AK_OP_ADD); } goto infix;
+	case TK_OP_SUB: { a = ast_kind(AK_OP_SUB); } goto infix;
+	case TK_OP_MUL: { a = ast_kind(AK_OP_MUL); } goto infix;
+	case TK_OP_DIV: { a = ast_kind(AK_OP_DIV); } goto infix;
+	case TK_OP_MOD: { a = ast_kind(AK_OP_MOD); } goto infix;
+	infix: { lex_next(l); ast_push(a, left); ast_push(a, parse_expr(l, st, ast_precedence(a->k))); } break;
 	}
 
-	return ast_a_pop(&as);
+	return left;
 }
 
 /* Parse a procedure expression. */
@@ -396,7 +359,7 @@ static ast *parse_expr_proc(lex *l, syt *st) {
 
 		a->t = s->t;
 	}
-	else { a->t = &TYPE(TY_NULL); }
+	else { a->t = &TYPE(TY_ZERO); }
 
 	ast_push(a, parse_stmt_compound(l, st)); return a;
 }
@@ -444,21 +407,36 @@ static ast *parse_flt(lex *l, syt *st) {
 	return a;
 }
 
-/* Lookup the operator associated with a particular token. */
-static op op_lookup(tok_k tk, bool unary) {
-	if (unary) switch (tk) {
-	case TK_OP_ADD: { return (op){ TK_OP_ADD, AK_OP_POS, 2, true }; }
-	case TK_OP_SUB: { return (op){ TK_OP_SUB, AK_OP_NEG, 2, true }; }
-	default: { return (op){ 0 }; } break;
+/*
+	Expression operator precedence:
+	8 > expression group (parenthesis), function call
+	7 > 
+	6 > positive (prefix +), negative (prefix -), address-of (prefix &), dereference (prefix *)
+	5 > 
+	4 > multiplication (*), division (/), modulo (%)
+	3 > addition (+), subtraction (-)
+	2 > 
+	1 > 
+*/
+
+/* Get the infix precedence of a token kind. */
+static s32 tok_precedence(tok_k tk) {
+	switch (tk) {
+	case TK_LPAREN: { return 8; }
+	case TK_OP_MUL: case TK_OP_DIV: case TK_OP_MOD: { return 4; }
+	case TK_OP_ADD: case TK_OP_SUB: { return 3; }
+	default: { return 0; }
+	}
+}
+
+/* Get the precedence of an AST kind. */
+static s32 ast_precedence(ast_k ak) {
+	switch (ak) {
+	case AK_OP_POS: case AK_OP_NEG: case AK_OP_ADO: case AK_OP_DRF: { return 6; }
+	case AK_OP_MUL: case AK_OP_DIV: case AK_OP_MOD: { return 4; }
+	case AK_OP_ADD: case AK_OP_SUB: { return 3; }
+	default: { return 0; }
 	}
-	else switch (tk) {
-	case TK_OP_ADD: { return (op){ TK_OP_ADD, AK_OP_ADD, 4, false }; } break;
-	case TK_OP_SUB: { return (op){ TK_OP_SUB, AK_OP_SUB, 4, false }; } break;
-	case TK_OP_MUL: { return (op){ TK_OP_MUL, AK_OP_MUL, 3, false }; } break;
-	case TK_OP_DIV: { return (op){ TK_OP_DIV, AK_OP_DIV, 3, false }; } break;
-	case TK_OP_MOD: { return (op){ TK_OP_MOD, AK_OP_MOD, 3, false }; } break;
-	default: { return (op){ 0 }; } break;
-	};
 }
 
 /* Recursively print an AST. */
diff --git a/src/parse.h b/src/parse.h
index 7781e50..203db0c 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -20,7 +20,7 @@ typedef enum {
 
 	AK_STMT, AK_COMP, AK_DECL, AK_RETURN, AK_IF, AK_FOR,
 
-	AK_OP_POS, AK_OP_NEG,
+	AK_OP_POS, AK_OP_NEG, AK_OP_ADO, AK_OP_DRF,
 	AK_OP_ADD, AK_OP_SUB, AK_OP_MUL, AK_OP_DIV, AK_OP_MOD,
 
 	AK_ASSIGN, AK_AS_ADD, AK_AS_SUB, AK_AS_MUL, AK_AS_DIV, AK_AS_MOD,
@@ -46,6 +46,7 @@ typedef struct { ast **a; UINT al; } ast_a;
 extern char *ast_ks[];
 
 extern ast *ast_init(void);
+extern ast *ast_kind(ast_k kind);
 extern void ast_push(ast *a, ast *c);
 extern void ast_displace(ast *a, ast *c);
 extern type *ast_type(ast *a, syt *st);
diff --git a/src/type.c b/src/type.c
index 548ec6e..e053f0e 100644
--- a/src/type.c
+++ b/src/type.c
@@ -6,7 +6,7 @@
 #include "type.h"
 
 type types[] = {
-	{ TY_NULL, 0,       0, "void" },
+	{ TY_ZERO, 0,       0, "void" },
 	{ TY_TYPE, 0,      -1, "type" },
 	{ TY_PTR,  TF_PTR, -1, "ptr"  },
 	{ TY_AUTO, 0,      -1, "auto" },
diff --git a/src/type.h b/src/type.h
index 2fbd7e1..9a56b7a 100644
--- a/src/type.h
+++ b/src/type.h
@@ -11,7 +11,7 @@
 #define TYPE(a) (types[a])
 
 typedef enum {
-	TY_NULL, TY_TYPE, TY_PTR, TY_AUTO,
+	TY_ZERO, TY_TYPE, TY_PTR, TY_AUTO,
 
 	TY_BOOL, TY_B8, TY_B16, TY_B32, TY_B64,