G

G Programming Language
git clone http://git.omkov.net/G
Log | Tree | Refs | README | Download

AuthorJakob Wakeling <[email protected]>
Date2023-07-01 03:15:32
Commit828da145ae807f2023e1ef42127b5650c80e40c3
Parentf05bcdbc0f36b14cc6a35475944ca081233cf497

Parse procedure calls as an infix operator

Diffstat

M README.md | 17 ++---------------
M doc/g.ebnf | 7 +++----
M doc/spec.md | 5 +++++
M src/analyse.c | 21 ++++++++++++++++++---
M src/llvm.c | 53 ++++++++++++++++++++++++-----------------------------
M src/parse.c | 66 +++++++++++++++++++++++++++++++++++-------------------------------
M src/parse.h | 2 +-

7 files changed, 88 insertions, 83 deletions

diff --git a/README.md b/README.md
index 164afd1..03b583b 100644
--- a/README.md
+++ b/README.md
@@ -5,19 +5,6 @@ Influenced by **C**, **C++**, **Odin**, and others.
 
 Note that at present, **G** is highly unstable and will certainly change.
 
-## Example
-
-```g
-import "std:io";
-
-main :: proc() -> s32 {
-	io.print("Hello, World!\n");
-	return 0;
-}
-```
-
-*Note that this example will not currently compile.*
-
 ## Usage
 
 **G** is being developed on x86-64 Linux, and is untested elsewhere.
@@ -26,7 +13,7 @@ main :: proc() -> s32 {
 
 - CMake >= 3.14, to build
 - LLVM, to build
-- Clang, for linking compiled object files
+- Clang, for `_start()` and linking
 
 ### Building
 
@@ -67,7 +54,7 @@ command. The second command will output an executable file, *a.out* by default.
 - [x] Implement reals
 - [x] Implement pointers
 - [x] Implement arrays
-- [ ] Implement variable length arrays
+- [ ] Implement variable length arrays (?)
 - [x] Implement expressions
 - [x] Implement type casting
 - [ ] Implement type casting to pointers and arrays
diff --git a/doc/g.ebnf b/doc/g.ebnf
index 862f5e1..b893a12 100644
--- a/doc/g.ebnf
+++ b/doc/g.ebnf
@@ -30,6 +30,7 @@ expr = iden | literal
      | "(", expr, ")"
      | type, "(", expr, ")" (* Type cast *)
      | iden, "(", [ expr, { ",", expr } ], ")" (* Procedure call *)
+     | iden, "[", expr, "]" (* Array access *)
      | "#", iden, [ "(", [ expr, { ",", expr } ], ")" ] (* Hash expression *)
      | "+", expr | "-", expr (* Unary POS and NEG *)
      | "!", expr | "~", expr (* Logical and bitwise NOT *)
@@ -60,8 +61,8 @@ expr_proc = "proc", "(", [ parm_list ], ")", [ "->", type ], stmt_compound ;
 parm = iden, ":", type ;
 parm_list = parm, { ",", parm } ;
 
-iden = alphu, { alphu | digit } ;
-type = iden ;
+iden = ( alpha | "_" ), { alpha | digit | "_" } ;
+type = { "*" | "[", [ literal_int ], "]" }, iden ;
 
 (* Literals *)
 literal_null = "null" ;
@@ -96,7 +97,6 @@ digit_doz = digit_dec | "A" | "B" ;
 digit_hex = digit_dec | "A" | "B" | "C" | "D" | "E" | "F" ;
 quadd_hex = digit_hex, digit_hex, digit_hex, digit_hex ;
 
-alphu = alpha | "_" ;
 alpha = alpha_upper | alpha_lower ;
 alpha_upper = ANY_UPPERCASE_ENGLISH_LETTER ;
 alpha_lower = ANY_LOWERCASE_ENGLISH_LETTER ;
diff --git a/doc/spec.md b/doc/spec.md
index c0c9994..392abaf 100644
--- a/doc/spec.md
+++ b/doc/spec.md
@@ -13,6 +13,7 @@
 - [Variables](#variables)
 - [Types](#types)
 	- [Basic Types](#basic-types)
+	- [Arrays](#arrays)
 
 ## Lexical Elements
 
@@ -177,3 +178,7 @@ f16be f32be f64be f128be // big endian
 char // alias of u8
 rune // alias of u32
 ```
+
+### Arrays
+
+An array is a fixed length sequence of elements of a single type.
diff --git a/src/analyse.c b/src/analyse.c
index d24d48f..2f275d6 100644
--- a/src/analyse.c
+++ b/src/analyse.c
@@ -141,10 +141,24 @@ static void analyse_stmt_for(ast *a, syt *st) {
 
 /* Analyse an expression. */
 static void analyse_expr(ast *a, syt *st) {
-	if (A.k == AK_PROC) { analyse_expr_proc(a, st); }
-	if (A.k == AK_OP_DRF) { A.t = ast_type(C[0], st)->base; }
-	
-	for (UINT i = 0; i < CL; i += 1) { analyse_expr(C[i], st); }
+	switch (A.k) {
+	case AK_CALL: {
+		assert(C[0]->k == AK_ID);
+		assert(C[0]->h != 0);
+		assert(C[0]->s != NULL);
+		
+		ast *sym = syt_search_h(st, C[0]->h, C[0]->s);
+		if (sym == NULL) { note("TODO", A.ln, A.cl, 0, "Use of undeclared identifier \"%s\"", C[0]->s); }
+		
+		else if (sym->k == AK_TYPE) {
+			A.k = AK_CAST; A.t = sym->t;
+			if (CL > 2) { note("TODO", A.ln, A.cl, 0, "Type casts must only have a single argument"); }
+		}
+	} break;
+	case AK_PROC:   { analyse_expr_proc(a, st);       } break;
+	case AK_OP_DRF: { A.t = ast_type(C[0], st)->base; } break;
+	default: { for (UINT i = 0; i < CL; i += 1) { analyse_expr(C[i], st); }} break;
+	}
 }
 
 /* Analyse a procedure expression. */
diff --git a/src/llvm.c b/src/llvm.c
index 852823b..01b71a3 100644
--- a/src/llvm.c
+++ b/src/llvm.c
@@ -170,12 +170,8 @@ static LLVMValueRef llvm_stmt_decl(ast *a, syt *st) {
 /* Generate IR for an assignment statement. */
 static LLVMValueRef llvm_stmt_assn(ast *a, syt *st) {
 	assert(
-		a->k == AK_ASSIGN ||
-		a->k == AK_AS_ADD ||
-		a->k == AK_AS_SUB ||
-		a->k == AK_AS_MUL ||
-		a->k == AK_AS_DIV ||
-		a->k == AK_AS_MOD
+		a->k == AK_ASSIGN || a->k == AK_AS_ADD || a->k == AK_AS_SUB ||
+		a->k == AK_AS_MUL || a->k == AK_AS_DIV || a->k == AK_AS_MOD
 	);
 
 	ast *v = syt_search(st, C[0]->s);
@@ -242,7 +238,7 @@ static LLVMValueRef llvm_expr(ast *a, syt *st) {
 	case AK_ARR:  { return llvm_arr(a, st);       } break;
 	case AK_PROC: { return llvm_expr_proc(a, st); } break;
 	case AK_CAST: { return llvm_expr_cast(a, st); } break;
-	case AK_ID_VAR: {
+	case AK_ID: {
 		ast *sym = syt_search(st, a->s);
 		if (sym == NULL) { note(file_name, A.ln, A.cl, 0, "Undefined variable %s", A.s); }
 
@@ -251,12 +247,14 @@ static LLVMValueRef llvm_expr(ast *a, syt *st) {
 		return LLVMBuildLoad2(llvm_builder, llvm_type(sym->t), sym->llvm_v, "");
 	} break;
 	case AK_CALL: {
-		ast *v = syt_search(st, a->s);
-		if (v == NULL) { error(2, "llvm_expr: Undefined procedure %s", a->s); }
+		ast *sym = syt_search_h(st, C[0]->h, C[0]->s);
+		if (sym == NULL) { note(file_name, A.ln, A.cl, -1, "Undefined procedure \"%s\" (llvm:llvm_expr)", C[0]->s); }
 
-		if (!v->llvm_v) { error(2, "llvm_expr: Procedure follows"); }
+		if (!sym->llvm_v) { note(file_name, A.ln, A.cl, -1, "Procedure \"%s\" follows (llvm:llvm_expr)", C[0]->s); }
 
-		return LLVMBuildCall2(llvm_builder, v->llvm_t, v->llvm_v, NULL, 0, "");
+		/* TODO procedure call arguments */
+		
+		return LLVMBuildCall2(llvm_builder, sym->llvm_t, sym->llvm_v, NULL, 0, "");
 	} break;
 	case AK_OP_POS: { a = C[0]; goto reset; /* no-op */ }
 	case AK_OP_NEG: {
@@ -321,54 +319,55 @@ static LLVMValueRef llvm_expr_proc(ast *a, syt *st) {
 
 /* Generate IR for a type cast. */
 static LLVMValueRef llvm_expr_cast(ast *a, syt *st) {
-	assert(a->t != NULL); assert(a->cl > 0);
+	assert(A.t != NULL);
+	assert(CL == 2);
 
-	type *expr_type = ast_type(C[0], st);
+	type *expr_type = ast_type(C[1], st);
 
 	if (is_ptr(expr_type)) {
 		if (is_int(a->t)) {
-			return LLVMBuildPtrToInt(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "");
+			return LLVMBuildPtrToInt(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "");
 		}
 	}
 	else if (is_bool(expr_type)) {
 		if (is_int(a->t)) {
-			return LLVMBuildIntCast2(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), is_sign(a->t), "");
+			return LLVMBuildIntCast2(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), is_sign(a->t), "");
 		}
 	}
 	else if (is_int(expr_type)) {
 		if (is_bool(a->t) || is_int(a->t)) {
-			return LLVMBuildIntCast2(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), is_sign(a->t), "");
+			return LLVMBuildIntCast2(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), is_sign(a->t), "");
 		}
 		else if (is_flt(a->t)) {
 			if (is_sign(expr_type)) {
-				return LLVMBuildSIToFP(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "stof");
+				return LLVMBuildSIToFP(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "stof");
 			}
 			else {
-				return LLVMBuildUIToFP(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "utof");
+				return LLVMBuildUIToFP(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "utof");
 			}
 		}
 	}
 	else if (is_flt(expr_type)) {
 		if (is_flt(a->t)) {
-			return LLVMBuildFPCast(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "cast");
+			return LLVMBuildFPCast(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "cast");
 		}
 		else if (is_int(a->t)) {
 			if (is_sign(a->t)) {
-				return LLVMBuildFPToSI(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "ftos");
+				return LLVMBuildFPToSI(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "ftos");
 			}
 			else {
-				return LLVMBuildFPToUI(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "ftou");
+				return LLVMBuildFPToUI(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "ftou");
 			}
 		}
 	}
 	else if (expr_type->k == TY_ARR) {
 		if (is_ptr(a->t)) {
 			a->t->base = expr_type->base;
-			return LLVMBuildBitCast(llvm_builder, llvm_expr(C[0], st), llvm_type(a->t), "");
+			return LLVMBuildBitCast(llvm_builder, llvm_expr(C[1], st), llvm_type(a->t), "");
 		}
 	}
 
-	note(file_name, A.ln, A.cl, -1, "unhandled type %s or %s (llvm:llvm_expr_cast)", expr_type->s, a->t->s);
+	note(file_name, A.ln, A.cl, -1, "Unhandled cast \"%s\" -> \"%s\" (llvm:llvm_expr_cast)", expr_type->s, A.t->s);
 }
 
 /* Generate IR for an array. */
diff --git a/src/parse.c b/src/parse.c
index 5d80fe5..2a9b439 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -27,7 +27,7 @@ char *ast_ks[] = {
 
 	"AK_ASSIGN", "AK_AS_ADD", "AK_AS_SUB", "AK_AS_MUL", "AK_AS_DIV", "AK_AS_MOD",
 
-	"AK_ID_VAR", "AK_CALL", "AK_BOOL", "AK_INT", "AK_FLT", "AK_ARR",
+	"AK_ID", "AK_CALL", "AK_BOOL", "AK_INT", "AK_FLT", "AK_ARR",
 
 	"AK_HASH_SYSCALL"
 };
@@ -52,6 +52,8 @@ static ast *parse_flt(lex *l, syt *st);
 static s32 tok_precedence(tok_k tk);
 static s32 ast_precedence(ast_k ak);
 
+static inline char *strdup_or_fail(const char *s);
+
 /* Initialise an AST node. */
 inline ast *ast_init(ast_k kind, UINT ln, UINT cl) {
 	ast *a = calloc(1, sizeof (*a)); if (a == NULL) { error(1, SERR); }
@@ -236,30 +238,8 @@ static ast *parse_stmt_for(lex *l, syt *st) {
 static ast *parse_expr(lex *l, syt *st, s32 o) {
 	ast *left = NULL; switch (T.k) {
 	case TK_ID: {
-		tok t = lex_next(l);
-		left = ast_init(0, t.ln, t.cl);
-		
-		ast *sym = syt_search_h(st, t.h, t.s);
-		if (sym == NULL) { note(l->n, t.ln, t.cl, -1, "Use of undeclared identifier \"%s\"", t.s); }
-		
-		if (T.k == TK_LPAREN) {
-			lex_kind(l, TK_LPAREN);
-			
-			if (sym->k == AK_TYPE) { left->k = AK_CAST; left->t = sym->t; }
-			else { left->k = AK_CALL; }
-			
-			if (T.k != TK_RPAREN) for (;;) {
-				ast_push(left, parse_expr(l, st, 0));
-				if (T.k != TK_COMMA) { break; }
-				if (left->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); }
-				lex_kind(l, TK_COMMA);
-			}
-			
-			lex_kind(l, TK_RPAREN);
-		}
-		else { left->k = AK_ID_VAR; left->t = sym->t; }
-		
-		if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); }
+		left = ast_init(AK_ID, T.ln, T.cl); left->h = T.h;
+		left->s = strdup_or_fail(T.s); lex_next(l);
 	} break;
 	case TK_TRUE:   { left = ast_init(AK_BOOL, T.ln, T.cl); left->v_bool = true;  } goto boolean;
 	case TK_FALSE:  { left = ast_init(AK_BOOL, T.ln, T.cl); left->v_bool = false; } goto boolean;
@@ -279,7 +259,7 @@ static ast *parse_expr(lex *l, syt *st, s32 o) {
 			lex_kind(l, TK_LPAREN);
 
 			if (T.k != TK_RPAREN) for (;;) {
-				ast_push(left, parse_expr(l, st, true));
+				ast_push(left, parse_expr(l, st, 0));
 				if (T.k == TK_COMMA) { lex_next(l); } else { break; }
 			}
 
@@ -300,6 +280,21 @@ static ast *parse_expr(lex *l, syt *st, s32 o) {
 
 	/* Parse an infix expression if one is present */
 	for (ast *a = NULL; tok_precedence(T.k) > o; left = a) switch (T.k) {
+	case TK_LPAREN: {
+		a = ast_init(AK_CALL, left->ln, left->cl);
+		lex_next(l); ast_push(a, left);
+		
+		/* Parse call arguments if present */
+		if (T.k != TK_RPAREN) for (;;) {
+			ast_push(a, parse_expr(l, st, 0));
+			if (T.k == TK_COMMA) { lex_next(l); } else { break; }
+		}
+		
+		lex_kind(l, TK_RPAREN);
+	} break;
+	case TK_LBRACK: {
+		/* TODO array access */
+	} break;
 	case TK_ASSIGN: { a = ast_init(AK_ASSIGN, T.ln, T.cl); } goto infix;
 	case TK_AS_ADD: { a = ast_init(AK_AS_ADD, T.ln, T.cl); } goto infix;
 	case TK_AS_SUB: { a = ast_init(AK_AS_SUB, T.ln, T.cl); } goto infix;
@@ -314,6 +309,8 @@ static ast *parse_expr(lex *l, syt *st, s32 o) {
 	infix: { lex_next(l); ast_push(a, left); ast_push(a, parse_expr(l, st, ast_precedence(a->k))); } break;
 	}
 
+	if (left == NULL) { note(l->n, T.ln, T.cl, 0, "Expected an expression"); }
+	
 	return left;
 }
 
@@ -393,8 +390,7 @@ static ast *parse_int(lex *l, syt *st) {
 	tok t = lex_next(l);
 	ast *a = ast_init(AK_INT, t.ln, t.cl);
 
-	if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
-	
+	a->s = strdup_or_fail(t.s);
 	a->v_int = t.v_int;
 
 	/* Determine the minimum integer type */
@@ -414,8 +410,7 @@ static ast *parse_flt(lex *l, syt *st) {
 	tok t = lex_next(l);
 	ast *a = ast_init(AK_FLT, t.ln, t.cl);
 
-	if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
-	
+	a->s = strdup_or_fail(t.s);
 	a->v_flt = t.v_flt;
 
 	/* Determine the minimum float type */
@@ -452,6 +447,7 @@ static s32 tok_precedence(tok_k tk) {
 /* Get the precedence of an AST kind. */
 static s32 ast_precedence(ast_k ak) {
 	switch (ak) {
+	case AK_CALL: { return 8; }
 	case AK_OP_POS: case AK_OP_NEG: case AK_BW_NOT: case AK_OP_ADO: case AK_OP_DRF: { return 6; }
 	case AK_OP_MUL: case AK_OP_DIV: case AK_OP_MOD: { return 4; }
 	case AK_OP_ADD: case AK_OP_SUB: { return 3; }
@@ -460,6 +456,13 @@ static s32 ast_precedence(ast_k ak) {
 	}
 }
 
+/* Duplicate a string or fail. */
+static inline char *strdup_or_fail(const char *s) {
+	register char *r = strdup(s);
+	if (r == NULL) { error(1, "%s", SERR); }
+	return r;
+}
+
 /* Recursively print an AST. */
 void ast_print(ast *a, UINT indent) {
 	for (UINT i = 0; i < indent; ++i) { printf("    "); }
diff --git a/src/parse.h b/src/parse.h
index 672346a..d875d72 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -24,7 +24,7 @@ typedef enum {
 
 	AK_ASSIGN, AK_AS_ADD, AK_AS_SUB, AK_AS_MUL, AK_AS_DIV, AK_AS_MOD,
 
-	AK_ID_VAR, AK_CALL, AK_BOOL, AK_INT, AK_FLT, AK_ARR,
+	AK_ID, AK_CALL, AK_BOOL, AK_INT, AK_FLT, AK_ARR,
 
 	AK_HASH_SYSCALL
 } ast_k;