G

G Programming Language
git clone http://git.omkov.net/G
Log | Tree | Refs | README | Download

AuthorJakob Wakeling <[email protected]>
Date2023-07-04 05:52:43
Commit421414826683053aae33f88755c0fbea14a06513
Parentcc58a1d3fcd21571a5d231359a3acbbe5564d97d

Improve hash expression handling

Diffstat

M examples/hello.g | 5 ++---
A examples/io.g | 11 +++++++++++
M src/analyse.c | 19 +++++++++++++++++--
M src/lex.c | 33 +++++++++++----------------------
M src/lex.h | 4 ++--
M src/llvm.c | 65 +++++++++++++++++++++++++++--------------------------------------
M src/parse.c | 29 ++++++++---------------------
M src/parse.h | 2 +-

8 files changed, 79 insertions, 89 deletions

diff --git a/examples/hello.g b/examples/hello.g
index 3525d82..62f4cde 100644
--- a/examples/hello.g
+++ b/examples/hello.g
@@ -1,5 +1,4 @@
-main :: proc() -> u64 {
+main :: proc() -> sint {
 	hello : [14]u8 = { 'H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!', '\n' };
-	#syscall(u64(1), u64(1), hello, u64(14));
-	return u64(0);
+	return #syscall(uint(1), sint(1), ptr(hello), uint(14));
 }
diff --git a/examples/io.g b/examples/io.g
new file mode 100644
index 0000000..a8bfe87
--- /dev/null
+++ b/examples/io.g
@@ -0,0 +1,11 @@
+main :: proc() -> sint {
+	file : [6]u8 = { 'g', '.', 't', 'x', 't', '\0' };
+	buf : [7]u8 = { 'O', 'u', 't', 'p', 'u', 't', '\n' };
+	
+	/* Open file with O_WRONLY and O_CREAT flags */
+	fd : sint = #syscall(uint(2), ptr(file), s32(0b1000010), u16(0o644));
+	r : sint = #syscall(uint(1), fd, ptr(buf), uint(7));
+	#syscall(uint(3), fd);
+	
+	return r;
+}
diff --git a/src/analyse.c b/src/analyse.c
index ed071f4..1df4abb 100644
--- a/src/analyse.c
+++ b/src/analyse.c
@@ -12,6 +12,7 @@
 
 #include <assert.h>
 #include <stdio.h>
+#include <string.h>
 
 static void analyse_stmt(ast *a, syt *st);
 static inline void analyse_stmt_comp(ast *a, syt *st);
@@ -22,6 +23,7 @@ static void analyse_stmt_for(ast *a, syt *st);
 
 static void analyse_expr(ast *a, syt *st);
 static void analyse_expr_proc(ast *a, syt *st);
+static void analyse_expr_hash(ast *a, syt *st);
 
 #define A (*a)
 #define C (a->c.a) /* AST child shorthand "C[i]" */
@@ -111,6 +113,8 @@ static void analyse_stmt_decl(ast *a, syt *st) {
 
 /* Analyse a return statement. */
 static void analyse_stmt_return(ast *a, syt *st) {
+	if (A.c.al == 1) { analyse_expr(C[0], st); }
+	
 	type *t = A.c.al != 0 ? ast_type(C[0], st) : &TYPE(TY_VOID);
 	ast *p = A.p; for (; p->k != AK_PROC; p = p->p);
 
@@ -125,8 +129,6 @@ static void analyse_stmt_return(ast *a, syt *st) {
 			note("TODO", A.ln, A.cl, 0, "Explicit cast required from %s to %s", t->s, p->t->s);
 		}
 	}
-	
-	if (A.c.al == 1) { analyse_expr(C[0], st); }
 }
 
 /* Analyse an if statement. */
@@ -164,6 +166,7 @@ static void analyse_expr(ast *a, syt *st) {
 		}
 	} break;
 	case AK_PROC:   { analyse_expr_proc(a, st);       } break;
+	case AK_HASH:   { analyse_expr_hash(a, st);       } break;
 	case AK_OP_DRF: { A.t = ast_type(C[0], st)->base; } break;
 	default: { for (UINT i = 0; i < CL; i += 1) { analyse_expr(C[i], st); }} break;
 	}
@@ -181,3 +184,13 @@ static void analyse_expr_proc(ast *a, syt *st) {
 	/* Analyse the procedure body */
 	analyse_stmt_comp(C[CL - 1], st);
 }
+
+/* Analyse a hash expression. */
+static void analyse_expr_hash(ast *a, syt *st) {
+	assert(A.k == AK_HASH);
+	
+	if (strcmp(A.s, "syscall") == 0) { A.k = AK_HASH_SYSCALL; A.t = &TYPE(TY_SINT); }
+	else { note("TODO", A.ln, A.cl, 0, "Unrecognised hash expression \"%s\"", A.s); }
+	
+	for (UINT i = 0; i < CL; i += 1) { analyse_expr(C[i], st); }
+}
diff --git a/src/lex.c b/src/lex.c
index 2180e47..8584ec8 100644
--- a/src/lex.c
+++ b/src/lex.c
@@ -16,12 +16,12 @@
 #include <stdio.h>
 
 char *tok_ks[] = {
-	"TK_VOID", "TK_EOF", "TK_ID", "TK_INT", "TK_FLT", "TK_STR", "TK_HASH",
+	"TK_VOID", "TK_EOF", "TK_ID", "TK_INT", "TK_FLT", "TK_STR",
 
 	"TK_NULL", "TK_TRUE", "TK_FALSE", "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC",
 
 	"TK_LPAREN", "TK_RPAREN", "TK_LBRACK", "TK_RBRACK", "TK_LBRACE", "TK_RBRACE",
-	"TK_COLON",  "TK_SCOLON", "TK_COMMA",  "TK_PERIOD", "TK_RARROW", "TK_QMARK",
+	"TK_COLON",  "TK_SCOLON", "TK_COMMA",  "TK_PERIOD", "TK_RARROW", "TK_QMARK", "TK_HASH",
 
 	"TK_OP_ADD", "TK_OP_SUB", "TK_OP_MUL", "TK_OP_DIV", "TK_OP_MOD",
 	"TK_OP_EQ",  "TK_OP_NEQ", "TK_OP_GT",  "TK_OP_LT",  "TK_OP_GTE", "TK_OP_LTE",
@@ -112,14 +112,14 @@ tok lex_next(lex *l) {
 		for (P += 1; is_alpha(C) || is_digit_dec(C) || C == '_'; P += 1);
 		sl = P - s; CL += sl; T.h = syt_hash(s, sl);
 
-		if      (strncmp(s, "null",   sl) == 0) { T.k = TK_NULL;   }
-		else if (strncmp(s, "true",   sl) == 0) { T.k = TK_TRUE;   }
-		else if (strncmp(s, "false",  sl) == 0) { T.k = TK_FALSE;  }
-		else if (strncmp(s, "return", sl) == 0) { T.k = TK_RETURN; }
-		else if (strncmp(s, "if",     sl) == 0) { T.k = TK_IF;     }
-		else if (strncmp(s, "else",   sl) == 0) { T.k = TK_ELSE;   }
-		else if (strncmp(s, "for",    sl) == 0) { T.k = TK_FOR;    }
-		else if (strncmp(s, "proc",   sl) == 0) { T.k = TK_PROC;   }
+		if      (strncmp(s, "null",   4) == 0) { T.k = TK_NULL;   }
+		else if (strncmp(s, "true",   5) == 0) { T.k = TK_TRUE;   }
+		else if (strncmp(s, "false",  6) == 0) { T.k = TK_FALSE;  }
+		else if (strncmp(s, "return", 6) == 0) { T.k = TK_RETURN; }
+		else if (strncmp(s, "if",     2) == 0) { T.k = TK_IF;     }
+		else if (strncmp(s, "else",   4) == 0) { T.k = TK_ELSE;   }
+		else if (strncmp(s, "for",    3) == 0) { T.k = TK_FOR;    }
+		else if (strncmp(s, "proc",   4) == 0) { T.k = TK_PROC;   }
 		else { T.k = TK_ID; if (!(T.s = strndup(s, sl))) { error(1, SERR); }}
 	}
 
@@ -142,18 +142,6 @@ tok lex_next(lex *l) {
 		}
 	}
 
-	/* Handle hash procedures */
-	else if (C == '#') {
-		char *s = P; UINT sl;
-		
-		for (P += 1; is_alpha(C) || is_digit_dec(C) || C == '_'; P += 1);
-		sl = P - s; CL += sl;
-		
-		if (sl <= 1) { note(l->n, T.ln, T.cl, 0, "A hash must be followed by an identifier"); goto reset; }
-		
-		T.k = TK_HASH; T.h = syt_hash(s, sl); if (!(T.s = strndup(s, sl))) { error(1, SERR); }
-	}
-	
 	/* Handle punctuators and operators */
 	else switch (C) {
 		case '(': { T.k = TK_LPAREN; P += 1; CL += 1; } break;
@@ -167,6 +155,7 @@ tok lex_next(lex *l) {
 		case ',': { T.k = TK_COMMA;  P += 1; CL += 1; } break;
 		case '.': { T.k = TK_PERIOD; P += 1; CL += 1; } break;
 		case '?': { T.k = TK_QMARK;  P += 1; CL += 1; } break;
+		case '#': { T.k = TK_HASH;   P += 1; CL += 1; } break;
 		case '+': switch (D) {
 			default:  { T.k = TK_OP_ADD; P += 1; CL += 1; } break;
 			case '=': { T.k = TK_AS_ADD; P += 2; CL += 2; } break;
diff --git a/src/lex.h b/src/lex.h
index 6592620..f48d162 100644
--- a/src/lex.h
+++ b/src/lex.h
@@ -10,12 +10,12 @@
 
 /* Remember to update tok_ks in lex.c */
 typedef enum {
-	TK_VOID, TK_EOF, TK_ID, TK_INT, TK_FLT, TK_STR, TK_HASH,
+	TK_VOID, TK_EOF, TK_ID, TK_INT, TK_FLT, TK_STR,
 
 	TK_NULL, TK_TRUE, TK_FALSE, TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC,
 
 	TK_LPAREN, TK_RPAREN, TK_LBRACK, TK_RBRACK, TK_LBRACE, TK_RBRACE,
-	TK_COLON,  TK_SCOLON, TK_COMMA,  TK_PERIOD, TK_RARROW, TK_QMARK,
+	TK_COLON,  TK_SCOLON, TK_COMMA,  TK_PERIOD, TK_RARROW, TK_QMARK, TK_HASH,
 
 	TK_OP_ADD, TK_OP_SUB, TK_OP_MUL, TK_OP_DIV, TK_OP_MOD,
 	TK_OP_EQ,  TK_OP_NEQ, TK_OP_GT,  TK_OP_LT,  TK_OP_GTE, TK_OP_LTE,
diff --git a/src/llvm.c b/src/llvm.c
index 3be9794..41a2b30 100644
--- a/src/llvm.c
+++ b/src/llvm.c
@@ -37,11 +37,10 @@ static LLVMValueRef llvm_stmt_for(ast *a, syt *st);
 static LLVMValueRef llvm_expr(ast *a, syt *st, bool load);
 static LLVMValueRef llvm_expr_proc(ast *a, syt *st);
 static LLVMValueRef llvm_expr_cast(ast *a, syt *st);
+static LLVMValueRef llvm_expr_hash(ast *a, syt *st);
 
 static LLVMValueRef llvm_arr(ast *a, syt *st);
 
-static LLVMValueRef llvm_hash(ast *a, syt *st);
-
 static inline void llvm_init(char *file);
 static inline void llvm_free(void);
 static LLVMTypeRef llvm_type(type *t);
@@ -121,7 +120,6 @@ static LLVMValueRef llvm_stmt(ast *a, syt *st) {
 	case AK_RETURN: { return llvm_stmt_return(a, st);   } break;
 	case AK_IF:     { return llvm_stmt_if(a, st);       } break;
 	case AK_FOR:    { return llvm_stmt_for(a, st);      } break;
-	case AK_HASH_SYSCALL: { return llvm_hash(a, st);    } break;
 	default:        { return llvm_expr(a, st, true);    } break;
 	}
 }
@@ -217,6 +215,8 @@ static LLVMValueRef llvm_expr(ast *a, syt *st, bool load) {
 
 		return LLVMBuildCall2(llvm_builder, sym->llvm_t, sym->llvm_v, args, CL - 1, "");
 	} break;
+	case AK_HASH_SYSCALL:
+	case AK_HASH: { return llvm_expr_hash(a, st); } break;
 	case AK_SUBS: {
 		ast *sym = syt_search_h(st, C[0]->h, C[0]->s);
 		if (sym == NULL) { note(file_name, A.ln, A.cl, 0, "Undefined variable \"%s\"", C[0]->s); }
@@ -229,7 +229,7 @@ static LLVMValueRef llvm_expr(ast *a, syt *st, bool load) {
 		if (!load) { return vr; }
 		else { return LLVMBuildLoad2(llvm_builder, llvm_type(sym->t->base), vr, ""); }
 	}
-	case AK_OP_POS: { a = C[0]; goto reset; /* no-op */ }
+	case AK_OP_POS: { a = C[0]; goto reset; /* no-op */ } break;
 	case AK_OP_NEG: {
 		type *t = ast_type(C[0], st);
 		if (t == NULL) { note(file_name, A.ln, A.cl, -1, "Subtree is missing a type (llvm:llvm_expr)"); }
@@ -370,46 +370,27 @@ static LLVMValueRef llvm_expr_cast(ast *a, syt *st) {
 	return NULL;
 }
 
-/* Generate IR for an array. */
-static LLVMValueRef llvm_arr(ast *a, syt *st) {
-	assert(A.k == AK_ARR);
-	
-	LLVMValueRef *va = calloc(CL, sizeof (LLVMValueRef));
-	
-	for (UINT i = 0; i < CL; i += 1) { va[i] = llvm_expr(C[i], st, true); }
-	
-	return LLVMConstArray(llvm_type(C[0]->t), va, CL);
-}
-
 /* Generate IR for a hash procedure. */
-static LLVMValueRef llvm_hash(ast *a, syt *st) {
-	assert(a->k == AK_HASH_SYSCALL);
+static LLVMValueRef llvm_expr_hash(ast *a, syt *st) {
+	assert(A.k == AK_HASH_SYSCALL);
 
-	UINT arg_count = a->c.al;
-	LLVMValueRef *args = calloc(arg_count, sizeof (LLVMValueRef));
-	for (UINT i = 0; i < arg_count; i += 1) {
-		args[i] = llvm_expr(a->c.a[i], st, true);
+	LLVMValueRef args[CL]; LLVMTypeRef argt[CL];
+	for (UINT i = 0; i < CL; i += 1) {
+		args[i] = llvm_expr(C[i], st, true);
+		argt[i] = llvm_type(ast_type(C[i], st));
 	}
 
-	LLVMTypeRef unsigned_integer_type = llvm_type(&TYPE(TY_UINT));
-	LLVMTypeRef *typs = calloc(arg_count, sizeof (LLVMTypeRef));
-	for (UINT i = 0; i < arg_count; i += 1) {
-		/* FIXME hardcoded for write syscall */
-		if (i == 2) { typs[i] = llvm_type(type_ptr(&TYPE(TY_U8), 1)); }
-		else { typs[i] = unsigned_integer_type; }
-	}
-	
-	LLVMTypeRef func_type = LLVMFunctionType(llvm_type(&TYPE(TY_UINT)), typs, arg_count, false);
+	LLVMTypeRef func_type = LLVMFunctionType(llvm_type(&TYPE(TY_SINT)), argt, CL, false);
 	LLVMValueRef inline_asm = NULL;
 
 	/* TODO check architecture */
 	{ /* x86-64 */
-		assert(arg_count <= 7);
+		assert(CL <= 7);
 
 		char constraints[128] = "={rax}";
 
 		char const *registers[] = { "rax", "rdi", "rsi", "rdx", "r10", "r8", "r9" };
-		for (UINT i = 0; i < arg_count; i += 1) {
+		for (UINT i = 0; i < CL; i += 1) {
 			strcat(constraints, ",{");
 			strcat(constraints, registers[i]);
 			strcat(constraints, "}");
@@ -422,7 +403,18 @@ static LLVMValueRef llvm_hash(ast *a, syt *st) {
 		);
 	}
 
-	return LLVMBuildCall2(llvm_builder, func_type, inline_asm, args, arg_count, "");
+	return LLVMBuildCall2(llvm_builder, func_type, inline_asm, args, CL, "");
+}
+
+/* Generate IR for an array. */
+static LLVMValueRef llvm_arr(ast *a, syt *st) {
+	assert(A.k == AK_ARR);
+	
+	LLVMValueRef *va = calloc(CL, sizeof (LLVMValueRef));
+	
+	for (UINT i = 0; i < CL; i += 1) { va[i] = llvm_expr(C[i], st, true); }
+	
+	return LLVMConstArray(llvm_type(C[0]->t), va, CL);
 }
 
 /* Initialise LLVM. */
diff --git a/src/parse.c b/src/parse.c
index bf81581..2b1fed7 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -29,7 +29,7 @@ char *ast_ks[] = {
 
 	"AK_ID", "AK_CALL", "AK_BOOL", "AK_INT", "AK_FLT", "AK_ARR", "AK_SUBS",
 
-	"AK_HASH_SYSCALL"
+	"AK_HASH", "AK_HASH_SYSCALL"
 };
 
 static ast *parse_stmt(lex *l, syt *st);
@@ -253,24 +253,8 @@ static ast *parse_expr(lex *l, syt *st, s32 o) {
 	case TK_LBRACE: { return parse_expr_compound(l, st); } break;
 	case TK_PROC:   { return parse_expr_proc(l, st);     } break;
 	case TK_HASH: {
-		tok t = lex_next(l); bool needs_args = false;
-		left = ast_init(0, t.ln, t.cl);
-		
-		if (strcmp(t.s, "#syscall") == 0) { left->k = AK_HASH_SYSCALL; needs_args = true; }
-		else { note("TODO", t.ln, t.cl, 0, "%s: unrecognised hash procedure", t.s); }
-		
-		if (needs_args) {
-			lex_kind(l, TK_LPAREN);
-			
-			if (T.k != TK_RPAREN) for (;;) {
-				ast_push(left, parse_expr(l, st, 0));
-				if (T.k == TK_COMMA) { lex_next(l); } else { break; }
-			}
-			
-			lex_kind(l, TK_RPAREN);
-		}
-		
-		if (!(left->s = strdup(t.s))) { error(1, "%s", SERR); }
+		left = ast_init(AK_HASH, T.ln, T.cl); lex_next(l);
+		left->h = T.h; left->s = strdup_or_fail(T.s); lex_kind(l, TK_ID);
 	} break;
 	case TK_LPAREN: { lex_next(l); left = parse_expr(l, st, 0); lex_kind(l, TK_RPAREN); } break;
 	case TK_OP_ADD: { left = ast_init(AK_OP_POS, T.ln, T.cl); } goto prefix;
@@ -285,8 +269,10 @@ static ast *parse_expr(lex *l, syt *st, s32 o) {
 	/* Parse an infix expression if one is present */
 	for (ast *a = NULL; tok_precedence(T.k) > o; left = a) switch (T.k) {
 	case TK_LPAREN: {
-		a = ast_init(AK_CALL, left->ln, left->cl);
-		lex_next(l); ast_push(a, left);
+		if (left->k == AK_HASH) { a = left; }
+		else { a = ast_init(AK_CALL, left->ln, left->cl); ast_push(a, left); }
+		
+		lex_next(l);
 
 		/* Parse call arguments if present */
 		if (T.k != TK_RPAREN) for (;;) {
diff --git a/src/parse.h b/src/parse.h
index 5b489f5..50dd366 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -26,7 +26,7 @@ typedef enum {
 
 	AK_ID, AK_CALL, AK_BOOL, AK_INT, AK_FLT, AK_ARR, AK_SUBS,
 
-	AK_HASH_SYSCALL
+	AK_HASH, AK_HASH_SYSCALL
 } ast_k;
 
 /*