G

G Programming Language
git clone http://git.omkov.net/G
Log | Tree | Refs | README | Download

AuthorJakob Wakeling <[email protected]>
Date2023-05-24 02:16:18
Commitaf77441bb0a4dd397f9bcad216f5f892557b938b
Parentc4932f6022fd80cd96b1141df6839f9ae78ef23c

Implement system calls

Diffstat

M README.md | 12 +++++++-----
M examples/main.g | 12 ++++--------
M src/lex.c | 51 +++++++++++++++++++++++++++++++++++++++++++--------
M src/lex.h | 4 ++--
M src/llvm.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
M src/llvm.h | 2 ++
M src/main.c | 4 +++-
M src/parse.c | 44 ++++++++++++++++++++++++++++++++++++--------
M src/parse.h | 4 +++-

9 files changed, 194 insertions, 37 deletions

diff --git a/README.md b/README.md
index 85f9d99..ca1b7f5 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # The G Programming Language
 
 A modern alternative to **C** intended to be fast, simple, and pleasant.
-Influenced by **C**, **C++**, **Odin**, **Rust**, and **Zig**.
+Influenced by **C**, **C++**, **Odin**, and others.
 
 Note that at present, **G** is highly unstable and will certainly change.
 
@@ -60,19 +60,21 @@ command. The second command will output an executable file, *a.out* by default.
 - [x] Implement procedure declarations
 - [x] Implement procedure calls
 - [ ] Implement procedure arguments
-- [x] Implement variable declarations
-- [x] Implement variable assignments
+- [x] Implement variables
+- [ ] Implement booleans
 - [x] Implement integers
 - [x] Implement reals
 - [ ] Implement arrays
-- [x] Implement expression parsing
-- [x] Implement expression code generation
+- [x] Implement expressions
 - [x] Implement type casting
 - [ ] Implement the *type* type
 - [ ] Implement *defer*
 - [ ] Implement *errdefer*
 - [ ] Implement *if* and *else*
 - [ ] Implement *for*
+- [ ] Implement first class strings
+- [x] Implement syscalls
+- [ ] Implement foreign code calling
 - [ ] Implement generics of some kind
 - [ ] Implement module definition
 - [ ] Implement module use
diff --git a/examples/main.g b/examples/main.g
index a6de101..1d5cfa9 100644
--- a/examples/main.g
+++ b/examples/main.g
@@ -1,9 +1,5 @@
-var := 42;
-
-test :: proc() -> u64 {
-	return u64(var);
-}
-
 main :: proc() -> u64 {
-	return test();
+	#syscall(u64(60), u64(42));
+	var : u64 = 69;
+	return var;
 }
diff --git a/src/lex.c b/src/lex.c
index b6e421c..81843f0 100644
--- a/src/lex.c
+++ b/src/lex.c
@@ -17,9 +17,9 @@
 #include <stdio.h>
 
 char *tok_ks[] = {
-	"TK_NULL", "TK_EOF", "TK_ID", "TK_NUM", "TK_STR",
+	"NULL_TK", "TK_EOF", "TK_ID", "TK_NUM", "TK_STR", "TK_HASH",
 
-	"TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC",
+	"TK_NULL", "TK_TRUE", "TK_FALSE", "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC",
 
 	"TK_LPAREN", "TK_RPAREN", "TK_LBRACK", "TK_RBRACK", "TK_LBRACE", "TK_RBRACE",
 	"TK_COLON",  "TK_SCOLON", "TK_COMMA",  "TK_PERIOD", "TK_RARROW", "TK_QMARK",
@@ -68,12 +68,12 @@ tok lex_peek(lex *l) { return T; }
 
 /* Lex the next token, and return the current one. */
 tok lex_next(lex *l) {
-	if (T.k == TK_EOF) { return T; }
+	redo:; if (T.k == TK_EOF) { return T; }
 	tok t = T; T = (tok){ 0 };
 
 	/* Skip null characters and whitespace */
 	skip:; for (; P != Q && (!C || isspace(C)); P += 1) switch (C) {
-		case '\0': { /* TODO warn user of null character */ } break;
+		case '\0': { note(l->n, l->ln, l->cl, 1, "Null character ignored"); } break;
 		case '\n': { LN += 1; CL = 0; } break;
 		default:   { CL += 1; } break;
 	}
@@ -107,7 +107,10 @@ tok lex_next(lex *l) {
 		for (P += 1; isalpha(C) || isdigit(C) || C == '_'; P += 1);
 		sl = P - s; CL += sl; T.h = syt_hash(s, sl);
 
-		if      (strncmp(s, "return", sl) == 0) { T.k = TK_RETURN; }
+		if      (strncmp(s, "null",   sl) == 0) { T.k = TK_NULL;   }
+		else if (strncmp(s, "true",   sl) == 0) { T.k = TK_TRUE;   }
+		else if (strncmp(s, "false",  sl) == 0) { T.k = TK_FALSE;  }
+		else if (strncmp(s, "return", sl) == 0) { T.k = TK_RETURN; }
 		else if (strncmp(s, "if",     sl) == 0) { T.k = TK_IF;     }
 		else if (strncmp(s, "else",   sl) == 0) { T.k = TK_ELSE;   }
 		else if (strncmp(s, "for",    sl) == 0) { T.k = TK_FOR;    }
@@ -127,6 +130,18 @@ tok lex_next(lex *l) {
 		T.k = TK_NUM; if (!(T.s = strndup(s, sl))) { error(1, SERR); }
 	}
 
+	/* Handle hash procedures */
+	else if (C == '#') {
+		char *s = P; UINT sl;
+		
+		for (P += 1; isalpha(C) || isdigit(C) || C == '_'; P += 1);
+		sl = P - s; CL += sl;
+		
+		if (sl <= 1) { note(l->n, T.ln, T.cl, 0, "A hash must be followed by an identifier"); goto redo; }
+		
+		T.k = TK_HASH; T.h = syt_hash(s, sl); if (!(T.s = strndup(s, sl))) { error(1, SERR); }
+	}
+	
 	/* Handle punctuators and operators */
 	else switch (C) {
 		case '(': { T.k = TK_LPAREN; P += 1; CL += 1; } break;
@@ -196,12 +211,31 @@ tok lex_next(lex *l) {
 			case '=': { T.k = TK_AS_XOR; P += 2; CL += 2; } break;
 		} break;
 
-		case '\'': { /* TODO */ } break;
-		case '\"': { /* TODO */ } break;
+		/* TODO implement character escapes */
+		/* TODO implement multi line strings */
+		// case '\'': {
+		// 	char *s = P; UINT sl;
+			
+		// 	for (P += 1; C != '\''; P += 1);
+		// 	sl = P - s; CL += sl;
+			
+		// 	T.k = TK_NUM;
+		// } break;
+		case '\"': {
+			char *s = P += 1; UINT sl;
+			
+			for (; C != '\"' && C != '\n'; P += 1);
+			sl = P - s; CL += sl; T.h = syt_hash(s, sl);
+			
+			if (C != '\"') { note(l->n, T.ln, T.cl, 0, "Missing closing quote"); }
+			else { P += 1; }
+			
+			T.k = TK_STR; if (!(T.s = strndup(s, sl))) { error(1, SERR); }
+		} break;
 
 		/* Handle unknown characters */
 		default: {
-			note("TODO", LN, CL, 1, "Unknown character: %X '%c'", C, C);
+			note(l->n, LN, CL, 1, "Unknown character: %X '%c'", C, C);
 			P += 1; CL += 1;
 		} break;
 	}
@@ -212,7 +246,7 @@ tok lex_next(lex *l) {
 /* Lex the next token if the current is of a specific type. */
 tok lex_kind(lex *l, tok_k k) {
 	if (T.k != k) {
-		note("TODO", T.ln, T.cl, 0, "Unexpected: \"%s\", was expecting: \"%s\"", tok_ks[T.k], tok_ks[k]);
+		note(l->n, T.ln, T.cl, 0, "Unexpected: \"%s\", was expecting: \"%s\"", tok_ks[T.k], tok_ks[k]);
 	}
 
 	return lex_next(l);
diff --git a/src/lex.h b/src/lex.h
index 849c729..2ce4303 100644
--- a/src/lex.h
+++ b/src/lex.h
@@ -10,9 +10,9 @@
 
 /* Remember to update tok_ks in lex.c */
 typedef enum {
-	TK_NULL, TK_EOF, TK_ID, TK_NUM, TK_STR,
+	NULL_TK, TK_EOF, TK_ID, TK_NUM, TK_STR, TK_HASH,
 
-	TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC,
+	TK_NULL, TK_TRUE, TK_FALSE, TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC,
 
 	TK_LPAREN, TK_RPAREN, TK_LBRACK, TK_RBRACK, TK_LBRACE, TK_RBRACE,
 	TK_COLON,  TK_SCOLON, TK_COMMA,  TK_PERIOD, TK_RARROW, TK_QMARK,
diff --git a/src/llvm.c b/src/llvm.c
index cf80459..db97832 100644
--- a/src/llvm.c
+++ b/src/llvm.c
@@ -19,6 +19,8 @@
 #include <llvm-c/Types.h>
 
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 
 static LLVMContextRef llvm_context = NULL;
 static LLVMModuleRef  llvm_module  = NULL;
@@ -40,6 +42,8 @@ static LLVMValueRef llvm_expr_cast(ast *a, syt *st);
 static LLVMValueRef llvm_int(ast *a);
 static LLVMValueRef llvm_flt(ast *a);
 
+static LLVMValueRef llvm_hash(ast *a, syt *st);
+
 static inline void llvm_init(void);
 static inline void llvm_free(void);
 static LLVMTypeRef llvm_type(type *t);
@@ -55,10 +59,6 @@ void llvm(ast *a) {
 	/* Generate IR for all child nodes */
 	for (UINT i = 0; i < a->c.al; i += 1) { llvm_stmt_decl(a->c.a[i], &a->st); }
 
-	if (LLVMWriteBitcodeToFile(llvm_module, "llvm.bc")) {
-		error(2, "LLVMWriteBitcodeToFile failure");
-	}
-	
 	char *err;
 	LLVMVerifyModule(llvm_module, LLVMAbortProcessAction, &err);
 	LLVMDisposeMessage(err);
@@ -85,6 +85,35 @@ void llvm(ast *a) {
 	LLVMDisposeTargetMachine(machine); llvm_free();
 }
 
+void llvm_bitcode(ast *a) {
+	llvm_init();
+	
+	/* Generate IR for all child nodes */
+	for (UINT i = 0; i < a->c.al; i += 1) { llvm_stmt_decl(a->c.a[i], &a->st); }
+	
+	if (LLVMWriteBitcodeToFile(llvm_module, "llvm.bc")) {
+		error(2, "LLVMWriteBitcodeToFile failure");
+	}
+	
+	char *err;
+	LLVMVerifyModule(llvm_module, LLVMAbortProcessAction, &err);
+	LLVMDisposeMessage(err); llvm_free();
+}
+
+void llvm_ir(ast *a) {
+	llvm_init();
+	
+	/* Generate IR for all child nodes */
+	for (UINT i = 0; i < a->c.al; i += 1) { llvm_stmt_decl(a->c.a[i], &a->st); }
+	
+	/* TODO */
+	
+	
+	char *err;
+	LLVMVerifyModule(llvm_module, LLVMAbortProcessAction, &err);
+	LLVMDisposeMessage(err); llvm_free();
+}
+
 /* Generate IR for a statement. */
 static LLVMValueRef llvm_stmt(ast *a, syt *st) {
 	switch (a->k) {
@@ -95,6 +124,8 @@ static LLVMValueRef llvm_stmt(ast *a, syt *st) {
 	case AK_RETURN: { return llvm_stmt_return(a, st);   } break;
 	case AK_IF:     { return llvm_stmt_if(a, st);       } break;
 	case AK_FOR:    { return llvm_stmt_for(a, st);      } break;
+	case AK_HASH_SYSCALL:
+		{ return llvm_hash(a, st); } break;
 	default:        { error(2, "llvm_stmt: Unhandled AST kind %s", ast_ks[a->k]); } break;
 	}
 }
@@ -295,6 +326,48 @@ static LLVMValueRef llvm_flt(ast *a) {
 	return LLVMConstReal(llvm_type(a->t), a->v.v_flt);
 }
 
+/* Generate IR for a hash procedure. */
+static LLVMValueRef llvm_hash(ast *a, syt *st) {
+	assert(a->k == AK_HASH_SYSCALL);
+	
+	UINT arg_count = a->c.al;
+	LLVMValueRef *args = calloc(arg_count, sizeof (LLVMValueRef));
+	for (UINT i = 0; i < arg_count; i += 1) {
+		args[i] = llvm_expr(a->c.a[i], st);
+	}
+	
+	LLVMTypeRef unsigned_integer_type = llvm_type(&TYPE(TY_UINT));
+	LLVMTypeRef *typs = calloc(arg_count, sizeof (LLVMTypeRef));
+	for (UINT i = 0; i < arg_count; i += 1) {
+		typs[i] = unsigned_integer_type;
+	}
+	
+	LLVMTypeRef func_type = LLVMFunctionType(llvm_type(&TYPE(TY_UINT)), typs, arg_count, false);
+	LLVMValueRef inline_asm = NULL;
+	
+	/* TODO check architecture */
+	{ /* x86-64 */
+		assert(arg_count <= 7);
+		
+		char constraints[128] = "={rax}";
+		
+		char const *registers[] = { "rax", "rdi", "rsi", "rdx", "r10", "r8", "r9" };
+		for (UINT i = 0; i < arg_count; i += 1) {
+			strcat(constraints, ",{");
+			strcat(constraints, registers[i]);
+			strcat(constraints, "}");
+		}
+		
+		strcat(constraints, ",~{rcx},~{r11},~{memory}");
+		
+		inline_asm = LLVMGetInlineAsm(
+			func_type, "syscall", 7, constraints, strlen(constraints), true, false, LLVMInlineAsmDialectATT, false
+		);
+	}
+	
+	return LLVMBuildCall2(llvm_builder, func_type, inline_asm, args, arg_count, "");
+}
+
 /* Initialise LLVM. */
 static inline void llvm_init(void) {
 	llvm_context = LLVMGetGlobalContext();
@@ -312,9 +385,15 @@ static inline void llvm_free(void) {
 /* Return the appropriate LLVMTypeRef for a G type. */
 static LLVMTypeRef llvm_type(type *t) {
 	switch (t->k) {
+	case TY_B8:   { return LLVMIntType(8);  } break;
+	case TY_B16:  { return LLVMIntType(16); } break;
+	case TY_B32:  { return LLVMIntType(32); } break;
+	case TY_B64:  { return LLVMIntType(64); } break;
 	case TY_UINT: case TY_SINT: { return LLVMIntType(64);  } break;
+	case TY_BYTE: case TY_CHAR:
 	case TY_U8:   case TY_S8:   { return LLVMIntType(8);   } break;
 	case TY_U16:  case TY_S16:  { return LLVMIntType(16);  } break;
+	case TY_RUNE:
 	case TY_U32:  case TY_S32:  { return LLVMIntType(32);  } break;
 	case TY_U64:  case TY_S64:  { return LLVMIntType(64);  } break;
 	case TY_U128: case TY_S128: { return LLVMIntType(128); } break;
@@ -329,9 +408,15 @@ static LLVMTypeRef llvm_type(type *t) {
 /* Return the default value for a G type. */
 static LLVMValueRef llvm_ival(type *t) {
 	switch (t->k) {
+	case TY_B8:   { return LLVMConstInt(LLVMIntType(8),   0, false); } break;
+	case TY_B16:  { return LLVMConstInt(LLVMIntType(16),  0, false); } break;
+	case TY_B32:  { return LLVMConstInt(LLVMIntType(32),  0, false); } break;
+	case TY_B64:  { return LLVMConstInt(LLVMIntType(64),  0, false); } break;
 	case TY_UINT: case TY_SINT: { return LLVMConstInt(LLVMIntType(64),  0, false); } break;
+	case TY_BYTE: case TY_CHAR:
 	case TY_U8:   case TY_S8:   { return LLVMConstInt(LLVMIntType(8),   0, false); } break;
 	case TY_U16:  case TY_S16:  { return LLVMConstInt(LLVMIntType(16),  0, false); } break;
+	case TY_RUNE:
 	case TY_U32:  case TY_S32:  { return LLVMConstInt(LLVMIntType(32),  0, false); } break;
 	case TY_U64:  case TY_S64:  { return LLVMConstInt(LLVMIntType(64),  0, false); } break;
 	case TY_U128: case TY_S128: { return LLVMConstInt(LLVMIntType(128), 0, false); } break;
diff --git a/src/llvm.h b/src/llvm.h
index 5e1e248..c35ad06 100644
--- a/src/llvm.h
+++ b/src/llvm.h
@@ -9,5 +9,7 @@
 #include "parse.h"
 
 extern void llvm(ast *a);
+extern void llvm_bitcode(ast *a);
+extern void llvm_ir(ast *a);
 
 #endif // G_LLVM_H_CZUMSHFW
diff --git a/src/main.c b/src/main.c
index 42e698c..3d25b5b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -78,7 +78,9 @@ static void compile(const char * file, char *src, UINT len) {
 	analyse(a);
 	if (Pflag) { ast_print(a, 0); goto end; }
 
-	llvm(a);
+	if (bflag) { llvm_ir(a); }
+	else if (Bflag) { llvm_bitcode(a); }
+	else { llvm(a); }
 
 	end:; return;
 }
diff --git a/src/parse.c b/src/parse.c
index b373212..df43318 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -29,7 +29,9 @@ char *ast_ks[] = {
 
 	"AK_ASSIGN", "AK_AS_ADD", "AK_AS_SUB", "AK_AS_MUL", "AK_AS_DIV", "AK_AS_MOD",
 
-	"AK_ID_VAR", "AK_CALL", "AK_INT", "AK_FLT"
+	"AK_ID_VAR", "AK_CALL", "AK_INT", "AK_FLT",
+	
+	"AK_HASH_SYSCALL"
 };
 
 static ast *parse_stmt(lex *l, syt *st);
@@ -305,7 +307,7 @@ static ast *parse_expr(lex *l, syt *st, bool arg) {
 	case TK_LPAREN: { tok_a_push(&ts, lex_next(l));      } break;
 	case TK_RPAREN: {
 		for (tok t = tok_a_pop(&ts);; t = tok_a_pop(&ts)) {
-			if (t.k == TK_NULL) {
+			if (t.k == NULL_TK) {
 				if (arg) { goto eox; }
 				note(l->n, T.ln, T.cl, -1, "expected left parenthesis");
 			}
@@ -314,12 +316,34 @@ static ast *parse_expr(lex *l, syt *st, bool arg) {
 			shunt(&as, t, op_lookup(t.k, false));
 		}
 
-		/* TODO handle procedure calls */
-		
 		lex_next(l);
 	} break;
+	case TK_HASH: {
+		ast *a = ast_init(); tok t = lex_kind(l, TK_HASH);
+		a->ln = t.ln; a->cl = t.cl; bool needs_args = false;
+		
+		if (strcmp(t.s, "#syscall") == 0) { a->k = AK_HASH_SYSCALL; needs_args = true; }
+		else { note("TODO", t.ln, t.cl, 0, "%s: unrecognised hash procedure", t.s); }
+		
+		if (needs_args) {
+			lex_kind(l, TK_LPAREN);
+			
+			if (T.k != TK_RPAREN) for (;;) {
+				ast_push(a, parse_expr(l, st, true) /* FIXME THIS IS NULL WHEN STRING OR ANY UNHANDLED EXPRESSION */);
+				if (T.k != TK_COMMA) { break; }
+				if (a->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); }
+				lex_kind(l, TK_COMMA);
+			}
+			
+			lex_kind(l, TK_RPAREN);
+		}
+		
+		if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
+		
+		ast_a_push(&as, a);
+	} break;
 	default: /* Handle operators */ {
-		op o1, o2; if ((o1 = op_lookup(T.k, false)).tk == TK_NULL) { goto eox; }
+		op o1, o2; if ((o1 = op_lookup(T.k, false)).tk == NULL_TK) { goto eox; }
 
 		/*
 			If there is an operator at the top of the operator stack that is not
@@ -327,7 +351,7 @@ static ast *parse_expr(lex *l, syt *st, bool arg) {
 			same precedence as o1 and o1 is left-associative, then pop it from
 			the stack onto the output.
 		*/
-		for (o2 = op_lookup(tok_a_peek(&ts).k, false); o2.tk != TK_NULL; o2 = op_lookup(tok_a_peek(&ts).k, false)) {
+		for (o2 = op_lookup(tok_a_peek(&ts).k, false); o2.tk != NULL_TK; o2 = op_lookup(tok_a_peek(&ts).k, false)) {
 			if (o2.tk == TK_LPAREN || (o1.o < o2.o && (o1.o != o2.o || o1.as == true))) { break; }
 
 			shunt(&as, tok_a_pop(&ts), o2);
@@ -338,7 +362,7 @@ static ast *parse_expr(lex *l, syt *st, bool arg) {
 	} eox:;
 
 	/* Pop any remaining operators from the operator stack */
-	for (tok t = tok_a_pop(&ts); t.k != TK_NULL; t = tok_a_pop(&ts)) {
+	for (tok t = tok_a_pop(&ts); t.k != NULL_TK; t = tok_a_pop(&ts)) {
 		if (t.k == TK_LPAREN) { error(1, "LPAREN: TODO"); }
 		if (t.k == TK_RPAREN) { error(1, "RPAREN: TODO"); }
 
diff --git a/src/parse.h b/src/parse.h
index 0642c50..ad88305 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -25,7 +25,9 @@ typedef enum {
 
 	AK_ASSIGN, AK_AS_ADD, AK_AS_SUB, AK_AS_MUL, AK_AS_DIV, AK_AS_MOD,
 
-	AK_ID_VAR, AK_CALL, AK_INT, AK_FLT
+	AK_ID_VAR, AK_CALL, AK_INT, AK_FLT,
+	
+	AK_HASH_SYSCALL
 } ast_k;
 
 /*