Author | Jakob Wakeling <[email protected]> |
Date | 2023-05-24 02:16:18 |
Commit | af77441bb0a4dd397f9bcad216f5f892557b938b |
Parent | c4932f6022fd80cd96b1141df6839f9ae78ef23c |
Implement system calls
Diffstat
M | README.md | | | 12 | +++++++----- |
M | examples/main.g | | | 12 | ++++-------- |
M | src/lex.c | | | 51 | +++++++++++++++++++++++++++++++++++++++++++-------- |
M | src/lex.h | | | 4 | ++-- |
M | src/llvm.c | | | 98 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
M | src/llvm.h | | | 2 | ++ |
M | src/main.c | | | 4 | +++- |
M | src/parse.c | | | 44 | ++++++++++++++++++++++++++++++++++++-------- |
M | src/parse.h | | | 4 | +++- |
9 files changed, 194 insertions, 37 deletions
diff --git a/README.md b/README.md index 85f9d99..ca1b7f5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # The G Programming Language A modern alternative to **C** intended to be fast, simple, and pleasant. -Influenced by **C**, **C++**, **Odin**, **Rust**, and **Zig**. +Influenced by **C**, **C++**, **Odin**, and others. Note that at present, **G** is highly unstable and will certainly change. @@ -60,19 +60,21 @@ command. The second command will output an executable file, *a.out* by default. - [x] Implement procedure declarations - [x] Implement procedure calls - [ ] Implement procedure arguments -- [x] Implement variable declarations -- [x] Implement variable assignments +- [x] Implement variables +- [ ] Implement booleans - [x] Implement integers - [x] Implement reals - [ ] Implement arrays -- [x] Implement expression parsing -- [x] Implement expression code generation +- [x] Implement expressions - [x] Implement type casting - [ ] Implement the *type* type - [ ] Implement *defer* - [ ] Implement *errdefer* - [ ] Implement *if* and *else* - [ ] Implement *for* +- [ ] Implement first class strings +- [x] Implement syscalls +- [ ] Implement foreign code calling - [ ] Implement generics of some kind - [ ] Implement module definition - [ ] Implement module use diff --git a/examples/main.g b/examples/main.g index a6de101..1d5cfa9 100644 --- a/examples/main.g +++ b/examples/main.g @@ -1,9 +1,5 @@ -var := 42; - -test :: proc() -> u64 { - return u64(var); -} - main :: proc() -> u64 { - return test(); + #syscall(u64(60), u64(42)); + var : u64 = 69; + return var; } diff --git a/src/lex.c b/src/lex.c index b6e421c..81843f0 100644 --- a/src/lex.c +++ b/src/lex.c @@ -17,9 +17,9 @@ #include <stdio.h> char *tok_ks[] = { - "TK_NULL", "TK_EOF", "TK_ID", "TK_NUM", "TK_STR", + "NULL_TK", "TK_EOF", "TK_ID", "TK_NUM", "TK_STR", "TK_HASH", - "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC", + "TK_NULL", "TK_TRUE", "TK_FALSE", "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC", "TK_LPAREN", "TK_RPAREN", "TK_LBRACK", "TK_RBRACK", "TK_LBRACE", "TK_RBRACE", "TK_COLON", "TK_SCOLON", "TK_COMMA", "TK_PERIOD", "TK_RARROW", "TK_QMARK", @@ -68,12 +68,12 @@ tok lex_peek(lex *l) { return T; } /* Lex the next token, and return the current one. */ tok lex_next(lex *l) { - if (T.k == TK_EOF) { return T; } + redo:; if (T.k == TK_EOF) { return T; } tok t = T; T = (tok){ 0 }; /* Skip null characters and whitespace */ skip:; for (; P != Q && (!C || isspace(C)); P += 1) switch (C) { - case '\0': { /* TODO warn user of null character */ } break; + case '\0': { note(l->n, l->ln, l->cl, 1, "Null character ignored"); } break; case '\n': { LN += 1; CL = 0; } break; default: { CL += 1; } break; } @@ -107,7 +107,10 @@ tok lex_next(lex *l) { for (P += 1; isalpha(C) || isdigit(C) || C == '_'; P += 1); sl = P - s; CL += sl; T.h = syt_hash(s, sl); - if (strncmp(s, "return", sl) == 0) { T.k = TK_RETURN; } + if (strncmp(s, "null", sl) == 0) { T.k = TK_NULL; } + else if (strncmp(s, "true", sl) == 0) { T.k = TK_TRUE; } + else if (strncmp(s, "false", sl) == 0) { T.k = TK_FALSE; } + else if (strncmp(s, "return", sl) == 0) { T.k = TK_RETURN; } else if (strncmp(s, "if", sl) == 0) { T.k = TK_IF; } else if (strncmp(s, "else", sl) == 0) { T.k = TK_ELSE; } else if (strncmp(s, "for", sl) == 0) { T.k = TK_FOR; } @@ -127,6 +130,18 @@ tok lex_next(lex *l) { T.k = TK_NUM; if (!(T.s = strndup(s, sl))) { error(1, SERR); } } + /* Handle hash procedures */ + else if (C == '#') { + char *s = P; UINT sl; + + for (P += 1; isalpha(C) || isdigit(C) || C == '_'; P += 1); + sl = P - s; CL += sl; + + if (sl <= 1) { note(l->n, T.ln, T.cl, 0, "A hash must be followed by an identifier"); goto redo; } + + T.k = TK_HASH; T.h = syt_hash(s, sl); if (!(T.s = strndup(s, sl))) { error(1, SERR); } + } + /* Handle punctuators and operators */ else switch (C) { case '(': { T.k = TK_LPAREN; P += 1; CL += 1; } break; @@ -196,12 +211,31 @@ tok lex_next(lex *l) { case '=': { T.k = TK_AS_XOR; P += 2; CL += 2; } break; } break; - case '\'': { /* TODO */ } break; - case '\"': { /* TODO */ } break; + /* TODO implement character escapes */ + /* TODO implement multi line strings */ + // case '\'': { + // char *s = P; UINT sl; + + // for (P += 1; C != '\''; P += 1); + // sl = P - s; CL += sl; + + // T.k = TK_NUM; + // } break; + case '\"': { + char *s = P += 1; UINT sl; + + for (; C != '\"' && C != '\n'; P += 1); + sl = P - s; CL += sl; T.h = syt_hash(s, sl); + + if (C != '\"') { note(l->n, T.ln, T.cl, 0, "Missing closing quote"); } + else { P += 1; } + + T.k = TK_STR; if (!(T.s = strndup(s, sl))) { error(1, SERR); } + } break; /* Handle unknown characters */ default: { - note("TODO", LN, CL, 1, "Unknown character: %X '%c'", C, C); + note(l->n, LN, CL, 1, "Unknown character: %X '%c'", C, C); P += 1; CL += 1; } break; } @@ -212,7 +246,7 @@ tok lex_next(lex *l) { /* Lex the next token if the current is of a specific type. */ tok lex_kind(lex *l, tok_k k) { if (T.k != k) { - note("TODO", T.ln, T.cl, 0, "Unexpected: \"%s\", was expecting: \"%s\"", tok_ks[T.k], tok_ks[k]); + note(l->n, T.ln, T.cl, 0, "Unexpected: \"%s\", was expecting: \"%s\"", tok_ks[T.k], tok_ks[k]); } return lex_next(l); diff --git a/src/lex.h b/src/lex.h index 849c729..2ce4303 100644 --- a/src/lex.h +++ b/src/lex.h @@ -10,9 +10,9 @@ /* Remember to update tok_ks in lex.c */ typedef enum { - TK_NULL, TK_EOF, TK_ID, TK_NUM, TK_STR, + NULL_TK, TK_EOF, TK_ID, TK_NUM, TK_STR, TK_HASH, - TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC, + TK_NULL, TK_TRUE, TK_FALSE, TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC, TK_LPAREN, TK_RPAREN, TK_LBRACK, TK_RBRACK, TK_LBRACE, TK_RBRACE, TK_COLON, TK_SCOLON, TK_COMMA, TK_PERIOD, TK_RARROW, TK_QMARK, diff --git a/src/llvm.c b/src/llvm.c index cf80459..db97832 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -19,6 +19,8 @@ #include <llvm-c/Types.h> #include <stdio.h> +#include <stdlib.h> +#include <string.h> static LLVMContextRef llvm_context = NULL; static LLVMModuleRef llvm_module = NULL; @@ -40,6 +42,8 @@ static LLVMValueRef llvm_expr_cast(ast *a, syt *st); static LLVMValueRef llvm_int(ast *a); static LLVMValueRef llvm_flt(ast *a); +static LLVMValueRef llvm_hash(ast *a, syt *st); + static inline void llvm_init(void); static inline void llvm_free(void); static LLVMTypeRef llvm_type(type *t); @@ -55,10 +59,6 @@ void llvm(ast *a) { /* Generate IR for all child nodes */ for (UINT i = 0; i < a->c.al; i += 1) { llvm_stmt_decl(a->c.a[i], &a->st); } - if (LLVMWriteBitcodeToFile(llvm_module, "llvm.bc")) { - error(2, "LLVMWriteBitcodeToFile failure"); - } - char *err; LLVMVerifyModule(llvm_module, LLVMAbortProcessAction, &err); LLVMDisposeMessage(err); @@ -85,6 +85,35 @@ void llvm(ast *a) { LLVMDisposeTargetMachine(machine); llvm_free(); } +void llvm_bitcode(ast *a) { + llvm_init(); + + /* Generate IR for all child nodes */ + for (UINT i = 0; i < a->c.al; i += 1) { llvm_stmt_decl(a->c.a[i], &a->st); } + + if (LLVMWriteBitcodeToFile(llvm_module, "llvm.bc")) { + error(2, "LLVMWriteBitcodeToFile failure"); + } + + char *err; + LLVMVerifyModule(llvm_module, LLVMAbortProcessAction, &err); + LLVMDisposeMessage(err); llvm_free(); +} + +void llvm_ir(ast *a) { + llvm_init(); + + /* Generate IR for all child nodes */ + for (UINT i = 0; i < a->c.al; i += 1) { llvm_stmt_decl(a->c.a[i], &a->st); } + + /* TODO */ + + + char *err; + LLVMVerifyModule(llvm_module, LLVMAbortProcessAction, &err); + LLVMDisposeMessage(err); llvm_free(); +} + /* Generate IR for a statement. */ static LLVMValueRef llvm_stmt(ast *a, syt *st) { switch (a->k) { @@ -95,6 +124,8 @@ static LLVMValueRef llvm_stmt(ast *a, syt *st) { case AK_RETURN: { return llvm_stmt_return(a, st); } break; case AK_IF: { return llvm_stmt_if(a, st); } break; case AK_FOR: { return llvm_stmt_for(a, st); } break; + case AK_HASH_SYSCALL: + { return llvm_hash(a, st); } break; default: { error(2, "llvm_stmt: Unhandled AST kind %s", ast_ks[a->k]); } break; } } @@ -295,6 +326,48 @@ static LLVMValueRef llvm_flt(ast *a) { return LLVMConstReal(llvm_type(a->t), a->v.v_flt); } +/* Generate IR for a hash procedure. */ +static LLVMValueRef llvm_hash(ast *a, syt *st) { + assert(a->k == AK_HASH_SYSCALL); + + UINT arg_count = a->c.al; + LLVMValueRef *args = calloc(arg_count, sizeof (LLVMValueRef)); + for (UINT i = 0; i < arg_count; i += 1) { + args[i] = llvm_expr(a->c.a[i], st); + } + + LLVMTypeRef unsigned_integer_type = llvm_type(&TYPE(TY_UINT)); + LLVMTypeRef *typs = calloc(arg_count, sizeof (LLVMTypeRef)); + for (UINT i = 0; i < arg_count; i += 1) { + typs[i] = unsigned_integer_type; + } + + LLVMTypeRef func_type = LLVMFunctionType(llvm_type(&TYPE(TY_UINT)), typs, arg_count, false); + LLVMValueRef inline_asm = NULL; + + /* TODO check architecture */ + { /* x86-64 */ + assert(arg_count <= 7); + + char constraints[128] = "={rax}"; + + char const *registers[] = { "rax", "rdi", "rsi", "rdx", "r10", "r8", "r9" }; + for (UINT i = 0; i < arg_count; i += 1) { + strcat(constraints, ",{"); + strcat(constraints, registers[i]); + strcat(constraints, "}"); + } + + strcat(constraints, ",~{rcx},~{r11},~{memory}"); + + inline_asm = LLVMGetInlineAsm( + func_type, "syscall", 7, constraints, strlen(constraints), true, false, LLVMInlineAsmDialectATT, false + ); + } + + return LLVMBuildCall2(llvm_builder, func_type, inline_asm, args, arg_count, ""); +} + /* Initialise LLVM. */ static inline void llvm_init(void) { llvm_context = LLVMGetGlobalContext(); @@ -312,9 +385,15 @@ static inline void llvm_free(void) { /* Return the appropriate LLVMTypeRef for a G type. */ static LLVMTypeRef llvm_type(type *t) { switch (t->k) { + case TY_B8: { return LLVMIntType(8); } break; + case TY_B16: { return LLVMIntType(16); } break; + case TY_B32: { return LLVMIntType(32); } break; + case TY_B64: { return LLVMIntType(64); } break; case TY_UINT: case TY_SINT: { return LLVMIntType(64); } break; + case TY_BYTE: case TY_CHAR: case TY_U8: case TY_S8: { return LLVMIntType(8); } break; case TY_U16: case TY_S16: { return LLVMIntType(16); } break; + case TY_RUNE: case TY_U32: case TY_S32: { return LLVMIntType(32); } break; case TY_U64: case TY_S64: { return LLVMIntType(64); } break; case TY_U128: case TY_S128: { return LLVMIntType(128); } break; @@ -329,9 +408,15 @@ static LLVMTypeRef llvm_type(type *t) { /* Return the default value for a G type. */ static LLVMValueRef llvm_ival(type *t) { switch (t->k) { + case TY_B8: { return LLVMConstInt(LLVMIntType(8), 0, false); } break; + case TY_B16: { return LLVMConstInt(LLVMIntType(16), 0, false); } break; + case TY_B32: { return LLVMConstInt(LLVMIntType(32), 0, false); } break; + case TY_B64: { return LLVMConstInt(LLVMIntType(64), 0, false); } break; case TY_UINT: case TY_SINT: { return LLVMConstInt(LLVMIntType(64), 0, false); } break; + case TY_BYTE: case TY_CHAR: case TY_U8: case TY_S8: { return LLVMConstInt(LLVMIntType(8), 0, false); } break; case TY_U16: case TY_S16: { return LLVMConstInt(LLVMIntType(16), 0, false); } break; + case TY_RUNE: case TY_U32: case TY_S32: { return LLVMConstInt(LLVMIntType(32), 0, false); } break; case TY_U64: case TY_S64: { return LLVMConstInt(LLVMIntType(64), 0, false); } break; case TY_U128: case TY_S128: { return LLVMConstInt(LLVMIntType(128), 0, false); } break; diff --git a/src/llvm.h b/src/llvm.h index 5e1e248..c35ad06 100644 --- a/src/llvm.h +++ b/src/llvm.h @@ -9,5 +9,7 @@ #include "parse.h" extern void llvm(ast *a); +extern void llvm_bitcode(ast *a); +extern void llvm_ir(ast *a); #endif // G_LLVM_H_CZUMSHFW diff --git a/src/main.c b/src/main.c index 42e698c..3d25b5b 100644 --- a/src/main.c +++ b/src/main.c @@ -78,7 +78,9 @@ static void compile(const char * file, char *src, UINT len) { analyse(a); if (Pflag) { ast_print(a, 0); goto end; } - llvm(a); + if (bflag) { llvm_ir(a); } + else if (Bflag) { llvm_bitcode(a); } + else { llvm(a); } end:; return; } diff --git a/src/parse.c b/src/parse.c index b373212..df43318 100644 --- a/src/parse.c +++ b/src/parse.c @@ -29,7 +29,9 @@ char *ast_ks[] = { "AK_ASSIGN", "AK_AS_ADD", "AK_AS_SUB", "AK_AS_MUL", "AK_AS_DIV", "AK_AS_MOD", - "AK_ID_VAR", "AK_CALL", "AK_INT", "AK_FLT" + "AK_ID_VAR", "AK_CALL", "AK_INT", "AK_FLT", + + "AK_HASH_SYSCALL" }; static ast *parse_stmt(lex *l, syt *st); @@ -305,7 +307,7 @@ static ast *parse_expr(lex *l, syt *st, bool arg) { case TK_LPAREN: { tok_a_push(&ts, lex_next(l)); } break; case TK_RPAREN: { for (tok t = tok_a_pop(&ts);; t = tok_a_pop(&ts)) { - if (t.k == TK_NULL) { + if (t.k == NULL_TK) { if (arg) { goto eox; } note(l->n, T.ln, T.cl, -1, "expected left parenthesis"); } @@ -314,12 +316,34 @@ static ast *parse_expr(lex *l, syt *st, bool arg) { shunt(&as, t, op_lookup(t.k, false)); } - /* TODO handle procedure calls */ - lex_next(l); } break; + case TK_HASH: { + ast *a = ast_init(); tok t = lex_kind(l, TK_HASH); + a->ln = t.ln; a->cl = t.cl; bool needs_args = false; + + if (strcmp(t.s, "#syscall") == 0) { a->k = AK_HASH_SYSCALL; needs_args = true; } + else { note("TODO", t.ln, t.cl, 0, "%s: unrecognised hash procedure", t.s); } + + if (needs_args) { + lex_kind(l, TK_LPAREN); + + if (T.k != TK_RPAREN) for (;;) { + ast_push(a, parse_expr(l, st, true) /* FIXME THIS IS NULL WHEN STRING OR ANY UNHANDLED EXPRESSION */); + if (T.k != TK_COMMA) { break; } + if (a->k == AK_CAST) { note(l->n, T.ln, T.cl, 0, "type casts must have only a single argument"); } + lex_kind(l, TK_COMMA); + } + + lex_kind(l, TK_RPAREN); + } + + if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); } + + ast_a_push(&as, a); + } break; default: /* Handle operators */ { - op o1, o2; if ((o1 = op_lookup(T.k, false)).tk == TK_NULL) { goto eox; } + op o1, o2; if ((o1 = op_lookup(T.k, false)).tk == NULL_TK) { goto eox; } /* If there is an operator at the top of the operator stack that is not @@ -327,7 +351,7 @@ static ast *parse_expr(lex *l, syt *st, bool arg) { same precedence as o1 and o1 is left-associative, then pop it from the stack onto the output. */ - for (o2 = op_lookup(tok_a_peek(&ts).k, false); o2.tk != TK_NULL; o2 = op_lookup(tok_a_peek(&ts).k, false)) { + for (o2 = op_lookup(tok_a_peek(&ts).k, false); o2.tk != NULL_TK; o2 = op_lookup(tok_a_peek(&ts).k, false)) { if (o2.tk == TK_LPAREN || (o1.o < o2.o && (o1.o != o2.o || o1.as == true))) { break; } shunt(&as, tok_a_pop(&ts), o2); @@ -338,7 +362,7 @@ static ast *parse_expr(lex *l, syt *st, bool arg) { } eox:; /* Pop any remaining operators from the operator stack */ - for (tok t = tok_a_pop(&ts); t.k != TK_NULL; t = tok_a_pop(&ts)) { + for (tok t = tok_a_pop(&ts); t.k != NULL_TK; t = tok_a_pop(&ts)) { if (t.k == TK_LPAREN) { error(1, "LPAREN: TODO"); } if (t.k == TK_RPAREN) { error(1, "RPAREN: TODO"); } diff --git a/src/parse.h b/src/parse.h index 0642c50..ad88305 100644 --- a/src/parse.h +++ b/src/parse.h @@ -25,7 +25,9 @@ typedef enum { AK_ASSIGN, AK_AS_ADD, AK_AS_SUB, AK_AS_MUL, AK_AS_DIV, AK_AS_MOD, - AK_ID_VAR, AK_CALL, AK_INT, AK_FLT + AK_ID_VAR, AK_CALL, AK_INT, AK_FLT, + + AK_HASH_SYSCALL } ast_k; /*