Browse Source

parse: Implement expression parsing

master
Jake Wakeling 4 months ago
parent
commit
4d44c44d6b
  1. 2
      CMakeLists.txt
  2. 8
      README.md
  3. 1
      examples/expr.g
  4. 16
      src/lex.c
  5. 6
      src/lex.h
  6. 83
      src/parse.c
  7. 9
      src/parse.h

2
CMakeLists.txt

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.12)
cmake_minimum_required(VERSION 3.14)
project(G VERSION 0.2.0 LANGUAGES C)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)

8
README.md

@ -24,7 +24,7 @@ main :: proc() -> s32 {
### Dependencies
- CMake >= 3.12, to build
- CMake >= 3.14, to build
- LLVM, to build
- Clang, for linking compiled object files
@ -57,13 +57,15 @@ command. The second command will output an executable file, *a.out* by default.
> Not all todo items will necesarilly be implemented
- [ ] Implement main entrypoint
- [ ] Implement procedure declarations
- [x] Implement procedure declarations
- [ ] Implement procedure calls
- [x] Implement variable declarations
- [ ] Implement variable assignments
- [x] Implement integers
- [ ] Implement reals
- [ ] Implement arrays
- [x] Implement expression parsing
- [ ] Implement expression code generation
- [ ] Implement the *type* type
- [ ] Implement multiple return values
- [ ] Implement *defer*

1
examples/expr.g

@ -0,0 +1 @@
main :: proc() -> s64 { return 2 + 3 - 1 * 4 / 2; }

16
src/lex.c

@ -32,6 +32,22 @@ char *tok_ks[] = {
"TK_AS_NOT", "TK_AS_AND", "TK_AS_OR", "TK_AS_XOR", "TK_AS_SHL", "TK_AS_SHR",
};
/* Push a token to a token array. */
void tok_a_push(tok_a *a, tok t) {
tok *ta = realloc(a->a, (a->al += 1) * sizeof (tok));
if (!ta) { error(1, SERR); } else { a->a = ta; a->a[a->al - 1] = t; }
}
/* Pop a token from a token array. */
tok tok_a_pop(tok_a *a) {
return (a->al ? a->a[a->al -= 1] : (tok){ 0 });
}
/* Pop a token from a token array. */
tok tok_a_peek(tok_a *a) {
return (a->al ? a->a[a->al - 1] : (tok){ 0 });
}
/* Initialise a lexer. */
lex lex_init(const char *file, char *src, UINT len) {
lex l = { file, src, src, src + len, 0, 0, 0 };

6
src/lex.h

@ -30,12 +30,18 @@ typedef struct {
union { u64 v_u64; s64 v_s64; f64 v_f64; };
} tok;
typedef struct { tok *a; UINT al; } tok_a;
typedef struct {
const char *n; char *s, *p, *q; UINT ln, cl; tok t;
} lex;
extern char *tok_ks[];
extern void tok_a_push(tok_a *a, tok t);
extern tok tok_a_pop(tok_a *a);
extern tok tok_a_peek(tok_a *a);
extern lex lex_init(const char *file, char *src, UINT len);
extern tok lex_peek(lex *l);
extern tok lex_next(lex *l);

83
src/parse.c

@ -16,11 +16,11 @@
#include <string.h>
char *ast_ks[] = {
"AK_NULL", "AK_PROG",
"AK_NULL", "AK_PROG", "AK_PROC",
"AK_STMT", "AK_COMP", "AK_DECL", "AK_RETURN", "AK_IF", "AK_FOR",
"AK_EXPR", "AK_PROC",
"AK_OP_ADD", "AK_OP_SUB", "AK_OP_MUL", "AK_OP_DIV", "AK_OP_MOD",
"AK_VAR", "AK_INT",
};
@ -53,6 +53,17 @@ void ast_push(ast *a, ast *c) {
c->p = a; a->c.a[a->c.al - 1] = c;
}
/* Push an AST to an AST array. */
void ast_a_push(ast_a *aa, ast *a) {
ast **ta = realloc(aa->a, (aa->al += 1) * sizeof (ast *));
if (!ta) { error(1, SERR); } else { aa->a = ta; aa->a[aa->al - 1] = a; }
}
/* Pop an AST from an AST array. */
ast *ast_a_pop(ast_a *aa) {
return (aa->al ? aa->a[aa->al -= 1] : NULL);
}
#define T (l->t) /* lex_peek equivalent */
/* Parse a program. */
@ -173,11 +184,38 @@ static ast *parse_stmt_for(lex *l, syt *st) {
ast_push(a, parse_stmt(l, st)); return a;
}
typedef struct { tok_k tk; ast_k ak; s32 o; bool as; } op;
static const op ops[] = {
{ TK_OP_MUL, AK_OP_MUL, 2, false },
{ TK_OP_DIV, AK_OP_DIV, 2, false },
{ TK_OP_MOD, AK_OP_MOD, 2, false },
{ TK_OP_ADD, AK_OP_ADD, 1, false },
{ TK_OP_SUB, AK_OP_SUB, 1, false },
};
static inline op ops_lookup(tok_k k) {
for (UINT i = 0; i < (sizeof (ops) / sizeof (*ops)); i += 1) {
if (k == ops[i].tk) { return ops[i]; }
} return (op){ 0 };
}
static inline void shunt(ast_a *aa, tok t, op o) {
ast *r = ast_a_pop(aa); ast *l = ast_a_pop(aa);
ast *a = ast_init(); a->k = o.ak; a->ln = t.ln; a->cl = t.cl;
ast_push(a, l); ast_push(a, r); ast_a_push(aa, a);
}
/* Parse an expression. */
static ast *parse_expr(lex *l, syt *st) {
if (T.k == TK_PROC) { return parse_expr_proc(l, st); }
if (T.k == TK_INT) { return parse_int(l, st); }
if (T.k == TK_ID) {
tok_a ts = { 0 }; ast_a as = { 0 };
/* Parse expressions with a shunting-yard algorithm */
for (;;) switch (T.k) {
case TK_ID: { /* TODO handle procedure calls */
ast *a = ast_init(); tok t = lex_kind(l, TK_ID);
a->k = AK_VAR; a->ln = t.ln; a->cl = t.cl;
@ -185,9 +223,33 @@ static ast *parse_expr(lex *l, syt *st) {
if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
return a;
ast_a_push(&as, a);
} break;
case TK_INT: { ast_a_push(&as, parse_int(l, st)); } break;
case TK_LPAREN: { tok_a_push(&ts, lex_next(l)); } break;
case TK_RPAREN: { /* TODO */ error(1, "RPAREN: TODO"); } break;
default: {
op o1; if ((o1 = ops_lookup(T.k)).tk == TK_NULL) { goto eox; }
for (op o2 = ops_lookup(tok_a_peek(&ts).k);; o2 = ops_lookup(tok_a_peek(&ts).k)) {
if (o2.tk == TK_LPAREN || (o1.o > o2.o && (o1.o != o2.o || o1.as == true))) { break; }
shunt(&as, tok_a_pop(&ts), o2);
}
tok_a_push(&ts, lex_next(l));
} break;
} eox:;
/* Pop any remaining operators from the operator stack */
for (tok t = tok_a_pop(&ts); t.k != TK_NULL; t = tok_a_pop(&ts)) {
if (t.k == TK_LPAREN) { error(1, "LPAREN: TODO"); }
if (t.k == TK_RPAREN) { error(1, "RPAREN: TODO"); }
shunt(&as, t, ops_lookup(t.k));
}
else { error(1, "PARSE_EXPR %s", tok_ks[T.k]); }
return ast_a_pop(&as);
}
/* Parse a procedure expression. */
@ -224,7 +286,7 @@ static ast *parse_int(lex *l, syt *st) {
ast *a = ast_init(); tok t = lex_kind(l, TK_INT);
a->k = AK_INT; a->ln = t.ln; a->cl = t.cl;
a->t = &types[TY_U64]; /* TODO properly determine type */
a->t = &TYPE(TY_S64); /* TODO properly determine type */
if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }
if (!(a->v = val_parse_int(t.s)).k) { error(1, "%s: %s", t.s, SERR); }
@ -236,7 +298,12 @@ static ast *parse_int(lex *l, syt *st) {
void ast_print(ast *a, UINT i) {
for (UINT j = 0; j != i; ++j) { printf(" "); }
printf("%zu:%zu: %s: %s", a->ln + 1, a->cl + 1, ast_ks[a->k], a->s);
if (a->k == AK_PROC) { printf(" -> %s", a->t->s); }
switch (a->k) {
case AK_PROC: case AK_INT: { printf(" -> %s", a->t->s); } break;
default: {} break;
}
fputc('\n', stdout);
if (a->c.a != NULL) for (UINT ci = 0; ci != a->c.al; ci += 1) {

9
src/parse.h

@ -13,11 +13,11 @@
#include "value.h"
typedef enum {
AK_NULL, AK_PROG,
AK_NULL, AK_PROG, AK_PROC,
AK_STMT, AK_COMP, AK_DECL, AK_RETURN, AK_IF, AK_FOR,
AK_EXPR, AK_PROC,
AK_OP_ADD, AK_OP_SUB, AK_OP_MUL, AK_OP_DIV, AK_OP_MOD,
AK_VAR, AK_INT,
} ast_k;
@ -29,11 +29,16 @@ typedef struct ast_s {
struct ast_s *p; struct { struct ast_s **a; UINT al; } c;
} ast;
typedef struct { ast **a; UINT al; } ast_a;
extern char *ast_ks[];
extern ast *ast_init(void);
extern void ast_push(ast *a, ast *c);
extern void ast_a_push(ast_a *aa, ast *a);
extern ast *ast_a_pop(ast_a *aa);
extern ast *parse(lex *l);
extern void ast_print(ast *a, UINT i);

Loading…
Cancel
Save