Author | Jakob Wakeling <[email protected]> |
Date | 2022-03-27 00:40:27 |
Commit | 06863d51b24798c94fc08458c425880887a5a1e4 |
Parent | fe591f98568959e5f448b6f7fa2fcfb8f7667f41 |
lex: Change token kind prefix to TK
Diffstat
M | src/keyword.c | | | 4 | ++-- |
M | src/lex.c | | | 117 | ++++++++++++++++++++++++++++++++++++++++--------------------------------------- |
M | src/lex.h | | | 20 | ++++++++++---------- |
M | src/parse.c | | | 56 | +++++++++++++++++++++++++++++++------------------------- |
4 files changed, 102 insertions, 95 deletions
diff --git a/src/keyword.c b/src/keyword.c index c416042..8706fd8 100644 --- a/src/keyword.c +++ b/src/keyword.c @@ -12,8 +12,8 @@ typedef enum { KK_PROC, KK_RETURN, } kwd_k; static kwd kwds[] = { - { LK_PROC, 0x85729B0E3537BC61, "proc" }, - { LK_RETURN, 0xC5C7B983377CAD5F, "return" }, + { TK_PROC, 0x85729B0E3537BC61, "proc" }, + { TK_RETURN, 0xC5C7B983377CAD5F, "return" }, }; kwd *k_proc = &kwds[KK_PROC]; kwd *k_return = &kwds[KK_RETURN]; diff --git a/src/lex.c b/src/lex.c index c837c1c..849f57a 100644 --- a/src/lex.c +++ b/src/lex.c @@ -18,20 +18,20 @@ #include <stdio.h> char *tok_ks[] = { - "LK_NULL", "LK_EOF", "LK_ID", "LK_INT", "LK_FLT", "LK_STR", + "TK_NULL", "TK_EOF", "TK_ID", "TK_INT", "TK_FLT", "TK_STR", - "LK_RETURN", "LK_IF", "LK_ELSE", "LK_FOR", "LK_PROC", + "TK_RETURN", "TK_IF", "TK_ELSE", "TK_FOR", "TK_PROC", - "LK_LPAREN", "LK_RPAREN", "LK_LBRACK", "LK_RBRACK", "LK_LBRACE", "LK_RBRACE", - "LK_COLON", "LK_SCOLON", "LK_COMMA", "LK_PERIOD", "LK_RARROW", "LK_QMARK", + "TK_LPAREN", "TK_RPAREN", "TK_LBRACK", "TK_RBRACK", "TK_LBRACE", "TK_RBRACE", + "TK_COLON", "TK_SCOLON", "TK_COMMA", "TK_PERIOD", "TK_RARROW", "TK_QMARK", - "LK_OP_ADD", "LK_OP_SUB", "LK_OP_MUL", "LK_OP_DIV", "LK_OP_MOD", - "LK_OP_EQ", "LK_OP_NEQ", "LK_OP_GT", "LK_OP_LT", "LK_OP_GTE", "LK_OP_LTE", - "LK_LO_NOT", "LK_LO_AND", "LK_LO_OR", - "LK_BW_NOT", "LK_BW_AND", "LK_BW_OR", "LK_BW_XOR", "LK_BW_SHL", "LK_BW_SHR", + "TK_OP_ADD", "TK_OP_SUB", "TK_OP_MUL", "TK_OP_DIV", "TK_OP_MOD", + "TK_OP_EQ", "TK_OP_NEQ", "TK_OP_GT", "TK_OP_LT", "TK_OP_GTE", "TK_OP_LTE", + "TK_LO_NOT", "TK_LO_AND", "TK_LO_OR", + "TK_BW_NOT", "TK_BW_AND", "TK_BW_OR", "TK_BW_XOR", "TK_BW_SHL", "TK_BW_SHR", - "LK_ASSIGN", "LK_AS_ADD", "LK_AS_SUB", "LK_AS_MUL", "LK_AS_DIV", "LK_AS_MOD", - "LK_AS_NOT", "LK_AS_AND", "LK_AS_OR", "LK_AS_XOR", "LK_AS_SHL", "LK_AS_SHR", + "TK_ASSIGN", "TK_AS_ADD", "TK_AS_SUB", "TK_AS_MUL", "TK_AS_DIV", "TK_AS_MOD", + "TK_AS_NOT", "TK_AS_AND", "TK_AS_OR", "TK_AS_XOR", "TK_AS_SHL", "TK_AS_SHR", }; /* Initialise a lexer. */ @@ -53,8 +53,8 @@ tok lex_peek(lex *l) { return T; } /* Lex the next token, and return the current one. */ tok lex_next(lex *l) { - if (T.k == LK_EOF) { return T; } - tok t = T; T = (tok){ LK_NULL, 0, 0, 0 }; + if (T.k == TK_EOF) { return T; } + tok t = T; T = (tok){ TK_NULL, 0, 0, 0 }; skip:; /* Skip null characters and whitespace */ @@ -65,7 +65,7 @@ skip:; } /* Return the current token immediately if EOF is reached */ - if (P == Q) { T = (tok){ LK_EOF, LN, CL, 0 }; return t; } + if (P == Q) { T = (tok){ TK_EOF, LN, CL, 0 }; return t; } /* Skip single-line and (potentially nested) multi-line comments */ if (C == '/') switch (D) { @@ -95,7 +95,7 @@ skip:; T.s = strndup(s, sl); T.h = syt_hash(s, sl); kwd *k = kwd_find(T.h); - if (k) { T.k = k->k; } else { T.k = LK_ID; } + if (k) { T.k = k->k; } else { T.k = TK_ID; } } /* Handle number literals */ @@ -106,76 +106,76 @@ skip:; sl = P - s; CL += sl; T.s = strndup(s, sl); - T.k = LK_INT; + T.k = TK_INT; } /* Handle punctuators and operators */ else switch (C) { - case '(': { T.k = LK_LPAREN; P += 1; CL += 1; } break; - case ')': { T.k = LK_RPAREN; P += 1; CL += 1; } break; - case '[': { T.k = LK_LBRACK; P += 1; CL += 1; } break; - case ']': { T.k = LK_RBRACK; P += 1; CL += 1; } break; - case '{': { T.k = LK_LBRACE; P += 1; CL += 1; } break; - case '}': { T.k = LK_RBRACE; P += 1; CL += 1; } break; - case ':': { T.k = LK_COLON; P += 1; CL += 1; } break; - case ';': { T.k = LK_SCOLON; P += 1; CL += 1; } break; - case ',': { T.k = LK_COMMA; P += 1; CL += 1; } break; - case '.': { T.k = LK_PERIOD; P += 1; CL += 1; } break; - case '?': { T.k = LK_QMARK; P += 1; CL += 1; } break; + case '(': { T.k = TK_LPAREN; P += 1; CL += 1; } break; + case ')': { T.k = TK_RPAREN; P += 1; CL += 1; } break; + case '[': { T.k = TK_LBRACK; P += 1; CL += 1; } break; + case ']': { T.k = TK_RBRACK; P += 1; CL += 1; } break; + case '{': { T.k = TK_LBRACE; P += 1; CL += 1; } break; + case '}': { T.k = TK_RBRACE; P += 1; CL += 1; } break; + case ':': { T.k = TK_COLON; P += 1; CL += 1; } break; + case ';': { T.k = TK_SCOLON; P += 1; CL += 1; } break; + case ',': { T.k = TK_COMMA; P += 1; CL += 1; } break; + case '.': { T.k = TK_PERIOD; P += 1; CL += 1; } break; + case '?': { T.k = TK_QMARK; P += 1; CL += 1; } break; case '+': switch (D) { - default: { T.k = LK_OP_ADD; P += 1; CL += 1; } break; - case '=': { T.k = LK_AS_ADD; P += 2; CL += 2; } break; + default: { T.k = TK_OP_ADD; P += 1; CL += 1; } break; + case '=': { T.k = TK_AS_ADD; P += 2; CL += 2; } break; } break; case '-': switch (D) { - default: { T.k = LK_OP_SUB; P += 1; CL += 1; } break; - case '>': { T.k = LK_RARROW; P += 2; CL += 2; } break; - case '=': { T.k = LK_AS_SUB; P += 2; CL += 2; } break; + default: { T.k = TK_OP_SUB; P += 1; CL += 1; } break; + case '>': { T.k = TK_RARROW; P += 2; CL += 2; } break; + case '=': { T.k = TK_AS_SUB; P += 2; CL += 2; } break; } break; case '*': switch (D) { - default: { T.k = LK_OP_MUL; P += 1; CL += 1; } break; - case '=': { T.k = LK_AS_MUL; P += 2; CL += 2; } break; + default: { T.k = TK_OP_MUL; P += 1; CL += 1; } break; + case '=': { T.k = TK_AS_MUL; P += 2; CL += 2; } break; } break; case '/': switch (D) { - default: { T.k = LK_OP_DIV; P += 1; CL += 1; } break; - case '=': { T.k = LK_AS_DIV; P += 2; CL += 2; } break; + default: { T.k = TK_OP_DIV; P += 1; CL += 1; } break; + case '=': { T.k = TK_AS_DIV; P += 2; CL += 2; } break; } break; case '%': switch (D) { - default: { T.k = LK_OP_MOD; P += 1; CL += 1; } break; - case '=': { T.k = LK_AS_MOD; P += 2; CL += 2; } break; + default: { T.k = TK_OP_MOD; P += 1; CL += 1; } break; + case '=': { T.k = TK_AS_MOD; P += 2; CL += 2; } break; } break; case '=': switch (D) { - default: { T.k = LK_ASSIGN; P += 1; CL += 1; } break; - case '=': { T.k = LK_OP_EQ; P += 2; CL += 2; } break; + default: { T.k = TK_ASSIGN; P += 1; CL += 1; } break; + case '=': { T.k = TK_OP_EQ; P += 2; CL += 2; } break; } break; case '<': switch (D) { - default: { T.k = LK_OP_LT; P += 1; CL += 1; } break; - case '=': { T.k = LK_OP_LTE; P += 2; CL += 2; } break; + default: { T.k = TK_OP_LT; P += 1; CL += 1; } break; + case '=': { T.k = TK_OP_LTE; P += 2; CL += 2; } break; } break; case '>': switch (D) { - default: { T.k = LK_OP_GT; P += 1; CL += 1; } break; - case '=': { T.k = LK_OP_GTE; P += 2; CL += 2; } break; + default: { T.k = TK_OP_GT; P += 1; CL += 1; } break; + case '=': { T.k = TK_OP_GTE; P += 2; CL += 2; } break; } break; case '!': switch (D) { - default: { T.k = LK_LO_NOT; P += 1; CL += 1; } break; - case '=': { T.k = LK_OP_NEQ; P += 2; CL += 2; } break; + default: { T.k = TK_LO_NOT; P += 1; CL += 1; } break; + case '=': { T.k = TK_OP_NEQ; P += 2; CL += 2; } break; } break; case '&': switch (D) { - default: { T.k = LK_BW_AND; P += 1; CL += 1; } break; - case '&': { T.k = LK_LO_AND; P += 2; CL += 2; } break; - case '=': { T.k = LK_AS_AND; P += 2; CL += 2; } break; + default: { T.k = TK_BW_AND; P += 1; CL += 1; } break; + case '&': { T.k = TK_LO_AND; P += 2; CL += 2; } break; + case '=': { T.k = TK_AS_AND; P += 2; CL += 2; } break; } break; case '|': switch (D) { - default: { T.k = LK_BW_OR; P += 1; CL += 1; } break; - case '|': { T.k = LK_LO_OR; P += 2; CL += 2; } break; - case '=': { T.k = LK_AS_OR; P += 2; CL += 2; } break; + default: { T.k = TK_BW_OR; P += 1; CL += 1; } break; + case '|': { T.k = TK_LO_OR; P += 2; CL += 2; } break; + case '=': { T.k = TK_AS_OR; P += 2; CL += 2; } break; } break; case '~': switch (D) { - default: { T.k = LK_BW_NOT; P += 1; CL += 1; } break; - case '=': { T.k = LK_AS_NOT; P += 2; CL += 2; } break; + default: { T.k = TK_BW_NOT; P += 1; CL += 1; } break; + case '=': { T.k = TK_AS_NOT; P += 2; CL += 2; } break; } break; case '^': switch (D) { - default: { T.k = LK_BW_XOR; P += 1; CL += 1; } break; - case '=': { T.k = LK_AS_XOR; P += 2; CL += 2; } break; + default: { T.k = TK_BW_XOR; P += 1; CL += 1; } break; + case '=': { T.k = TK_AS_XOR; P += 2; CL += 2; } break; } break; case '\'': { /* TODO */ } break; @@ -203,7 +203,7 @@ tok lex_kind(lex *l, tok_k k) { /* Print lexer debug output and exit. */ void lex_debug(lex *l) { - for (tok t = lex_next(l); t.k != LK_EOF; free(t.s), t = lex_next(l)) { + for (tok t = lex_next(l); t.k != TK_EOF; free(t.s), t = lex_next(l)) { printf("%zu:%zu: %s \"%s\"\n", t.ln + 1, t.cl + 1, tok_ks[t.k], t.s); } } diff --git a/src/lex.h b/src/lex.h index 71d6f67..4c23e49 100644 --- a/src/lex.h +++ b/src/lex.h @@ -9,20 +9,20 @@ #include "util/util.h" typedef enum { - LK_NULL, LK_EOF, LK_ID, LK_INT, LK_FLT, LK_STR, + TK_NULL, TK_EOF, TK_ID, TK_INT, TK_FLT, TK_STR, - LK_RETURN, LK_IF, LK_ELSE, LK_FOR, LK_PROC, + TK_RETURN, TK_IF, TK_ELSE, TK_FOR, TK_PROC, - LK_LPAREN, LK_RPAREN, LK_LBRACK, LK_RBRACK, LK_LBRACE, LK_RBRACE, - LK_COLON, LK_SCOLON, LK_COMMA, LK_PERIOD, LK_RARROW, LK_QMARK, + TK_LPAREN, TK_RPAREN, TK_LBRACK, TK_RBRACK, TK_LBRACE, TK_RBRACE, + TK_COLON, TK_SCOLON, TK_COMMA, TK_PERIOD, TK_RARROW, TK_QMARK, - LK_OP_ADD, LK_OP_SUB, LK_OP_MUL, LK_OP_DIV, LK_OP_MOD, - LK_OP_EQ, LK_OP_NEQ, LK_OP_GT, LK_OP_LT, LK_OP_GTE, LK_OP_LTE, - LK_LO_NOT, LK_LO_AND, LK_LO_OR, - LK_BW_NOT, LK_BW_AND, LK_BW_OR, LK_BW_XOR, LK_BW_SHL, LK_BW_SHR, + TK_OP_ADD, TK_OP_SUB, TK_OP_MUL, TK_OP_DIV, TK_OP_MOD, + TK_OP_EQ, TK_OP_NEQ, TK_OP_GT, TK_OP_LT, TK_OP_GTE, TK_OP_LTE, + TK_LO_NOT, TK_LO_AND, TK_LO_OR, + TK_BW_NOT, TK_BW_AND, TK_BW_OR, TK_BW_XOR, TK_BW_SHL, TK_BW_SHR, - LK_ASSIGN, LK_AS_ADD, LK_AS_SUB, LK_AS_MUL, LK_AS_DIV, LK_AS_MOD, - LK_AS_NOT, LK_AS_AND, LK_AS_OR, LK_AS_XOR, LK_AS_SHL, LK_AS_SHR, + TK_ASSIGN, TK_AS_ADD, TK_AS_SUB, TK_AS_MUL, TK_AS_DIV, TK_AS_MOD, + TK_AS_NOT, TK_AS_AND, TK_AS_OR, TK_AS_XOR, TK_AS_SHL, TK_AS_SHR, } tok_k; typedef struct { diff --git a/src/parse.c b/src/parse.c index 6432f13..2219623 100644 --- a/src/parse.c +++ b/src/parse.c @@ -36,7 +36,7 @@ static ast *parse_stmt_for(lex *l, syt *st); static ast *parse_expr(lex *l, syt *st); static ast *parse_expr_proc(lex *l, syt *st); -static inline ast *parse_int(lex *l); +static ast *parse_int(lex *l); /* Initialise an AST node. */ ast *ast_init(void) { @@ -60,7 +60,7 @@ ast *parse(lex *l) { ast *a = ast_init(); a->k = AK_PROG; /* Parse and append all child nodes */ - for (ast *c; T.k != LK_EOF;) { + for (ast *c; T.k != TK_EOF;) { if ((c = parse_stmt_decl(l, &a->st))) { ast_push(a, c); } else { error(1, "NULL AST (parse:parse_stmt_decl)"); } } @@ -71,36 +71,36 @@ ast *parse(lex *l) { /* Parse a statement. */ static ast *parse_stmt(lex *l, syt *st) { switch (T.k) { - case LK_LBRACE: { return parse_stmt_compound(l, st); } break; - case LK_ID: { return parse_stmt_decl(l, st); } break; - case LK_RETURN: { return parse_stmt_return(l, st); } break; - case LK_IF: { return parse_stmt_if(l, st); } break; - case LK_FOR: { return parse_stmt_for(l, st); } break; + case TK_LBRACE: { return parse_stmt_compound(l, st); } break; + case TK_ID: { return parse_stmt_decl(l, st); } break; + case TK_RETURN: { return parse_stmt_return(l, st); } break; + case TK_IF: { return parse_stmt_if(l, st); } break; + case TK_FOR: { return parse_stmt_for(l, st); } break; default: { return parse_stmt_expr(l, st); } break; } } /* Parse a compound statement. */ static ast *parse_stmt_compound(lex *l, syt *st) { - lex_kind(l, LK_LBRACE); + lex_kind(l, TK_LBRACE); ast *a = ast_init(); a->k = AK_COMP; - for (; T.k != LK_EOF && T.k != LK_RBRACE;) { ast_push(a, parse_stmt(l, st)); } + for (; T.k != TK_EOF && T.k != TK_RBRACE;) { ast_push(a, parse_stmt(l, st)); } - lex_kind(l, LK_RBRACE); return a; + lex_kind(l, TK_RBRACE); return a; } /* Parse a declaration statement. */ static ast *parse_stmt_decl(lex *l, syt *st) { sym sm = { SK_NULL, T.ln, T.cl, T.h, T.s }; - lex_kind(l, LK_ID); lex_kind(l, LK_COLON); + lex_kind(l, TK_ID); lex_kind(l, TK_COLON); sm.a = ast_init(); sm.a->k = AK_DECL; if (!(sm.a->s = strdup(sm.s))) { error(1, SERR); } /* Store the declaration's type if one is specified */ /* TODO store type when one is specified */ - if (T.k == LK_ID) { + if (T.k == TK_ID) { sym s = syt_search_h(st, T.h, T.s); if (s.k == SK_NULL) { error( /* ERROR */ @@ -113,19 +113,19 @@ static ast *parse_stmt_decl(lex *l, syt *st) { ); } sm.t = s.t; lex_next(l); - if (T.k == LK_SCOLON) { lex_next(l); goto end; } + if (T.k == TK_SCOLON) { lex_next(l); goto end; } } - else if (T.k == LK_SCOLON) { error( + else if (T.k == TK_SCOLON) { error( 1, "%s:%zu:%zu: error: a declaration without a type is invalid", l->n, T.ln + 1, T.cl + 1 ); } /* Assign a constant or variable value */ - if (T.k == LK_COLON || T.k == LK_ASSIGN) { lex_next(l); ast_push(sm.a, parse_expr(l, st)); } + if (T.k == TK_COLON || T.k == TK_ASSIGN) { lex_next(l); ast_push(sm.a, parse_expr(l, st)); } else { error(1, "%s:%zu:%zu: error: expected ':' or '='", l->n, T.ln + 1, T.cl + 1); } /* Parse a semicolon if one is required */ - if (sm.a->c.a[0]->k != AK_PROC) { lex_kind(l, LK_SCOLON); } + if (sm.a->c.a[0]->k != AK_PROC) { lex_kind(l, TK_SCOLON); } /* Insert the new symbol and return */ end:; syt_insert_h(st, sm.h, sm.s, sm); return sm.a; @@ -133,18 +133,18 @@ static ast *parse_stmt_decl(lex *l, syt *st) { /* Parse an expression statement. */ static ast *parse_stmt_expr(lex *l, syt *st) { - ast *a; if (T.k != LK_SCOLON) { a = parse_expr(l, st); } - lex_kind(l, LK_SCOLON); return a; + ast *a; if (T.k != TK_SCOLON) { a = parse_expr(l, st); } + lex_kind(l, TK_SCOLON); return a; } /* Parse a return statement. */ static ast *parse_stmt_return(lex *l, syt *st) { - lex_kind(l, LK_RETURN); + lex_kind(l, TK_RETURN); ast *a = ast_init(); a->k = AK_RETURN; ast_push(a, parse_expr(l, st)); - lex_kind(l, LK_SCOLON); return a; + lex_kind(l, TK_SCOLON); return a; } /* Parse an if statement. */ @@ -155,23 +155,23 @@ static ast *parse_stmt_for(lex *l, syt *st) { /* TODO */ } /* Parse an expression. */ static ast *parse_expr(lex *l, syt *st) { - if (T.k == LK_PROC) { return parse_expr_proc(l, st); } - if (T.k == LK_INT) { return parse_int(l); } + if (T.k == TK_PROC) { return parse_expr_proc(l, st); } + if (T.k == TK_INT) { return parse_int(l); } else { error(1, "PARSE_EXPR %s", tok_ks[T.k]); } } /* Parse a procedure expression. */ static ast *parse_expr_proc(lex *l, syt *st) { - lex_kind(l, LK_PROC); lex_kind(l, LK_LPAREN); + lex_kind(l, TK_PROC); lex_kind(l, TK_LPAREN); ast *a = ast_init(); a->k = AK_PROC; /* Parse optional procedure parameter(s) */ - /* TODO */ lex_kind(l, LK_RPAREN); + /* TODO */ lex_kind(l, TK_RPAREN); /* Parse optional procedure return type(s) */ /* TODO parse more than one return type */ - if (T.k == LK_RARROW) { - lex_next(l); tok t = lex_kind(l, LK_ID); + if (T.k == TK_RARROW) { + lex_next(l); tok t = lex_kind(l, TK_ID); sym s = syt_search_h(st, t.h, t.s); if (s.k == SK_NULL) { error( /* ERROR */ @@ -190,8 +190,8 @@ static ast *parse_expr_proc(lex *l, syt *st) { } /* Parse an interger. */ -static inline ast *parse_int(lex *l) { - ast *a = ast_init(); tok t = lex_kind(l, LK_INT); +static ast *parse_int(lex *l) { + ast *a = ast_init(); tok t = lex_kind(l, TK_INT); a->k = AK_INT; a->ln = t.ln; a->cl = t.cl; if (!(a->s = strdup(t.s))) { error(1, "%s", SERR); }