Author | Jakob Wakeling <[email protected]> |
Date | 2020-06-30 12:16:15 |
Commit | b7279ece23b36fa3b74c324082539b89d5fdccb4 |
Parent | f193bb40eef110b3908a1ff67ae5000fe30c96ef |
Add lexer
Diffstat
A | src/lex.c | | | 49 | +++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/lex.h | | | 31 | +++++++++++++++++++++++++++++++ |
2 files changed, 80 insertions, 0 deletions
diff --git a/src/lex.c b/src/lex.c new file mode 100644 index 0000000..18a2fbc --- /dev/null +++ b/src/lex.c @@ -0,0 +1,49 @@ +// lex.c +// Lexer source file for OBFI +// Copyright (C) 2020, Jakob Wakeling +// All rights reserved. + +#include "lex.h" + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define TOKSET(type) tok.typ = type; tok.ln = line; tok.cl = col + +tok_t lex(char *str) { + static char *p; static size_t line, col; tok_t tok; + if (str) { p = str; line = 0; col = 0; } tok.num = 1; + + for (; *p; ++p) { + switch (*p) { + case '>': { TOKSET(RGT); goto count; } + case '<': { TOKSET(LFT); goto count; } + case '+': { TOKSET(ADD); goto count; } + case '-': { TOKSET(SUB); goto count; } + case '.': { TOKSET(PUT); ++p; ++col; return tok; } + case ',': { TOKSET(GET); ++p; ++col; return tok; } + case '[': { TOKSET(OPN); ++p; ++col; return tok; } + case ']': { TOKSET(CLS); ++p; ++col; return tok; } + case '\n': { ++line; col = 0; continue; } + count: { + for (register char c = *p++; *p == c; ++p) { ++tok.num; } + col += tok.num; return tok; + } + } + } + + tok.typ = END; return tok; +} + +tok_t *parse(char *str, size_t len) { + tok_t *toks = malloc(sizeof (*toks) * len); + register tok_t *p = toks; + + tok_t tok = lex(str); if (tok.typ == EOF) { goto end; } + do { *p++ = tok; } while ((tok = lex(NULL)).typ != EOF); + +end: + *p = tok; return toks; +} diff --git a/src/lex.h b/src/lex.h new file mode 100644 index 0000000..178abd7 --- /dev/null +++ b/src/lex.h @@ -0,0 +1,31 @@ +// lex.h +// Lexer header file for OBFI +// Copyright (C) 2020, Jakob Wakeling +// All rights reserved. + +#ifndef OMKOV_OBFI_LEX_H_AD07BU3C +#define OMKOV_OBFI_LEX_H_AD07BU3C + +#include <stddef.h> +#include <stdio.h> + +enum { + END = EOF, // EOF + ERR = 0, // ERROR + RGT = 1, // '>' + LFT = 2, // '<' + ADD = 3, // '+' + SUB = 4, // '-' + PUT = 5, // '.' + GET = 6, // ',' + OPN = 7, // '[' + CLS = 8, // ']' + SET = 9, // '[-]', '[+]' +}; + +typedef struct { int typ; size_t num, ln, cl; } tok_t; + +tok_t lex(char *str); +tok_t *parse(char *str, size_t len); + +#endif // OMKOV_OBFI_LEX_H_AD07BU3C