Author | Jakob Wakeling <[email protected]> |
Date | 2023-07-13 04:57:41 |
Commit | 1544347804b7a223b8dc012a622ed61ae77eb068 |
Parent | 8afca84b873628ea66a00a16be614d515f23f10c |
Inline integer and float lexing
Diffstat
M | README.md | | | 15 | +++++++++++++++ |
M | doc/spec.md | | | 2 | +- |
M | src/lex.c | | | 106 | ++++++++++++++++++++++++++++++++++++++----------------------------------------- |
M | src/main.c | | | 35 | ++++++++++++++++++++++------------- |
4 files changed, 89 insertions, 69 deletions
diff --git a/README.md b/README.md index ab9df5a..90b5072 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,20 @@ Influenced by **C**, **Odin**, **Zig**, and others. Note that at present, **G** is highly unstable and will certainly change. +## Example + +```g +print :: proc(s: string) { + #syscall(uint(1), sint(1), cstring(s), len(s)); + return; +} + +main :: proc() -> u8 { + print("Hello, World!\n"); + return 0; +} +``` + ## Usage **G** is being developed on x86-64 Linux, and is untested elsewhere. diff --git a/doc/spec.md b/doc/spec.md index 392abaf..3c465ac 100644 --- a/doc/spec.md +++ b/doc/spec.md @@ -61,7 +61,7 @@ An underscore character `_` may appear between digits in each part; these underscores are solely for readability and do not change the literal's value. ```ebnf -literal_flt = integer_seq, ".", integer_seq, [ "e", "+" | "-", integer_seq ] ; +literal_flt = integer_seq, [ ".", integer_seq ], [ "e", "+" | "-", integer_seq ] ; integer_seq = digit_dec, { "_", digit_dec } ; ``` diff --git a/src/lex.c b/src/lex.c index c240889..6254c20 100644 --- a/src/lex.c +++ b/src/lex.c @@ -40,9 +40,6 @@ char *tok_ks[] = { #define is_digit_doz(c) ((c >= '0' && c <= '9') || (c == 'A' || c == 'B')) #define is_digit_hex(c) ((c >= '0' && c <= '9') || (c >= 'A' || c == 'F')) -static inline u64 parse_int(char *s); -static inline f128 parse_flt(char *s); - /* Initialise a lexer. */ lex lex_init(const char *file, char *src, UINT len) { lex l = { file, src, src, src + len, 0, 0, 0, 0 }; @@ -106,24 +103,56 @@ tok lex_next(lex *l) { else if (strncmp(s, "for", 3) == 0) { T.k = TK_FOR; } else if (strncmp(s, "break", 5) == 0) { T.k = TK_BREAK; } else if (strncmp(s, "continue", 8) == 0) { T.k = TK_CONTINUE; } - else { T.k = TK_ID; if (!(T.s = strndup(s, sl))) { error(1, SERR); }} + else { T.k = TK_ID; if (!(T.s = strndup(s, sl))) { panic(SERR); }} } /* Handle number literals */ else if (is_digit_dec(C)) { - T.k = TK_INT; char *s = P; - - for (P += 1; is_alpha(C) || is_digit_dec(C); P += 1); - if (C == '.') { T.k = TK_FLT; P += 1; for (P += 1; is_digit_dec(C); P += 1); } - if (C == 'e') { P += (C == '+' || C == '-') ? 2 : 1; for (P += 1; is_digit_dec(C); P += 1); } + T.k = TK_INT; char *start = P; - UINT sl = P - s; CL += sl; + for (P += 1; is_alpha(C) || is_digit_dec(C) || C == '.'; P += 1) { + if (C == '.') { T.k = TK_FLT; } + if (C == 'e' || C == 'E') { T.k = TK_FLT; P += P[1] == '+' || P[1] == '-'; } + } - if (!(T.s = strndup(s, sl))) { error(1, SERR); } + UINT sl = P - start; CL += sl; + if (!(T.s = strndup(start, sl))) { panic(SERR); } switch (T.k) { - case TK_INT: { T.v_int = parse_int(T.s); } break; - case TK_FLT: { T.v_flt = parse_flt(T.s); } break; + case TK_INT: { + char *s = T.s; register u64 v = 0; u64 c; + register UINT b = 10; char *b_s; + + if (s[0] == '0') switch (s[1]) { + case 'b': { s += 2; b = 2; b_s = "binary"; } break; + case 'o': { s += 2; b = 8; b_s = "octal"; } break; + case 'd': { s += 2; b = 10; b_s = "decimal"; } break; + case 'z': { s += 2; b = 12; b_s = "duodecimal"; } break; + case 'x': { s += 2; b = 16; b_s = "hexadecimal"; } break; + default: { s += 1; } break; + } + + for (; s[0]; ++s) { + if (s[0] >= '0' && s[0] <= '9') { c = *s - '0'; } + else if (s[0] >= 'A' && s[0] <= 'F') { c = *s - ('A' - 10); } + + if (c >= b) { note(l->n, T.ln, T.cl, 0, "Invalid digit \'%c\' in %s constant", s[0], b_s); } + if (v > (U64_MAX - c) / b) { note(l->n, T.ln, T.cl, 0, "Integer literal cannot be represented"); } + + v = v * b + c; + } + + T.v_int = v; + } break; + case TK_FLT: { + /* TODO reimplement */ + char *s = T.s, *end; register f128 v = 0; + + v = strtold(s, &end); + if (*end != '\0') { note(l->n, T.ln, T.cl, 0, "Invalid digit \'%c\' in floating-point constant", *end); } + + T.v_flt = v; + } break; default: { /* Unreachable */ } break; } } @@ -243,15 +272,12 @@ tok lex_next(lex *l) { else if (P != Q) { P += 1; CL += 1; } if (quote == '\'') { - T.k = TK_INT; if (!(T.s = strndup(s, sl))) { error(1, SERR); } + T.k = TK_INT; if (!(T.s = strndup(s, sl))) { panic(SERR); } - /* - Numerical value of character literals is calculated and - stored in T.v_int - */ + /* Numerical value of character literals is calculated and stored in T.v_int */ for (UINT i = 0; i < sl; i += 1) { if (T.v_int > (U32_MAX - s[i]) / 256) { - note(l->n, T.ln, T.cl, 1, "Character constant exceeds maximum size"); break; + note(l->n, T.ln, T.cl, 0, "Character literal cannot be represented"); break; } T.v_int = T.v_int * 256 + s[i]; @@ -259,7 +285,7 @@ tok lex_next(lex *l) { } else if (quote == '\"') { T.k = TK_STR; T.h = syt_hash(s, sl); - if (!(T.s = strndup(s, sl))) { error(1, SERR); } + if (!(T.s = strndup(s, sl))) { panic(SERR); } } } break; @@ -283,39 +309,3 @@ void lex_debug(lex *l) { printf("%zu:%zu: %s \"%s\"\n", t.ln + 1, t.cl + 1, tok_ks[t.k], t.s); } } - -/* Parse an integer string into a value. */ -static inline u64 parse_int(char *s) { - /* TODO lex exponent part of numbers */ - register u64 v = 0; u64 c; register UINT b = 10; - - if (s[0] == '0') switch (s[1]) { - case 'b': { s += 2; b = 2; } break; case 'o': { s += 2; b = 8; } break; - case 'd': { s += 2; b = 10; } break; case 'z': { s += 2; b = 12; } break; - case 'x': { s += 2; b = 16; } break; default: { s += 1; } break; - } - - for (; s[0]; ++s) { - if (s[0] >= '0' && s[0] <= '9') { c = *s - '0'; } - else if (s[0] >= 'A' && s[0] <= 'F') { c = *s - ('A' - 10); } - - /* TODO better error handling */ - if (c >= b) { errno = EDOM; return 0; } - if (v > (U64_MAX - c) / b) { errno = ERANGE; return 0; } - - v = v * b + c; - } - - return v; -} - -static inline f128 parse_flt(char *s) { - /* TODO lex exponent part of numbers */ - register f128 v = 0; u64 c; char *endptr; - - v = strtold(s, &endptr); - /* TODO better error handling */ - if (*endptr != '\0') { return 0; } - - return v; -} diff --git a/src/main.c b/src/main.c index fde1694..d51477b 100644 --- a/src/main.c +++ b/src/main.c @@ -26,6 +26,7 @@ static bool bflag = false, Bflag = false, cflag = false; static bool Eflag = false, pflag = false, Pflag = false; static bool qflag = false, Sflag = false; +static char *output = NULL; static int verbosity = 0; static void compile(const char *file, char *src, UINT len); @@ -41,18 +42,19 @@ static void ver(void); int main(int ac, char *av[]) { A0 = av[0]; struct opt opt = OPTGET_INIT; opt.str = "bBcEf:O:pPqSvW:"; opt.lops = lops; for (int o; (o = optget(&opt, av, 1)) != -1;) switch (o) { - case 'b': { bflag = true; } break; /* Output LLVM IR files */ - case 'B': { Bflag = true; } break; /* Output LLVM bitcode files */ - case 'c': { cflag = true; } break; /* Output object files */ - case 'E': { Eflag = true; } break; /* Output lexer tokens */ - case 'f': { opt_f(opt.arg); } break; /* Configure formatting */ - case 'O': { opt_O(opt.arg); } break; /* Configure optimisation */ - case 'p': { pflag = true; } break; /* Output parser AST */ - case 'P': { Pflag = true; } break; /* Output analyser AST */ - case 'q': { qflag = true; } break; /* Silence certain outputs (for benchmarking) */ - case 'S': { Sflag = true; } break; /* Output assembly files */ - case 'v': { verbosity += 1; } break; /* Increase verbosity */ - case 'W': { opt_W(opt.arg); } break; /* Configure warnings and errors */ + case 'b': { bflag = true; } break; /* Output LLVM IR files */ + case 'B': { Bflag = true; } break; /* Output LLVM bitcode files */ + case 'c': { cflag = true; } break; /* Output object files */ + case 'E': { Eflag = true; } break; /* Output lexer tokens */ + case 'f': { opt_f(opt.arg); } break; /* Configure formatting */ + case 'o': { output = opt.arg; } break; /* Specify output file */ + case 'O': { opt_O(opt.arg); } break; /* Configure optimisation */ + case 'p': { pflag = true; } break; /* Output parser AST */ + case 'P': { Pflag = true; } break; /* Output analyser AST */ + case 'q': { qflag = true; } break; /* Silence certain outputs (for benchmarking) */ + case 'S': { Sflag = true; } break; /* Output assembly files */ + case 'v': { verbosity += 1; } break; /* Increase verbosity */ + case 'W': { opt_W(opt.arg); } break; /* Configure warnings and errors */ case 256: { hlp(); } return 0; case 257: { ver(); } return 0; default: {} return 1; @@ -118,14 +120,21 @@ static void opt_W(const char *arg) { /* Print help information. */ static void hlp(void) { puts("G - G Programming Language\n"); - puts("Usage: g\n"); + puts("Usage: g file...\n"); puts("Options:"); puts(" -b, Output LLVM IR files"); puts(" -B, Output LLVM bitcode files"); puts(" -c, Output object files"); puts(" -E, Output lexer tokens"); + // puts(" -f, Configure formatting"); + // puts(" -o file, Specify output file"); + // puts(" -O, Configure optimisation"); puts(" -p, Output parser AST"); puts(" -P, Output analyser AST"); + puts(" -q, Silence certain outputs (for benchmarking)"); + puts(" -S, Output assembly files"); + // puts(" -v, Increase verbosity "); + // puts(" -W, Configure warnings and errors"); puts(" --help Display help information"); puts(" --version Display version information"); }