ESH

Executive Shell
git clone http://git.omkov.net/ESH
Log | Tree | Refs | README | Download

ESH/src/lex.c (90 lines, 2.4 KiB) -rw-r--r-- blame download

01234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
// Copyright (C) 2023, Jakob Wakeling
// All rights reserved.

#include "lex.h"
#include "util/log.h"
#include "util/stack.h"
#include "util/util.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *tok_ks[] = {
	"TK_VOID", "TK_EOF", "TK_WORD", "TK_END",
	"TK_PIPE", "TK_RIN", "TK_ROUT", "TK_RAPP",
};

#define is_space(c) (c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v')

/* Initialise a lexer. */
lex lex_init(char *str, u64 len) {
	lex l = { .s = str, .p = str, .q = str + len }; lex_next(&l); return l;
}

#define P (l->p) /* Pointer to the Current Character */
#define Q (l->q) /* Pointer to EOF */
#define T (l->t) /* Current Token */

/* Return the current token. */
tok lex_peek(lex *l) { return T; }

/* Lex the next token, and return the current one. */
tok lex_next(lex *l) {
	if (T.k == TK_EOF) { return T; }
	tok t = T; T = (tok){ .k = TK_VOID };

	/* Skip null characters and whitespace */
	skip:; for (; P != Q && (!P[0] || is_space(P[0])); P += 1);

	/* Return the current token immediately if EOF or END is reached */
	if (P == Q) { T.k = TK_EOF; return t; }
	if (P[0] == '\n' || P[0] == ';') { P += 1; T.k = TK_END; return t; }

	/* Skip comments */
	if (P[0] == '#') {
		for (P += 1; P != Q && P[0] != '\n'; P += 1);
		if (P[0] == '\n') { P += 1; } goto skip;
	}

	switch (P[0]) {
	case '|': { T.k = TK_PIPE; P += 1; } break;
	case '<': { T.k = TK_RIN;  P += 1; } break;
	case '>': switch (P[1]) {
		default:  { T.k = TK_ROUT; P += 1; } break;
		case '>': { T.k = TK_RAPP; P += 2; } break;
	} break;

	/* Handle words, TODO review quotes and substitutions */
	default: {
		stack s = stack_init(sizeof (char), NULL);

		for (; P != Q && P[0] != '\n' && P[0] != ';' && P[0] != ' '; P += 1) {
			if (P[0] == '|' || P[0] == '<' || P[0] == '>') { break; }

			/* Handle single quotes */
			else if (P[0] == '\'') for (P += 1;; P += 1) {
				/* FIXME memory leak upon missing closing ', needs refinement */
				if (P == Q) { log_warn("Missing closing \'"); return t; }
				else if (P[0] == '\'') { break; }
				else { stack_push(&s, P[0]); }
			}

			/* Handle all other characters */
			else { stack_push(&s, P[0]); }
		}

		T.s = strndup(s.a, s.al); T.k = TK_WORD; stack_free(&s);
	} break;
	}

	return t;
}

/* Print lexer debug output. */
void lex_debug(lex *l) {
	for (tok t = lex_next(l); t.k != TK_EOF; free(t.s), t = lex_next(l)) {
		printf("%s \"%s\"\n", tok_ks[t.k], t.s);
	}
}