G

G Programming Language
git clone http://git.omkov.net/G
Log | Tree | Refs | README | Download

AuthorJakob Wakeling <[email protected]>
Date2022-01-06 02:41:12
Commit58704bd2f2be6db8a7cc5fcecf9ec8c3f14232b3
Parent6d119aeef9f375133743b8b148efd4fde90934cb

symbol: Add specific symbol table hashmap

Diffstat

A src/symbol.c | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/symbol.h | 33 +++++++++++++++++++++++++++++++++

2 files changed, 193 insertions, 0 deletions

diff --git a/src/symbol.c b/src/symbol.c
new file mode 100644
index 0000000..6b4b21a
--- /dev/null
+++ b/src/symbol.c
@@ -0,0 +1,160 @@
+// symbol.c
+// Symbol source file for G
+// Copyright (C) 2021, Jakob Wakeling
+// All rights reserved.
+
+
+
+/*
+	This file uses the currently non-standard 'typeof' operator. Its use is
+	considered acceptable because it is supported by both GCC and Clang, and
+	POSIX extensions are used here anyway. Additionally, it is expected that the
+	'typeof' operator will become standard in C23.
+*/
+
+#include "symbol.h"
+#include "util/alloc.h"
+#include "util/util.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define INITIAL_CAPACITY 64
+#define LOAD_FACTOR 0.90
+
+static void map_resize(syt *m);
+
+/* Initialise a map. */
+syt syt_init(void (*free)(void *)) {
+	return (syt){ NULL, 0, 0 };
+}
+
+/* Uninitialise a map. */
+void syt_free(syt *m) {
+	for (UINT i = 0; i < m->ac; i += 1) {
+		if (m->a[i].h == 0) { continue; } free(m->a[i].k);
+	}
+	
+	free(m->a); *m = (syt){ NULL, 0, 0 };
+}
+
+/* Compute the hash of some data. Will not return 0. */
+u64 syt_hash(const char *dat, UINT len) {
+	register u64 fnv = 0xCBF29CE484222325;
+	for (; len; len -= 1, dat += 1) { fnv ^= *dat; fnv *= 0x00000100000001B3; }
+	fnv |= fnv == 0; return fnv;
+}
+
+#define SWAP(a, b) { register typeof (a) t = a; a = b; b = t; }
+#define DIB(i) ((i + m->ac - (m->a[i].h % m->ac)) % m->ac)
+
+/* Insert a key-value pair into a map. The key is duplicated. */
+void syt_insert(syt *m, char *k, sym v) {
+	if (m->ac == 0 || m->al >= ((f64)m->ac * LOAD_FACTOR)) { map_resize(m); }
+	UINT h = syt_hash(k, strlen(k)), i = h % m->ac; k = strdup(k);
+	
+	for (UINT dist = 0;; i = (i + 1) % m->ac, dist += 1) {
+		if (m->a[i].h == 0) {
+			/* If an empty bucket is found, insert here */
+			m->a[i] = (typeof (*m->a)){ h, k, v };
+			m->al += 1; return;
+		}
+		
+		/* Calculate tsid, the DIB of the item at the current index */
+		UINT tsid = (i + m->ac - (m->a[i].h % m->ac)) % m->ac;
+		
+		if (dist > tsid) {
+			SWAP(m->a[i].h, h);
+			SWAP(m->a[i].k, k);
+			SWAP(m->a[i].v, v);
+			
+			dist = tsid;
+		}
+	}
+}
+
+/* Lookup the value associated with a key from a map. */
+sym syt_lookup(syt *m, char *k) {
+	UINT h = syt_hash(k, strlen(k)), i = h % m->ac;
+	
+	for (UINT dist = 0;; i = (i + 1) % m->ac, dist += 1) {
+		if (m->a[i].h == 0) { return (sym){}; }
+		
+		if (dist > DIB(i)) { return (sym){}; }
+		if ((m->a[i].h == h) && (strcmp(m->a[i].k, k) == 0)) {
+			return m->a[i].v;
+		}
+	}
+}
+
+/* Remove a key-value pair from a map. */
+void syt_remove(syt *m, char *k) {
+	UINT h = syt_hash(k, strlen(k)), i = h % m->ac;
+	
+	for (UINT dist = 0;; i = (i + 1) % m->ac, dist += 1) {
+		if (m->a[i].h == 0) { return; }
+		
+		if (dist > DIB(i)) { return; }
+		if ((m->a[i].h == h) && (strcmp(m->a[i].k, k) == 0)) {
+			/* If the element to be removed is found, then deallocate it */
+			free(m->a[i].k);
+			m->a[i] = (typeof (*m->a)){ 0, NULL, {} }; m->al -= 1;
+			
+			/*  */
+			for (UINT j = (i + 1) % m->ac;; i = j, j = (j + 1) % m->ac) {
+				if (m->a[j].h == 0 || DIB(j) == 0) { break; }
+				
+				SWAP(m->a[i].h, m->a[j].h);
+				SWAP(m->a[i].k, m->a[j].k);
+				SWAP(m->a[i].v, m->a[j].v);
+			}
+			
+			/*
+				TODO I am unsure if I want to have this procedure return the
+				removed value or simply an acknowledgement of its removal
+			*/
+			return;
+		}
+	}
+}
+
+/* Print a basic representation of a map to stdout. */
+void syt_print(syt *m) {
+	for (UINT i = 0; i < m->ac; i += 1) if (m->a[i].h != 0) {
+		printf("%s -> %s\n", m->a[i].k, "TODO");
+	}
+}
+
+/* Print a debug representation of a map to stdout. */
+void syt_debug(syt *m) {
+	for (UINT i = 0; i < m->ac; i += 1) {
+		if (m->a[i].h == 0) { printf("[%zu] %lu\n", i, m->a[i].h); }
+		else printf(
+			"[%zu] %lu, %s -> %s, DIB: %zu\n",
+			i, m->a[i].h, m->a[i].k, "TODO", DIB(i)
+		);
+	}
+}
+
+/* Double the number of buckets in a map. */
+static void map_resize(syt *m) {
+	/* If the map is empty, simply allocate it without rehashing */
+	if (m->ac == 0) {
+		m->ac = INITIAL_CAPACITY;
+		m->a = xcalloc(m->ac, sizeof (*m->a)); return;
+	}
+	
+	/* Otherwise rehash every element into a new resized map */
+	syt old = *m; m->ac *= 2; m->a = xcalloc(m->ac, sizeof (*m->a)); m->al = 0;
+	
+	for (UINT i = 0; i < old.ac; i += 1) {
+		if (old.a[i].h == 0) { continue; }
+		
+		syt_insert(m, old.a[i].k, old.a[i].v);
+		free(old.a[i].k);
+	}
+	
+	free(old.a);
+}
diff --git a/src/symbol.h b/src/symbol.h
new file mode 100644
index 0000000..0b26eac
--- /dev/null
+++ b/src/symbol.h
@@ -0,0 +1,33 @@
+// symbol.h
+// Symbol header file for G
+// Copyright (C) 2021, Jakob Wakeling
+// All rights reserved.
+
+
+
+#ifndef UTIL_SYMBOL_H_Q1VLFKFE
+#define UTIL_SYMBOL_H_Q1VLFKFE
+
+#include "util/util.h"
+
+typedef struct {
+	UINT a;
+} sym;
+
+typedef struct {
+	struct { u64 h; char *k; sym v; } *a;
+	UINT al, ac;
+} syt;
+
+extern syt  syt_init(void (*free)(void *));
+extern void syt_free(syt *m);
+extern u64  syt_hash(const char *s, UINT l);
+
+extern void syt_insert(syt *m, char *k, sym v);
+extern sym  syt_lookup(syt *m, char *k);
+extern void syt_remove(syt *m, char *k);
+
+extern void syt_print(syt *m);
+extern void syt_debug(syt *m);
+
+#endif // UTIL_SYMBOL_H_Q1VLFKFE