G

G Programming Language
git clone http://git.omkov.net/G
Log | Tree | Refs | README | Download

AuthorJakob Wakeling <[email protected]>
Date2022-01-05 11:48:22
Commit869f0cfe4f33532abff7792d05c8e20a507e25f2
Parent1979766c850eeb32fc925556ae2cd2826de26c21

util: Update util/map

Diffstat

M src/util/map.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
M src/util/map.h | 22 ++++++++++++++--------

2 files changed, 165 insertions, 50 deletions

diff --git a/src/util/map.c b/src/util/map.c
index 901346a..d51273a 100644
--- a/src/util/map.c
+++ b/src/util/map.c
@@ -1,115 +1,163 @@
-// util/map.c, version 0.0.0
+// util/map.c, version 0.1.0
 // Map utility source file from libutil
 // Copyright (C) 2021, Jakob Wakeling
 // All rights reserved.
 

 
+/*
+	This file uses the currently non-standard 'typeof' operator. Its use is
+	considered acceptable because it is supported by both GCC and Clang, and
+	POSIX extensions are used here anyway. Additionally, it is expected that the
+	'typeof' operator will become standard in C23.
+*/
+
 #include "alloc.h"
-#include "fnv.h"
 #include "map.h"
 #include "util.h"
 
-#include <malloc.h>
-
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
-#define LOAD_FACTOR 0.75
+#define LOAD_FACTOR 0.90
+
+UINT map_initial_capacity = 64;
 
 static void map_resize(map *m);
+static u64  hash(const char *s, UINT l);
 
 /* Initialise a map. */
-map map_init(UINT el, void (*free)(void *)) {
-	return (map){ NULL, NULL, 0, 0, el, free };
+map map_init(void (*free)(void *)) {
+	return (map){ NULL, 0, 0, free };
 }
 
-/* Unititialise a map. */
+/* Uninitialise a map. */
 void map_free(map *m) {
-	for (map_ent *e = m->n, *n; e; e = n) {
-		n = e->n; if (m->free) { m->free(e->v); } free(e);
+	for (UINT i = 0; i < m->ac; i += 1) {
+		if (m->a[i].h == 0) { continue; } free(m->a[i].k);
+		if (m->free != NULL) { m->free(m->a[i].v); }
 	}
 
-	free(m->a);
+	free(m->a); *m = (map){ NULL, 0, 0, NULL };
 }
 
-/* Insert a pointer into a map. */
-void *map_insert(map *m, const char *k, void *v) {
+#define SWAP(a, b) { register typeof (a) t = a; a = b; b = t; }
+#define DIB(i) ((i + m->ac - (m->a[i].h % m->ac)) % m->ac)
+
+/* Insert a key-value pair into a map. The key is duplicated. */
+void map_insert(map *m, char *k, void *v) {
 	if (m->ac == 0 || m->al >= ((f64)m->ac * LOAD_FACTOR)) { map_resize(m); }
-	UINT index = fnv1a64(k, strlen(k)) % m->ac; void *old = map_remove(m, k);
+	UINT h = hash(k, strlen(k)), i = h % m->ac; k = strdup(k);
 
-	/* Allocate and define the entry */
-	map_ent *e = xcalloc(1, sizeof (*e));
-	// map_debug(m);
-	e->k = (char *)k; e->v = v; e->n = m->n; m->n = e;
-	
-	/* Insert the entry at begining of the buckets chain */
-	// printf("m->a: %p\n", m->a);
-	// printf("%zu\n", malloc_usable_size(m->a + (index * m->el)));
-	memcpy(&e->c, m->a + (index * m->el), m->el);
-	memcpy(m->a + (index * m->el), &e, m->el); m->al += 1;
-	
-	return old;
+	for (UINT dist = 0;; i = (i + 1) % m->ac, dist += 1) {
+		if (m->a[i].h == 0) {
+			/* If an empty bucket is found, insert here */
+			m->a[i] = (typeof (*m->a)){ h, k, v };
+			m->al += 1; return;
+		}
+		
+		/* Calculate tsid, the DIB of the item at the current index */
+		UINT tsid = (i + m->ac - (m->a[i].h % m->ac)) % m->ac;
+		
+		if (dist > tsid) {
+			SWAP(m->a[i].h, h);
+			SWAP(m->a[i].k, k);
+			SWAP(m->a[i].v, v);
+			
+			dist = tsid;
+		}
+	}
 }
 
-/* Lookup a pointer from a map. */
-void *map_lookup(map *m, const char *k) {
-	UINT index = fnv1a64(k, strlen(k)) % m->ac;
-	
-	map_ent *e; memcpy(&e, m->a + (index * m->el), m->el);
-	for (; e; e = e->c) { if (!strcmp(e->k, k)) { return e->v; } }
+/* Lookup the value associated with a key from a map. */
+void *map_lookup(map *m, char *k) {
+	UINT h = hash(k, strlen(k)), i = h % m->ac;
 
-	return NULL;
+	for (UINT dist = 0;; i = (i + 1) % m->ac, dist += 1) {
+		if (m->a[i].h == 0) { return NULL; }
+		
+		if (dist > DIB(i)) { return NULL; }
+		if ((m->a[i].h == h) && (strcmp(m->a[i].k, k) == 0)) {
+			return m->a[i].v;
+		}
+	}
 }
 
-/* Remove a pointer from the top of a map. */
-void *map_remove(map *m, const char *k) {
-	return NULL; /* TODO */
+/* Remove a key-value pair from a map. */
+void *map_remove(map *m, char *k) {
+	UINT h = hash(k, strlen(k)), i = h % m->ac;
+	
+	for (UINT dist = 0;; i = (i + 1) % m->ac, dist += 1) {
+		if (m->a[i].h == 0) { return NULL; }
+		
+		if (dist > DIB(i)) { return NULL; }
+		if ((m->a[i].h == h) && (strcmp(m->a[i].k, k) == 0)) {
+			/* If the element to be removed is found, then deallocate it */
+			if (m->free != NULL) { m->free(m->a[i].v); } free(m->a[i].k);
+			m->a[i] = (typeof (*m->a)){ 0, NULL, NULL }; m->al -= 1;
+			
+			/*  */
+			for (UINT j = (i + 1) % m->ac;; i = j, j = (j + 1) % m->ac) {
+				if (m->a[j].h == 0 || DIB(j) == 0) { break; }
+				
+				SWAP(m->a[i].h, m->a[j].h);
+				SWAP(m->a[i].k, m->a[j].k);
+				SWAP(m->a[i].v, m->a[j].v);
+			}
+			
+			/*
+				TODO I am unsure if I want to have this procedure return the
+				removed value or simply an acknowledgement of its removal
+			*/
+			return (void *)true;
+		}
+	}
 }
 
-/* Print a basic representation of the map to stdout. */
+/* Print a basic representation of a map to stdout. */
 void map_print(map *m) {
-	for (map_ent *e = m->n; e; e = e->n) {
-		printf("%s -> %s\n", e->k, (char *)e->v);
+	for (UINT i = 0; i < m->ac; i += 1) if (m->a[i].h != 0) {
+		printf("%s -> %s\n", m->a[i].k, (char *)m->a[i].v);
 	}
 }
 
-/* Print a debug representation of the map to stdout. */
-/* FIXME for some reason printf leaves junk in my calloc (m->a[33]) */
+/* Print a debug representation of a map to stdout. */
 void map_debug(map *m) {
-	for (UINT i = 0; i != m->ac; i += 1) {
-		map_ent *e; memcpy(&e, m->a + (i * m->el), m->el);
-		
-		// printf("%zu: ", i);
-		for (; e; e = e->c) { printf("%s -> %s, ", e->k, (char *)e->v); }
-		// printf("\n");
-	} printf("\n");
+	for (UINT i = 0; i < m->ac; i += 1) {
+		if (m->a[i].h == 0) { printf("[%zu] %lu\n", i, m->a[i].h); }
+		else printf(
+			"[%zu] %lu, %s -> %s, DIB: %zu\n",
+			i, m->a[i].h, m->a[i].k, (char *)m->a[i].v, DIB(i)
+		);
+	}
 }
 
 /* Double the number of buckets in a map. */
 static void map_resize(map *m) {
-	if (m->ac == 0) { m->ac = 256; } else { m->ac *= 2; }
-	
-	/* If the map is empty, simply resize it without rehashing */
-	if (m->al == 0) {
-		// printf("%zu * %zu = %zu; %zu\n", m->ac, m->el, m->ac * m->el, m->ac * 8 * m->el); fflush(stdout);
-		free(m->a); m->a = xcalloc(m->ac * 8, m->el);
-		// printf("m->a: %p\n", m->a);
-		// printf("%p: %zu\n", m->a, malloc_usable_size(m->a));
-		// printf("%p: %zu\n", m->a + 1696, malloc_usable_size(m->a + 1696));
-		// printf("%zu\n", ((m->a + 1696) - m->a));
-		fflush(stdout);
+	/* If the map is empty, simply allocate it without rehashing */
+	if (m->ac == 0) {
+		m->ac = map_initial_capacity;
+		m->a = xcalloc(m->ac, sizeof (*m->a)); return;
 	}
+	
 	/* Otherwise rehash every element into a new resized map */
-	else {
-		// printf("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n"); fflush(stdout);
-		
-		map old = *m;
-		
-		m->a = xcalloc(m->ac * 8, m->el); m->n = NULL; m->al = 0;
-		for (map_ent *e = old.n; e; e = e->n) { map_insert(m, e->k, e->v); }
+	map old = *m; m->ac *= 2; m->a = xcalloc(m->ac, sizeof (*m->a)); m->al = 0;
+	
+	for (UINT i = 0; i < old.ac; i += 1) {
+		if (old.a[i].h == 0) { continue; }
 
-		map_free(&old);
+		map_insert(m, old.a[i].k, old.a[i].v);
+		free(old.a[i].k);
 	}
+	
+	free(old.a);
+}
+
+/* Compute the hash of some data. Will not return 0. */
+static u64 hash(const char *dat, UINT len) {
+	register u64 fnv = 0xCBF29CE484222325;
+	for (; len; len -= 1, dat += 1) { fnv ^= *dat; fnv *= 0x00000100000001B3; }
+	fnv |= fnv == 0; return fnv;
 }
diff --git a/src/util/map.h b/src/util/map.h
index d4b4401..63b0784 100644
--- a/src/util/map.h
+++ b/src/util/map.h
@@ -1,4 +1,4 @@
-// util/map.h, version 0.0.0
+// util/map.h, version 0.1.0
 // Map utility header file from libutil
 // Copyright (C) 2021, Jakob Wakeling
 // All rights reserved.
@@ -10,17 +10,23 @@
 
 #include "util.h"
 
-#define MAP_INSERT(m, k, v) map_insert(m, k, (void *)(UINT)(e))
+typedef struct {
+	struct { u64 h; char *k; void *v; } *a;
+	UINT al, ac; void (*free)(void *);
+} map;
 
-typedef struct map_ent { char *k; void *v; struct map_ent *c, *n; } map_ent;
-typedef struct { map_ent **a, *n; UINT al, ac, el; void (*free)(void *); } map;
+// #define MAP_INIT(type, free) map_init(sizeof (type), (free))
+// #define MAP_INSERT(m, k, v) map_insert(m, k, (void *)(UINT)(e))
 
-extern map  map_init(UINT el, void (*free)(void *));
+/* This should be used for debugging or testing only. */
+extern UINT map_initial_capacity;
+
+extern map  map_init(void (*free)(void *));
 extern void map_free(map *m);
 
-extern void *map_insert(map *m, const char *k, void *v);
-extern void *map_lookup(map *m, const char *k);
-extern void *map_remove(map *m, const char *k);
+extern void  map_insert(map *m, char *k, void *v);
+extern void *map_lookup(map *m, char *k);
+extern void *map_remove(map *m, char *k);
 
 extern void map_print(map *m);
 extern void map_debug(map *m);