Author | Jakob Wakeling <[email protected]> |
Date | 2022-01-06 02:41:12 |
Commit | 58704bd2f2be6db8a7cc5fcecf9ec8c3f14232b3 |
Parent | 6d119aeef9f375133743b8b148efd4fde90934cb |
symbol: Add specific symbol table hashmap
Diffstat
A | src/symbol.c | | | 160 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/symbol.h | | | 33 | +++++++++++++++++++++++++++++++++ |
2 files changed, 193 insertions, 0 deletions
diff --git a/src/symbol.c b/src/symbol.c new file mode 100644 index 0000000..6b4b21a --- /dev/null +++ b/src/symbol.c @@ -0,0 +1,160 @@ +// symbol.c +// Symbol source file for G +// Copyright (C) 2021, Jakob Wakeling +// All rights reserved. + + + +/* + This file uses the currently non-standard 'typeof' operator. Its use is + considered acceptable because it is supported by both GCC and Clang, and + POSIX extensions are used here anyway. Additionally, it is expected that the + 'typeof' operator will become standard in C23. +*/ + +#include "symbol.h" +#include "util/alloc.h" +#include "util/util.h" + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define INITIAL_CAPACITY 64 +#define LOAD_FACTOR 0.90 + +static void map_resize(syt *m); + +/* Initialise a map. */ +syt syt_init(void (*free)(void *)) { + return (syt){ NULL, 0, 0 }; +} + +/* Uninitialise a map. */ +void syt_free(syt *m) { + for (UINT i = 0; i < m->ac; i += 1) { + if (m->a[i].h == 0) { continue; } free(m->a[i].k); + } + + free(m->a); *m = (syt){ NULL, 0, 0 }; +} + +/* Compute the hash of some data. Will not return 0. */ +u64 syt_hash(const char *dat, UINT len) { + register u64 fnv = 0xCBF29CE484222325; + for (; len; len -= 1, dat += 1) { fnv ^= *dat; fnv *= 0x00000100000001B3; } + fnv |= fnv == 0; return fnv; +} + +#define SWAP(a, b) { register typeof (a) t = a; a = b; b = t; } +#define DIB(i) ((i + m->ac - (m->a[i].h % m->ac)) % m->ac) + +/* Insert a key-value pair into a map. The key is duplicated. */ +void syt_insert(syt *m, char *k, sym v) { + if (m->ac == 0 || m->al >= ((f64)m->ac * LOAD_FACTOR)) { map_resize(m); } + UINT h = syt_hash(k, strlen(k)), i = h % m->ac; k = strdup(k); + + for (UINT dist = 0;; i = (i + 1) % m->ac, dist += 1) { + if (m->a[i].h == 0) { + /* If an empty bucket is found, insert here */ + m->a[i] = (typeof (*m->a)){ h, k, v }; + m->al += 1; return; + } + + /* Calculate tsid, the DIB of the item at the current index */ + UINT tsid = (i + m->ac - (m->a[i].h % m->ac)) % m->ac; + + if (dist > tsid) { + SWAP(m->a[i].h, h); + SWAP(m->a[i].k, k); + SWAP(m->a[i].v, v); + + dist = tsid; + } + } +} + +/* Lookup the value associated with a key from a map. */ +sym syt_lookup(syt *m, char *k) { + UINT h = syt_hash(k, strlen(k)), i = h % m->ac; + + for (UINT dist = 0;; i = (i + 1) % m->ac, dist += 1) { + if (m->a[i].h == 0) { return (sym){}; } + + if (dist > DIB(i)) { return (sym){}; } + if ((m->a[i].h == h) && (strcmp(m->a[i].k, k) == 0)) { + return m->a[i].v; + } + } +} + +/* Remove a key-value pair from a map. */ +void syt_remove(syt *m, char *k) { + UINT h = syt_hash(k, strlen(k)), i = h % m->ac; + + for (UINT dist = 0;; i = (i + 1) % m->ac, dist += 1) { + if (m->a[i].h == 0) { return; } + + if (dist > DIB(i)) { return; } + if ((m->a[i].h == h) && (strcmp(m->a[i].k, k) == 0)) { + /* If the element to be removed is found, then deallocate it */ + free(m->a[i].k); + m->a[i] = (typeof (*m->a)){ 0, NULL, {} }; m->al -= 1; + + /* */ + for (UINT j = (i + 1) % m->ac;; i = j, j = (j + 1) % m->ac) { + if (m->a[j].h == 0 || DIB(j) == 0) { break; } + + SWAP(m->a[i].h, m->a[j].h); + SWAP(m->a[i].k, m->a[j].k); + SWAP(m->a[i].v, m->a[j].v); + } + + /* + TODO I am unsure if I want to have this procedure return the + removed value or simply an acknowledgement of its removal + */ + return; + } + } +} + +/* Print a basic representation of a map to stdout. */ +void syt_print(syt *m) { + for (UINT i = 0; i < m->ac; i += 1) if (m->a[i].h != 0) { + printf("%s -> %s\n", m->a[i].k, "TODO"); + } +} + +/* Print a debug representation of a map to stdout. */ +void syt_debug(syt *m) { + for (UINT i = 0; i < m->ac; i += 1) { + if (m->a[i].h == 0) { printf("[%zu] %lu\n", i, m->a[i].h); } + else printf( + "[%zu] %lu, %s -> %s, DIB: %zu\n", + i, m->a[i].h, m->a[i].k, "TODO", DIB(i) + ); + } +} + +/* Double the number of buckets in a map. */ +static void map_resize(syt *m) { + /* If the map is empty, simply allocate it without rehashing */ + if (m->ac == 0) { + m->ac = INITIAL_CAPACITY; + m->a = xcalloc(m->ac, sizeof (*m->a)); return; + } + + /* Otherwise rehash every element into a new resized map */ + syt old = *m; m->ac *= 2; m->a = xcalloc(m->ac, sizeof (*m->a)); m->al = 0; + + for (UINT i = 0; i < old.ac; i += 1) { + if (old.a[i].h == 0) { continue; } + + syt_insert(m, old.a[i].k, old.a[i].v); + free(old.a[i].k); + } + + free(old.a); +} diff --git a/src/symbol.h b/src/symbol.h new file mode 100644 index 0000000..0b26eac --- /dev/null +++ b/src/symbol.h @@ -0,0 +1,33 @@ +// symbol.h +// Symbol header file for G +// Copyright (C) 2021, Jakob Wakeling +// All rights reserved. + + + +#ifndef UTIL_SYMBOL_H_Q1VLFKFE +#define UTIL_SYMBOL_H_Q1VLFKFE + +#include "util/util.h" + +typedef struct { + UINT a; +} sym; + +typedef struct { + struct { u64 h; char *k; sym v; } *a; + UINT al, ac; +} syt; + +extern syt syt_init(void (*free)(void *)); +extern void syt_free(syt *m); +extern u64 syt_hash(const char *s, UINT l); + +extern void syt_insert(syt *m, char *k, sym v); +extern sym syt_lookup(syt *m, char *k); +extern void syt_remove(syt *m, char *k); + +extern void syt_print(syt *m); +extern void syt_debug(syt *m); + +#endif // UTIL_SYMBOL_H_Q1VLFKFE