From: Dale Weiler Date: Tue, 1 Jan 2013 07:59:04 +0000 (+0000) Subject: Implemented a optimized hash-set that can be used in various parts of the compiler... X-Git-Tag: before-library~398 X-Git-Url: https://git.rm.cloudns.org/?a=commitdiff_plain;h=686394654fff6ecfc4f6b9a76bec599585d56795;p=xonotic%2Fgmqcc.git Implemented a optimized hash-set that can be used in various parts of the compiler (to get a little more speed). I intend this to replace the hackery that is code_genstring, and code_util_str_htgeth. --- diff --git a/gmqcc.h b/gmqcc.h index 52ae6c7..02b9bfe 100644 --- a/gmqcc.h +++ b/gmqcc.h @@ -320,6 +320,14 @@ typedef struct hash_table_t { struct hash_node_t **table; } hash_table_t, *ht; +typedef struct hash_set_t { + size_t bits; + size_t mask; + size_t capacity; + size_t *items; + size_t total; +} hash_set_t, *hs; + /* * hashtable implementation: * @@ -358,6 +366,45 @@ void util_htseth(hash_table_t *ht, const char *key, size_t hash, void * void *util_htget (hash_table_t *ht, const char *key); void *util_htgeth(hash_table_t *ht, const char *key, size_t hash); + +/* + * hashset implementation: + * This was designed for pointers: you manage the life of the object yourself + * if you do use this for non-pointers please be warned that the object may not + * be valid if the duration of it exceeds (i.e on stack). So you need to allocate + * yourself, or put those in global scope to ensure duration is for the whole + * runtime. + * + * util_hsnew() -- to make a new hashset + * util_hsadd(set, key) -- to add something in the set + * util_hshas(set, key) -- to check if something is in the set + * util_hsrem(set, key) -- to remove something in the set + * util_hsdel(set) -- to delete the set + * + * example of use: + * + * hs foo = util_hsnew(); + * char *bar = "hello blub\n"; + * char *baz = "hello dale\n"; + * + * util_hsadd(foo, bar); + * util_hsadd(foo, baz); + * util_hsrem(foo, baz); + * + * printf("bar %d | baz %d\n", + * util_hshas(foo, bar), + * util_hshad(foo, baz) + * ); + * + * util_hsdel(foo); + */ + +hash_set_t *util_hsnew(void); +int util_hsadd(hash_set_t *, void *); +int util_hshas(hash_set_t *, void *); +int util_hsrem(hash_set_t *, void *); +void util_hsdel(hash_set_t *); + /*===================================================================*/ /*============================ file.c ===============================*/ /*===================================================================*/ diff --git a/util.c b/util.c index ddf7805..5d0d683 100644 --- a/util.c +++ b/util.c @@ -573,6 +573,146 @@ void util_htdel(hash_table_t *ht) { mem_d(ht); } +/* + * A basic implementation of a hash-set. Unlike a hashtable, a hash + * set doesn't maintain key-value pairs. It simply maintains a key + * that can be set, removed, and checked for. + * + * See EXPOSED interface comment below + */ +#define GMQCC_HASHSET_PRIME0 0x0049 +#define GMQCC_HASHSET_PRIME1 0x1391 + +static int util_hsput(hash_set_t *set, void *item) { + size_t hash = (size_t)item; /* shouldn't drop the bits */ + size_t iter; + + /* a == 0 || a == 1 */ + if (hash >> 1) + return -1; + + iter = set->mask & (GMQCC_HASHSET_PRIME0 * hash); + + /* while (set->items[iter] != 0 && set->items[iter] != 1) */ + while (!(set->items[iter] >> 1)) { + if (set->items[iter] == hash) + return 0; + + iter = set->mask & (iter + GMQCC_HASHSET_PRIME1); + } + + set->total ++; + set->items[iter] = hash; + + return 1; +} + +static void util_hsupdate(hash_set_t *set) { + size_t *old; + size_t end; + size_t itr; + + /* time to rehash? */ + if ((float)set->total >= (size_t)((double)set->capacity * 0.85)) { + old = set->items; + end = set->capacity; + + set->bits ++; + set->capacity = (size_t)(1 << set->bits); + set->mask = set->capacity - 1; + set->items = mem_a(set->capacity * sizeof(size_t)); + set->total = 0; + + /*assert(set->items);*/ + + /* + * this shouldn't be slow? if so unroll it a little perhaps + * (shouldn't be though) + */ + for (itr = 0; itr < end; itr++) + util_hsput(set, (void*)old[itr]); + + mem_d(old); + } +} + +/* + * EXPOSED interface: all of these functions are exposed to the outside + * for use. The stuff above is static because it's the "internal" mechanics + * for syncronizing the set for updating, and putting data into the set. + */ +int util_hsadd(hash_set_t *set, void *item) { + int run = util_hsput(set, item); /* inlined */ + util_hsupdate(set); + + return run; +} + +/* remove item in set */ +int util_hsrem(hash_set_t *set, void *item) { + size_t hash = (size_t)item; + size_t iter = set->mask & (GMQCC_HASHSET_PRIME0 * hash); + + while (set->items[iter]) { + if (set->items[iter] == hash) { + set->items[iter] = 1; + set->total --; + + return 1; + } + iter = set->mask & (iter + GMQCC_HASHSET_PRIME1); + } + + return 0; +} + +/* check if item is set */ +int util_hshas(hash_set_t *set, void *item) { + size_t hash = (size_t)item; + size_t iter = set->mask & (GMQCC_HASHSET_PRIME0 * hash); + + while (set->items[iter]) { + if (set->items[iter] == hash) + return 1; + + iter = set->mask & (iter + GMQCC_HASHSET_PRIME1); + } + + return 0; +} + +hash_set_t *util_hsnew(void) { + hash_set_t *set; + + if (!(set = mem_a(sizeof(hash_set_t)))) + return NULL; + + set->bits = 3; + set->total = 0; + set->capacity = (size_t)(1 << set->bits); + set->mask = set->capacity - 1; + set->items = mem_a(set->capacity * sizeof(size_t)); + + if (!set->items) { + util_hsdel(set); + return NULL; + } + + return set; +} + +void util_hsdel(hash_set_t *set) { + if (!set) return; + + if (set->items) + mem_d(set->items); + + mem_d(set); +} +#undef GMQCC_HASHSET_PRIME0 +#undef GMQCC_HASHSET_PRIME1 + + /* * Portable implementation of vasprintf/asprintf. Assumes vsnprintf * exists, otherwise compiler error.