From: Wolfgang Bumiller Date: Tue, 25 Dec 2012 19:38:05 +0000 (+0100) Subject: The very aggressive -Ooverlap-strings X-Git-Tag: before-library~500 X-Git-Url: https://git.rm.cloudns.org/?a=commitdiff_plain;h=35ba2dcaf99f4352869ad8519c35fefb1c53dd93;p=xonotic%2Fgmqcc.git The very aggressive -Ooverlap-strings --- diff --git a/code.c b/code.c index 1edaa47..e6cd6a3 100644 --- a/code.c +++ b/code.c @@ -59,7 +59,8 @@ void code_init() { int i = 0; code_entfields = 0; - code_string_cache = util_htnew(1024); + + code_string_cache = util_htnew(OPTS_OPTIMIZATION(OPTIM_OVERLAP_STRINGS) ? 0x100 : 1024); /* * The way progs.dat is suppose to work is odd, there needs to be @@ -75,6 +76,7 @@ void code_init() { vec_push(code_fields, empty_def); } +void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin); uint32_t code_genstring(const char *str) { uint32_t off; @@ -92,8 +94,14 @@ uint32_t code_genstring(const char *str) return code_string_cached_empty; } - hash = util_hthash(code_string_cache, str); - existing = util_htgeth(code_string_cache, str, hash); + if (OPTS_OPTIMIZATION(OPTIM_OVERLAP_STRINGS)) { + hash = ((unsigned char*)str)[strlen(str)-1]; + existing = code_util_str_htgeth(code_string_cache, str, hash); + } else { + hash = util_hthash(code_string_cache, str); + existing = util_htgeth(code_string_cache, str, hash); + } + if (existing) return HASH_ENTRY_TO_QCINT(existing); @@ -101,7 +109,6 @@ uint32_t code_genstring(const char *str) vec_upload(code_chars, str, strlen(str)+1); util_htseth(code_string_cache, str, hash, QCINT_TO_HASH_ENTRY(off)); - existing = util_htgeth(code_string_cache, str, hash); return off; } diff --git a/doc/gmqcc.1 b/doc/gmqcc.1 index 3f3983a..66fa92f 100644 --- a/doc/gmqcc.1 +++ b/doc/gmqcc.1 @@ -389,6 +389,24 @@ possible. Don't generate defs for immediate values or even declared constants. Meaning variables which are implicitly constant or qualified as such using the 'const' keyword. +.TP +.B -Ooverlap-strings +Aggressively reuse strings in the string section. When a string should +be added which is the trailing substring of an already existing +string, the existing string's tail will be returned instead of the new +string being added. + +For example the following code will only generate 1 string: + +.in +4 +.nf +print("Hell you!\\n"); +print("you!\\n"); // trailing substring of "Hello you!\\n" +.fi +.in +There's however one limitation. Strings are still processed in order, +so if the above print statements were reversed, this optimization +would not happen. .SH CONFIG The configuration file is similar to regular .ini files. Comments start with hashtags or semicolons, sections are written in square diff --git a/opts.def b/opts.def index 111700d..f39336a 100644 --- a/opts.def +++ b/opts.def @@ -85,6 +85,7 @@ GMQCC_DEFINE_FLAG(TAIL_CALLS, 2) GMQCC_DEFINE_FLAG(OVERLAP_LOCALS, 3) GMQCC_DEFINE_FLAG(STRIP_CONSTANT_NAMES, 1) + GMQCC_DEFINE_FLAG(OVERLAP_STRINGS, 2) #endif /* some cleanup so we don't have to */ diff --git a/util.c b/util.c index a1d6776..b2b092d 100644 --- a/util.c +++ b/util.c @@ -514,6 +514,40 @@ void *util_htget(hash_table_t *ht, const char *key) { return util_htgeth(ht, key, util_hthash(ht, key)); } +void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin) { + hash_node_t *pair; + size_t len, keylen; + int cmp; + + keylen = strlen(key); + + pair = ht->table[bin]; + while (pair && pair->key) { + len = strlen(pair->key); + if (len < keylen) { + pair = pair->next; + continue; + } + if (keylen == len) { + cmp = strcmp(key, pair->key); + if (cmp == 0) + return pair->value; + if (cmp < 0) + return NULL; + pair = pair->next; + continue; + } + cmp = strcmp(key, pair->key + len - keylen); + if (cmp == 0) { + uintptr_t up = (uintptr_t)pair->value; + up += len - keylen; + return (void*)up; + } + pair = pair->next; + } + return NULL; +} + /* * Free all allocated data in a hashtable, this is quite the amount * of work.