summaryrefslogtreecommitdiffstats
path: root/hashmap.c
diff options
context:
space:
mode:
authorKarsten Blees <karsten.blees@gmail.com>2014-07-03 00:22:54 +0200
committerJunio C Hamano <gitster@pobox.com>2014-07-07 22:56:38 +0200
commit7b64d42d22206d9995a8f0cb3b515e623cac4702 (patch)
tree26895c5fde113b1d84f52409f951feb37f451412 /hashmap.c
parenthashmap: add simplified hashmap_get_from_hash() API (diff)
downloadgit-7b64d42d22206d9995a8f0cb3b515e623cac4702.tar.xz
git-7b64d42d22206d9995a8f0cb3b515e623cac4702.zip
hashmap: add string interning API
Interning short strings with high probability of duplicates can reduce the memory footprint and speed up comparisons. Add strintern() and memintern() APIs that use a hashmap to manage the pool of unique, interned strings. Note: strintern(getenv()) could be used to sanitize git's use of getenv(), in case we ever encounter a platform where a call to getenv() invalidates previous getenv() results (which is allowed by POSIX). Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'hashmap.c')
-rw-r--r--hashmap.c38
1 files changed, 38 insertions, 0 deletions
diff --git a/hashmap.c b/hashmap.c
index d1b8056d8d..f693839cb4 100644
--- a/hashmap.c
+++ b/hashmap.c
@@ -226,3 +226,41 @@ void *hashmap_iter_next(struct hashmap_iter *iter)
current = iter->map->table[iter->tablepos++];
}
}
+
+struct pool_entry {
+ struct hashmap_entry ent;
+ size_t len;
+ unsigned char data[FLEX_ARRAY];
+};
+
+static int pool_entry_cmp(const struct pool_entry *e1,
+ const struct pool_entry *e2,
+ const unsigned char *keydata)
+{
+ return e1->data != keydata &&
+ (e1->len != e2->len || memcmp(e1->data, keydata, e1->len));
+}
+
+const void *memintern(const void *data, size_t len)
+{
+ static struct hashmap map;
+ struct pool_entry key, *e;
+
+ /* initialize string pool hashmap */
+ if (!map.tablesize)
+ hashmap_init(&map, (hashmap_cmp_fn) pool_entry_cmp, 0);
+
+ /* lookup interned string in pool */
+ hashmap_entry_init(&key, memhash(data, len));
+ key.len = len;
+ e = hashmap_get(&map, &key, data);
+ if (!e) {
+ /* not found: create it */
+ e = xmallocz(sizeof(struct pool_entry) + len);
+ hashmap_entry_init(e, key.ent.hash);
+ e->len = len;
+ memcpy(e->data, data, len);
+ hashmap_add(&map, e);
+ }
+ return e->data;
+}