diff options
author | Karsten Blees <karsten.blees@gmail.com> | 2013-11-14 20:17:54 +0100 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2013-11-18 22:03:51 +0100 |
commit | 6a364ced497e407ab3ffb2554d4ef2c78f801832 (patch) | |
tree | d3f6ee1a0ae10dc4e6cbca185fbc99ca7ac70930 /test-hashmap.c | |
parent | submodule: don't access the .gitmodules cache entry after removing it (diff) | |
download | git-6a364ced497e407ab3ffb2554d4ef2c78f801832.tar.xz git-6a364ced497e407ab3ffb2554d4ef2c78f801832.zip |
add a hashtable implementation that supports O(1) removal
The existing hashtable implementation (in hash.[ch]) uses open addressing
(i.e. resolve hash collisions by distributing entries across the table).
Thus, removal is difficult to implement with less than O(n) complexity.
Resolving collisions of entries with identical hashes (e.g. via chaining)
is left to the client code.
Add a hashtable implementation that supports O(1) removal and is slightly
easier to use due to builtin entry chaining.
Supports all basic operations init, free, get, add, remove and iteration.
Also includes ready-to-use hash functions based on the public domain FNV-1
algorithm (http://www.isthe.com/chongo/tech/comp/fnv).
The per-entry data structure (hashmap_entry) is piggybacked in front of
the client's data structure to save memory. See test-hashmap.c for usage
examples.
The hashtable is resized by a factor of four when 80% full. With these
settings, average memory consumption is about 2/3 of hash.[ch], and
insertion is about twice as fast due to less frequent resizing.
Lookups are also slightly faster, because entries are strictly confined to
their bucket (i.e. no data of other buckets needs to be traversed).
Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'test-hashmap.c')
-rw-r--r-- | test-hashmap.c | 340 |
1 files changed, 340 insertions, 0 deletions
diff --git a/test-hashmap.c b/test-hashmap.c new file mode 100644 index 0000000000..581d2964e4 --- /dev/null +++ b/test-hashmap.c @@ -0,0 +1,340 @@ +#include "cache.h" +#include "hashmap.h" +#include <stdio.h> + +struct test_entry +{ + struct hashmap_entry ent; + /* key and value as two \0-terminated strings */ + char key[FLEX_ARRAY]; +}; + +static const char *get_value(const struct test_entry *e) +{ + return e->key + strlen(e->key) + 1; +} + +static int test_entry_cmp(const struct test_entry *e1, + const struct test_entry *e2, const char* key) +{ + return strcmp(e1->key, key ? key : e2->key); +} + +static int test_entry_cmp_icase(const struct test_entry *e1, + const struct test_entry *e2, const char* key) +{ + return strcasecmp(e1->key, key ? key : e2->key); +} + +static struct test_entry *alloc_test_entry(int hash, char *key, int klen, + char *value, int vlen) +{ + struct test_entry *entry = malloc(sizeof(struct test_entry) + klen + + vlen + 2); + hashmap_entry_init(entry, hash); + memcpy(entry->key, key, klen + 1); + memcpy(entry->key + klen + 1, value, vlen + 1); + return entry; +} + +#define HASH_METHOD_FNV 0 +#define HASH_METHOD_I 1 +#define HASH_METHOD_IDIV10 2 +#define HASH_METHOD_0 3 +#define HASH_METHOD_X2 4 +#define TEST_SPARSE 8 +#define TEST_ADD 16 +#define TEST_SIZE 100000 + +static unsigned int hash(unsigned int method, unsigned int i, const char *key) +{ + unsigned int hash; + switch (method & 3) + { + case HASH_METHOD_FNV: + hash = strhash(key); + break; + case HASH_METHOD_I: + hash = i; + break; + case HASH_METHOD_IDIV10: + hash = i / 10; + break; + case HASH_METHOD_0: + hash = 0; + break; + } + + if (method & HASH_METHOD_X2) + hash = 2 * hash; + return hash; +} + +/* + * Test performance of hashmap.[ch] + * Usage: time echo "perfhashmap method rounds" | test-hashmap + */ +static void perf_hashmap(unsigned int method, unsigned int rounds) +{ + struct hashmap map; + char buf[16]; + struct test_entry **entries; + unsigned int *hashes; + unsigned int i, j; + + entries = malloc(TEST_SIZE * sizeof(struct test_entry *)); + hashes = malloc(TEST_SIZE * sizeof(int)); + for (i = 0; i < TEST_SIZE; i++) { + snprintf(buf, sizeof(buf), "%i", i); + entries[i] = alloc_test_entry(0, buf, strlen(buf), "", 0); + hashes[i] = hash(method, i, entries[i]->key); + } + + if (method & TEST_ADD) { + /* test adding to the map */ + for (j = 0; j < rounds; j++) { + hashmap_init(&map, (hashmap_cmp_fn) test_entry_cmp, 0); + + /* add entries */ + for (i = 0; i < TEST_SIZE; i++) { + hashmap_entry_init(entries[i], hashes[i]); + hashmap_add(&map, entries[i]); + } + + hashmap_free(&map, 0); + } + } else { + /* test map lookups */ + hashmap_init(&map, (hashmap_cmp_fn) test_entry_cmp, 0); + + /* fill the map (sparsely if specified) */ + j = (method & TEST_SPARSE) ? TEST_SIZE / 10 : TEST_SIZE; + for (i = 0; i < j; i++) { + hashmap_entry_init(entries[i], hashes[i]); + hashmap_add(&map, entries[i]); + } + + for (j = 0; j < rounds; j++) { + for (i = 0; i < TEST_SIZE; i++) { + struct hashmap_entry key; + hashmap_entry_init(&key, hashes[i]); + hashmap_get(&map, &key, entries[i]->key); + } + } + + hashmap_free(&map, 0); + } +} + +struct hash_entry +{ + struct hash_entry *next; + char key[FLEX_ARRAY]; +}; + +/* + * Test performance of hash.[ch] + * Usage: time echo "perfhash method rounds" | test-hashmap + */ +static void perf_hash(unsigned int method, unsigned int rounds) +{ + struct hash_table map; + char buf[16]; + struct hash_entry **entries, **res, *entry; + unsigned int *hashes; + unsigned int i, j; + + entries = malloc(TEST_SIZE * sizeof(struct hash_entry *)); + hashes = malloc(TEST_SIZE * sizeof(int)); + for (i = 0; i < TEST_SIZE; i++) { + snprintf(buf, sizeof(buf), "%i", i); + entries[i] = malloc(sizeof(struct hash_entry) + strlen(buf) + 1); + strcpy(entries[i]->key, buf); + hashes[i] = hash(method, i, entries[i]->key); + } + + if (method & TEST_ADD) { + /* test adding to the map */ + for (j = 0; j < rounds; j++) { + init_hash(&map); + + /* add entries */ + for (i = 0; i < TEST_SIZE; i++) { + res = (struct hash_entry **) insert_hash( + hashes[i], entries[i], &map); + if (res) { + entries[i]->next = *res; + *res = entries[i]; + } else { + entries[i]->next = NULL; + } + } + + free_hash(&map); + } + } else { + /* test map lookups */ + init_hash(&map); + + /* fill the map (sparsely if specified) */ + j = (method & TEST_SPARSE) ? TEST_SIZE / 10 : TEST_SIZE; + for (i = 0; i < j; i++) { + res = (struct hash_entry **) insert_hash(hashes[i], + entries[i], &map); + if (res) { + entries[i]->next = *res; + *res = entries[i]; + } else { + entries[i]->next = NULL; + } + } + + for (j = 0; j < rounds; j++) { + for (i = 0; i < TEST_SIZE; i++) { + entry = lookup_hash(hashes[i], &map); + while (entry) { + if (!strcmp(entries[i]->key, entry->key)) + break; + entry = entry->next; + } + } + } + + free_hash(&map); + + } +} + +#define DELIM " \t\r\n" + +/* + * Read stdin line by line and print result of commands to stdout: + * + * hash key -> strhash(key) memhash(key) strihash(key) memihash(key) + * put key value -> NULL / old value + * get key -> NULL / value + * remove key -> NULL / old value + * iterate -> key1 value1\nkey2 value2\n... + * size -> tablesize numentries + * + * perfhashmap method rounds -> test hashmap.[ch] performance + * perfhash method rounds -> test hash.[ch] performance + */ +int main(int argc, char *argv[]) +{ + char line[1024]; + struct hashmap map; + int icase; + + /* init hash map */ + icase = argc > 1 && !strcmp("ignorecase", argv[1]); + hashmap_init(&map, (hashmap_cmp_fn) (icase ? test_entry_cmp_icase + : test_entry_cmp), 0); + + /* process commands from stdin */ + while (fgets(line, sizeof(line), stdin)) { + char *cmd, *p1 = NULL, *p2 = NULL; + int l1 = 0, l2 = 0, hash = 0; + struct test_entry *entry; + + /* break line into command and up to two parameters */ + cmd = strtok(line, DELIM); + /* ignore empty lines */ + if (!cmd || *cmd == '#') + continue; + + p1 = strtok(NULL, DELIM); + if (p1) { + l1 = strlen(p1); + hash = icase ? strihash(p1) : strhash(p1); + p2 = strtok(NULL, DELIM); + if (p2) + l2 = strlen(p2); + } + + if (!strcmp("hash", cmd) && l1) { + + /* print results of different hash functions */ + printf("%u %u %u %u\n", strhash(p1), memhash(p1, l1), + strihash(p1), memihash(p1, l1)); + + } else if (!strcmp("add", cmd) && l1 && l2) { + + /* create entry with key = p1, value = p2 */ + entry = alloc_test_entry(hash, p1, l1, p2, l2); + + /* add to hashmap */ + hashmap_add(&map, entry); + + } else if (!strcmp("put", cmd) && l1 && l2) { + + /* create entry with key = p1, value = p2 */ + entry = alloc_test_entry(hash, p1, l1, p2, l2); + + /* add / replace entry */ + entry = hashmap_put(&map, entry); + + /* print and free replaced entry, if any */ + puts(entry ? get_value(entry) : "NULL"); + free(entry); + + } else if (!strcmp("get", cmd) && l1) { + + /* setup static key */ + struct hashmap_entry key; + hashmap_entry_init(&key, hash); + + /* lookup entry in hashmap */ + entry = hashmap_get(&map, &key, p1); + + /* print result */ + if (!entry) + puts("NULL"); + while (entry) { + puts(get_value(entry)); + entry = hashmap_get_next(&map, entry); + } + + } else if (!strcmp("remove", cmd) && l1) { + + /* setup static key */ + struct hashmap_entry key; + hashmap_entry_init(&key, hash); + + /* remove entry from hashmap */ + entry = hashmap_remove(&map, &key, p1); + + /* print result and free entry*/ + puts(entry ? get_value(entry) : "NULL"); + free(entry); + + } else if (!strcmp("iterate", cmd)) { + + struct hashmap_iter iter; + hashmap_iter_init(&map, &iter); + while ((entry = hashmap_iter_next(&iter))) + printf("%s %s\n", entry->key, get_value(entry)); + + } else if (!strcmp("size", cmd)) { + + /* print table sizes */ + printf("%u %u\n", map.tablesize, map.size); + + } else if (!strcmp("perfhashmap", cmd) && l1 && l2) { + + perf_hashmap(atoi(p1), atoi(p2)); + + } else if (!strcmp("perfhash", cmd) && l1 && l2) { + + perf_hash(atoi(p1), atoi(p2)); + + } else { + + printf("Unknown command %s\n", cmd); + + } + } + + hashmap_free(&map, 1); + return 0; +} |