diff options
author | David Kastrup <dak@gnu.org> | 2007-09-08 23:25:55 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2007-09-10 02:16:49 +0200 |
commit | 02e665ce491296245f474dafdc02d47a6c8afa86 (patch) | |
tree | a47e82077da6f80abc52ccc898e5f03f883c3417 /diff-delta.c | |
parent | diff-delta.c: pack the index structure (diff) | |
download | git-02e665ce491296245f474dafdc02d47a6c8afa86.tar.xz git-02e665ce491296245f474dafdc02d47a6c8afa86.zip |
diff-delta.c: Rationalize culling of hash buckets
The previous hash bucket culling resulted in a somewhat unpredictable
number of hash bucket entries in the order of magnitude of HASH_LIMIT.
Replace this with a Bresenham-like algorithm leaving us with exactly
HASH_LIMIT entries by uniform culling.
Signed-off-by: David Kastrup <dak@gnu.org>
Acked-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'diff-delta.c')
-rw-r--r-- | diff-delta.c | 41 |
1 files changed, 31 insertions, 10 deletions
diff --git a/diff-delta.c b/diff-delta.c index 7f8a60de18..9e440a9299 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -207,19 +207,40 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) * the reference buffer. */ for (i = 0; i < hsize; i++) { - if (hash_count[i] < HASH_LIMIT) + int acc; + + if (hash_count[i] <= HASH_LIMIT) continue; + + entries -= hash_count[i] - HASH_LIMIT; + /* We leave exactly HASH_LIMIT entries in the bucket */ + entry = hash[i]; + acc = 0; do { - struct unpacked_index_entry *keep = entry; - int skip = hash_count[i] / HASH_LIMIT; - do { - --entries; - entry = entry->next; - } while(--skip && entry); - ++entries; - keep->next = entry; - } while(entry); + acc += hash_count[i] - HASH_LIMIT; + if (acc > 0) { + struct unpacked_index_entry *keep = entry; + do { + entry = entry->next; + acc -= HASH_LIMIT; + } while (acc > 0); + keep->next = entry->next; + } + entry = entry->next; + } while (entry); + + /* Assume that this loop is gone through exactly + * HASH_LIMIT times and is entered and left with + * acc==0. So the first statement in the loop + * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT + * to the accumulator, and the inner loop consequently + * is run (hash_count[i]-HASH_LIMIT) times, removing + * one element from the list each time. Since acc + * balances out to 0 at the final run, the inner loop + * body can't be left with entry==NULL. So we indeed + * encounter entry==NULL in the outer loop only. + */ } free(hash_count); |