summaryrefslogtreecommitdiffstats
path: root/packfile.c
diff options
context:
space:
mode:
authorDerrick Stolee <stolee@gmail.com>2018-07-12 21:39:29 +0200
committerJunio C Hamano <gitster@pobox.com>2018-07-20 20:27:28 +0200
commitfe1ed56f5e482507b54a4fb491273f122c5fd9ea (patch)
tree2f1e2521f84234aaae3eba17b459fe5d26155e56 /packfile.c
parentmidx: read pack names into array (diff)
downloadgit-fe1ed56f5e482507b54a4fb491273f122c5fd9ea.tar.xz
git-fe1ed56f5e482507b54a4fb491273f122c5fd9ea.zip
midx: sort and deduplicate objects from packfiles
Before writing a list of objects and their offsets to a multi-pack-index, we need to collect the list of objects contained in the packfiles. There may be multiple copies of some objects, so this list must be deduplicated. It is possible to artificially get into a state where there are many duplicate copies of objects. That can create high memory pressure if we are to create a list of all objects before de-duplication. To reduce this memory pressure without a significant performance drop, automatically group objects by the first byte of their object id. Use the IDX fanout tables to group the data, copy to a local array, then sort. Copy only the de-duplicated entries. Select the duplicate based on the most-recent modified time of a packfile containing the object. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'packfile.c')
-rw-r--r--packfile.c17
1 files changed, 17 insertions, 0 deletions
diff --git a/packfile.c b/packfile.c
index ee1ab9b804..3d652212c6 100644
--- a/packfile.c
+++ b/packfile.c
@@ -196,6 +196,23 @@ int open_pack_index(struct packed_git *p)
return ret;
}
+uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
+{
+ const uint32_t *level1_ofs = p->index_data;
+
+ if (!level1_ofs) {
+ if (open_pack_index(p))
+ return 0;
+ level1_ofs = p->index_data;
+ }
+
+ if (p->index_version > 1) {
+ level1_ofs += 2;
+ }
+
+ return ntohl(level1_ofs[value]);
+}
+
static struct packed_git *alloc_packed_git(int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));