From 9208e318f548d28c677203a222f02a31a15c5d0e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:25 -0400 Subject: packfile: generalize pack directory list In anticipation of sharing the pack directory listing with the multi-pack-index, generalize prepare_packed_git_one() into for_each_file_in_pack_dir(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 101 ++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 38 deletions(-) (limited to 'packfile.c') diff --git a/packfile.c b/packfile.c index 7cd45aa4b2..ee1ab9b804 100644 --- a/packfile.c +++ b/packfile.c @@ -738,13 +738,14 @@ static void report_pack_garbage(struct string_list *list) report_helper(list, seen_bits, first, list->nr); } -static void prepare_packed_git_one(struct repository *r, char *objdir, int local) +void for_each_file_in_pack_dir(const char *objdir, + each_file_in_pack_dir_fn fn, + void *data) { struct strbuf path = STRBUF_INIT; size_t dirnamelen; DIR *dir; struct dirent *de; - struct string_list garbage = STRING_LIST_INIT_DUP; strbuf_addstr(&path, objdir); strbuf_addstr(&path, "/pack"); @@ -759,53 +760,77 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local strbuf_addch(&path, '/'); dirnamelen = path.len; while ((de = readdir(dir)) != NULL) { - struct packed_git *p; - size_t base_len; - if (is_dot_or_dotdot(de->d_name)) continue; strbuf_setlen(&path, dirnamelen); strbuf_addstr(&path, de->d_name); - base_len = path.len; - if (strip_suffix_mem(path.buf, &base_len, ".idx")) { - /* Don't reopen a pack we already have. */ - for (p = r->objects->packed_git; p; - p = p->next) { - size_t len; - if (strip_suffix(p->pack_name, ".pack", &len) && - len == base_len && - !memcmp(p->pack_name, path.buf, len)) - break; - } - if (p == NULL && - /* - * See if it really is a valid .idx file with - * corresponding .pack file that we can map. - */ - (p = add_packed_git(path.buf, path.len, local)) != NULL) - install_packed_git(r, p); - } - - if (!report_garbage) - continue; - - if (ends_with(de->d_name, ".idx") || - ends_with(de->d_name, ".pack") || - ends_with(de->d_name, ".bitmap") || - ends_with(de->d_name, ".keep") || - ends_with(de->d_name, ".promisor")) - string_list_append(&garbage, path.buf); - else - report_garbage(PACKDIR_FILE_GARBAGE, path.buf); + fn(path.buf, path.len, de->d_name, data); } + closedir(dir); - report_pack_garbage(&garbage); - string_list_clear(&garbage, 0); strbuf_release(&path); } +struct prepare_pack_data { + struct repository *r; + struct string_list *garbage; + int local; +}; + +static void prepare_pack(const char *full_name, size_t full_name_len, + const char *file_name, void *_data) +{ + struct prepare_pack_data *data = (struct prepare_pack_data *)_data; + struct packed_git *p; + size_t base_len = full_name_len; + + if (strip_suffix_mem(full_name, &base_len, ".idx")) { + /* Don't reopen a pack we already have. */ + for (p = data->r->objects->packed_git; p; p = p->next) { + size_t len; + if (strip_suffix(p->pack_name, ".pack", &len) && + len == base_len && + !memcmp(p->pack_name, full_name, len)) + break; + } + + if (!p) { + p = add_packed_git(full_name, full_name_len, data->local); + if (p) + install_packed_git(data->r, p); + } + } + + if (!report_garbage) + return; + + if (ends_with(file_name, ".idx") || + ends_with(file_name, ".pack") || + ends_with(file_name, ".bitmap") || + ends_with(file_name, ".keep") || + ends_with(file_name, ".promisor")) + string_list_append(data->garbage, full_name); + else + report_garbage(PACKDIR_FILE_GARBAGE, full_name); +} + +static void prepare_packed_git_one(struct repository *r, char *objdir, int local) +{ + struct prepare_pack_data data; + struct string_list garbage = STRING_LIST_INIT_DUP; + + data.r = r; + data.garbage = &garbage; + data.local = local; + + for_each_file_in_pack_dir(objdir, prepare_pack, &data); + + report_pack_garbage(data.garbage); + string_list_clear(data.garbage, 0); +} + static void prepare_packed_git(struct repository *r); /* * Give a fast, rough count of the number of objects in the repository. This -- cgit v1.2.3 From fe1ed56f5e482507b54a4fb491273f122c5fd9ea Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:29 -0400 Subject: midx: sort and deduplicate objects from packfiles Before writing a list of objects and their offsets to a multi-pack-index, we need to collect the list of objects contained in the packfiles. There may be multiple copies of some objects, so this list must be deduplicated. It is possible to artificially get into a state where there are many duplicate copies of objects. That can create high memory pressure if we are to create a list of all objects before de-duplication. To reduce this memory pressure without a significant performance drop, automatically group objects by the first byte of their object id. Use the IDX fanout tables to group the data, copy to a local array, then sort. Copy only the de-duplicated entries. Select the duplicate based on the most-recent modified time of a packfile containing the object. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ packfile.c | 17 ++++++++ packfile.h | 2 + 3 files changed, 147 insertions(+) (limited to 'packfile.c') diff --git a/midx.c b/midx.c index fcdf6553ce..29f8de5ee6 100644 --- a/midx.c +++ b/midx.c @@ -4,6 +4,7 @@ #include "lockfile.h" #include "packfile.h" #include "object-store.h" +#include "packfile.h" #include "midx.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ @@ -182,12 +183,21 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, packs->list[packs->nr] = add_packed_git(full_path, full_path_len, 0); + if (!packs->list[packs->nr]) { warning(_("failed to add packfile '%s'"), full_path); return; } + if (open_pack_index(packs->list[packs->nr])) { + warning(_("failed to open pack-index '%s'"), + full_path); + close_pack(packs->list[packs->nr]); + FREE_AND_NULL(packs->list[packs->nr]); + return; + } + packs->names[packs->nr] = xstrdup(file_name); packs->pack_name_concat_len += strlen(file_name) + 1; packs->nr++; @@ -228,6 +238,119 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p free(pairs); } +struct pack_midx_entry { + struct object_id oid; + uint32_t pack_int_id; + time_t pack_mtime; + uint64_t offset; +}; + +static int midx_oid_compare(const void *_a, const void *_b) +{ + const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a; + const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b; + int cmp = oidcmp(&a->oid, &b->oid); + + if (cmp) + return cmp; + + if (a->pack_mtime > b->pack_mtime) + return -1; + else if (a->pack_mtime < b->pack_mtime) + return 1; + + return a->pack_int_id - b->pack_int_id; +} + +static void fill_pack_entry(uint32_t pack_int_id, + struct packed_git *p, + uint32_t cur_object, + struct pack_midx_entry *entry) +{ + if (!nth_packed_object_oid(&entry->oid, p, cur_object)) + die(_("failed to locate object %d in packfile"), cur_object); + + entry->pack_int_id = pack_int_id; + entry->pack_mtime = p->mtime; + + entry->offset = nth_packed_object_offset(p, cur_object); +} + +/* + * It is possible to artificially get into a state where there are many + * duplicate copies of objects. That can create high memory pressure if + * we are to create a list of all objects before de-duplication. To reduce + * this memory pressure without a significant performance drop, automatically + * group objects by the first byte of their object id. Use the IDX fanout + * tables to group the data, copy to a local array, then sort. + * + * Copy only the de-duplicated entries (selected by most-recent modified time + * of a packfile containing the object). + */ +static struct pack_midx_entry *get_sorted_entries(struct packed_git **p, + uint32_t *perm, + uint32_t nr_packs, + uint32_t *nr_objects) +{ + uint32_t cur_fanout, cur_pack, cur_object; + uint32_t alloc_fanout, alloc_objects, total_objects = 0; + struct pack_midx_entry *entries_by_fanout = NULL; + struct pack_midx_entry *deduplicated_entries = NULL; + + for (cur_pack = 0; cur_pack < nr_packs; cur_pack++) + total_objects += p[cur_pack]->num_objects; + + /* + * As we de-duplicate by fanout value, we expect the fanout + * slices to be evenly distributed, with some noise. Hence, + * allocate slightly more than one 256th. + */ + alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16; + + ALLOC_ARRAY(entries_by_fanout, alloc_fanout); + ALLOC_ARRAY(deduplicated_entries, alloc_objects); + *nr_objects = 0; + + for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { + uint32_t nr_fanout = 0; + + for (cur_pack = 0; cur_pack < nr_packs; cur_pack++) { + uint32_t start = 0, end; + + if (cur_fanout) + start = get_pack_fanout(p[cur_pack], cur_fanout - 1); + end = get_pack_fanout(p[cur_pack], cur_fanout); + + for (cur_object = start; cur_object < end; cur_object++) { + ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); + fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]); + nr_fanout++; + } + } + + QSORT(entries_by_fanout, nr_fanout, midx_oid_compare); + + /* + * The batch is now sorted by OID and then mtime (descending). + * Take only the first duplicate. + */ + for (cur_object = 0; cur_object < nr_fanout; cur_object++) { + if (cur_object && !oidcmp(&entries_by_fanout[cur_object - 1].oid, + &entries_by_fanout[cur_object].oid)) + continue; + + ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects); + memcpy(&deduplicated_entries[*nr_objects], + &entries_by_fanout[cur_object], + sizeof(struct pack_midx_entry)); + (*nr_objects)++; + } + } + + free(entries_by_fanout); + return deduplicated_entries; +} + static size_t write_midx_pack_names(struct hashfile *f, char **pack_names, uint32_t num_packs) @@ -271,6 +394,8 @@ int write_midx_file(const char *object_dir) uint64_t written = 0; uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1]; uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1]; + uint32_t nr_entries; + struct pack_midx_entry *entries = NULL; midx_name = get_midx_filename(object_dir); if (safe_create_leading_directories(midx_name)) { @@ -296,6 +421,8 @@ int write_midx_file(const char *object_dir) ALLOC_ARRAY(pack_perm, packs.nr); sort_packs_by_name(packs.names, packs.nr, pack_perm); + entries = get_sorted_entries(packs.list, pack_perm, packs.nr, &nr_entries); + hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); FREE_AND_NULL(midx_name); @@ -365,5 +492,6 @@ int write_midx_file(const char *object_dir) free(packs.list); free(packs.names); + free(entries); return 0; } diff --git a/packfile.c b/packfile.c index ee1ab9b804..3d652212c6 100644 --- a/packfile.c +++ b/packfile.c @@ -196,6 +196,23 @@ int open_pack_index(struct packed_git *p) return ret; } +uint32_t get_pack_fanout(struct packed_git *p, uint32_t value) +{ + const uint32_t *level1_ofs = p->index_data; + + if (!level1_ofs) { + if (open_pack_index(p)) + return 0; + level1_ofs = p->index_data; + } + + if (p->index_version > 1) { + level1_ofs += 2; + } + + return ntohl(level1_ofs[value]); +} + static struct packed_git *alloc_packed_git(int extra) { struct packed_git *p = xmalloc(st_add(sizeof(*p), extra)); diff --git a/packfile.h b/packfile.h index d2ad30300a..b0eed44c0b 100644 --- a/packfile.h +++ b/packfile.h @@ -69,6 +69,8 @@ extern int open_pack_index(struct packed_git *); */ extern void close_pack_index(struct packed_git *); +extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); + extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); extern void close_pack_windows(struct packed_git *); extern void close_pack(struct packed_git *); -- cgit v1.2.3 From c4d25228ebb22a60f1fcb267e19c503bab708cdc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:33 -0400 Subject: config: create core.multiPackIndex setting The core.multiPackIndex config setting controls the multi-pack- index (MIDX) feature. If false, the setting will disable all reads from the multi-pack-index file. Read this config setting in the new prepare_multi_pack_index_one() which is called during prepare_packed_git(). This check is run once per repository. Add comparison commands in t5319-multi-pack-index.sh to check typical Git behavior remains the same as the config setting is turned on and off. This currently includes 'git rev-list' and 'git log' commands to trigger several object database reads. Currently, these would only catch an error in the prepare_multi_pack_index_one(), but with later commits will catch errors in object lookups, abbreviations, and approximate object counts. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config.txt | 5 +++++ midx.c | 25 ++++++++++++++++++++++ midx.h | 5 +++++ object-store.h | 7 +++++++ packfile.c | 6 +++++- t/t5319-multi-pack-index.sh | 51 +++++++++++++++++++++++++++++++++------------ 6 files changed, 85 insertions(+), 14 deletions(-) (limited to 'packfile.c') diff --git a/Documentation/config.txt b/Documentation/config.txt index ab641bf5a9..25f817ca42 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -908,6 +908,11 @@ core.commitGraph:: Enable git commit graph feature. Allows reading from the commit-graph file. +core.multiPackIndex:: + Use the multi-pack-index file to track multiple packfiles using a + single index. See link:technical/multi-pack-index.html[the + multi-pack-index design document]. + core.sparseCheckout:: Enable "sparse checkout" feature. See section "Sparse checkout" in linkgit:git-read-tree[1] for more information. diff --git a/midx.c b/midx.c index e83110ae92..4090cf4ca4 100644 --- a/midx.c +++ b/midx.c @@ -1,4 +1,5 @@ #include "cache.h" +#include "config.h" #include "csum-file.h" #include "dir.h" #include "lockfile.h" @@ -177,6 +178,30 @@ cleanup_fail: return NULL; } +int prepare_multi_pack_index_one(struct repository *r, const char *object_dir) +{ + struct multi_pack_index *m = r->objects->multi_pack_index; + struct multi_pack_index *m_search; + int config_value; + + if (repo_config_get_bool(r, "core.multipackindex", &config_value) || + !config_value) + return 0; + + for (m_search = m; m_search; m_search = m_search->next) + if (!strcmp(object_dir, m_search->object_dir)) + return 1; + + r->objects->multi_pack_index = load_multi_pack_index(object_dir); + + if (r->objects->multi_pack_index) { + r->objects->multi_pack_index->next = m; + return 1; + } + + return 0; +} + static size_t write_midx_header(struct hashfile *f, unsigned char num_chunks, uint32_t num_packs) diff --git a/midx.h b/midx.h index e15966272f..9bcfc82d2e 100644 --- a/midx.h +++ b/midx.h @@ -1,7 +1,11 @@ #ifndef __MIDX_H__ #define __MIDX_H__ +#include "repository.h" + struct multi_pack_index { + struct multi_pack_index *next; + int fd; const unsigned char *data; @@ -25,6 +29,7 @@ struct multi_pack_index { }; struct multi_pack_index *load_multi_pack_index(const char *object_dir); +int prepare_multi_pack_index_one(struct repository *r, const char *object_dir); int write_midx_file(const char *object_dir); diff --git a/object-store.h b/object-store.h index 13a766aea8..c2b162489a 100644 --- a/object-store.h +++ b/object-store.h @@ -105,6 +105,13 @@ struct raw_object_store { */ struct oidmap *replace_map; + /* + * private data + * + * should only be accessed directly by packfile.c and midx.c + */ + struct multi_pack_index *multi_pack_index; + /* * private data * diff --git a/packfile.c b/packfile.c index 3d652212c6..5d4493dbf4 100644 --- a/packfile.c +++ b/packfile.c @@ -15,6 +15,7 @@ #include "tree-walk.h" #include "tree.h" #include "object-store.h" +#include "midx.h" char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, @@ -935,10 +936,13 @@ static void prepare_packed_git(struct repository *r) if (r->objects->packed_git_initialized) return; + prepare_multi_pack_index_one(r, r->objects->objectdir); prepare_packed_git_one(r, r->objects->objectdir, 1); prepare_alt_odb(r); - for (alt = r->objects->alt_odb_list; alt; alt = alt->next) + for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { + prepare_multi_pack_index_one(r, alt->path); prepare_packed_git_one(r, alt->path, 0); + } rearrange_packed_git(r); prepare_packed_git_mru(r); r->objects->packed_git_initialized = 1; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 4a4fa26f7a..b9661c7c66 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -3,6 +3,8 @@ test_description='multi-pack-indexes' . ./test-lib.sh +objdir=.git/objects + midx_read_expect () { NUM_PACKS=$1 NUM_OBJECTS=$2 @@ -76,18 +78,35 @@ test_expect_success 'create objects' ' ' test_expect_success 'write midx with one v1 pack' ' - pack=$(git pack-objects --index-version=1 pack/test expect && + git -c core.multiPackIndex=true $1 >actual && + test_cmp expect actual +} + +compare_results_with_midx () { + MSG=$1 + test_expect_success "check normal git operations: $MSG" ' + midx_git_two_modes "rev-list --objects --all" && + midx_git_two_modes "log --raw" + ' +} + test_expect_success 'write midx with one v2 pack' ' - git pack-objects --index-version=2,0x40 pack/test [] corrupt_data () { file=$1 -- cgit v1.2.3 From 3715a6335c37367b4240b6bfa842dc64dedee34d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:34 -0400 Subject: midx: read objects from multi-pack-index Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- midx.h | 3 +++ packfile.c | 8 +++++- 3 files changed, 100 insertions(+), 2 deletions(-) (limited to 'packfile.c') diff --git a/midx.c b/midx.c index 4090cf4ca4..182535933c 100644 --- a/midx.c +++ b/midx.c @@ -5,7 +5,7 @@ #include "lockfile.h" #include "packfile.h" #include "object-store.h" -#include "packfile.h" +#include "sha1-lookup.h" #include "midx.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ @@ -151,6 +151,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) m->num_objects = ntohl(m->chunk_oid_fanout[255]); m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names)); + m->packs = xcalloc(m->num_packs, sizeof(*m->packs)); cur_pack_name = (const char *)m->chunk_pack_names; for (i = 0; i < m->num_packs; i++) { @@ -178,6 +179,94 @@ cleanup_fail: return NULL; } +static int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) +{ + struct strbuf pack_name = STRBUF_INIT; + + if (pack_int_id >= m->num_packs) + BUG("bad pack-int-id"); + + if (m->packs[pack_int_id]) + return 0; + + strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir, + m->pack_names[pack_int_id]); + + m->packs[pack_int_id] = add_packed_git(pack_name.buf, pack_name.len, 1); + strbuf_release(&pack_name); + return !m->packs[pack_int_id]; +} + +int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result) +{ + return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup, + MIDX_HASH_LEN, result); +} + +static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) +{ + const unsigned char *offset_data; + uint32_t offset32; + + offset_data = m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH; + offset32 = get_be32(offset_data + sizeof(uint32_t)); + + if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) { + if (sizeof(offset32) < sizeof(uint64_t)) + die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); + + offset32 ^= MIDX_LARGE_OFFSET_NEEDED; + return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); + } + + return offset32; +} + +static uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) +{ + return get_be32(m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH); +} + +static int nth_midxed_pack_entry(struct multi_pack_index *m, struct pack_entry *e, uint32_t pos) +{ + uint32_t pack_int_id; + struct packed_git *p; + + if (pos >= m->num_objects) + return 0; + + pack_int_id = nth_midxed_pack_int_id(m, pos); + + if (prepare_midx_pack(m, pack_int_id)) + die(_("error preparing packfile from multi-pack-index")); + p = m->packs[pack_int_id]; + + /* + * We are about to tell the caller where they can locate the + * requested object. We better make sure the packfile is + * still here and can be accessed before supplying that + * answer, as it may have been deleted since the MIDX was + * loaded! + */ + if (!is_pack_valid(p)) + return 0; + + e->offset = nth_midxed_offset(m, pos); + e->p = p; + + return 1; +} + +int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m) +{ + uint32_t pos; + + if (!bsearch_midx(oid, m, &pos)) + return 0; + + return nth_midxed_pack_entry(m, e, pos); +} + int prepare_multi_pack_index_one(struct repository *r, const char *object_dir) { struct multi_pack_index *m = r->objects->multi_pack_index; diff --git a/midx.h b/midx.h index 9bcfc82d2e..377838c9ca 100644 --- a/midx.h +++ b/midx.h @@ -25,10 +25,13 @@ struct multi_pack_index { const unsigned char *chunk_large_offsets; const char **pack_names; + struct packed_git **packs; char object_dir[FLEX_ARRAY]; }; struct multi_pack_index *load_multi_pack_index(const char *object_dir); +int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); +int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir); int write_midx_file(const char *object_dir); diff --git a/packfile.c b/packfile.c index 5d4493dbf4..bc763d91b9 100644 --- a/packfile.c +++ b/packfile.c @@ -1902,11 +1902,17 @@ static int fill_pack_entry(const struct object_id *oid, int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { struct list_head *pos; + struct multi_pack_index *m; prepare_packed_git(r); - if (!r->objects->packed_git) + if (!r->objects->packed_git && !r->objects->multi_pack_index) return 0; + for (m = r->objects->multi_pack_index; m; m = m->next) { + if (fill_midx_entry(oid, e, m)) + return 1; + } + list_for_each(pos, &r->objects->packed_git_mru) { struct packed_git *p = list_entry(pos, struct packed_git, mru); if (fill_pack_entry(oid, e, p)) { -- cgit v1.2.3 From 8aac67a174061a0744557a3984a433f926bf5cb3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:35 -0400 Subject: midx: use midx in abbreviation calculations Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 11 ++++++++++ midx.h | 3 +++ packfile.c | 6 ++++++ packfile.h | 1 + sha1-name.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 91 insertions(+) (limited to 'packfile.c') diff --git a/midx.c b/midx.c index 182535933c..4e014ff6e3 100644 --- a/midx.c +++ b/midx.c @@ -203,6 +203,17 @@ int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32 MIDX_HASH_LEN, result); } +struct object_id *nth_midxed_object_oid(struct object_id *oid, + struct multi_pack_index *m, + uint32_t n) +{ + if (n >= m->num_objects) + return NULL; + + hashcpy(oid->hash, m->chunk_oid_lookup + m->hash_len * n); + return oid; +} + static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) { const unsigned char *offset_data; diff --git a/midx.h b/midx.h index 377838c9ca..1b976df873 100644 --- a/midx.h +++ b/midx.h @@ -31,6 +31,9 @@ struct multi_pack_index { struct multi_pack_index *load_multi_pack_index(const char *object_dir); int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); +struct object_id *nth_midxed_object_oid(struct object_id *oid, + struct multi_pack_index *m, + uint32_t n); int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir); diff --git a/packfile.c b/packfile.c index bc763d91b9..c0eb5ac885 100644 --- a/packfile.c +++ b/packfile.c @@ -961,6 +961,12 @@ struct packed_git *get_packed_git(struct repository *r) return r->objects->packed_git; } +struct multi_pack_index *get_multi_pack_index(struct repository *r) +{ + prepare_packed_git(r); + return r->objects->multi_pack_index; +} + struct list_head *get_packed_git_mru(struct repository *r) { prepare_packed_git(r); diff --git a/packfile.h b/packfile.h index b0eed44c0b..046280caf3 100644 --- a/packfile.h +++ b/packfile.h @@ -45,6 +45,7 @@ extern void install_packed_git(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); +struct multi_pack_index *get_multi_pack_index(struct repository *r); /* * Give a rough count of objects in the repository. This sacrifices accuracy diff --git a/sha1-name.c b/sha1-name.c index 60d9ef3c7e..7dc71201e6 100644 --- a/sha1-name.c +++ b/sha1-name.c @@ -12,6 +12,7 @@ #include "packfile.h" #include "object-store.h" #include "repository.h" +#include "midx.h" static int get_oid_oneline(const char *, struct object_id *, struct commit_list *); @@ -149,6 +150,32 @@ static int match_sha(unsigned len, const unsigned char *a, const unsigned char * return 1; } +static void unique_in_midx(struct multi_pack_index *m, + struct disambiguate_state *ds) +{ + uint32_t num, i, first = 0; + const struct object_id *current = NULL; + num = m->num_objects; + + if (!num) + return; + + bsearch_midx(&ds->bin_pfx, m, &first); + + /* + * At this point, "first" is the location of the lowest object + * with an object name that could match "bin_pfx". See if we have + * 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num && !ds->ambiguous; i++) { + struct object_id oid; + current = nth_midxed_object_oid(&oid, m, i); + if (!match_sha(ds->len, ds->bin_pfx.hash, current->hash)) + break; + update_candidates(ds, current); + } +} + static void unique_in_pack(struct packed_git *p, struct disambiguate_state *ds) { @@ -177,8 +204,12 @@ static void unique_in_pack(struct packed_git *p, static void find_short_packed_object(struct disambiguate_state *ds) { + struct multi_pack_index *m; struct packed_git *p; + for (m = get_multi_pack_index(the_repository); m && !ds->ambiguous; + m = m->next) + unique_in_midx(m, ds); for (p = get_packed_git(the_repository); p && !ds->ambiguous; p = p->next) unique_in_pack(p, ds); @@ -527,6 +558,42 @@ static int extend_abbrev_len(const struct object_id *oid, void *cb_data) return 0; } +static void find_abbrev_len_for_midx(struct multi_pack_index *m, + struct min_abbrev_data *mad) +{ + int match = 0; + uint32_t num, first = 0; + struct object_id oid; + const struct object_id *mad_oid; + + if (!m->num_objects) + return; + + num = m->num_objects; + mad_oid = mad->oid; + match = bsearch_midx(mad_oid, m, &first); + + /* + * first is now the position in the packfile where we would insert + * mad->hash if it does not exist (or the position of mad->hash if + * it does exist). Hence, we consider a maximum of two objects + * nearby for the abbreviation length. + */ + mad->init_len = 0; + if (!match) { + if (nth_midxed_object_oid(&oid, m, first)) + extend_abbrev_len(&oid, mad); + } else if (first < num - 1) { + if (nth_midxed_object_oid(&oid, m, first + 1)) + extend_abbrev_len(&oid, mad); + } + if (first > 0) { + if (nth_midxed_object_oid(&oid, m, first - 1)) + extend_abbrev_len(&oid, mad); + } + mad->init_len = mad->cur_len; +} + static void find_abbrev_len_for_pack(struct packed_git *p, struct min_abbrev_data *mad) { @@ -565,8 +632,11 @@ static void find_abbrev_len_for_pack(struct packed_git *p, static void find_abbrev_len_packed(struct min_abbrev_data *mad) { + struct multi_pack_index *m; struct packed_git *p; + for (m = get_multi_pack_index(the_repository); m; m = m->next) + find_abbrev_len_for_midx(m, mad); for (p = get_packed_git(the_repository); p; p = p->next) find_abbrev_len_for_pack(p, mad); } -- cgit v1.2.3 From b8990fbfedf7cd9fc92a5208b0fbbd7dad79be6d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:37 -0400 Subject: midx: use midx in approximate_object_count Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'packfile.c') diff --git a/packfile.c b/packfile.c index c0eb5ac885..97e7812b6b 100644 --- a/packfile.c +++ b/packfile.c @@ -861,10 +861,13 @@ unsigned long approximate_object_count(void) { if (!the_repository->objects->approximate_object_count_valid) { unsigned long count; + struct multi_pack_index *m; struct packed_git *p; prepare_packed_git(the_repository); count = 0; + for (m = get_multi_pack_index(the_repository); m; m = m->next) + count += m->num_objects; for (p = the_repository->objects->packed_git; p; p = p->next) { if (open_pack_index(p)) continue; -- cgit v1.2.3 From f3a002bd84790e89399c3a18f1e7101b850ed6f8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:38 -0400 Subject: midx: prevent duplicate packfile loads The multi-pack-index, when present, tracks the existence of objects and their offsets within a list of packfiles. This allows us to use the multi-pack-index for object lookups, abbreviations, and object counts. When the multi-pack-index tracks a packfile, then we do not need to add that packfile to the packed_git linked list or the MRU list. We still need to load the packfiles that are not tracked by the multi-pack-index. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'packfile.c') diff --git a/packfile.c b/packfile.c index 97e7812b6b..2c819a0ad8 100644 --- a/packfile.c +++ b/packfile.c @@ -795,6 +795,7 @@ struct prepare_pack_data { struct repository *r; struct string_list *garbage; int local; + struct multi_pack_index *m; }; static void prepare_pack(const char *full_name, size_t full_name_len, @@ -805,6 +806,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx")) { + if (data->m && midx_contains_pack(data->m, file_name)) + return; /* Don't reopen a pack we already have. */ for (p = data->r->objects->packed_git; p; p = p->next) { size_t len; @@ -839,6 +842,12 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local struct prepare_pack_data data; struct string_list garbage = STRING_LIST_INIT_DUP; + data.m = r->objects->multi_pack_index; + + /* look for the multi-pack-index for this object directory */ + while (data.m && strcmp(data.m->object_dir, objdir)) + data.m = data.m->next; + data.r = r; data.garbage = &garbage; data.local = local; -- cgit v1.2.3 From 17c35c89698c1b9e130ae9a3dc9c016b353308d8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:39 -0400 Subject: packfile: skip loading index if in multi-pack-index Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'packfile.c') diff --git a/packfile.c b/packfile.c index 2c819a0ad8..e6ecf12ab5 100644 --- a/packfile.c +++ b/packfile.c @@ -469,8 +469,19 @@ static int open_packed_git_1(struct packed_git *p) ssize_t read_result; const unsigned hashsz = the_hash_algo->rawsz; - if (!p->index_data && open_pack_index(p)) - return error("packfile %s index unavailable", p->pack_name); + if (!p->index_data) { + struct multi_pack_index *m; + const char *pack_name = strrchr(p->pack_name, '/'); + + for (m = the_repository->objects->multi_pack_index; + m; m = m->next) { + if (midx_contains_pack(m, pack_name)) + break; + } + + if (!m && open_pack_index(p)) + return error("packfile %s index unavailable", p->pack_name); + } if (!pack_max_fds) { unsigned int max_fds = get_max_fd_limit(); @@ -521,6 +532,10 @@ static int open_packed_git_1(struct packed_git *p) " supported (try upgrading GIT to a newer version)", p->pack_name, ntohl(hdr.hdr_version)); + /* Skip index checking if in multi-pack-index */ + if (!p->index_data) + return 0; + /* Verify the pack matches its index. */ if (p->num_objects != ntohl(hdr.hdr_entries)) return error("packfile %s claims to have %"PRIu32" objects" -- cgit v1.2.3