summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTaylor Blau <me@ttaylorr.com>2024-08-06 17:37:30 +0200
committerJunio C Hamano <gitster@pobox.com>2024-08-06 21:01:36 +0200
commit3f5f1cff92dfe64bbbfa9f2fb4ed810125810b1b (patch)
tree4bc0015be7250d54ab341a986dab37208e5cda25
parentmidx: teach `nth_bitmapped_pack()` about incremental MIDXs (diff)
downloadgit-3f5f1cff92dfe64bbbfa9f2fb4ed810125810b1b.tar.xz
git-3f5f1cff92dfe64bbbfa9f2fb4ed810125810b1b.zip
midx: introduce `bsearch_one_midx()`
The `bsearch_midx()` function will be extended in a following commit to search for the location of a given object ID across all MIDXs in a chain (or the single non-chain MIDX if no chain is available). While most callers will naturally want to use the updated `bsearch_midx()` function, there are a handful of special cases that will want finer control and will only want to search through a single MIDX. For instance, the object abbreviation code, which cares about object IDs near to where we'd expect to find a match in a MIDX. In that case, we want to look at the nearby matches in each layer of the MIDX chain, not just a single one). Split the more fine-grained control out into a separate function called `bsearch_one_midx()` which searches only a single MIDX. At present both `bsearch_midx()` and `bsearch_one_midx()` have identical behavior, but the following commit will rewrite the former to be aware of incremental MIDXs for the remaining non-special case callers. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--midx.c17
-rw-r--r--midx.h5
-rw-r--r--object-name.c99
3 files changed, 71 insertions, 50 deletions
diff --git a/midx.c b/midx.c
index 25350152f1..bd6e3f26c9 100644
--- a/midx.c
+++ b/midx.c
@@ -330,10 +330,21 @@ int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
return 0;
}
-int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result)
+int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
+ uint32_t *result)
{
- return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup,
- the_hash_algo->rawsz, result);
+ int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
+ m->chunk_oid_lookup, the_hash_algo->rawsz,
+ result);
+ if (result)
+ *result += m->num_objects_in_base;
+ return ret;
+}
+
+int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
+ uint32_t *result)
+{
+ return bsearch_one_midx(oid, m, result);
}
struct object_id *nth_midxed_object_oid(struct object_id *oid,
diff --git a/midx.h b/midx.h
index 020e49f77c..46c53d69ff 100644
--- a/midx.h
+++ b/midx.h
@@ -90,7 +90,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id);
int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
struct bitmapped_pack *bp, uint32_t pack_int_id);
-int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result);
+int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
+ uint32_t *result);
+int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
+ uint32_t *result);
off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos);
uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos);
struct object_id *nth_midxed_object_oid(struct object_id *oid,
diff --git a/object-name.c b/object-name.c
index 527b853ac4..739d46f9cf 100644
--- a/object-name.c
+++ b/object-name.c
@@ -134,28 +134,32 @@ static int match_hash(unsigned len, const unsigned char *a, const unsigned char
static void unique_in_midx(struct multi_pack_index *m,
struct disambiguate_state *ds)
{
- uint32_t num, i, first = 0;
- const struct object_id *current = NULL;
- int len = ds->len > ds->repo->hash_algo->hexsz ?
- ds->repo->hash_algo->hexsz : ds->len;
- num = m->num_objects;
+ for (; m; m = m->base_midx) {
+ uint32_t num, i, first = 0;
+ const struct object_id *current = NULL;
+ int len = ds->len > ds->repo->hash_algo->hexsz ?
+ ds->repo->hash_algo->hexsz : ds->len;
- if (!num)
- return;
+ if (!m->num_objects)
+ continue;
- bsearch_midx(&ds->bin_pfx, m, &first);
+ num = m->num_objects + m->num_objects_in_base;
- /*
- * At this point, "first" is the location of the lowest object
- * with an object name that could match "bin_pfx". See if we have
- * 0, 1 or more objects that actually match(es).
- */
- for (i = first; i < num && !ds->ambiguous; i++) {
- struct object_id oid;
- current = nth_midxed_object_oid(&oid, m, i);
- if (!match_hash(len, ds->bin_pfx.hash, current->hash))
- break;
- update_candidates(ds, current);
+ bsearch_one_midx(&ds->bin_pfx, m, &first);
+
+ /*
+ * At this point, "first" is the location of the lowest
+ * object with an object name that could match
+ * "bin_pfx". See if we have 0, 1 or more objects that
+ * actually match(es).
+ */
+ for (i = first; i < num && !ds->ambiguous; i++) {
+ struct object_id oid;
+ current = nth_midxed_object_oid(&oid, m, i);
+ if (!match_hash(len, ds->bin_pfx.hash, current->hash))
+ break;
+ update_candidates(ds, current);
+ }
}
}
@@ -708,37 +712,40 @@ static int repo_extend_abbrev_len(struct repository *r UNUSED,
static void find_abbrev_len_for_midx(struct multi_pack_index *m,
struct min_abbrev_data *mad)
{
- int match = 0;
- uint32_t num, first = 0;
- struct object_id oid;
- const struct object_id *mad_oid;
+ for (; m; m = m->base_midx) {
+ int match = 0;
+ uint32_t num, first = 0;
+ struct object_id oid;
+ const struct object_id *mad_oid;
- if (!m->num_objects)
- return;
+ if (!m->num_objects)
+ continue;
- num = m->num_objects;
- mad_oid = mad->oid;
- match = bsearch_midx(mad_oid, m, &first);
+ num = m->num_objects + m->num_objects_in_base;
+ mad_oid = mad->oid;
+ match = bsearch_one_midx(mad_oid, m, &first);
- /*
- * first is now the position in the packfile where we would insert
- * mad->hash if it does not exist (or the position of mad->hash if
- * it does exist). Hence, we consider a maximum of two objects
- * nearby for the abbreviation length.
- */
- mad->init_len = 0;
- if (!match) {
- if (nth_midxed_object_oid(&oid, m, first))
- extend_abbrev_len(&oid, mad);
- } else if (first < num - 1) {
- if (nth_midxed_object_oid(&oid, m, first + 1))
- extend_abbrev_len(&oid, mad);
- }
- if (first > 0) {
- if (nth_midxed_object_oid(&oid, m, first - 1))
- extend_abbrev_len(&oid, mad);
+ /*
+ * first is now the position in the packfile where we
+ * would insert mad->hash if it does not exist (or the
+ * position of mad->hash if it does exist). Hence, we
+ * consider a maximum of two objects nearby for the
+ * abbreviation length.
+ */
+ mad->init_len = 0;
+ if (!match) {
+ if (nth_midxed_object_oid(&oid, m, first))
+ extend_abbrev_len(&oid, mad);
+ } else if (first < num - 1) {
+ if (nth_midxed_object_oid(&oid, m, first + 1))
+ extend_abbrev_len(&oid, mad);
+ }
+ if (first > 0) {
+ if (nth_midxed_object_oid(&oid, m, first - 1))
+ extend_abbrev_len(&oid, mad);
+ }
+ mad->init_len = mad->cur_len;
}
- mad->init_len = mad->cur_len;
}
static void find_abbrev_len_for_pack(struct packed_git *p,