summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2023-06-23 01:29:05 +0200
committerJunio C Hamano <gitster@pobox.com>2023-06-23 01:29:05 +0200
commitf2ffc7418685f75e43e2919426276141fd62c656 (patch)
tree44571a732337464a3a2a6ac20259d7923bd6ff8a
parentMerge branch 'ja/worktree-orphan' (diff)
parentpack-bitmap.c: use commit boundary during bitmap traversal (diff)
downloadgit-f2ffc7418685f75e43e2919426276141fd62c656.tar.xz
git-f2ffc7418685f75e43e2919426276141fd62c656.zip
Merge branch 'tb/pack-bitmap-traversal-with-boundary'
The object traversal using reachability bitmap done by "pack-object" has been tweaked to take advantage of the fact that using "boundary" commits as representative of all the uninteresting ones can save quite a lot of object enumeration. * tb/pack-bitmap-traversal-with-boundary: pack-bitmap.c: use commit boundary during bitmap traversal pack-bitmap.c: extract `fill_in_bitmap()` object: add object_array initializer helper function
-rw-r--r--Documentation/config/feature.txt3
-rw-r--r--Documentation/config/pack.txt17
-rwxr-xr-xci/run-build-and-tests.sh1
-rw-r--r--object.c6
-rw-r--r--object.h2
-rw-r--r--pack-bitmap.c241
-rw-r--r--pack-bitmap.h4
-rw-r--r--repo-settings.c7
-rw-r--r--repository.h1
-rw-r--r--t/README4
-rwxr-xr-xt/t5310-pack-bitmaps.sh38
11 files changed, 284 insertions, 40 deletions
diff --git a/Documentation/config/feature.txt b/Documentation/config/feature.txt
index 17b4d39f89..bf9546fca4 100644
--- a/Documentation/config/feature.txt
+++ b/Documentation/config/feature.txt
@@ -14,6 +14,9 @@ feature.experimental::
+
* `fetch.negotiationAlgorithm=skipping` may improve fetch negotiation times by
skipping more commits at a time, reducing the number of round trips.
++
+* `pack.useBitmapBoundaryTraversal=true` may improve bitmap traversal times by
+walking fewer objects.
feature.manyFiles::
Enable config options that optimize for repos with many files in the
diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt
index d4c7c9d4e4..3748136d14 100644
--- a/Documentation/config/pack.txt
+++ b/Documentation/config/pack.txt
@@ -123,6 +123,23 @@ pack.useBitmaps::
true. You should not generally need to turn this off unless
you are debugging pack bitmaps.
+pack.useBitmapBoundaryTraversal::
+ When true, Git will use an experimental algorithm for computing
+ reachability queries with bitmaps. Instead of building up
+ complete bitmaps for all of the negated tips and then OR-ing
+ them together, consider negated tips with existing bitmaps as
+ additive (i.e. OR-ing them into the result if they exist,
+ ignoring them otherwise), and build up a bitmap at the boundary
+ instead.
++
+When using this algorithm, Git may include too many objects as a result
+of not opening up trees belonging to certain UNINTERESTING commits. This
+inexactness matches the non-bitmap traversal algorithm.
++
+In many cases, this can provide a speed-up over the exact algorithm,
+particularly when there is poor bitmap coverage of the negated side of
+the query.
+
pack.useSparse::
When true, git will default to using the '--sparse' option in
'git pack-objects' when the '--revs' option is present. This
diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh
index a18b13a41d..2528f25e31 100755
--- a/ci/run-build-and-tests.sh
+++ b/ci/run-build-and-tests.sh
@@ -29,6 +29,7 @@ linux-TEST-vars)
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master
export GIT_TEST_NO_WRITE_REV_INDEX=1
export GIT_TEST_CHECKOUT_WORKERS=2
+ export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
;;
linux-clang)
export GIT_TEST_DEFAULT_HASH=sha1
diff --git a/object.c b/object.c
index 6d4ef1524d..f1adb458b6 100644
--- a/object.c
+++ b/object.c
@@ -356,6 +356,12 @@ void object_list_free(struct object_list **list)
*/
static char object_array_slopbuf[1];
+void object_array_init(struct object_array *array)
+{
+ struct object_array blank = OBJECT_ARRAY_INIT;
+ memcpy(array, &blank, sizeof(*array));
+}
+
void add_object_array_with_path(struct object *obj, const char *name,
struct object_array *array,
unsigned mode, const char *path)
diff --git a/object.h b/object.h
index 5871615fee..114d45954d 100644
--- a/object.h
+++ b/object.h
@@ -58,6 +58,8 @@ struct object_array {
#define OBJECT_ARRAY_INIT { 0 }
+void object_array_init(struct object_array *array);
+
/*
* object flag allocation:
* revision.h: 0---------10 15 23------27
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 999f962602..894bff02c5 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1043,6 +1043,160 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
return 1;
}
+static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git,
+ struct rev_info *revs,
+ struct bitmap *base,
+ struct bitmap *seen)
+{
+ struct include_data incdata;
+ struct bitmap_show_data show_data;
+
+ if (!base)
+ base = bitmap_new();
+
+ incdata.bitmap_git = bitmap_git;
+ incdata.base = base;
+ incdata.seen = seen;
+
+ revs->include_check = should_include;
+ revs->include_check_obj = should_include_obj;
+ revs->include_check_data = &incdata;
+
+ if (prepare_revision_walk(revs))
+ die(_("revision walk setup failed"));
+
+ show_data.bitmap_git = bitmap_git;
+ show_data.base = base;
+
+ traverse_commit_list(revs, show_commit, show_object, &show_data);
+
+ revs->include_check = NULL;
+ revs->include_check_obj = NULL;
+ revs->include_check_data = NULL;
+
+ return base;
+}
+
+struct bitmap_boundary_cb {
+ struct bitmap_index *bitmap_git;
+ struct bitmap *base;
+
+ struct object_array boundary;
+};
+
+static void show_boundary_commit(struct commit *commit, void *_data)
+{
+ struct bitmap_boundary_cb *data = _data;
+
+ if (commit->object.flags & BOUNDARY)
+ add_object_array(&commit->object, "", &data->boundary);
+
+ if (commit->object.flags & UNINTERESTING) {
+ if (bitmap_walk_contains(data->bitmap_git, data->base,
+ &commit->object.oid))
+ return;
+
+ add_commit_to_bitmap(data->bitmap_git, &data->base, commit);
+ }
+}
+
+static void show_boundary_object(struct object *object,
+ const char *name, void *data)
+{
+ BUG("should not be called");
+}
+
+static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
+ struct rev_info *revs,
+ struct object_list *roots)
+{
+ struct bitmap_boundary_cb cb;
+ struct object_list *root;
+ unsigned int i;
+ unsigned int tmp_blobs, tmp_trees, tmp_tags;
+ int any_missing = 0;
+
+ cb.bitmap_git = bitmap_git;
+ cb.base = bitmap_new();
+ object_array_init(&cb.boundary);
+
+ revs->ignore_missing_links = 1;
+
+ /*
+ * OR in any existing reachability bitmaps among `roots` into
+ * `cb.base`.
+ */
+ for (root = roots; root; root = root->next) {
+ struct object *object = root->item;
+ if (object->type != OBJ_COMMIT ||
+ bitmap_walk_contains(bitmap_git, cb.base, &object->oid))
+ continue;
+
+ if (add_commit_to_bitmap(bitmap_git, &cb.base,
+ (struct commit *)object))
+ continue;
+
+ any_missing = 1;
+ }
+
+ if (!any_missing)
+ goto cleanup;
+
+ tmp_blobs = revs->blob_objects;
+ tmp_trees = revs->tree_objects;
+ tmp_tags = revs->blob_objects;
+ revs->blob_objects = 0;
+ revs->tree_objects = 0;
+ revs->tag_objects = 0;
+
+ /*
+ * We didn't have complete coverage of the roots. First setup a
+ * revision walk to (a) OR in any bitmaps that are UNINTERESTING
+ * between the tips and boundary, and (b) record the boundary.
+ */
+ trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
+ if (prepare_revision_walk(revs))
+ die("revision walk setup failed");
+ trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
+
+ trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
+ revs->boundary = 1;
+ traverse_commit_list_filtered(revs,
+ show_boundary_commit,
+ show_boundary_object,
+ &cb, NULL);
+ revs->boundary = 0;
+ trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
+
+ revs->blob_objects = tmp_blobs;
+ revs->tree_objects = tmp_trees;
+ revs->tag_objects = tmp_tags;
+
+ reset_revision_walk();
+ clear_object_flags(UNINTERESTING);
+
+ /*
+ * Then add the boundary commit(s) as fill-in traversal tips.
+ */
+ trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
+ for (i = 0; i < cb.boundary.nr; i++) {
+ struct object *obj = cb.boundary.objects[i].item;
+ if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
+ obj->flags |= SEEN;
+ else
+ add_pending_object(revs, obj, "");
+ }
+ if (revs->pending.nr)
+ cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
+ trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
+
+cleanup:
+ object_array_clear(&cb.boundary);
+ revs->ignore_missing_links = 0;
+
+ return cb.base;
+}
+
static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots,
@@ -1109,33 +1263,19 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
}
if (needs_walk) {
- struct include_data incdata;
- struct bitmap_show_data show_data;
-
- if (!base)
- base = bitmap_new();
-
- incdata.bitmap_git = bitmap_git;
- incdata.base = base;
- incdata.seen = seen;
-
- revs->include_check = should_include;
- revs->include_check_obj = should_include_obj;
- revs->include_check_data = &incdata;
-
- if (prepare_revision_walk(revs))
- die(_("revision walk setup failed"));
-
- show_data.bitmap_git = bitmap_git;
- show_data.base = base;
-
- traverse_commit_list(revs,
- show_commit, show_object,
- &show_data);
-
- revs->include_check = NULL;
- revs->include_check_obj = NULL;
- revs->include_check_data = NULL;
+ /*
+ * This fill-in traversal may walk over some objects
+ * again, since we have already traversed in order to
+ * find the boundary.
+ *
+ * But this extra walk should be extremely cheap, since
+ * all commit objects are loaded into memory, and
+ * because we skip walking to parents that are
+ * UNINTERESTING, since it will be marked in the haves
+ * bitmap already (or it has an on-disk bitmap, since
+ * OR-ing it in covers all of its ancestors).
+ */
+ base = fill_in_bitmap(bitmap_git, revs, base, seen);
}
return base;
@@ -1528,6 +1668,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
int filter_provided_objects)
{
unsigned int i;
+ int use_boundary_traversal;
struct object_list *wants = NULL;
struct object_list *haves = NULL;
@@ -1578,13 +1719,21 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_insert(object, &wants);
}
- /*
- * if we have a HAVES list, but none of those haves is contained
- * in the packfile that has a bitmap, we don't have anything to
- * optimize here
- */
- if (haves && !in_bitmapped_pack(bitmap_git, haves))
- goto cleanup;
+ use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1);
+ if (use_boundary_traversal < 0) {
+ prepare_repo_settings(revs->repo);
+ use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal;
+ }
+
+ if (!use_boundary_traversal) {
+ /*
+ * if we have a HAVES list, but none of those haves is contained
+ * in the packfile that has a bitmap, we don't have anything to
+ * optimize here
+ */
+ if (haves && !in_bitmapped_pack(bitmap_git, haves))
+ goto cleanup;
+ }
/* if we don't want anything, we're done here */
if (!wants)
@@ -1598,18 +1747,32 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (load_bitmap(revs->repo, bitmap_git) < 0)
goto cleanup;
- object_array_clear(&revs->pending);
+ if (!use_boundary_traversal)
+ object_array_clear(&revs->pending);
if (haves) {
- revs->ignore_missing_links = 1;
- haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
- reset_revision_walk();
- revs->ignore_missing_links = 0;
+ if (use_boundary_traversal) {
+ trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
+ haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
+ trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
+ } else {
+ trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ revs->ignore_missing_links = 1;
+ haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
+ reset_revision_walk();
+ revs->ignore_missing_links = 0;
+ trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
+ }
if (!haves_bitmap)
BUG("failed to perform bitmap walk");
}
+ if (use_boundary_traversal) {
+ object_array_clear(&revs->pending);
+ reset_revision_walk();
+ }
+
wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap);
if (!wants_bitmap)
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 84591f041b..5273a6a019 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -62,6 +62,10 @@ void traverse_bitmap_commit_list(struct bitmap_index *,
void test_bitmap_walk(struct rev_info *revs);
int test_bitmap_commits(struct repository *r);
int test_bitmap_hashes(struct repository *r);
+
+#define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \
+ "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL"
+
struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
int filter_provided_objects);
uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git);
diff --git a/repo-settings.c b/repo-settings.c
index d220c5dd9f..7b566d729d 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -41,8 +41,10 @@ void prepare_repo_settings(struct repository *r)
repo_cfg_bool(r, "feature.experimental", &experimental, 0);
/* Defaults modified by feature.* */
- if (experimental)
+ if (experimental) {
r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
+ r->settings.pack_use_bitmap_boundary_traversal = 1;
+ }
if (manyfiles) {
r->settings.index_version = 4;
r->settings.index_skip_hash = 1;
@@ -62,6 +64,9 @@ void prepare_repo_settings(struct repository *r)
repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0);
repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash);
repo_cfg_bool(r, "pack.readreverseindex", &r->settings.pack_read_reverse_index, 1);
+ repo_cfg_bool(r, "pack.usebitmapboundarytraversal",
+ &r->settings.pack_use_bitmap_boundary_traversal,
+ r->settings.pack_use_bitmap_boundary_traversal);
/*
* The GIT_TEST_MULTI_PACK_INDEX variable is special in that
diff --git a/repository.h b/repository.h
index 74ae26635a..586086783f 100644
--- a/repository.h
+++ b/repository.h
@@ -37,6 +37,7 @@ struct repo_settings {
int command_requires_full_index;
int sparse_index;
int pack_read_reverse_index;
+ int pack_use_bitmap_boundary_traversal;
struct fsmonitor_settings *fsmonitor; /* lazily loaded */
diff --git a/t/README b/t/README
index bdfac4cceb..b71a065e4a 100644
--- a/t/README
+++ b/t/README
@@ -442,6 +442,10 @@ GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path
for the index version specified. Can be set to any valid version
(currently 2, 3, or 4).
+GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=<boolean> if enabled will
+use the boundary-based bitmap traversal algorithm. See the documentation
+of `pack.useBitmapBoundaryTraversal` for more details.
+
GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects
builtin to use the non-sparse object walk. This can still be overridden by
the --sparse command-line argument.
diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh
index 526a5a506e..78c1c6c923 100755
--- a/t/t5310-pack-bitmaps.sh
+++ b/t/t5310-pack-bitmaps.sh
@@ -9,6 +9,10 @@ test_description='exercise basic bitmap functionality'
# their place.
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
+# Likewise, allow individual tests to control whether or not they use
+# the boundary-based traversal.
+sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
+
objpath () {
echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')"
}
@@ -457,6 +461,13 @@ test_bitmap_cases () {
test_bitmap_cases
+GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
+export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
+
+test_bitmap_cases
+
+sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
+
test_expect_success 'incremental repack fails when bitmaps are requested' '
test_commit more-1 &&
test_must_fail git repack -d 2>err &&
@@ -468,6 +479,33 @@ test_expect_success 'incremental repack can disable bitmaps' '
git repack -d --no-write-bitmap-index
'
+test_expect_success 'boundary-based traversal is used when requested' '
+ git repack -a -d --write-bitmap-index &&
+
+ for argv in \
+ "git -c pack.useBitmapBoundaryTraversal=true" \
+ "git -c feature.experimental=true" \
+ "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 git"
+ do
+ eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \
+ --use-bitmap-index second..other 2>perf" &&
+ grep "\"region_enter\".*\"label\":\"haves/boundary\"" perf ||
+ return 1
+ done &&
+
+ for argv in \
+ "git -c pack.useBitmapBoundaryTraversal=false" \
+ "git -c feature.experimental=true -c pack.useBitmapBoundaryTraversal=false" \
+ "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c pack.useBitmapBoundaryTraversal=true" \
+ "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c feature.experimental=true"
+ do
+ eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \
+ --use-bitmap-index second..other 2>perf" &&
+ grep "\"region_enter\".*\"label\":\"haves/classic\"" perf ||
+ return 1
+ done
+'
+
test_bitmap_cases "pack.writeBitmapLookupTable"
test_expect_success 'verify writing bitmap lookup table when enabled' '