diff options
author | Junio C Hamano <gitster@pobox.com> | 2023-06-23 01:29:05 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2023-06-23 01:29:05 +0200 |
commit | f2ffc7418685f75e43e2919426276141fd62c656 (patch) | |
tree | 44571a732337464a3a2a6ac20259d7923bd6ff8a | |
parent | Merge branch 'ja/worktree-orphan' (diff) | |
parent | pack-bitmap.c: use commit boundary during bitmap traversal (diff) | |
download | git-f2ffc7418685f75e43e2919426276141fd62c656.tar.xz git-f2ffc7418685f75e43e2919426276141fd62c656.zip |
Merge branch 'tb/pack-bitmap-traversal-with-boundary'
The object traversal using reachability bitmap done by
"pack-object" has been tweaked to take advantage of the fact that
using "boundary" commits as representative of all the uninteresting
ones can save quite a lot of object enumeration.
* tb/pack-bitmap-traversal-with-boundary:
pack-bitmap.c: use commit boundary during bitmap traversal
pack-bitmap.c: extract `fill_in_bitmap()`
object: add object_array initializer helper function
-rw-r--r-- | Documentation/config/feature.txt | 3 | ||||
-rw-r--r-- | Documentation/config/pack.txt | 17 | ||||
-rwxr-xr-x | ci/run-build-and-tests.sh | 1 | ||||
-rw-r--r-- | object.c | 6 | ||||
-rw-r--r-- | object.h | 2 | ||||
-rw-r--r-- | pack-bitmap.c | 241 | ||||
-rw-r--r-- | pack-bitmap.h | 4 | ||||
-rw-r--r-- | repo-settings.c | 7 | ||||
-rw-r--r-- | repository.h | 1 | ||||
-rw-r--r-- | t/README | 4 | ||||
-rwxr-xr-x | t/t5310-pack-bitmaps.sh | 38 |
11 files changed, 284 insertions, 40 deletions
diff --git a/Documentation/config/feature.txt b/Documentation/config/feature.txt index 17b4d39f89..bf9546fca4 100644 --- a/Documentation/config/feature.txt +++ b/Documentation/config/feature.txt @@ -14,6 +14,9 @@ feature.experimental:: + * `fetch.negotiationAlgorithm=skipping` may improve fetch negotiation times by skipping more commits at a time, reducing the number of round trips. ++ +* `pack.useBitmapBoundaryTraversal=true` may improve bitmap traversal times by +walking fewer objects. feature.manyFiles:: Enable config options that optimize for repos with many files in the diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index d4c7c9d4e4..3748136d14 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -123,6 +123,23 @@ pack.useBitmaps:: true. You should not generally need to turn this off unless you are debugging pack bitmaps. +pack.useBitmapBoundaryTraversal:: + When true, Git will use an experimental algorithm for computing + reachability queries with bitmaps. Instead of building up + complete bitmaps for all of the negated tips and then OR-ing + them together, consider negated tips with existing bitmaps as + additive (i.e. OR-ing them into the result if they exist, + ignoring them otherwise), and build up a bitmap at the boundary + instead. ++ +When using this algorithm, Git may include too many objects as a result +of not opening up trees belonging to certain UNINTERESTING commits. This +inexactness matches the non-bitmap traversal algorithm. ++ +In many cases, this can provide a speed-up over the exact algorithm, +particularly when there is poor bitmap coverage of the negated side of +the query. + pack.useSparse:: When true, git will default to using the '--sparse' option in 'git pack-objects' when the '--revs' option is present. This diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index a18b13a41d..2528f25e31 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -29,6 +29,7 @@ linux-TEST-vars) export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master export GIT_TEST_NO_WRITE_REV_INDEX=1 export GIT_TEST_CHECKOUT_WORKERS=2 + export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 ;; linux-clang) export GIT_TEST_DEFAULT_HASH=sha1 @@ -356,6 +356,12 @@ void object_list_free(struct object_list **list) */ static char object_array_slopbuf[1]; +void object_array_init(struct object_array *array) +{ + struct object_array blank = OBJECT_ARRAY_INIT; + memcpy(array, &blank, sizeof(*array)); +} + void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path) @@ -58,6 +58,8 @@ struct object_array { #define OBJECT_ARRAY_INIT { 0 } +void object_array_init(struct object_array *array); + /* * object flag allocation: * revision.h: 0---------10 15 23------27 diff --git a/pack-bitmap.c b/pack-bitmap.c index 999f962602..894bff02c5 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1043,6 +1043,160 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git, return 1; } +static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git, + struct rev_info *revs, + struct bitmap *base, + struct bitmap *seen) +{ + struct include_data incdata; + struct bitmap_show_data show_data; + + if (!base) + base = bitmap_new(); + + incdata.bitmap_git = bitmap_git; + incdata.base = base; + incdata.seen = seen; + + revs->include_check = should_include; + revs->include_check_obj = should_include_obj; + revs->include_check_data = &incdata; + + if (prepare_revision_walk(revs)) + die(_("revision walk setup failed")); + + show_data.bitmap_git = bitmap_git; + show_data.base = base; + + traverse_commit_list(revs, show_commit, show_object, &show_data); + + revs->include_check = NULL; + revs->include_check_obj = NULL; + revs->include_check_data = NULL; + + return base; +} + +struct bitmap_boundary_cb { + struct bitmap_index *bitmap_git; + struct bitmap *base; + + struct object_array boundary; +}; + +static void show_boundary_commit(struct commit *commit, void *_data) +{ + struct bitmap_boundary_cb *data = _data; + + if (commit->object.flags & BOUNDARY) + add_object_array(&commit->object, "", &data->boundary); + + if (commit->object.flags & UNINTERESTING) { + if (bitmap_walk_contains(data->bitmap_git, data->base, + &commit->object.oid)) + return; + + add_commit_to_bitmap(data->bitmap_git, &data->base, commit); + } +} + +static void show_boundary_object(struct object *object, + const char *name, void *data) +{ + BUG("should not be called"); +} + +static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, + struct rev_info *revs, + struct object_list *roots) +{ + struct bitmap_boundary_cb cb; + struct object_list *root; + unsigned int i; + unsigned int tmp_blobs, tmp_trees, tmp_tags; + int any_missing = 0; + + cb.bitmap_git = bitmap_git; + cb.base = bitmap_new(); + object_array_init(&cb.boundary); + + revs->ignore_missing_links = 1; + + /* + * OR in any existing reachability bitmaps among `roots` into + * `cb.base`. + */ + for (root = roots; root; root = root->next) { + struct object *object = root->item; + if (object->type != OBJ_COMMIT || + bitmap_walk_contains(bitmap_git, cb.base, &object->oid)) + continue; + + if (add_commit_to_bitmap(bitmap_git, &cb.base, + (struct commit *)object)) + continue; + + any_missing = 1; + } + + if (!any_missing) + goto cleanup; + + tmp_blobs = revs->blob_objects; + tmp_trees = revs->tree_objects; + tmp_tags = revs->blob_objects; + revs->blob_objects = 0; + revs->tree_objects = 0; + revs->tag_objects = 0; + + /* + * We didn't have complete coverage of the roots. First setup a + * revision walk to (a) OR in any bitmaps that are UNINTERESTING + * between the tips and boundary, and (b) record the boundary. + */ + trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository); + if (prepare_revision_walk(revs)) + die("revision walk setup failed"); + trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository); + + trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository); + revs->boundary = 1; + traverse_commit_list_filtered(revs, + show_boundary_commit, + show_boundary_object, + &cb, NULL); + revs->boundary = 0; + trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository); + + revs->blob_objects = tmp_blobs; + revs->tree_objects = tmp_trees; + revs->tag_objects = tmp_tags; + + reset_revision_walk(); + clear_object_flags(UNINTERESTING); + + /* + * Then add the boundary commit(s) as fill-in traversal tips. + */ + trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository); + for (i = 0; i < cb.boundary.nr; i++) { + struct object *obj = cb.boundary.objects[i].item; + if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid)) + obj->flags |= SEEN; + else + add_pending_object(revs, obj, ""); + } + if (revs->pending.nr) + cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL); + trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository); + +cleanup: + object_array_clear(&cb.boundary); + revs->ignore_missing_links = 0; + + return cb.base; +} + static struct bitmap *find_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots, @@ -1109,33 +1263,19 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, } if (needs_walk) { - struct include_data incdata; - struct bitmap_show_data show_data; - - if (!base) - base = bitmap_new(); - - incdata.bitmap_git = bitmap_git; - incdata.base = base; - incdata.seen = seen; - - revs->include_check = should_include; - revs->include_check_obj = should_include_obj; - revs->include_check_data = &incdata; - - if (prepare_revision_walk(revs)) - die(_("revision walk setup failed")); - - show_data.bitmap_git = bitmap_git; - show_data.base = base; - - traverse_commit_list(revs, - show_commit, show_object, - &show_data); - - revs->include_check = NULL; - revs->include_check_obj = NULL; - revs->include_check_data = NULL; + /* + * This fill-in traversal may walk over some objects + * again, since we have already traversed in order to + * find the boundary. + * + * But this extra walk should be extremely cheap, since + * all commit objects are loaded into memory, and + * because we skip walking to parents that are + * UNINTERESTING, since it will be marked in the haves + * bitmap already (or it has an on-disk bitmap, since + * OR-ing it in covers all of its ancestors). + */ + base = fill_in_bitmap(bitmap_git, revs, base, seen); } return base; @@ -1528,6 +1668,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects) { unsigned int i; + int use_boundary_traversal; struct object_list *wants = NULL; struct object_list *haves = NULL; @@ -1578,13 +1719,21 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, object_list_insert(object, &wants); } - /* - * if we have a HAVES list, but none of those haves is contained - * in the packfile that has a bitmap, we don't have anything to - * optimize here - */ - if (haves && !in_bitmapped_pack(bitmap_git, haves)) - goto cleanup; + use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1); + if (use_boundary_traversal < 0) { + prepare_repo_settings(revs->repo); + use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal; + } + + if (!use_boundary_traversal) { + /* + * if we have a HAVES list, but none of those haves is contained + * in the packfile that has a bitmap, we don't have anything to + * optimize here + */ + if (haves && !in_bitmapped_pack(bitmap_git, haves)) + goto cleanup; + } /* if we don't want anything, we're done here */ if (!wants) @@ -1598,18 +1747,32 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, if (load_bitmap(revs->repo, bitmap_git) < 0) goto cleanup; - object_array_clear(&revs->pending); + if (!use_boundary_traversal) + object_array_clear(&revs->pending); if (haves) { - revs->ignore_missing_links = 1; - haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); - reset_revision_walk(); - revs->ignore_missing_links = 0; + if (use_boundary_traversal) { + trace2_region_enter("pack-bitmap", "haves/boundary", the_repository); + haves_bitmap = find_boundary_objects(bitmap_git, revs, haves); + trace2_region_leave("pack-bitmap", "haves/boundary", the_repository); + } else { + trace2_region_enter("pack-bitmap", "haves/classic", the_repository); + revs->ignore_missing_links = 1; + haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); + reset_revision_walk(); + revs->ignore_missing_links = 0; + trace2_region_leave("pack-bitmap", "haves/classic", the_repository); + } if (!haves_bitmap) BUG("failed to perform bitmap walk"); } + if (use_boundary_traversal) { + object_array_clear(&revs->pending); + reset_revision_walk(); + } + wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap); if (!wants_bitmap) diff --git a/pack-bitmap.h b/pack-bitmap.h index 84591f041b..5273a6a019 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -62,6 +62,10 @@ void traverse_bitmap_commit_list(struct bitmap_index *, void test_bitmap_walk(struct rev_info *revs); int test_bitmap_commits(struct repository *r); int test_bitmap_hashes(struct repository *r); + +#define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \ + "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL" + struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects); uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git); diff --git a/repo-settings.c b/repo-settings.c index d220c5dd9f..7b566d729d 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -41,8 +41,10 @@ void prepare_repo_settings(struct repository *r) repo_cfg_bool(r, "feature.experimental", &experimental, 0); /* Defaults modified by feature.* */ - if (experimental) + if (experimental) { r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING; + r->settings.pack_use_bitmap_boundary_traversal = 1; + } if (manyfiles) { r->settings.index_version = 4; r->settings.index_skip_hash = 1; @@ -62,6 +64,9 @@ void prepare_repo_settings(struct repository *r) repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0); repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash); repo_cfg_bool(r, "pack.readreverseindex", &r->settings.pack_read_reverse_index, 1); + repo_cfg_bool(r, "pack.usebitmapboundarytraversal", + &r->settings.pack_use_bitmap_boundary_traversal, + r->settings.pack_use_bitmap_boundary_traversal); /* * The GIT_TEST_MULTI_PACK_INDEX variable is special in that diff --git a/repository.h b/repository.h index 74ae26635a..586086783f 100644 --- a/repository.h +++ b/repository.h @@ -37,6 +37,7 @@ struct repo_settings { int command_requires_full_index; int sparse_index; int pack_read_reverse_index; + int pack_use_bitmap_boundary_traversal; struct fsmonitor_settings *fsmonitor; /* lazily loaded */ @@ -442,6 +442,10 @@ GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path for the index version specified. Can be set to any valid version (currently 2, 3, or 4). +GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=<boolean> if enabled will +use the boundary-based bitmap traversal algorithm. See the documentation +of `pack.useBitmapBoundaryTraversal` for more details. + GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects builtin to use the non-sparse object walk. This can still be overridden by the --sparse command-line argument. diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index 526a5a506e..78c1c6c923 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -9,6 +9,10 @@ test_description='exercise basic bitmap functionality' # their place. GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +# Likewise, allow individual tests to control whether or not they use +# the boundary-based traversal. +sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL + objpath () { echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')" } @@ -457,6 +461,13 @@ test_bitmap_cases () { test_bitmap_cases +GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 +export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL + +test_bitmap_cases + +sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL + test_expect_success 'incremental repack fails when bitmaps are requested' ' test_commit more-1 && test_must_fail git repack -d 2>err && @@ -468,6 +479,33 @@ test_expect_success 'incremental repack can disable bitmaps' ' git repack -d --no-write-bitmap-index ' +test_expect_success 'boundary-based traversal is used when requested' ' + git repack -a -d --write-bitmap-index && + + for argv in \ + "git -c pack.useBitmapBoundaryTraversal=true" \ + "git -c feature.experimental=true" \ + "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 git" + do + eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \ + --use-bitmap-index second..other 2>perf" && + grep "\"region_enter\".*\"label\":\"haves/boundary\"" perf || + return 1 + done && + + for argv in \ + "git -c pack.useBitmapBoundaryTraversal=false" \ + "git -c feature.experimental=true -c pack.useBitmapBoundaryTraversal=false" \ + "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c pack.useBitmapBoundaryTraversal=true" \ + "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c feature.experimental=true" + do + eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \ + --use-bitmap-index second..other 2>perf" && + grep "\"region_enter\".*\"label\":\"haves/classic\"" perf || + return 1 + done +' + test_bitmap_cases "pack.writeBitmapLookupTable" test_expect_success 'verify writing bitmap lookup table when enabled' ' |