diff options
author | Junio C Hamano <gitster@pobox.com> | 2024-09-12 20:47:23 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2024-09-12 20:47:23 +0200 |
commit | 143682ec43d5772ee9327ed84eb0cdc007b1f489 (patch) | |
tree | f31ec1d5710d11b704afef67f270152226e8b537 | |
parent | Merge branch 'tb/multi-pack-reuse-fix' (diff) | |
parent | refs/files: use heuristic to decide whether to repack with `--auto` (diff) | |
download | git-143682ec43d5772ee9327ed84eb0cdc007b1f489.tar.xz git-143682ec43d5772ee9327ed84eb0cdc007b1f489.zip |
Merge branch 'ps/pack-refs-auto-heuristics'
"git pack-refs --auto" for the files backend was too aggressive,
which has been a bit tamed.
* ps/pack-refs-auto-heuristics:
refs/files: use heuristic to decide whether to repack with `--auto`
t0601: merge tests for auto-packing of refs
wrapper: introduce `log2u()`
-rw-r--r-- | bisect.c | 12 | ||||
-rw-r--r-- | refs/files-backend.c | 65 | ||||
-rw-r--r-- | refs/packed-backend.c | 18 | ||||
-rw-r--r-- | refs/packed-backend.h | 7 | ||||
-rwxr-xr-x | t/t0601-reffiles-pack-refs.sh | 101 | ||||
-rw-r--r-- | wrapper.h | 18 |
6 files changed, 194 insertions, 27 deletions
@@ -1130,16 +1130,6 @@ cleanup: return res; } -static inline int log2i(int n) -{ - int log2 = 0; - - for (; n > 1; n >>= 1) - log2++; - - return log2; -} - static inline int exp2i(int n) { return 1 << n; @@ -1162,7 +1152,7 @@ int estimate_bisect_steps(int all) if (all < 3) return 0; - n = log2i(all); + n = log2u(all); e = exp2i(n); x = all - e; diff --git a/refs/files-backend.c b/refs/files-backend.c index 4c5573b19c..c7f3f4e591 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1311,6 +1311,68 @@ static int should_pack_ref(struct files_ref_store *refs, return 0; } +static int should_pack_refs(struct files_ref_store *refs, + struct pack_refs_opts *opts) +{ + struct ref_iterator *iter; + size_t packed_size; + size_t refcount = 0; + size_t limit; + int ret; + + if (!(opts->flags & PACK_REFS_AUTO)) + return 1; + + ret = packed_refs_size(refs->packed_ref_store, &packed_size); + if (ret < 0) + die("cannot determine packed-refs size"); + + /* + * Packing loose references into the packed-refs file scales with the + * number of references we're about to write. We thus decide whether we + * repack refs by weighing the current size of the packed-refs file + * against the number of loose references. This is done such that we do + * not repack too often on repositories with a huge number of + * references, where we can expect a lot of churn in the number of + * references. + * + * As a heuristic, we repack if the number of loose references in the + * repository exceeds `log2(nr_packed_refs) * 5`, where we estimate + * `nr_packed_refs = packed_size / 100`, which scales as following: + * + * - 1kB ~ 10 packed refs: 16 refs + * - 10kB ~ 100 packed refs: 33 refs + * - 100kB ~ 1k packed refs: 49 refs + * - 1MB ~ 10k packed refs: 66 refs + * - 10MB ~ 100k packed refs: 82 refs + * - 100MB ~ 1m packed refs: 99 refs + * + * We thus allow roughly 16 additional loose refs per factor of ten of + * packed refs. This heuristic may be tweaked in the future, but should + * serve as a sufficiently good first iteration. + */ + limit = log2u(packed_size / 100) * 5; + if (limit < 16) + limit = 16; + + iter = cache_ref_iterator_begin(get_loose_ref_cache(refs, 0), NULL, + refs->base.repo, 0); + while ((ret = ref_iterator_advance(iter)) == ITER_OK) { + if (should_pack_ref(refs, iter->refname, iter->oid, + iter->flags, opts)) + refcount++; + if (refcount >= limit) { + ref_iterator_abort(iter); + return 1; + } + } + + if (ret != ITER_DONE) + die("error while iterating over references"); + + return 0; +} + static int files_pack_refs(struct ref_store *ref_store, struct pack_refs_opts *opts) { @@ -1323,6 +1385,9 @@ static int files_pack_refs(struct ref_store *ref_store, struct strbuf err = STRBUF_INIT; struct ref_transaction *transaction; + if (!should_pack_refs(refs, opts)) + return 0; + transaction = ref_store_transaction_begin(refs->packed_ref_store, &err); if (!transaction) return -1; diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 7a0a695ca2..07c57fd541 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1250,6 +1250,24 @@ int packed_refs_is_locked(struct ref_store *ref_store) return is_lock_file_locked(&refs->lock); } +int packed_refs_size(struct ref_store *ref_store, + size_t *out) +{ + struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ, + "packed_refs_size"); + struct stat st; + + if (stat(refs->path, &st) < 0) { + if (errno != ENOENT) + return -1; + *out = 0; + return 0; + } + + *out = st.st_size; + return 0; +} + /* * The packed-refs header line that we write out. Perhaps other traits * will be added later. diff --git a/refs/packed-backend.h b/refs/packed-backend.h index 09437ad13b..9481d5e7c2 100644 --- a/refs/packed-backend.h +++ b/refs/packed-backend.h @@ -28,6 +28,13 @@ void packed_refs_unlock(struct ref_store *ref_store); int packed_refs_is_locked(struct ref_store *ref_store); /* + * Obtain the size of the `packed-refs` file. Reports `0` as size in case there + * is no packed-refs file. Returns 0 on success, negative otherwise. + */ +int packed_refs_size(struct ref_store *ref_store, + size_t *out); + +/* * Return true if `transaction` really needs to be carried out against * the specified packed_ref_store, or false if it can be skipped * (i.e., because it is an obvious NOOP). `ref_store` must be locked diff --git a/t/t0601-reffiles-pack-refs.sh b/t/t0601-reffiles-pack-refs.sh index 60a544b8ee..d8cbd3f202 100755 --- a/t/t0601-reffiles-pack-refs.sh +++ b/t/t0601-reffiles-pack-refs.sh @@ -161,13 +161,6 @@ test_expect_success 'test --exclude takes precedence over --include' ' git pack-refs --include "refs/heads/pack*" --exclude "refs/heads/pack*" && test -f .git/refs/heads/dont_pack5' -test_expect_success '--auto packs and prunes refs as usual' ' - git branch auto && - test_path_is_file .git/refs/heads/auto && - git pack-refs --auto --all && - test_path_is_missing .git/refs/heads/auto -' - test_expect_success 'see if up-to-date packed refs are preserved' ' git branch q && git pack-refs --all --prune && @@ -367,14 +360,90 @@ test_expect_success 'pack-refs does not drop broken refs during deletion' ' test_cmp expect actual ' -test_expect_success 'maintenance --auto unconditionally packs loose refs' ' - git update-ref refs/heads/something HEAD && - test_path_is_file .git/refs/heads/something && - git rev-parse refs/heads/something >expect && - git maintenance run --task=pack-refs --auto && - test_path_is_missing .git/refs/heads/something && - git rev-parse refs/heads/something >actual && - test_cmp expect actual -' +for command in "git pack-refs --all --auto" "git maintenance run --task=pack-refs --auto" +do + test_expect_success "$command does not repack below 16 refs without packed-refs" ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + git commit --allow-empty --message "initial" && + + # Create 14 additional references, which brings us to + # 15 together with the default branch. + printf "create refs/heads/loose-%d HEAD\n" $(test_seq 14) >stdin && + git update-ref --stdin <stdin && + test_path_is_missing .git/packed-refs && + git pack-refs --auto --all && + test_path_is_missing .git/packed-refs && + + # Create the 16th reference, which should cause us to repack. + git update-ref refs/heads/loose-15 HEAD && + git pack-refs --auto --all && + test_path_is_file .git/packed-refs + ) + ' + + test_expect_success "$command does not repack below 16 refs with small packed-refs" ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + git commit --allow-empty --message "initial" && + + git pack-refs --all && + test_line_count = 2 .git/packed-refs && + + # Create 15 loose references. + printf "create refs/heads/loose-%d HEAD\n" $(test_seq 15) >stdin && + git update-ref --stdin <stdin && + git pack-refs --auto --all && + test_line_count = 2 .git/packed-refs && + + # Create the 16th loose reference, which should cause us to repack. + git update-ref refs/heads/loose-17 HEAD && + git pack-refs --auto --all && + test_line_count = 18 .git/packed-refs + ) + ' + + test_expect_success "$command scales with size of packed-refs" ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + git commit --allow-empty --message "initial" && + + # Create 99 packed refs. This should cause the heuristic + # to require more than the minimum amount of loose refs. + test_seq 99 | + while read i + do + printf "create refs/heads/packed-%d HEAD\n" $i || return 1 + done >stdin && + git update-ref --stdin <stdin && + git pack-refs --all && + test_line_count = 101 .git/packed-refs && + + # Create 24 loose refs, which should not yet cause us to repack. + printf "create refs/heads/loose-%d HEAD\n" $(test_seq 24) >stdin && + git update-ref --stdin <stdin && + git pack-refs --auto --all && + test_line_count = 101 .git/packed-refs && + + # Create another handful of refs to cross the border. + # Note that we explicitly do not check for strict + # boundaries here, as this also depends on the size of + # the object hash. + printf "create refs/heads/addn-%d HEAD\n" $(test_seq 10) >stdin && + git update-ref --stdin <stdin && + git pack-refs --auto --all && + test_line_count = 135 .git/packed-refs + ) + ' +done test_done @@ -140,4 +140,22 @@ int csprng_bytes(void *buf, size_t len); */ uint32_t git_rand(void); +/* Provide log2 of the given `size_t`. */ +static inline unsigned log2u(uintmax_t sz) +{ + unsigned l = 0; + + /* + * Technically this isn't required, but it helps the compiler optimize + * this to a `bsr` instruction. + */ + if (!sz) + return 0; + + for (; sz; sz >>= 1) + l++; + + return l - 1; +} + #endif /* WRAPPER_H */ |