summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-09-12 20:47:23 +0200
committerJunio C Hamano <gitster@pobox.com>2024-09-12 20:47:23 +0200
commit143682ec43d5772ee9327ed84eb0cdc007b1f489 (patch)
treef31ec1d5710d11b704afef67f270152226e8b537
parentMerge branch 'tb/multi-pack-reuse-fix' (diff)
parentrefs/files: use heuristic to decide whether to repack with `--auto` (diff)
downloadgit-143682ec43d5772ee9327ed84eb0cdc007b1f489.tar.xz
git-143682ec43d5772ee9327ed84eb0cdc007b1f489.zip
Merge branch 'ps/pack-refs-auto-heuristics'
"git pack-refs --auto" for the files backend was too aggressive, which has been a bit tamed. * ps/pack-refs-auto-heuristics: refs/files: use heuristic to decide whether to repack with `--auto` t0601: merge tests for auto-packing of refs wrapper: introduce `log2u()`
-rw-r--r--bisect.c12
-rw-r--r--refs/files-backend.c65
-rw-r--r--refs/packed-backend.c18
-rw-r--r--refs/packed-backend.h7
-rwxr-xr-xt/t0601-reffiles-pack-refs.sh101
-rw-r--r--wrapper.h18
6 files changed, 194 insertions, 27 deletions
diff --git a/bisect.c b/bisect.c
index 4406fc44cf..4713226fad 100644
--- a/bisect.c
+++ b/bisect.c
@@ -1130,16 +1130,6 @@ cleanup:
return res;
}
-static inline int log2i(int n)
-{
- int log2 = 0;
-
- for (; n > 1; n >>= 1)
- log2++;
-
- return log2;
-}
-
static inline int exp2i(int n)
{
return 1 << n;
@@ -1162,7 +1152,7 @@ int estimate_bisect_steps(int all)
if (all < 3)
return 0;
- n = log2i(all);
+ n = log2u(all);
e = exp2i(n);
x = all - e;
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 4c5573b19c..c7f3f4e591 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1311,6 +1311,68 @@ static int should_pack_ref(struct files_ref_store *refs,
return 0;
}
+static int should_pack_refs(struct files_ref_store *refs,
+ struct pack_refs_opts *opts)
+{
+ struct ref_iterator *iter;
+ size_t packed_size;
+ size_t refcount = 0;
+ size_t limit;
+ int ret;
+
+ if (!(opts->flags & PACK_REFS_AUTO))
+ return 1;
+
+ ret = packed_refs_size(refs->packed_ref_store, &packed_size);
+ if (ret < 0)
+ die("cannot determine packed-refs size");
+
+ /*
+ * Packing loose references into the packed-refs file scales with the
+ * number of references we're about to write. We thus decide whether we
+ * repack refs by weighing the current size of the packed-refs file
+ * against the number of loose references. This is done such that we do
+ * not repack too often on repositories with a huge number of
+ * references, where we can expect a lot of churn in the number of
+ * references.
+ *
+ * As a heuristic, we repack if the number of loose references in the
+ * repository exceeds `log2(nr_packed_refs) * 5`, where we estimate
+ * `nr_packed_refs = packed_size / 100`, which scales as following:
+ *
+ * - 1kB ~ 10 packed refs: 16 refs
+ * - 10kB ~ 100 packed refs: 33 refs
+ * - 100kB ~ 1k packed refs: 49 refs
+ * - 1MB ~ 10k packed refs: 66 refs
+ * - 10MB ~ 100k packed refs: 82 refs
+ * - 100MB ~ 1m packed refs: 99 refs
+ *
+ * We thus allow roughly 16 additional loose refs per factor of ten of
+ * packed refs. This heuristic may be tweaked in the future, but should
+ * serve as a sufficiently good first iteration.
+ */
+ limit = log2u(packed_size / 100) * 5;
+ if (limit < 16)
+ limit = 16;
+
+ iter = cache_ref_iterator_begin(get_loose_ref_cache(refs, 0), NULL,
+ refs->base.repo, 0);
+ while ((ret = ref_iterator_advance(iter)) == ITER_OK) {
+ if (should_pack_ref(refs, iter->refname, iter->oid,
+ iter->flags, opts))
+ refcount++;
+ if (refcount >= limit) {
+ ref_iterator_abort(iter);
+ return 1;
+ }
+ }
+
+ if (ret != ITER_DONE)
+ die("error while iterating over references");
+
+ return 0;
+}
+
static int files_pack_refs(struct ref_store *ref_store,
struct pack_refs_opts *opts)
{
@@ -1323,6 +1385,9 @@ static int files_pack_refs(struct ref_store *ref_store,
struct strbuf err = STRBUF_INIT;
struct ref_transaction *transaction;
+ if (!should_pack_refs(refs, opts))
+ return 0;
+
transaction = ref_store_transaction_begin(refs->packed_ref_store, &err);
if (!transaction)
return -1;
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 7a0a695ca2..07c57fd541 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1250,6 +1250,24 @@ int packed_refs_is_locked(struct ref_store *ref_store)
return is_lock_file_locked(&refs->lock);
}
+int packed_refs_size(struct ref_store *ref_store,
+ size_t *out)
+{
+ struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ,
+ "packed_refs_size");
+ struct stat st;
+
+ if (stat(refs->path, &st) < 0) {
+ if (errno != ENOENT)
+ return -1;
+ *out = 0;
+ return 0;
+ }
+
+ *out = st.st_size;
+ return 0;
+}
+
/*
* The packed-refs header line that we write out. Perhaps other traits
* will be added later.
diff --git a/refs/packed-backend.h b/refs/packed-backend.h
index 09437ad13b..9481d5e7c2 100644
--- a/refs/packed-backend.h
+++ b/refs/packed-backend.h
@@ -28,6 +28,13 @@ void packed_refs_unlock(struct ref_store *ref_store);
int packed_refs_is_locked(struct ref_store *ref_store);
/*
+ * Obtain the size of the `packed-refs` file. Reports `0` as size in case there
+ * is no packed-refs file. Returns 0 on success, negative otherwise.
+ */
+int packed_refs_size(struct ref_store *ref_store,
+ size_t *out);
+
+/*
* Return true if `transaction` really needs to be carried out against
* the specified packed_ref_store, or false if it can be skipped
* (i.e., because it is an obvious NOOP). `ref_store` must be locked
diff --git a/t/t0601-reffiles-pack-refs.sh b/t/t0601-reffiles-pack-refs.sh
index 60a544b8ee..d8cbd3f202 100755
--- a/t/t0601-reffiles-pack-refs.sh
+++ b/t/t0601-reffiles-pack-refs.sh
@@ -161,13 +161,6 @@ test_expect_success 'test --exclude takes precedence over --include' '
git pack-refs --include "refs/heads/pack*" --exclude "refs/heads/pack*" &&
test -f .git/refs/heads/dont_pack5'
-test_expect_success '--auto packs and prunes refs as usual' '
- git branch auto &&
- test_path_is_file .git/refs/heads/auto &&
- git pack-refs --auto --all &&
- test_path_is_missing .git/refs/heads/auto
-'
-
test_expect_success 'see if up-to-date packed refs are preserved' '
git branch q &&
git pack-refs --all --prune &&
@@ -367,14 +360,90 @@ test_expect_success 'pack-refs does not drop broken refs during deletion' '
test_cmp expect actual
'
-test_expect_success 'maintenance --auto unconditionally packs loose refs' '
- git update-ref refs/heads/something HEAD &&
- test_path_is_file .git/refs/heads/something &&
- git rev-parse refs/heads/something >expect &&
- git maintenance run --task=pack-refs --auto &&
- test_path_is_missing .git/refs/heads/something &&
- git rev-parse refs/heads/something >actual &&
- test_cmp expect actual
-'
+for command in "git pack-refs --all --auto" "git maintenance run --task=pack-refs --auto"
+do
+ test_expect_success "$command does not repack below 16 refs without packed-refs" '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ (
+ cd repo &&
+ git config set maintenance.auto false &&
+ git commit --allow-empty --message "initial" &&
+
+ # Create 14 additional references, which brings us to
+ # 15 together with the default branch.
+ printf "create refs/heads/loose-%d HEAD\n" $(test_seq 14) >stdin &&
+ git update-ref --stdin <stdin &&
+ test_path_is_missing .git/packed-refs &&
+ git pack-refs --auto --all &&
+ test_path_is_missing .git/packed-refs &&
+
+ # Create the 16th reference, which should cause us to repack.
+ git update-ref refs/heads/loose-15 HEAD &&
+ git pack-refs --auto --all &&
+ test_path_is_file .git/packed-refs
+ )
+ '
+
+ test_expect_success "$command does not repack below 16 refs with small packed-refs" '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ (
+ cd repo &&
+ git config set maintenance.auto false &&
+ git commit --allow-empty --message "initial" &&
+
+ git pack-refs --all &&
+ test_line_count = 2 .git/packed-refs &&
+
+ # Create 15 loose references.
+ printf "create refs/heads/loose-%d HEAD\n" $(test_seq 15) >stdin &&
+ git update-ref --stdin <stdin &&
+ git pack-refs --auto --all &&
+ test_line_count = 2 .git/packed-refs &&
+
+ # Create the 16th loose reference, which should cause us to repack.
+ git update-ref refs/heads/loose-17 HEAD &&
+ git pack-refs --auto --all &&
+ test_line_count = 18 .git/packed-refs
+ )
+ '
+
+ test_expect_success "$command scales with size of packed-refs" '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ (
+ cd repo &&
+ git config set maintenance.auto false &&
+ git commit --allow-empty --message "initial" &&
+
+ # Create 99 packed refs. This should cause the heuristic
+ # to require more than the minimum amount of loose refs.
+ test_seq 99 |
+ while read i
+ do
+ printf "create refs/heads/packed-%d HEAD\n" $i || return 1
+ done >stdin &&
+ git update-ref --stdin <stdin &&
+ git pack-refs --all &&
+ test_line_count = 101 .git/packed-refs &&
+
+ # Create 24 loose refs, which should not yet cause us to repack.
+ printf "create refs/heads/loose-%d HEAD\n" $(test_seq 24) >stdin &&
+ git update-ref --stdin <stdin &&
+ git pack-refs --auto --all &&
+ test_line_count = 101 .git/packed-refs &&
+
+ # Create another handful of refs to cross the border.
+ # Note that we explicitly do not check for strict
+ # boundaries here, as this also depends on the size of
+ # the object hash.
+ printf "create refs/heads/addn-%d HEAD\n" $(test_seq 10) >stdin &&
+ git update-ref --stdin <stdin &&
+ git pack-refs --auto --all &&
+ test_line_count = 135 .git/packed-refs
+ )
+ '
+done
test_done
diff --git a/wrapper.h b/wrapper.h
index 1b2b047ea0..a6b3e1f09e 100644
--- a/wrapper.h
+++ b/wrapper.h
@@ -140,4 +140,22 @@ int csprng_bytes(void *buf, size_t len);
*/
uint32_t git_rand(void);
+/* Provide log2 of the given `size_t`. */
+static inline unsigned log2u(uintmax_t sz)
+{
+ unsigned l = 0;
+
+ /*
+ * Technically this isn't required, but it helps the compiler optimize
+ * this to a `bsr` instruction.
+ */
+ if (!sz)
+ return 0;
+
+ for (; sz; sz >>= 1)
+ l++;
+
+ return l - 1;
+}
+
#endif /* WRAPPER_H */