summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2017-12-06 18:23:42 +0100
committerJunio C Hamano <gitster@pobox.com>2017-12-06 18:23:42 +0100
commit79bafd23a82a883ab054c9fcaf1e3d3bea57e4b9 (patch)
tree6ce68f30cbf2b36bdf62ef2177deca278e1a378f
parentMerge branch 'tg/deprecate-stash-save' (diff)
parentsha1_file: fast-path null sha1 as a missing object (diff)
downloadgit-79bafd23a82a883ab054c9fcaf1e3d3bea57e4b9.tar.xz
git-79bafd23a82a883ab054c9fcaf1e3d3bea57e4b9.zip
Merge branch 'jk/fewer-pack-rescan'
Internaly we use 0{40} as a placeholder object name to signal the codepath that there is no such object (e.g. the fast-forward check while "git fetch" stores a new remote-tracking ref says "we know there is no 'old' thing pointed at by the ref, as we are creating it anew" by passing 0{40} for the 'old' side), and expect that a codepath to locate an in-core object to return NULL as a sign that the object does not exist. A look-up for an object that does not exist however is quite costly with a repository with large number of packfiles. This access pattern has been optimized. * jk/fewer-pack-rescan: sha1_file: fast-path null sha1 as a missing object everything_local: use "quick" object existence check p5551: add a script to test fetch pack-dir rescans t/perf/lib-pack: use fast-import checkpoint to create packs p5550: factor out nonsense-pack creation
-rw-r--r--fetch-pack.c3
-rw-r--r--sha1_file.c3
-rw-r--r--t/perf/lib-pack.sh25
-rwxr-xr-xt/perf/p5550-fetch-tags.sh25
-rwxr-xr-xt/perf/p5551-fetch-rescan.sh55
5 files changed, 87 insertions, 24 deletions
diff --git a/fetch-pack.c b/fetch-pack.c
index 008b25d3db..9f6b07ad91 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -716,7 +716,8 @@ static int everything_local(struct fetch_pack_args *args,
for (ref = *refs; ref; ref = ref->next) {
struct object *o;
- if (!has_object_file(&ref->old_oid))
+ if (!has_object_file_with_flags(&ref->old_oid,
+ OBJECT_INFO_QUICK))
continue;
o = parse_object(&ref->old_oid);
diff --git a/sha1_file.c b/sha1_file.c
index 8ae6cb6285..afe4b90f6e 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1164,6 +1164,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
lookup_replace_object(sha1) :
sha1;
+ if (is_null_sha1(real))
+ return -1;
+
if (!oi)
oi = &blank_oi;
diff --git a/t/perf/lib-pack.sh b/t/perf/lib-pack.sh
new file mode 100644
index 0000000000..d3865db286
--- /dev/null
+++ b/t/perf/lib-pack.sh
@@ -0,0 +1,25 @@
+# Helpers for dealing with large numbers of packs.
+
+# create $1 nonsense packs, each with a single blob
+create_packs () {
+ perl -le '
+ my ($n) = @ARGV;
+ for (1..$n) {
+ print "blob";
+ print "data <<EOF";
+ print "$_";
+ print "EOF";
+ print "checkpoint"
+ }
+ ' "$@" |
+ git fast-import
+}
+
+# create a large number of packs, disabling any gc which might
+# cause us to repack them
+setup_many_packs () {
+ git config gc.auto 0 &&
+ git config gc.autopacklimit 0 &&
+ git config fastimport.unpacklimit 0 &&
+ create_packs 500
+}
diff --git a/t/perf/p5550-fetch-tags.sh b/t/perf/p5550-fetch-tags.sh
index a5dc39f86a..d0e0e019ea 100755
--- a/t/perf/p5550-fetch-tags.sh
+++ b/t/perf/p5550-fetch-tags.sh
@@ -20,6 +20,7 @@ start to show a noticeable performance problem on my machine, but without
taking too long to set up and run the tests.
'
. ./perf-lib.sh
+. "$TEST_DIRECTORY/perf/lib-pack.sh"
# make a long nonsense history on branch $1, consisting of $2 commits, each
# with a unique file pointing to the blob at $2.
@@ -44,26 +45,6 @@ create_tags () {
git update-ref --stdin
}
-# create $1 nonsense packs, each with a single blob
-create_packs () {
- perl -le '
- my ($n) = @ARGV;
- for (1..$n) {
- print "blob";
- print "data <<EOF";
- print "$_";
- print "EOF";
- }
- ' "$@" |
- git fast-import &&
-
- git cat-file --batch-all-objects --batch-check='%(objectname)' |
- while read sha1
- do
- echo $sha1 | git pack-objects .git/objects/pack/pack
- done
-}
-
test_expect_success 'create parent and child' '
git init parent &&
git -C parent commit --allow-empty -m base &&
@@ -84,9 +65,7 @@ test_expect_success 'populate parent tags' '
test_expect_success 'create child packs' '
(
cd child &&
- git config gc.auto 0 &&
- git config gc.autopacklimit 0 &&
- create_packs 500
+ setup_many_packs
)
'
diff --git a/t/perf/p5551-fetch-rescan.sh b/t/perf/p5551-fetch-rescan.sh
new file mode 100755
index 0000000000..b99dc23e32
--- /dev/null
+++ b/t/perf/p5551-fetch-rescan.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+test_description='fetch performance with many packs
+
+It is common for fetch to consider objects that we might not have, and it is an
+easy mistake for the code to use a function like `parse_object` that might
+give the correct _answer_ on such an object, but do so slowly (due to
+re-scanning the pack directory for lookup failures).
+
+The resulting performance drop can be hard to notice in a real repository, but
+becomes quite large in a repository with a large number of packs. So this
+test creates a more pathological case, since any mistakes would produce a more
+noticeable slowdown.
+'
+. ./perf-lib.sh
+. "$TEST_DIRECTORY"/perf/lib-pack.sh
+
+test_expect_success 'create parent and child' '
+ git init parent &&
+ git clone parent child
+'
+
+
+test_expect_success 'create refs in the parent' '
+ (
+ cd parent &&
+ git commit --allow-empty -m foo &&
+ head=$(git rev-parse HEAD) &&
+ test_seq 1000 |
+ sed "s,.*,update refs/heads/& $head," |
+ $MODERN_GIT update-ref --stdin
+ )
+'
+
+test_expect_success 'create many packs in the child' '
+ (
+ cd child &&
+ setup_many_packs
+ )
+'
+
+test_perf 'fetch' '
+ # start at the same state for each iteration
+ obj=$($MODERN_GIT -C parent rev-parse HEAD) &&
+ (
+ cd child &&
+ $MODERN_GIT for-each-ref --format="delete %(refname)" refs/remotes |
+ $MODERN_GIT update-ref --stdin &&
+ rm -vf .git/objects/$(echo $obj | sed "s|^..|&/|") &&
+
+ git fetch
+ )
+'
+
+test_done