diff options
author | Junio C Hamano <gitster@pobox.com> | 2017-12-06 18:23:42 +0100 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2017-12-06 18:23:42 +0100 |
commit | 79bafd23a82a883ab054c9fcaf1e3d3bea57e4b9 (patch) | |
tree | 6ce68f30cbf2b36bdf62ef2177deca278e1a378f | |
parent | Merge branch 'tg/deprecate-stash-save' (diff) | |
parent | sha1_file: fast-path null sha1 as a missing object (diff) | |
download | git-79bafd23a82a883ab054c9fcaf1e3d3bea57e4b9.tar.xz git-79bafd23a82a883ab054c9fcaf1e3d3bea57e4b9.zip |
Merge branch 'jk/fewer-pack-rescan'
Internaly we use 0{40} as a placeholder object name to signal the
codepath that there is no such object (e.g. the fast-forward check
while "git fetch" stores a new remote-tracking ref says "we know
there is no 'old' thing pointed at by the ref, as we are creating
it anew" by passing 0{40} for the 'old' side), and expect that a
codepath to locate an in-core object to return NULL as a sign that
the object does not exist. A look-up for an object that does not
exist however is quite costly with a repository with large number
of packfiles. This access pattern has been optimized.
* jk/fewer-pack-rescan:
sha1_file: fast-path null sha1 as a missing object
everything_local: use "quick" object existence check
p5551: add a script to test fetch pack-dir rescans
t/perf/lib-pack: use fast-import checkpoint to create packs
p5550: factor out nonsense-pack creation
-rw-r--r-- | fetch-pack.c | 3 | ||||
-rw-r--r-- | sha1_file.c | 3 | ||||
-rw-r--r-- | t/perf/lib-pack.sh | 25 | ||||
-rwxr-xr-x | t/perf/p5550-fetch-tags.sh | 25 | ||||
-rwxr-xr-x | t/perf/p5551-fetch-rescan.sh | 55 |
5 files changed, 87 insertions, 24 deletions
diff --git a/fetch-pack.c b/fetch-pack.c index 008b25d3db..9f6b07ad91 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -716,7 +716,8 @@ static int everything_local(struct fetch_pack_args *args, for (ref = *refs; ref; ref = ref->next) { struct object *o; - if (!has_object_file(&ref->old_oid)) + if (!has_object_file_with_flags(&ref->old_oid, + OBJECT_INFO_QUICK)) continue; o = parse_object(&ref->old_oid); diff --git a/sha1_file.c b/sha1_file.c index 8ae6cb6285..afe4b90f6e 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1164,6 +1164,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, lookup_replace_object(sha1) : sha1; + if (is_null_sha1(real)) + return -1; + if (!oi) oi = &blank_oi; diff --git a/t/perf/lib-pack.sh b/t/perf/lib-pack.sh new file mode 100644 index 0000000000..d3865db286 --- /dev/null +++ b/t/perf/lib-pack.sh @@ -0,0 +1,25 @@ +# Helpers for dealing with large numbers of packs. + +# create $1 nonsense packs, each with a single blob +create_packs () { + perl -le ' + my ($n) = @ARGV; + for (1..$n) { + print "blob"; + print "data <<EOF"; + print "$_"; + print "EOF"; + print "checkpoint" + } + ' "$@" | + git fast-import +} + +# create a large number of packs, disabling any gc which might +# cause us to repack them +setup_many_packs () { + git config gc.auto 0 && + git config gc.autopacklimit 0 && + git config fastimport.unpacklimit 0 && + create_packs 500 +} diff --git a/t/perf/p5550-fetch-tags.sh b/t/perf/p5550-fetch-tags.sh index a5dc39f86a..d0e0e019ea 100755 --- a/t/perf/p5550-fetch-tags.sh +++ b/t/perf/p5550-fetch-tags.sh @@ -20,6 +20,7 @@ start to show a noticeable performance problem on my machine, but without taking too long to set up and run the tests. ' . ./perf-lib.sh +. "$TEST_DIRECTORY/perf/lib-pack.sh" # make a long nonsense history on branch $1, consisting of $2 commits, each # with a unique file pointing to the blob at $2. @@ -44,26 +45,6 @@ create_tags () { git update-ref --stdin } -# create $1 nonsense packs, each with a single blob -create_packs () { - perl -le ' - my ($n) = @ARGV; - for (1..$n) { - print "blob"; - print "data <<EOF"; - print "$_"; - print "EOF"; - } - ' "$@" | - git fast-import && - - git cat-file --batch-all-objects --batch-check='%(objectname)' | - while read sha1 - do - echo $sha1 | git pack-objects .git/objects/pack/pack - done -} - test_expect_success 'create parent and child' ' git init parent && git -C parent commit --allow-empty -m base && @@ -84,9 +65,7 @@ test_expect_success 'populate parent tags' ' test_expect_success 'create child packs' ' ( cd child && - git config gc.auto 0 && - git config gc.autopacklimit 0 && - create_packs 500 + setup_many_packs ) ' diff --git a/t/perf/p5551-fetch-rescan.sh b/t/perf/p5551-fetch-rescan.sh new file mode 100755 index 0000000000..b99dc23e32 --- /dev/null +++ b/t/perf/p5551-fetch-rescan.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +test_description='fetch performance with many packs + +It is common for fetch to consider objects that we might not have, and it is an +easy mistake for the code to use a function like `parse_object` that might +give the correct _answer_ on such an object, but do so slowly (due to +re-scanning the pack directory for lookup failures). + +The resulting performance drop can be hard to notice in a real repository, but +becomes quite large in a repository with a large number of packs. So this +test creates a more pathological case, since any mistakes would produce a more +noticeable slowdown. +' +. ./perf-lib.sh +. "$TEST_DIRECTORY"/perf/lib-pack.sh + +test_expect_success 'create parent and child' ' + git init parent && + git clone parent child +' + + +test_expect_success 'create refs in the parent' ' + ( + cd parent && + git commit --allow-empty -m foo && + head=$(git rev-parse HEAD) && + test_seq 1000 | + sed "s,.*,update refs/heads/& $head," | + $MODERN_GIT update-ref --stdin + ) +' + +test_expect_success 'create many packs in the child' ' + ( + cd child && + setup_many_packs + ) +' + +test_perf 'fetch' ' + # start at the same state for each iteration + obj=$($MODERN_GIT -C parent rev-parse HEAD) && + ( + cd child && + $MODERN_GIT for-each-ref --format="delete %(refname)" refs/remotes | + $MODERN_GIT update-ref --stdin && + rm -vf .git/objects/$(echo $obj | sed "s|^..|&/|") && + + git fetch + ) +' + +test_done |