summaryrefslogtreecommitdiffstats
path: root/diff.c
diff options
context:
space:
mode:
authorJonathan Tan <jonathantanmy@google.com>2020-04-08 00:11:43 +0200
committerJunio C Hamano <gitster@pobox.com>2020-04-08 01:09:29 +0200
commit95acf11a3dc3d18ec999f4913ec6c6a54545c6b7 (patch)
tree3448ee7b27c3a42dfc07931e7eee15441b1b57c8 /diff.c
parentdiff: refactor object read (diff)
downloadgit-95acf11a3dc3d18ec999f4913ec6c6a54545c6b7.tar.xz
git-95acf11a3dc3d18ec999f4913ec6c6a54545c6b7.zip
diff: restrict when prefetching occurs
Commit 7fbbcb21b1 ("diff: batch fetching of missing blobs", 2019-04-08) optimized "diff" by prefetching blobs in a partial clone, but there are some cases wherein blobs do not need to be prefetched. In these cases, any command that uses the diff machinery will unnecessarily fetch blobs. diffcore_std() may read blobs when it calls the following functions: (1) diffcore_skip_stat_unmatch() (controlled by the config variable diff.autorefreshindex) (2) diffcore_break() and diffcore_merge_broken() (for break-rewrite detection) (3) diffcore_rename() (for rename detection) (4) diffcore_pickaxe() (for detecting addition/deletion of specified string) Instead of always prefetching blobs, teach diffcore_skip_stat_unmatch(), diffcore_break(), and diffcore_rename() to prefetch blobs upon the first read of a missing object. This covers (1), (2), and (3): to cover the rest, teach diffcore_std() to prefetch if the output type is one that includes blob data (and hence blob data will be required later anyway), or if it knows that (4) will be run. Helped-by: Jeff King <peff@peff.net> Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'diff.c')
-rw-r--r--diff.c73
1 files changed, 51 insertions, 22 deletions
diff --git a/diff.c b/diff.c
index 61ce05d219..b061f5bc70 100644
--- a/diff.c
+++ b/diff.c
@@ -4034,10 +4034,18 @@ int diff_populate_filespec(struct repository *r,
*/
info.contentp = &s->data;
+ if (options && options->missing_object_cb) {
+ if (!oid_object_info_extended(r, &s->oid, &info,
+ OBJECT_INFO_LOOKUP_REPLACE |
+ OBJECT_INFO_SKIP_FETCH_OBJECT))
+ goto object_read;
+ options->missing_object_cb(options->missing_object_data);
+ }
if (oid_object_info_extended(r, &s->oid, &info,
OBJECT_INFO_LOOKUP_REPLACE))
die("unable to read %s", oid_to_hex(&s->oid));
+object_read:
if (size_only || check_binary) {
if (size_only)
return 0;
@@ -6444,6 +6452,8 @@ static int diff_filespec_check_stat_unmatch(struct repository *r,
{
struct diff_populate_filespec_options dpf_options = {
.check_size_only = 1,
+ .missing_object_cb = diff_queued_diff_prefetch,
+ .missing_object_data = r,
};
if (p->done_skip_stat_unmatch)
@@ -6520,9 +6530,9 @@ void diffcore_fix_diff_index(void)
QSORT(q->queue, q->nr, diffnamecmp);
}
-static void add_if_missing(struct repository *r,
- struct oid_array *to_fetch,
- const struct diff_filespec *filespec)
+void diff_add_if_missing(struct repository *r,
+ struct oid_array *to_fetch,
+ const struct diff_filespec *filespec)
{
if (filespec && filespec->oid_valid &&
!S_ISGITLINK(filespec->mode) &&
@@ -6531,29 +6541,48 @@ static void add_if_missing(struct repository *r,
oid_array_append(to_fetch, &filespec->oid);
}
-void diffcore_std(struct diff_options *options)
+void diff_queued_diff_prefetch(void *repository)
{
- if (options->repo == the_repository && has_promisor_remote()) {
- /*
- * Prefetch the diff pairs that are about to be flushed.
- */
- int i;
- struct diff_queue_struct *q = &diff_queued_diff;
- struct oid_array to_fetch = OID_ARRAY_INIT;
+ struct repository *repo = repository;
+ int i;
+ struct diff_queue_struct *q = &diff_queued_diff;
+ struct oid_array to_fetch = OID_ARRAY_INIT;
- for (i = 0; i < q->nr; i++) {
- struct diff_filepair *p = q->queue[i];
- add_if_missing(options->repo, &to_fetch, p->one);
- add_if_missing(options->repo, &to_fetch, p->two);
- }
- /*
- * NEEDSWORK: Consider deduplicating the OIDs sent.
- */
- promisor_remote_get_direct(options->repo,
- to_fetch.oid, to_fetch.nr);
- oid_array_clear(&to_fetch);
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ diff_add_if_missing(repo, &to_fetch, p->one);
+ diff_add_if_missing(repo, &to_fetch, p->two);
}
+ /*
+ * NEEDSWORK: Consider deduplicating the OIDs sent.
+ */
+ promisor_remote_get_direct(repo, to_fetch.oid, to_fetch.nr);
+
+ oid_array_clear(&to_fetch);
+}
+
+void diffcore_std(struct diff_options *options)
+{
+ int output_formats_to_prefetch = DIFF_FORMAT_DIFFSTAT |
+ DIFF_FORMAT_NUMSTAT |
+ DIFF_FORMAT_PATCH |
+ DIFF_FORMAT_SHORTSTAT |
+ DIFF_FORMAT_DIRSTAT;
+
+ /*
+ * Check if the user requested a blob-data-requiring diff output and/or
+ * break-rewrite detection (which requires blob data). If yes, prefetch
+ * the diff pairs.
+ *
+ * If no prefetching occurs, diffcore_rename() will prefetch if it
+ * decides that it needs inexact rename detection.
+ */
+ if (options->repo == the_repository && has_promisor_remote() &&
+ (options->output_format & output_formats_to_prefetch ||
+ options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK))
+ diff_queued_diff_prefetch(options->repo);
+
/* NOTE please keep the following in sync with diff_tree_combined() */
if (options->skip_stat_unmatch)
diffcore_skip_stat_unmatch(options);