summaryrefslogtreecommitdiffstats
path: root/diffcore-rename.c
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2005-05-28 00:56:38 +0200
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-29 20:17:44 +0200
commitf0c6b2a2fd98b51f1f2655ea69ace9763da28e79 (patch)
tree29668f3fbfb0e4871ac17f72b144eb289be5e490 /diffcore-rename.c
parent[PATCH] Move pathspec to the beginning of the diffcore chain. (diff)
downloadgit-f0c6b2a2fd98b51f1f2655ea69ace9763da28e79.tar.xz
git-f0c6b2a2fd98b51f1f2655ea69ace9763da28e79.zip
[PATCH] Optimize diff-tree -[CM] --stdin
This attempts to optimize "diff-tree -[CM] --stdin", which compares successible tree pairs. This optimization does not make much sense for other commands in the diff-* brothers. When reading from --stdin and using rename/copy detection, the patch makes diff-tree to read the current index file first. This is done to reuse the optimization used by diff-cache in the non-cached case. Similarity estimator can avoid expanding a blob if the index says what is in the work tree has an exact copy of that blob already expanded. Another optimization the patch makes is to check only file sizes first to terminate similarity estimation early. In order for this to work, it needs a way to tell the size of the blob without expanding it. Since an obvious way of doing it, which is to keep all the blobs previously used in the memory, is too costly, it does so by keeping the filesize for each object it has already seen in memory. Signed-off-by: Junio C Hamano <junkio@cox.net> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'diffcore-rename.c')
-rw-r--r--diffcore-rename.c19
1 files changed, 14 insertions, 5 deletions
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 6389dedbf9..035d4ebb85 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -99,8 +99,11 @@ static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst)
if (src->sha1_valid && dst->sha1_valid &&
!memcmp(src->sha1, dst->sha1, 20))
return 1;
- if (diff_populate_filespec(src) || diff_populate_filespec(dst))
- /* this is an error but will be caught downstream */
+ if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
+ return 0;
+ if (src->size != dst->size)
+ return 0;
+ if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
return 0;
if (src->size == dst->size &&
!memcmp(src->data, dst->data, src->size))
@@ -125,9 +128,11 @@ static int estimate_similarity(struct diff_filespec *src,
* dst, and then some edit has been applied to dst.
*
* Compare them and return how similar they are, representing
- * the score as an integer between 0 and 10000, except
- * where they match exactly it is considered better than anything
- * else.
+ * the score as an integer between 0 and MAX_SCORE.
+ *
+ * When there is an exact match, it is considered a better
+ * match than anything else; the destination does not even
+ * call into this function in that case.
*/
void *delta;
unsigned long delta_size, base_size;
@@ -147,6 +152,7 @@ static int estimate_similarity(struct diff_filespec *src,
/* We would not consider edits that change the file size so
* drastically. delta_size must be smaller than
* (MAX_SCORE-minimum_score)/MAX_SCORE * min(src->size, dst->size).
+ *
* Note that base_size == 0 case is handled here already
* and the final score computation below would not have a
* divide-by-zero issue.
@@ -154,6 +160,9 @@ static int estimate_similarity(struct diff_filespec *src,
if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
return 0;
+ if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+ return 0; /* error but caught downstream */
+
delta = diff_delta(src->data, src->size,
dst->data, dst->size,
&delta_size);