diff options
author | Junio C Hamano <junkio@cox.net> | 2005-05-24 10:10:48 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-05-24 10:26:26 +0200 |
commit | 25d5ea410fd528c13e5f56f7836ee8a3b839b168 (patch) | |
tree | 06e98a3d7cfef4675801dacaffc8aad0d251398a /diffcore-rename.c | |
parent | git-apply: more consistency checks on gitdiff filenames (diff) | |
download | git-25d5ea410fd528c13e5f56f7836ee8a3b839b168.tar.xz git-25d5ea410fd528c13e5f56f7836ee8a3b839b168.zip |
[PATCH] Redo rename/copy detection logic.
Earlier implementation had a major screw-up in the memory
management area. Rename/copy logic sometimes borrowed a pointer
to a structure without any provision for downstream to determine
which pointer is shared and which is not. This resulted in the
later clean-up code to sometimes double free such structure,
resulting in a segfault. This made -M and -C useless.
Another problem the earlier implementation had was that it
reordered the patches, and forced the logic to differentiate
renames and copies to depend on that particular order. This
problem was fixed by teaching rename/copy detection logic not to
do any reordering, and rename-copy differentiator not to depend
on the order of the patches. The diffs will leave rename/copy
detector in the same destination path order as the patch that
was fed into it. Some test vectors have been reordered to
accommodate this change.
It also adds a sanity check logic to the human-readable diff-raw
output to detect paths with embedded TAB and LF characters,
which cannot be expressed with that format. This idea came up
during a discussion with Chris Wedgwood.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'diffcore-rename.c')
-rw-r--r-- | diffcore-rename.c | 385 |
1 files changed, 181 insertions, 204 deletions
diff --git a/diffcore-rename.c b/diffcore-rename.c index f40ab78bd1..34e83dac8d 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -6,29 +6,92 @@ #include "diffcore.h" #include "delta.h" -struct diff_rename_pool { - struct diff_filespec **s; - int nr, alloc; -}; +/* Table of rename/copy destinations */ + +static struct diff_rename_dst { + struct diff_filespec *two; + struct diff_filepair *pair; +} *rename_dst; +static int rename_dst_nr, rename_dst_alloc; -static void diff_rename_pool_clear(struct diff_rename_pool *pool) +static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two, + int insert_ok) { - pool->s = NULL; pool->nr = pool->alloc = 0; + int first, last; + + first = 0; + last = rename_dst_nr; + while (last > first) { + int next = (last + first) >> 1; + struct diff_rename_dst *dst = &(rename_dst[next]); + int cmp = strcmp(two->path, dst->two->path); + if (!cmp) + return dst; + if (cmp < 0) { + last = next; + continue; + } + first = next+1; + } + /* not found */ + if (!insert_ok) + return NULL; + /* insert to make it at "first" */ + if (rename_dst_alloc <= rename_dst_nr) { + rename_dst_alloc = alloc_nr(rename_dst_alloc); + rename_dst = xrealloc(rename_dst, + rename_dst_alloc * sizeof(*rename_dst)); + } + rename_dst_nr++; + if (first < rename_dst_nr) + memmove(rename_dst + first + 1, rename_dst + first, + (rename_dst_nr - first - 1) * sizeof(*rename_dst)); + rename_dst[first].two = two; + rename_dst[first].pair = NULL; + return &(rename_dst[first]); } -static void diff_rename_pool_add(struct diff_rename_pool *pool, - struct diff_filespec *s) -{ - if (S_ISDIR(s->mode)) - return; /* no trees, please */ +static struct diff_rename_src { + struct diff_filespec *one; + unsigned src_used : 1; +} *rename_src; +static int rename_src_nr, rename_src_alloc; - if (pool->alloc <= pool->nr) { - pool->alloc = alloc_nr(pool->alloc); - pool->s = xrealloc(pool->s, - sizeof(*(pool->s)) * pool->alloc); +static struct diff_rename_src *locate_rename_src(struct diff_filespec *one, + int insert_ok) +{ + int first, last; + + first = 0; + last = rename_src_nr; + while (last > first) { + int next = (last + first) >> 1; + struct diff_rename_src *src = &(rename_src[next]); + int cmp = strcmp(one->path, src->one->path); + if (!cmp) + return src; + if (cmp < 0) { + last = next; + continue; + } + first = next+1; + } + /* not found */ + if (!insert_ok) + return NULL; + /* insert to make it at "first" */ + if (rename_src_alloc <= rename_src_nr) { + rename_src_alloc = alloc_nr(rename_src_alloc); + rename_src = xrealloc(rename_src, + rename_src_alloc * sizeof(*rename_src)); } - pool->s[pool->nr] = s; - pool->nr++; + rename_src_nr++; + if (first < rename_src_nr) + memmove(rename_src + first + 1, rename_src + first, + (rename_src_nr - first - 1) * sizeof(*rename_src)); + rename_src[first].one = one; + rename_src[first].src_used = 0; + return &(rename_src[first]); } static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst) @@ -46,8 +109,8 @@ static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst) } struct diff_score { - struct diff_filespec *src; - struct diff_filespec *dst; + int src; /* index in rename_src */ + int dst; /* index in rename_dst */ int score; int rank; }; @@ -113,92 +176,28 @@ static int estimate_similarity(struct diff_filespec *src, return score; } -static void record_rename_pair(struct diff_queue_struct *outq, - struct diff_filespec *src, - struct diff_filespec *dst, - int rank, - int score) +static void record_rename_pair(struct diff_queue_struct *renq, + int dst_index, int src_index, int score) { - /* - * These ranks are used to sort the final output, because there - * are certain dependencies: - * - * 1. rename/copy that depends on deleted ones. - * 2. deletions in the original. - * 3. rename/copy that depends on the pre-edit image of kept files. - * 4. additions, modifications and no-modifications in the original. - * 5. rename/copy that depends on the post-edit image of kept files - * (note that we currently do not detect such rename/copy). - * - * The downstream diffcore transformers are free to reorder - * the entries as long as they keep file pairs that has the - * same p->one->path in earlier rename_rank to appear before - * later ones. - * - * To the final output routine, and in the diff-raw format - * output, a rename/copy that is based on a path that has a - * later entry that shares the same p->one->path and is not a - * deletion is a copy. Otherwise it is a rename. - */ + struct diff_filespec *one, *two, *src, *dst; + struct diff_filepair *dp; - struct diff_filepair *dp = diff_queue(outq, src, dst); - dp->rename_rank = rank * 2 + 1; - dp->score = score; - dst->xfrm_flags |= RENAME_DST_MATCHED; -} + if (rename_dst[dst_index].pair) + die("internal error: dst already matched."); -#if 0 -static void debug_filespec(struct diff_filespec *s, int x, const char *one) -{ - fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n", - x, one, - s->path, - DIFF_FILE_VALID(s) ? "valid" : "invalid", - s->mode, - s->sha1_valid ? sha1_to_hex(s->sha1) : ""); - fprintf(stderr, "queue[%d] %s size %lu flags %d\n", - x, one, - s->size, s->xfrm_flags); -} + src = rename_src[src_index].one; + one = alloc_filespec(src->path); + fill_filespec(one, src->sha1, src->mode); -static void debug_filepair(const struct diff_filepair *p, int i) -{ - debug_filespec(p->one, i, "one"); - debug_filespec(p->two, i, "two"); - fprintf(stderr, "pair rank %d, orig order %d, score %d\n", - p->rename_rank, p->orig_order, p->score); -} + dst = rename_dst[dst_index].two; + two = alloc_filespec(dst->path); + fill_filespec(two, dst->sha1, dst->mode); -static void debug_queue(const char *msg, struct diff_queue_struct *q) -{ - int i; - if (msg) - fprintf(stderr, "%s\n", msg); - fprintf(stderr, "q->nr = %d\n", q->nr); - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - debug_filepair(p, i); - } -} -#else -#define debug_queue(a,b) do { ; /*nothing*/ } while(0) -#endif + dp = diff_queue(renq, one, two); + dp->score = score; -/* - * We sort the outstanding diff entries according to the rank (see - * comment at the beginning of record_rename_pair) and tiebreak with - * the order in the original input. - */ -static int rank_compare(const void *a_, const void *b_) -{ - const struct diff_filepair *a = *(const struct diff_filepair **)a_; - const struct diff_filepair *b = *(const struct diff_filepair **)b_; - int a_rank = a->rename_rank; - int b_rank = b->rename_rank; - - if (a_rank != b_rank) - return a_rank - b_rank; - return a->orig_order - b->orig_order; + rename_src[src_index].src_used = 1; + rename_dst[dst_index].pair = dp; } /* @@ -232,24 +231,15 @@ int diff_scoreopt_parse(const char *opt) void diffcore_rename(int detect_rename, int minimum_score) { struct diff_queue_struct *q = &diff_queued_diff; - struct diff_queue_struct outq; - struct diff_rename_pool created, deleted, stay; - struct diff_rename_pool *(srcs[2]); + struct diff_queue_struct renq, outq; struct diff_score *mx; - int h, i, j; - int num_create, num_src, dst_cnt, src_cnt; + int i, j; + int num_create, num_src, dst_cnt; if (!minimum_score) minimum_score = DEFAULT_MINIMUM_SCORE; - outq.queue = NULL; - outq.nr = outq.alloc = 0; - - diff_rename_pool_clear(&created); - diff_rename_pool_clear(&deleted); - diff_rename_pool_clear(&stay); - - srcs[0] = &deleted; - srcs[1] = &stay; + renq.queue = NULL; + renq.nr = renq.alloc = 0; for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; @@ -257,76 +247,70 @@ void diffcore_rename(int detect_rename, int minimum_score) if (!DIFF_FILE_VALID(p->two)) continue; /* unmerged */ else - diff_rename_pool_add(&created, p->two); + locate_rename_dst(p->two, 1); else if (!DIFF_FILE_VALID(p->two)) - diff_rename_pool_add(&deleted, p->one); + locate_rename_src(p->one, 1); else if (1 < detect_rename) /* find copy, too */ - diff_rename_pool_add(&stay, p->one); + locate_rename_src(p->one, 1); } - if (created.nr == 0) + if (rename_dst_nr == 0) goto cleanup; /* nothing to do */ /* We really want to cull the candidates list early * with cheap tests in order to avoid doing deltas. */ - for (i = 0; i < created.nr; i++) { - for (h = 0; h < sizeof(srcs)/sizeof(srcs[0]); h++) { - struct diff_rename_pool *p = srcs[h]; - for (j = 0; j < p->nr; j++) { - if (!is_exact_match(p->s[j], created.s[i])) - continue; - record_rename_pair(&outq, - p->s[j], created.s[i], h, - MAX_SCORE); - break; /* we are done with this entry */ - } + for (i = 0; i < rename_dst_nr; i++) { + struct diff_filespec *two = rename_dst[i].two; + for (j = 0; j < rename_src_nr; j++) { + struct diff_filespec *one = rename_src[j].one; + if (!is_exact_match(one, two)) + continue; + record_rename_pair(&renq, i, j, MAX_SCORE); + break; /* we are done with this entry */ } } - debug_queue("done detecting exact", &outq); + diff_debug_queue("done detecting exact", &renq); /* Have we run out the created file pool? If so we can avoid * doing the delta matrix altogether. */ - if (outq.nr == created.nr) + if (renq.nr == rename_dst_nr) goto flush_rest; - num_create = (created.nr - outq.nr); - num_src = deleted.nr + stay.nr; + num_create = (rename_dst_nr - renq.nr); + num_src = rename_src_nr; mx = xmalloc(sizeof(*mx) * num_create * num_src); - for (dst_cnt = i = 0; i < created.nr; i++) { + for (dst_cnt = i = 0; i < rename_dst_nr; i++) { int base = dst_cnt * num_src; - if (created.s[i]->xfrm_flags & RENAME_DST_MATCHED) + struct diff_filespec *two = rename_dst[i].two; + if (rename_dst[i].pair) continue; /* dealt with exact match already. */ - for (src_cnt = h = 0; h < sizeof(srcs)/sizeof(srcs[0]); h++) { - struct diff_rename_pool *p = srcs[h]; - for (j = 0; j < p->nr; j++, src_cnt++) { - struct diff_score *m = &mx[base + src_cnt]; - m->src = p->s[j]; - m->dst = created.s[i]; - m->score = estimate_similarity(m->src, m->dst, - minimum_score); - m->rank = h; - } + for (j = 0; j < rename_src_nr; j++) { + struct diff_filespec *one = rename_src[j].one; + struct diff_score *m = &mx[base+j]; + m->src = j; + m->dst = i; + m->score = estimate_similarity(one, two, + minimum_score); } dst_cnt++; } /* cost matrix sorted by most to least similar pair */ qsort(mx, num_create * num_src, sizeof(*mx), score_compare); for (i = 0; i < num_create * num_src; i++) { - if (mx[i].dst->xfrm_flags & RENAME_DST_MATCHED) - continue; /* alreayd done, either exact or fuzzy. */ + struct diff_rename_dst *dst = &rename_dst[mx[i].dst]; + if (dst->pair) + continue; /* already done, either exact or fuzzy. */ if (mx[i].score < minimum_score) break; /* there is not any more diffs applicable. */ - record_rename_pair(&outq, - mx[i].src, mx[i].dst, mx[i].rank, - mx[i].score); + record_rename_pair(&renq, mx[i].dst, mx[i].src, mx[i].score); } free(mx); - debug_queue("done detecting fuzzy", &outq); + diff_debug_queue("done detecting fuzzy", &renq); flush_rest: /* At this point, we have found some renames and copies and they - * are kept in outq. The original list is still in *q. + * are kept in renq. The original list is still in *q. * * Scan the original list and move them into the outq; we will sort * outq and swap it into the queue supplied to pass that to @@ -335,68 +319,61 @@ void diffcore_rename(int detect_rename, int minimum_score) * See comments at the top of record_rename_pair for numbers used * to assign rename_rank. */ + outq.queue = NULL; + outq.nr = outq.alloc = 0; for (i = 0; i < q->nr; i++) { - struct diff_filepair *dp, *p = q->queue[i]; - if (!DIFF_FILE_VALID(p->one)) { - /* creation or unmerged entries */ - dp = diff_queue(&outq, p->one, p->two); - dp->rename_rank = 4; - } - else if (!DIFF_FILE_VALID(p->two)) { - /* deletion */ - dp = diff_queue(&outq, p->one, p->two); - dp->rename_rank = 2; + struct diff_filepair *p = q->queue[i]; + struct diff_rename_src *src = locate_rename_src(p->one, 0); + struct diff_rename_dst *dst = locate_rename_dst(p->two, 0); + struct diff_filepair *pair_to_free = NULL; + + if (dst) { + /* creation */ + if (dst->pair) { + /* renq has rename/copy already to produce + * this file, so we do not emit the creation + * record in the output. + */ + diff_q(&outq, dst->pair); + pair_to_free = p; + } + else + /* no matching rename/copy source, so record + * this as a creation. + */ + diff_q(&outq, p); } + else if (!diff_unmodified_pair(p)) + /* all the other cases need to be recorded as is */ + diff_q(&outq, p); else { - /* modification, or stay as is */ - dp = diff_queue(&outq, p->one, p->two); - dp->rename_rank = 4; - } - free(p); - } - debug_queue("done copying original", &outq); - - /* Sort outq */ - qsort(outq.queue, outq.nr, sizeof(outq.queue[0]), rank_compare); - - debug_queue("done sorting", &outq); - - free(q->queue); - q->nr = q->alloc = 0; - q->queue = NULL; - - /* Copy it out to q, removing duplicates. */ - for (i = 0; i < outq.nr; i++) { - struct diff_filepair *p = outq.queue[i]; - if (!DIFF_FILE_VALID(p->one)) { - /* created or unmerged */ - if (p->two->xfrm_flags & RENAME_DST_MATCHED) - ; /* rename/copy created it already */ + /* unmodified pair needs to be recorded only if + * it is used as the source of rename/copy + */ + if (src && src->src_used) + diff_q(&outq, p); else - diff_queue(q, p->one, p->two); - } - else if (!DIFF_FILE_VALID(p->two)) { - /* deleted */ - diff_queue(q, p->one, p->two); + pair_to_free = p; } - else if (strcmp(p->one->path, p->two->path)) { - /* rename or copy */ - struct diff_filepair *dp = - diff_queue(q, p->one, p->two); - dp->score = p->score; + if (pair_to_free) { + diff_free_filespec_data(pair_to_free->one); + diff_free_filespec_data(pair_to_free->two); + free(pair_to_free); } - else - /* otherwise it is a modified (or "stay") entry */ - diff_queue(q, p->one, p->two); - free(p); } + diff_debug_queue("done copying original", &outq); - free(outq.queue); - debug_queue("done collapsing", q); + free(renq.queue); + free(q->queue); + *q = outq; + diff_debug_queue("done collapsing", q); cleanup: - free(created.s); - free(deleted.s); - free(stay.s); + free(rename_dst); + rename_dst = NULL; + rename_dst_nr = rename_dst_alloc = 0; + free(rename_src); + rename_src = NULL; + rename_src_nr = rename_src_alloc = 0; return; } |