diff options
author | Junio C Hamano <gitster@pobox.com> | 2020-05-01 22:39:53 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2020-05-01 22:39:53 +0200 |
commit | 9b6606f43d55bbf33b9924d16e02e60e1c09660a (patch) | |
tree | 0082094df8d1e99873fa1e72628fa4e02ec3a282 /revision.c | |
parent | Merge branch 'tb/commit-graph-fd-exhaustion-fix' (diff) | |
parent | bloom: ignore renames when computing changed paths (diff) | |
download | git-9b6606f43d55bbf33b9924d16e02e60e1c09660a.tar.xz git-9b6606f43d55bbf33b9924d16e02e60e1c09660a.zip |
Merge branch 'gs/commit-graph-path-filter'
Introduce an extension to the commit-graph to make it efficient to
check for the paths that were modified at each commit using Bloom
filters.
* gs/commit-graph-path-filter:
bloom: ignore renames when computing changed paths
commit-graph: add GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS test flag
t4216: add end to end tests for git log with Bloom filters
revision.c: add trace2 stats around Bloom filter usage
revision.c: use Bloom filters to speed up path based revision walks
commit-graph: add --changed-paths option to write subcommand
commit-graph: reuse existing Bloom filters during write
commit-graph: write Bloom filters to commit graph file
commit-graph: examine commits by generation number
commit-graph: examine changed-path objects in pack order
commit-graph: compute Bloom filters for changed paths
diff: halt tree-diff early after max_changes
bloom.c: core Bloom filter implementation for changed paths.
bloom.c: introduce core Bloom filter constructs
bloom.c: add the murmur3 hash implementation
commit-graph: define and use MAX_NUM_CHUNKS
Diffstat (limited to 'revision.c')
-rw-r--r-- | revision.c | 126 |
1 files changed, 124 insertions, 2 deletions
diff --git a/revision.c b/revision.c index 5bc96444b6..43857823bd 100644 --- a/revision.c +++ b/revision.c @@ -29,6 +29,8 @@ #include "prio-queue.h" #include "hashmap.h" #include "utf8.h" +#include "bloom.h" +#include "json-writer.h" volatile show_early_output_fn_t show_early_output; @@ -624,11 +626,116 @@ static void file_change(struct diff_options *options, options->flags.has_changes = 1; } +static int bloom_filter_atexit_registered; +static unsigned int count_bloom_filter_maybe; +static unsigned int count_bloom_filter_definitely_not; +static unsigned int count_bloom_filter_false_positive; +static unsigned int count_bloom_filter_not_present; +static unsigned int count_bloom_filter_length_zero; + +static void trace2_bloom_filter_statistics_atexit(void) +{ + struct json_writer jw = JSON_WRITER_INIT; + + jw_object_begin(&jw, 0); + jw_object_intmax(&jw, "filter_not_present", count_bloom_filter_not_present); + jw_object_intmax(&jw, "zero_length_filter", count_bloom_filter_length_zero); + jw_object_intmax(&jw, "maybe", count_bloom_filter_maybe); + jw_object_intmax(&jw, "definitely_not", count_bloom_filter_definitely_not); + jw_object_intmax(&jw, "false_positive", count_bloom_filter_false_positive); + jw_end(&jw); + + trace2_data_json("bloom", the_repository, "statistics", &jw); + + jw_release(&jw); +} + +static void prepare_to_use_bloom_filter(struct rev_info *revs) +{ + struct pathspec_item *pi; + char *path_alloc = NULL; + const char *path; + int last_index; + int len; + + if (!revs->commits) + return; + + repo_parse_commit(revs->repo, revs->commits->item); + + if (!revs->repo->objects->commit_graph) + return; + + revs->bloom_filter_settings = revs->repo->objects->commit_graph->bloom_filter_settings; + if (!revs->bloom_filter_settings) + return; + + pi = &revs->pruning.pathspec.items[0]; + last_index = pi->len - 1; + + /* remove single trailing slash from path, if needed */ + if (pi->match[last_index] == '/') { + path_alloc = xstrdup(pi->match); + path_alloc[last_index] = '\0'; + path = path_alloc; + } else + path = pi->match; + + len = strlen(path); + + revs->bloom_key = xmalloc(sizeof(struct bloom_key)); + fill_bloom_key(path, len, revs->bloom_key, revs->bloom_filter_settings); + + if (trace2_is_enabled() && !bloom_filter_atexit_registered) { + atexit(trace2_bloom_filter_statistics_atexit); + bloom_filter_atexit_registered = 1; + } + + free(path_alloc); +} + +static int check_maybe_different_in_bloom_filter(struct rev_info *revs, + struct commit *commit) +{ + struct bloom_filter *filter; + int result; + + if (!revs->repo->objects->commit_graph) + return -1; + + if (commit->generation == GENERATION_NUMBER_INFINITY) + return -1; + + filter = get_bloom_filter(revs->repo, commit, 0); + + if (!filter) { + count_bloom_filter_not_present++; + return -1; + } + + if (!filter->len) { + count_bloom_filter_length_zero++; + return -1; + } + + result = bloom_filter_contains(filter, + revs->bloom_key, + revs->bloom_filter_settings); + + if (result) + count_bloom_filter_maybe++; + else + count_bloom_filter_definitely_not++; + + return result; +} + static int rev_compare_tree(struct rev_info *revs, - struct commit *parent, struct commit *commit) + struct commit *parent, struct commit *commit, int nth_parent) { struct tree *t1 = get_commit_tree(parent); struct tree *t2 = get_commit_tree(commit); + int bloom_ret = 1; if (!t1) return REV_TREE_NEW; @@ -653,11 +760,23 @@ static int rev_compare_tree(struct rev_info *revs, return REV_TREE_SAME; } + if (revs->bloom_key && !nth_parent) { + bloom_ret = check_maybe_different_in_bloom_filter(revs, commit); + + if (bloom_ret == 0) + return REV_TREE_SAME; + } + tree_difference = REV_TREE_SAME; revs->pruning.flags.has_changes = 0; if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning) < 0) return REV_TREE_DIFFERENT; + + if (!nth_parent) + if (bloom_ret == 1 && tree_difference == REV_TREE_SAME) + count_bloom_filter_false_positive++; + return tree_difference; } @@ -855,7 +974,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) die("cannot simplify commit %s (because of %s)", oid_to_hex(&commit->object.oid), oid_to_hex(&p->object.oid)); - switch (rev_compare_tree(revs, p, commit)) { + switch (rev_compare_tree(revs, p, commit, nth_parent)) { case REV_TREE_SAME: if (!revs->simplify_history || !relevant_commit(p)) { /* Even if a merge with an uninteresting @@ -3385,6 +3504,8 @@ int prepare_revision_walk(struct rev_info *revs) FOR_EACH_OBJECT_PROMISOR_ONLY); } + if (revs->pruning.pathspec.nr == 1 && !revs->reflog_info) + prepare_to_use_bloom_filter(revs); if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED) commit_list_sort_by_date(&revs->commits); if (revs->no_walk) @@ -3402,6 +3523,7 @@ int prepare_revision_walk(struct rev_info *revs) simplify_merges(revs); if (revs->children.name) set_children(revs); + return 0; } |