From 83073cc994cc3cd364f3f213478b9162476e8e44 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 25 Apr 2018 14:37:55 +0000 Subject: commit: add generation number to struct commit The generation number of a commit is defined recursively as follows: * If a commit A has no parents, then the generation number of A is one. * If a commit A has parents, then the generation number of A is one more than the maximum generation number among the parents of A. Add a uint32_t generation field to struct commit so we can pass this information to revision walks. We use three special values to signal the generation number is invalid: GENERATION_NUMBER_INFINITY 0xFFFFFFFF GENERATION_NUMBER_MAX 0x3FFFFFFF GENERATION_NUMBER_ZERO 0 The first (_INFINITY) means the generation number has not been loaded or computed. The second (_MAX) means the generation number is too large to store in the commit-graph file. The third (_ZERO) means the generation number was loaded from a commit graph file that was written by a version of git that did not support generation numbers. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'commit.h') diff --git a/commit.h b/commit.h index 23a3f364ed..aac3b8c56f 100644 --- a/commit.h +++ b/commit.h @@ -10,6 +10,9 @@ #include "pretty.h" #define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF +#define GENERATION_NUMBER_INFINITY 0xFFFFFFFF +#define GENERATION_NUMBER_MAX 0x3FFFFFFF +#define GENERATION_NUMBER_ZERO 0 struct commit_list { struct commit *item; @@ -30,6 +33,7 @@ struct commit { */ struct tree *maybe_tree; uint32_t graph_pos; + uint32_t generation; }; extern int save_commit_buffer; -- cgit v1.2.3 From 3afc679b3c13d99e4f02bceb686f11d51576d3ae Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:11 +0000 Subject: commit: use generations in paint_down_to_common() Define compare_commits_by_gen_then_commit_date(), which uses generation numbers as a primary comparison and commit date to break ties (or as a comparison when both commits do not have computed generation numbers). Since the commit-graph file is closed under reachability, we know that all commits in the file have generation at most GENERATION_NUMBER_MAX which is less than GENERATION_NUMBER_INFINITY. This change does not affect the number of commits that are walked during the execution of paint_down_to_common(), only the order that those commits are inspected. In the case that commit dates violate topological order (i.e. a parent is "newer" than a child), the previous code could walk a commit twice: if a commit is reached with the PARENT1 bit, but later is re-visited with the PARENT2 bit, then that PARENT2 bit must be propagated to its parents. Using generation numbers avoids this extra effort, even if it is somewhat rare. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.c | 20 +++++++++++++++++++- commit.h | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'commit.h') diff --git a/commit.c b/commit.c index 711f674c18..4d00b0a1d6 100644 --- a/commit.c +++ b/commit.c @@ -640,6 +640,24 @@ static int compare_commits_by_author_date(const void *a_, const void *b_, return 0; } +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused) +{ + const struct commit *a = a_, *b = b_; + + /* newer commits first */ + if (a->generation < b->generation) + return 1; + else if (a->generation > b->generation) + return -1; + + /* use date as a heuristic when generations are equal */ + if (a->date < b->date) + return 1; + else if (a->date > b->date) + return -1; + return 0; +} + int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused) { const struct commit *a = a_, *b = b_; @@ -789,7 +807,7 @@ static int queue_has_nonstale(struct prio_queue *queue) /* all input commits in one and twos[] must have been parsed! */ static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos) { - struct prio_queue queue = { compare_commits_by_commit_date }; + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; int i; diff --git a/commit.h b/commit.h index aac3b8c56f..64436ff44e 100644 --- a/commit.h +++ b/commit.h @@ -341,6 +341,7 @@ extern int remove_signature(struct strbuf *buf); extern int check_commit_signature(const struct commit *commit, struct signature_check *sigc); int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused); +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused); LAST_ARG_MUST_BE_NULL extern int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...); -- cgit v1.2.3 From e2838d85b6d35592ff5851d67f0232a78083ada7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:13 +0000 Subject: commit-graph: always load commit-graph information Most code paths load commits using lookup_commit() and then parse_commit(). In some cases, including some branch lookups, the commit is parsed using parse_object_buffer() which side-steps parse_commit() in favor of parse_commit_buffer(). With generation numbers in the commit-graph, we need to ensure that any commit that exists in the commit-graph file has its generation number loaded. Create new load_commit_graph_info() method to fill in the information for a commit that exists only in the commit-graph file. Call it from parse_commit_buffer() after loading the other commit information from the given buffer. Only fill this information when specified by the 'check_graph' parameter. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 46 +++++++++++++++++++++++++++++++--------------- commit-graph.h | 8 ++++++++ commit.c | 7 +++++-- commit.h | 2 +- object.c | 2 +- sha1_file.c | 2 +- 6 files changed, 47 insertions(+), 20 deletions(-) (limited to 'commit.h') diff --git a/commit-graph.c b/commit-graph.c index 36d765e10a..a8c337dd77 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -245,6 +245,13 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g, return &commit_list_insert(c, pptr)->next; } +static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos) +{ + const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos; + item->graph_pos = pos; + item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; +} + static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos) { uint32_t edge_value; @@ -292,31 +299,40 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin return 1; } +static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos) +{ + if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) { + *pos = item->graph_pos; + return 1; + } else { + return bsearch_graph(g, &(item->object.oid), pos); + } +} + int parse_commit_in_graph(struct commit *item) { + uint32_t pos; + if (!core_commit_graph) return 0; if (item->object.parsed) return 1; - prepare_commit_graph(); - if (commit_graph) { - uint32_t pos; - int found; - if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) { - pos = item->graph_pos; - found = 1; - } else { - found = bsearch_graph(commit_graph, &(item->object.oid), &pos); - } - - if (found) - return fill_commit_in_graph(item, commit_graph, pos); - } - + if (commit_graph && find_commit_in_graph(item, commit_graph, &pos)) + return fill_commit_in_graph(item, commit_graph, pos); return 0; } +void load_commit_graph_info(struct commit *item) +{ + uint32_t pos; + if (!core_commit_graph) + return; + prepare_commit_graph(); + if (commit_graph && find_commit_in_graph(item, commit_graph, &pos)) + fill_commit_graph_info(item, commit_graph, pos); +} + static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c) { struct object_id oid; diff --git a/commit-graph.h b/commit-graph.h index 260a468e73..96cccb10f3 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -17,6 +17,14 @@ char *get_commit_graph_filename(const char *obj_dir); */ int parse_commit_in_graph(struct commit *item); +/* + * It is possible that we loaded commit contents from the commit buffer, + * but we also want to ensure the commit-graph content is correctly + * checked and filled. Fill the graph_pos and generation members of + * the given commit. + */ +void load_commit_graph_info(struct commit *item); + struct tree *get_commit_tree_in_graph(const struct commit *c); struct commit_graph { diff --git a/commit.c b/commit.c index 4d00b0a1d6..39a3749abd 100644 --- a/commit.c +++ b/commit.c @@ -331,7 +331,7 @@ const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) return ret; } -int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size) +int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph) { const char *tail = buffer; const char *bufptr = buffer; @@ -386,6 +386,9 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s } item->date = parse_commit_date(bufptr, tail); + if (check_graph) + load_commit_graph_info(item); + return 0; } @@ -412,7 +415,7 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing) return error("Object %s not a commit", oid_to_hex(&item->object.oid)); } - ret = parse_commit_buffer(item, buffer, size); + ret = parse_commit_buffer(item, buffer, size, 0); if (save_commit_buffer && !ret) { set_commit_buffer(item, buffer, size); return 0; diff --git a/commit.h b/commit.h index 64436ff44e..b5afde1ae9 100644 --- a/commit.h +++ b/commit.h @@ -72,7 +72,7 @@ struct commit *lookup_commit_reference_by_name(const char *name); */ struct commit *lookup_commit_or_die(const struct object_id *oid, const char *ref_name); -int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size); +int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph); int parse_commit_gently(struct commit *item, int quiet_on_missing); static inline int parse_commit(struct commit *item) { diff --git a/object.c b/object.c index e6ad3f61f0..efe4871325 100644 --- a/object.c +++ b/object.c @@ -207,7 +207,7 @@ struct object *parse_object_buffer(const struct object_id *oid, enum object_type } else if (type == OBJ_COMMIT) { struct commit *commit = lookup_commit(oid); if (commit) { - if (parse_commit_buffer(commit, buffer, size)) + if (parse_commit_buffer(commit, buffer, size, 1)) return NULL; if (!get_cached_commit_buffer(commit, NULL)) { set_commit_buffer(commit, buffer, size); diff --git a/sha1_file.c b/sha1_file.c index 1b94f39c4c..0fd4f0b8b6 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1755,7 +1755,7 @@ static void check_commit(const void *buf, size_t size) { struct commit c; memset(&c, 0, sizeof(c)); - if (parse_commit_buffer(&c, buf, size)) + if (parse_commit_buffer(&c, buf, size, 0)) die("corrupt commit"); } -- cgit v1.2.3