diff options
author | Taylor Blau <me@ttaylorr.com> | 2024-05-23 23:26:42 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2024-05-24 20:40:42 +0200 |
commit | faf558b23ef55b18f395d1f7a7c2714ccc1320e2 (patch) | |
tree | 4b18d29919b4f980c71cd3f71a7b6efef669f8f6 /pseudo-merge.c | |
parent | config: introduce `git_config_double()` (diff) | |
download | git-faf558b23ef55b18f395d1f7a7c2714ccc1320e2.tar.xz git-faf558b23ef55b18f395d1f7a7c2714ccc1320e2.zip |
pseudo-merge: implement support for selecting pseudo-merge commits
Teach the new pseudo-merge machinery how to select non-bitmapped commits
for inclusion in different pseudo-merge group(s) based on a handful of
criteria.
Note that the selected pseudo-merge commits aren't actually used or
written anywhere yet. This will be done in the following commit.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'pseudo-merge.c')
-rw-r--r-- | pseudo-merge.c | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/pseudo-merge.c b/pseudo-merge.c index 37e037ba27..0f6854c753 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -1,2 +1,456 @@ #include "git-compat-util.h" #include "pseudo-merge.h" +#include "date.h" +#include "oid-array.h" +#include "strbuf.h" +#include "config.h" +#include "string-list.h" +#include "refs.h" +#include "pack-bitmap.h" +#include "commit.h" +#include "alloc.h" +#include "progress.h" + +#define DEFAULT_PSEUDO_MERGE_DECAY 1.0 +#define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64 +#define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1 +#define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago") +#define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago") +#define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512 + +static double gitexp(double base, int exp) +{ + double result = 1; + while (1) { + if (exp % 2) + result *= base; + exp >>= 1; + if (!exp) + break; + base *= base; + } + return result; +} + +static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group *group, + const struct pseudo_merge_matches *matches, + uint32_t i) +{ + double C = 0.0f; + uint32_t n; + + /* + * The size of pseudo-merge groups decays according to a power series, + * which looks like: + * + * f(n) = C * n^-k + * + * , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k' + * is the decay rate, and 'C' is a scaling value. + * + * The value of C depends on the number of groups, decay rate, and total + * number of commits. It is computed such that if there are M and N + * total groups and commits, respectively, that: + * + * N = f(0) + f(1) + ... f(M-1) + * + * Rearranging to isolate C, we get: + * + * N = \sum_{n=1}^M C / n^k + * + * N / C = \sum_{n=1}^M n^-k + * + * C = N / \sum_{n=1}^M n^-k + * + * For example, if we have a decay rate of 'k' being equal to 1.5, 'N' + * total commits equal to 10,000, and 'M' being equal to 6 groups, then + * the (rounded) group sizes are: + * + * { 5469, 1934, 1053, 684, 489, 372 } + * + * increasing the number of total groups, say to 10, scales the group + * sizes appropriately: + * + * { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 } + */ + for (n = 0; n < group->max_merges; n++) + C += 1.0 / gitexp(n + 1, group->decay); + C = matches->unstable_nr / C; + + return (uint32_t)((C / gitexp(i + 1, group->decay)) + 0.5); +} + +static void pseudo_merge_group_init(struct pseudo_merge_group *group) +{ + memset(group, 0, sizeof(struct pseudo_merge_group)); + + strmap_init_with_options(&group->matches, NULL, 0); + + group->decay = DEFAULT_PSEUDO_MERGE_DECAY; + group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES; + group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; + group->threshold = DEFAULT_PSEUDO_MERGE_THRESHOLD; + group->stable_threshold = DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD; + group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE; +} + +static int pseudo_merge_config(const char *var, const char *value, + const struct config_context *ctx, + void *cb_data) +{ + struct string_list *list = cb_data; + struct string_list_item *item; + struct pseudo_merge_group *group; + struct strbuf buf = STRBUF_INIT; + const char *sub, *key; + size_t sub_len; + int ret = 0; + + if (parse_config_key(var, "bitmappseudomerge", &sub, &sub_len, &key)) + goto done; + + if (!sub_len) + goto done; + + strbuf_add(&buf, sub, sub_len); + + item = string_list_lookup(list, buf.buf); + if (!item) { + item = string_list_insert(list, buf.buf); + + item->util = xmalloc(sizeof(struct pseudo_merge_group)); + pseudo_merge_group_init(item->util); + } + + group = item->util; + + if (!strcmp(key, "pattern")) { + struct strbuf re = STRBUF_INIT; + + free(group->pattern); + if (*value != '^') + strbuf_addch(&re, '^'); + strbuf_addstr(&re, value); + + group->pattern = xcalloc(1, sizeof(regex_t)); + if (regcomp(group->pattern, re.buf, REG_EXTENDED)) + die(_("failed to load pseudo-merge regex for %s: '%s'"), + sub, re.buf); + + strbuf_release(&re); + } else if (!strcmp(key, "decay")) { + group->decay = git_config_double(var, value, ctx->kvi); + if (group->decay < 0) { + warning(_("%s must be non-negative, using default"), var); + group->decay = DEFAULT_PSEUDO_MERGE_DECAY; + } + } else if (!strcmp(key, "samplerate")) { + group->sample_rate = git_config_double(var, value, ctx->kvi); + if (!(0 <= group->sample_rate && group->sample_rate <= 1)) { + warning(_("%s must be between 0 and 1, using default"), var); + group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; + } + } else if (!strcmp(key, "threshold")) { + if (git_config_expiry_date(&group->threshold, var, value)) { + ret = -1; + goto done; + } + } else if (!strcmp(key, "maxmerges")) { + group->max_merges = git_config_int(var, value, ctx->kvi); + if (group->max_merges < 0) { + warning(_("%s must be non-negative, using default"), var); + group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES; + } + } else if (!strcmp(key, "stablethreshold")) { + if (git_config_expiry_date(&group->stable_threshold, var, value)) { + ret = -1; + goto done; + } + } else if (!strcmp(key, "stablesize")) { + group->stable_size = git_config_int(var, value, ctx->kvi); + if (group->stable_size <= 0) { + warning(_("%s must be positive, using default"), var); + group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE; + } + } + +done: + strbuf_release(&buf); + + return ret; +} + +void load_pseudo_merges_from_config(struct string_list *list) +{ + struct string_list_item *item; + + git_config(pseudo_merge_config, list); + + for_each_string_list_item(item, list) { + struct pseudo_merge_group *group = item->util; + if (!group->pattern) + die(_("pseudo-merge group '%s' missing required pattern"), + item->string); + if (group->threshold < group->stable_threshold) + die(_("pseudo-merge group '%s' has unstable threshold " + "before stable one"), item->string); + } +} + +static int find_pseudo_merge_group_for_ref(const char *refname, + const struct object_id *oid, + int flags UNUSED, + void *_data) +{ + struct bitmap_writer *writer = _data; + struct object_id peeled; + struct commit *c; + uint32_t i; + int has_bitmap; + + if (!peel_iterated_oid(oid, &peeled)) + oid = &peeled; + + c = lookup_commit(the_repository, oid); + if (!c) + return 0; + + has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid); + + for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { + struct pseudo_merge_group *group; + struct pseudo_merge_matches *matches; + struct strbuf group_name = STRBUF_INIT; + regmatch_t captures[16]; + size_t j; + + group = writer->pseudo_merge_groups.items[i].util; + if (regexec(group->pattern, refname, ARRAY_SIZE(captures), + captures, 0)) + continue; + + if (captures[ARRAY_SIZE(captures) - 1].rm_so != -1) + warning(_("pseudo-merge regex from config has too many capture " + "groups (max=%"PRIuMAX")"), + (uintmax_t)ARRAY_SIZE(captures) - 2); + + for (j = !!group->pattern->re_nsub; j < ARRAY_SIZE(captures); j++) { + regmatch_t *match = &captures[j]; + if (match->rm_so == -1) + continue; + + if (group_name.len) + strbuf_addch(&group_name, '-'); + + strbuf_add(&group_name, refname + match->rm_so, + match->rm_eo - match->rm_so); + } + + matches = strmap_get(&group->matches, group_name.buf); + if (!matches) { + matches = xcalloc(1, sizeof(*matches)); + strmap_put(&group->matches, strbuf_detach(&group_name, NULL), + matches); + } + + if (c->date <= group->stable_threshold) { + ALLOC_GROW(matches->stable, matches->stable_nr + 1, + matches->stable_alloc); + matches->stable[matches->stable_nr++] = c; + } else if (c->date <= group->threshold && !has_bitmap) { + ALLOC_GROW(matches->unstable, matches->unstable_nr + 1, + matches->unstable_alloc); + matches->unstable[matches->unstable_nr++] = c; + } + + strbuf_release(&group_name); + } + + return 0; +} + +static struct commit *push_pseudo_merge(struct pseudo_merge_group *group) +{ + struct commit *merge; + + ALLOC_GROW(group->merges, group->merges_nr + 1, group->merges_alloc); + + merge = alloc_commit_node(the_repository); + merge->object.parsed = 1; + merge->object.flags |= BITMAP_PSEUDO_MERGE; + + group->merges[group->merges_nr++] = merge; + + return merge; +} + +static struct pseudo_merge_commit_idx *pseudo_merge_idx(kh_oid_map_t *pseudo_merge_commits, + const struct object_id *oid) + +{ + struct pseudo_merge_commit_idx *pmc; + int hash_ret; + khiter_t hash_pos = kh_put_oid_map(pseudo_merge_commits, *oid, + &hash_ret); + + if (hash_ret) { + CALLOC_ARRAY(pmc, 1); + kh_value(pseudo_merge_commits, hash_pos) = pmc; + } else { + pmc = kh_value(pseudo_merge_commits, hash_pos); + } + + return pmc; +} + +#define MIN_PSEUDO_MERGE_SIZE 8 + +static void select_pseudo_merges_1(struct bitmap_writer *writer, + struct pseudo_merge_group *group, + struct pseudo_merge_matches *matches) +{ + uint32_t i, j; + uint32_t stable_merges_nr; + + if (!matches->stable_nr && !matches->unstable_nr) + return; /* all tips in this group already have bitmaps */ + + stable_merges_nr = matches->stable_nr / group->stable_size; + if (matches->stable_nr % group->stable_size) + stable_merges_nr++; + + /* make stable_merges_nr pseudo merges for stable commits */ + for (i = 0, j = 0; i < stable_merges_nr; i++) { + struct commit *merge; + struct commit_list **p; + + merge = push_pseudo_merge(group); + p = &merge->parents; + + /* + * For each pseudo-merge created above, add parents to the + * allocated commit node from the stable set of commits + * (un-bitmapped, newer than the stable threshold). + */ + do { + struct commit *c; + struct pseudo_merge_commit_idx *pmc; + + if (j >= matches->stable_nr) + break; + + c = matches->stable[j++]; + /* + * Here and below, make sure that we keep our mapping of + * commits -> pseudo-merge(s) which include the key'd + * commit up-to-date. + */ + pmc = pseudo_merge_idx(writer->pseudo_merge_commits, + &c->object.oid); + + ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc); + + pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr; + p = commit_list_append(c, p); + } while (j % group->stable_size); + + bitmap_writer_push_commit(writer, merge, 1); + writer->pseudo_merges_nr++; + } + + /* make up to group->max_merges pseudo merges for unstable commits */ + for (i = 0, j = 0; i < group->max_merges; i++) { + struct commit *merge; + struct commit_list **p; + uint32_t size, end; + + merge = push_pseudo_merge(group); + p = &merge->parents; + + size = pseudo_merge_group_size(group, matches, i); + end = size < MIN_PSEUDO_MERGE_SIZE ? matches->unstable_nr : j + size; + + /* + * For each pseudo-merge commit created above, add parents to + * the allocated commit node from the unstable set of commits + * (newer than the stable threshold). + * + * Account for the sample rate, since not every candidate from + * the set of stable commits will be included as a pseudo-merge + * parent. + */ + for (; j < end && j < matches->unstable_nr; j++) { + struct commit *c = matches->unstable[j]; + struct pseudo_merge_commit_idx *pmc; + + if (j % (uint32_t)(1.0 / group->sample_rate)) + continue; + + pmc = pseudo_merge_idx(writer->pseudo_merge_commits, + &c->object.oid); + + ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc); + + pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr; + p = commit_list_append(c, p); + } + + bitmap_writer_push_commit(writer, merge, 1); + writer->pseudo_merges_nr++; + if (end >= matches->unstable_nr) + break; + } +} + +static int commit_date_cmp(const void *va, const void *vb) +{ + timestamp_t a = (*(const struct commit **)va)->date; + timestamp_t b = (*(const struct commit **)vb)->date; + + if (a < b) + return -1; + else if (a > b) + return 1; + return 0; +} + +static void sort_pseudo_merge_matches(struct pseudo_merge_matches *matches) +{ + QSORT(matches->stable, matches->stable_nr, commit_date_cmp); + QSORT(matches->unstable, matches->unstable_nr, commit_date_cmp); +} + +void select_pseudo_merges(struct bitmap_writer *writer, + struct commit **commits, size_t commits_nr) +{ + struct progress *progress = NULL; + uint32_t i; + + if (!writer->pseudo_merge_groups.nr) + return; + + if (writer->show_progress) + progress = start_progress("Selecting pseudo-merge commits", + writer->pseudo_merge_groups.nr); + + for_each_ref(find_pseudo_merge_group_for_ref, writer); + + for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { + struct pseudo_merge_group *group; + struct hashmap_iter iter; + struct strmap_entry *e; + + group = writer->pseudo_merge_groups.items[i].util; + strmap_for_each_entry(&group->matches, &iter, e) { + struct pseudo_merge_matches *matches = e->value; + + sort_pseudo_merge_matches(matches); + + select_pseudo_merges_1(writer, group, matches); + } + + display_progress(progress, i + 1); + } + + stop_progress(&progress); +} |