summaryrefslogtreecommitdiffstats
path: root/builtin/shortlog.c
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2020-09-27 10:40:07 +0200
committerJunio C Hamano <gitster@pobox.com>2020-09-27 21:21:05 +0200
commitf17b0b99bf2dc2fcd74544ce35d058e558e6b056 (patch)
tree9bdcbc31e0f5d80caaa399b0ec1a5e32444b5d13 /builtin/shortlog.c
parentshortlog: match commit trailers with --group (diff)
downloadgit-f17b0b99bf2dc2fcd74544ce35d058e558e6b056.tar.xz
git-f17b0b99bf2dc2fcd74544ce35d058e558e6b056.zip
shortlog: de-duplicate trailer values
The current documentation is vague about what happens with --group=trailer:signed-off-by when we see a commit with: Signed-off-by: One Signed-off-by: Two Signed-off-by: One We clearly should credit both "One" and "Two", but should "One" get credited twice? The current code does so, but mostly because that was the easiest thing to do. It's probably more useful to count each commit at most once. This will become especially important when we allow values from multiple sources in a future patch. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin/shortlog.c')
-rw-r--r--builtin/shortlog.c58
1 files changed, 58 insertions, 0 deletions
diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index e1d9ee909f..d2d8103dd3 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -166,6 +166,60 @@ static void read_from_stdin(struct shortlog *log)
strbuf_release(&oneline);
}
+struct strset_item {
+ struct hashmap_entry ent;
+ char value[FLEX_ARRAY];
+};
+
+struct strset {
+ struct hashmap map;
+};
+
+#define STRSET_INIT { { NULL } }
+
+static int strset_item_hashcmp(const void *hash_data,
+ const struct hashmap_entry *entry,
+ const struct hashmap_entry *entry_or_key,
+ const void *keydata)
+{
+ const struct strset_item *a, *b;
+
+ a = container_of(entry, const struct strset_item, ent);
+ if (keydata)
+ return strcmp(a->value, keydata);
+
+ b = container_of(entry_or_key, const struct strset_item, ent);
+ return strcmp(a->value, b->value);
+}
+
+/*
+ * Adds "str" to the set if it was not already present; returns true if it was
+ * already there.
+ */
+static int strset_check_and_add(struct strset *ss, const char *str)
+{
+ unsigned int hash = strhash(str);
+ struct strset_item *item;
+
+ if (!ss->map.table)
+ hashmap_init(&ss->map, strset_item_hashcmp, NULL, 0);
+
+ if (hashmap_get_from_hash(&ss->map, hash, str))
+ return 1;
+
+ FLEX_ALLOC_STR(item, value, str);
+ hashmap_entry_init(&item->ent, hash);
+ hashmap_add(&ss->map, &item->ent);
+ return 0;
+}
+
+static void strset_clear(struct strset *ss)
+{
+ if (!ss->map.table)
+ return;
+ hashmap_free_entries(&ss->map, struct strset_item, ent);
+}
+
static void insert_records_from_trailers(struct shortlog *log,
struct commit *commit,
struct pretty_print_context *ctx,
@@ -173,6 +227,7 @@ static void insert_records_from_trailers(struct shortlog *log,
{
struct trailer_iterator iter;
const char *commit_buffer, *body;
+ struct strset dups = STRSET_INIT;
/*
* Using format_commit_message("%B") would be simpler here, but
@@ -190,10 +245,13 @@ static void insert_records_from_trailers(struct shortlog *log,
if (strcasecmp(iter.key.buf, log->trailer))
continue;
+ if (strset_check_and_add(&dups, value))
+ continue;
insert_one_record(log, value, oneline);
}
trailer_iterator_release(&iter);
+ strset_clear(&dups);
unuse_commit_buffer(commit, commit_buffer);
}