summaryrefslogtreecommitdiffstats
path: root/sha1_name.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-01 02:19:37 +0200
committerJunio C Hamano <gitster@pobox.com>2016-10-03 21:54:29 +0200
commite6c587c733b4634030b353f4024794b08bc86892 (patch)
tree6ff539da43f3c8030efc7c59d9fb35e9ed167a59 /sha1_name.c
parentabbrev: prepare for new world order (diff)
downloadgit-e6c587c733b4634030b353f4024794b08bc86892.tar.xz
git-e6c587c733b4634030b353f4024794b08bc86892.zip
abbrev: auto size the default abbreviation
In fairly early days we somehow decided to abbreviate object names down to 7-hexdigits, but as projects grow, it is becoming more and more likely to see such a short object names made in earlier days and recorded in the log messages no longer unique. Currently the Linux kernel project needs 11 to 12 hexdigits, while Git itself needs 10 hexdigits to uniquely identify the objects they have, while many smaller projects may still be fine with the original 7-hexdigit default. One-size does not fit all projects. Introduce a mechanism, where we estimate the number of objects in the repository upon the first request to abbreviate an object name with the default setting and come up with a sane default for the repository. Based on the expectation that we would see collision in a repository with 2^(2N) objects when using object names shortened to first N bits, use sufficient number of hexdigits to cover the number of objects in the repository. Each hexdigit (4-bits) we add to the shortened name allows us to have four times (2-bits) as many objects in the repository. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'sha1_name.c')
-rw-r--r--sha1_name.c28
1 files changed, 27 insertions, 1 deletions
diff --git a/sha1_name.c b/sha1_name.c
index 3b647fd7cf..beb7ab588b 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -15,6 +15,7 @@ typedef int (*disambiguate_hint_fn)(const unsigned char *, void *);
struct disambiguate_state {
int len; /* length of prefix in hex chars */
+ unsigned int nrobjects;
char hex_pfx[GIT_SHA1_HEXSZ + 1];
unsigned char bin_pfx[GIT_SHA1_RAWSZ];
@@ -118,6 +119,14 @@ static void find_short_object_filename(struct disambiguate_state *ds)
if (strlen(de->d_name) != 38)
continue;
+
+ /*
+ * We only look at the one subdirectory, and we assume
+ * each subdirectory is roughly similar, so each
+ * object we find probably has 255 other objects in
+ * the other fan-out directories.
+ */
+ ds->nrobjects += 256;
if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2))
continue;
memcpy(hex + 2, de->d_name, 38);
@@ -151,6 +160,7 @@ static void unique_in_pack(struct packed_git *p,
open_pack_index(p);
num = p->num_objects;
+ ds->nrobjects += num;
last = num;
while (first < last) {
uint32_t mid = (first + last) / 2;
@@ -380,6 +390,9 @@ static int show_ambiguous_object(const unsigned char *sha1, void *data)
return 0;
}
+/* start from our historical default before the automatic abbreviation */
+static int default_automatic_abbrev = FALLBACK_DEFAULT_ABBREV;
+
static int get_short_sha1(const char *name, int len, unsigned char *sha1,
unsigned flags)
{
@@ -426,6 +439,14 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1,
for_each_abbrev(ds.hex_pfx, show_ambiguous_object, &ds);
}
+ if (len < 16 && !status && (flags & GET_SHA1_AUTOMATIC)) {
+ unsigned int expect_collision = 1 << (len * 2);
+ if (ds.nrobjects > expect_collision) {
+ default_automatic_abbrev = len+1;
+ return SHORT_NAME_AMBIGUOUS;
+ }
+ }
+
return status;
}
@@ -458,14 +479,19 @@ int for_each_abbrev(const char *prefix, each_abbrev_fn fn, void *cb_data)
int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
{
int status, exists;
+ int flags = GET_SHA1_QUIETLY;
+ if (len < 0) {
+ flags |= GET_SHA1_AUTOMATIC;
+ len = default_automatic_abbrev;
+ }
sha1_to_hex_r(hex, sha1);
if (len == 40 || !len)
return 40;
exists = has_sha1_file(sha1);
while (len < 40) {
unsigned char sha1_ret[20];
- status = get_short_sha1(hex, len, sha1_ret, GET_SHA1_QUIETLY);
+ status = get_short_sha1(hex, len, sha1_ret, flags);
if (exists
? !status
: status == SHORT_NAME_NOT_FOUND) {