summaryrefslogtreecommitdiffstats
path: root/midx.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2021-04-08 22:23:25 +0200
committerJunio C Hamano <gitster@pobox.com>2021-04-08 22:23:25 +0200
commite6b971fcf5d85db821636f2d887cfaf204b32bda (patch)
tree8d94e5501218bd7614729a363637e3746568a814 /midx.c
parentThe seventh batch (diff)
parentmidx.c: improve cache locality in midx_pack_order_cmp() (diff)
downloadgit-e6b971fcf5d85db821636f2d887cfaf204b32bda.tar.xz
git-e6b971fcf5d85db821636f2d887cfaf204b32bda.zip
Merge branch 'tb/reverse-midx'
An on-disk reverse-index to map the in-pack location of an object back to its object name across multiple packfiles is introduced. * tb/reverse-midx: midx.c: improve cache locality in midx_pack_order_cmp() pack-revindex: write multi-pack reverse indexes pack-write.c: extract 'write_rev_file_order' pack-revindex: read multi-pack reverse indexes Documentation/technical: describe multi-pack reverse indexes midx: make some functions non-static midx: keep track of the checksum midx: don't free midx_name early midx: allow marking a pack as preferred t/helper/test-read-midx.c: add '--show-objects' builtin/multi-pack-index.c: display usage on unrecognized command builtin/multi-pack-index.c: don't enter bogus cmd_mode builtin/multi-pack-index.c: split sub-commands builtin/multi-pack-index.c: define common usage with a macro builtin/multi-pack-index.c: don't handle 'progress' separately builtin/multi-pack-index.c: inline 'flags' with options
Diffstat (limited to 'midx.c')
-rw-r--r--midx.c219
1 files changed, 206 insertions, 13 deletions
diff --git a/midx.c b/midx.c
index becfafe65e..9e86583172 100644
--- a/midx.c
+++ b/midx.c
@@ -12,6 +12,7 @@
#include "run-command.h"
#include "repository.h"
#include "chunk-format.h"
+#include "pack.h"
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
#define MIDX_VERSION 1
@@ -47,11 +48,22 @@ static uint8_t oid_version(void)
}
}
+static const unsigned char *get_midx_checksum(struct multi_pack_index *m)
+{
+ return m->data + m->data_len - the_hash_algo->rawsz;
+}
+
static char *get_midx_filename(const char *object_dir)
{
return xstrfmt("%s/pack/multi-pack-index", object_dir);
}
+char *get_midx_rev_filename(struct multi_pack_index *m)
+{
+ return xstrfmt("%s/pack/multi-pack-index-%s.rev",
+ m->object_dir, hash_to_hex(get_midx_checksum(m)));
+}
+
static int midx_read_oid_fanout(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
@@ -239,7 +251,7 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid,
return oid;
}
-static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
+off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
{
const unsigned char *offset_data;
uint32_t offset32;
@@ -258,7 +270,7 @@ static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
return offset32;
}
-static uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
+uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
{
return get_be32(m->chunk_object_offsets +
(off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
@@ -431,6 +443,14 @@ static int pack_info_compare(const void *_a, const void *_b)
return strcmp(a->pack_name, b->pack_name);
}
+static int idx_or_pack_name_cmp(const void *_va, const void *_vb)
+{
+ const char *pack_name = _va;
+ const struct pack_info *compar = _vb;
+
+ return cmp_idx_or_pack_name(pack_name, compar->pack_name);
+}
+
struct write_midx_context {
struct pack_info *info;
uint32_t nr;
@@ -443,8 +463,11 @@ struct write_midx_context {
uint32_t entries_nr;
uint32_t *pack_perm;
+ uint32_t *pack_order;
unsigned large_offsets_needed:1;
uint32_t num_large_offsets;
+
+ int preferred_pack_idx;
};
static void add_pack_to_midx(const char *full_path, size_t full_path_len,
@@ -489,6 +512,7 @@ struct pack_midx_entry {
uint32_t pack_int_id;
time_t pack_mtime;
uint64_t offset;
+ unsigned preferred : 1;
};
static int midx_oid_compare(const void *_a, const void *_b)
@@ -500,6 +524,12 @@ static int midx_oid_compare(const void *_a, const void *_b)
if (cmp)
return cmp;
+ /* Sort objects in a preferred pack first when multiple copies exist. */
+ if (a->preferred > b->preferred)
+ return -1;
+ if (a->preferred < b->preferred)
+ return 1;
+
if (a->pack_mtime > b->pack_mtime)
return -1;
else if (a->pack_mtime < b->pack_mtime)
@@ -527,7 +557,8 @@ static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
static void fill_pack_entry(uint32_t pack_int_id,
struct packed_git *p,
uint32_t cur_object,
- struct pack_midx_entry *entry)
+ struct pack_midx_entry *entry,
+ int preferred)
{
if (nth_packed_object_id(&entry->oid, p, cur_object) < 0)
die(_("failed to locate object %d in packfile"), cur_object);
@@ -536,6 +567,7 @@ static void fill_pack_entry(uint32_t pack_int_id,
entry->pack_mtime = p->mtime;
entry->offset = nth_packed_object_offset(p, cur_object);
+ entry->preferred = !!preferred;
}
/*
@@ -552,7 +584,8 @@ static void fill_pack_entry(uint32_t pack_int_id,
static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
struct pack_info *info,
uint32_t nr_packs,
- uint32_t *nr_objects)
+ uint32_t *nr_objects,
+ int preferred_pack)
{
uint32_t cur_fanout, cur_pack, cur_object;
uint32_t alloc_fanout, alloc_objects, total_objects = 0;
@@ -589,12 +622,17 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
nth_midxed_pack_midx_entry(m,
&entries_by_fanout[nr_fanout],
cur_object);
+ if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack)
+ entries_by_fanout[nr_fanout].preferred = 1;
+ else
+ entries_by_fanout[nr_fanout].preferred = 0;
nr_fanout++;
}
}
for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
uint32_t start = 0, end;
+ int preferred = cur_pack == preferred_pack;
if (cur_fanout)
start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1);
@@ -602,7 +640,11 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
for (cur_object = start; cur_object < end; cur_object++) {
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
- fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]);
+ fill_pack_entry(cur_pack,
+ info[cur_pack].p,
+ cur_object,
+ &entries_by_fanout[nr_fanout],
+ preferred);
nr_fanout++;
}
}
@@ -776,10 +818,80 @@ static int write_midx_large_offsets(struct hashfile *f,
return 0;
}
+struct midx_pack_order_data {
+ uint32_t nr;
+ uint32_t pack;
+ off_t offset;
+};
+
+static int midx_pack_order_cmp(const void *va, const void *vb)
+{
+ const struct midx_pack_order_data *a = va, *b = vb;
+ if (a->pack < b->pack)
+ return -1;
+ else if (a->pack > b->pack)
+ return 1;
+ else if (a->offset < b->offset)
+ return -1;
+ else if (a->offset > b->offset)
+ return 1;
+ else
+ return 0;
+}
+
+static uint32_t *midx_pack_order(struct write_midx_context *ctx)
+{
+ struct midx_pack_order_data *data;
+ uint32_t *pack_order;
+ uint32_t i;
+
+ ALLOC_ARRAY(data, ctx->entries_nr);
+ for (i = 0; i < ctx->entries_nr; i++) {
+ struct pack_midx_entry *e = &ctx->entries[i];
+ data[i].nr = i;
+ data[i].pack = ctx->pack_perm[e->pack_int_id];
+ if (!e->preferred)
+ data[i].pack |= (1U << 31);
+ data[i].offset = e->offset;
+ }
+
+ QSORT(data, ctx->entries_nr, midx_pack_order_cmp);
+
+ ALLOC_ARRAY(pack_order, ctx->entries_nr);
+ for (i = 0; i < ctx->entries_nr; i++)
+ pack_order[i] = data[i].nr;
+ free(data);
+
+ return pack_order;
+}
+
+static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash,
+ struct write_midx_context *ctx)
+{
+ struct strbuf buf = STRBUF_INIT;
+ const char *tmp_file;
+
+ strbuf_addf(&buf, "%s-%s.rev", midx_name, hash_to_hex(midx_hash));
+
+ tmp_file = write_rev_file_order(NULL, ctx->pack_order, ctx->entries_nr,
+ midx_hash, WRITE_REV);
+
+ if (finalize_object_file(tmp_file, buf.buf))
+ die(_("cannot store reverse index file"));
+
+ strbuf_release(&buf);
+}
+
+static void clear_midx_files_ext(struct repository *r, const char *ext,
+ unsigned char *keep_hash);
+
static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
- struct string_list *packs_to_drop, unsigned flags)
+ struct string_list *packs_to_drop,
+ const char *preferred_pack_name,
+ unsigned flags)
{
char *midx_name;
+ unsigned char midx_hash[GIT_MAX_RAWSZ];
uint32_t i;
struct hashfile *f = NULL;
struct lock_file lk;
@@ -828,7 +940,19 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop)
goto cleanup;
- ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr);
+ ctx.preferred_pack_idx = -1;
+ if (preferred_pack_name) {
+ for (i = 0; i < ctx.nr; i++) {
+ if (!cmp_idx_or_pack_name(preferred_pack_name,
+ ctx.info[i].pack_name)) {
+ ctx.preferred_pack_idx = i;
+ break;
+ }
+ }
+ }
+
+ ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
+ ctx.preferred_pack_idx);
ctx.large_offsets_needed = 0;
for (i = 0; i < ctx.entries_nr; i++) {
@@ -889,13 +1013,30 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
}
+ /* Check that the preferred pack wasn't expired (if given). */
+ if (preferred_pack_name) {
+ struct pack_info *preferred = bsearch(preferred_pack_name,
+ ctx.info, ctx.nr,
+ sizeof(*ctx.info),
+ idx_or_pack_name_cmp);
+
+ if (!preferred)
+ warning(_("unknown preferred pack: '%s'"),
+ preferred_pack_name);
+ else {
+ uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id];
+ if (perm == PACK_EXPIRED)
+ warning(_("preferred pack '%s' is expired"),
+ preferred_pack_name);
+ }
+ }
+
if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
(pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk));
- FREE_AND_NULL(midx_name);
if (ctx.m)
close_midx(ctx.m);
@@ -927,8 +1068,16 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
write_chunkfile(cf, &ctx);
- finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
+ finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
free_chunkfile(cf);
+
+ if (flags & MIDX_WRITE_REV_INDEX)
+ ctx.pack_order = midx_pack_order(&ctx);
+
+ if (flags & MIDX_WRITE_REV_INDEX)
+ write_midx_reverse_index(midx_name, midx_hash, &ctx);
+ clear_midx_files_ext(the_repository, ".rev", midx_hash);
+
commit_lock_file(&lk);
cleanup:
@@ -943,13 +1092,55 @@ cleanup:
free(ctx.info);
free(ctx.entries);
free(ctx.pack_perm);
+ free(ctx.pack_order);
free(midx_name);
return result;
}
-int write_midx_file(const char *object_dir, unsigned flags)
+int write_midx_file(const char *object_dir,
+ const char *preferred_pack_name,
+ unsigned flags)
{
- return write_midx_internal(object_dir, NULL, NULL, flags);
+ return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name,
+ flags);
+}
+
+struct clear_midx_data {
+ char *keep;
+ const char *ext;
+};
+
+static void clear_midx_file_ext(const char *full_path, size_t full_path_len,
+ const char *file_name, void *_data)
+{
+ struct clear_midx_data *data = _data;
+
+ if (!(starts_with(file_name, "multi-pack-index-") &&
+ ends_with(file_name, data->ext)))
+ return;
+ if (data->keep && !strcmp(data->keep, file_name))
+ return;
+
+ if (unlink(full_path))
+ die_errno(_("failed to remove %s"), full_path);
+}
+
+static void clear_midx_files_ext(struct repository *r, const char *ext,
+ unsigned char *keep_hash)
+{
+ struct clear_midx_data data;
+ memset(&data, 0, sizeof(struct clear_midx_data));
+
+ if (keep_hash)
+ data.keep = xstrfmt("multi-pack-index-%s%s",
+ hash_to_hex(keep_hash), ext);
+ data.ext = ext;
+
+ for_each_file_in_pack_dir(r->objects->odb->path,
+ clear_midx_file_ext,
+ &data);
+
+ free(data.keep);
}
void clear_midx_file(struct repository *r)
@@ -964,6 +1155,8 @@ void clear_midx_file(struct repository *r)
if (remove_path(midx))
die(_("failed to clear multi-pack-index at %s"), midx);
+ clear_midx_files_ext(r, ".rev", NULL);
+
free(midx);
}
@@ -1184,7 +1377,7 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla
free(count);
if (packs_to_drop.nr)
- result = write_midx_internal(object_dir, m, &packs_to_drop, flags);
+ result = write_midx_internal(object_dir, m, &packs_to_drop, NULL, flags);
string_list_clear(&packs_to_drop, 0);
return result;
@@ -1373,7 +1566,7 @@ int midx_repack(struct repository *r, const char *object_dir, size_t batch_size,
goto cleanup;
}
- result = write_midx_internal(object_dir, m, NULL, flags);
+ result = write_midx_internal(object_dir, m, NULL, NULL, flags);
m = NULL;
cleanup: