summaryrefslogtreecommitdiffstats
path: root/midx-write.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-08-19 20:07:37 +0200
committerJunio C Hamano <gitster@pobox.com>2024-08-19 20:07:37 +0200
commitb9497848df63272aa505cdd6d3d5de487b845527 (patch)
tree0fff8fe44b9fe32b2dfbfab170e291edcc84e10a /midx-write.c
parentMerge branch 'jc/tests-no-useless-tee' (diff)
parentmidx: implement support for writing incremental MIDX chains (diff)
downloadgit-b9497848df63272aa505cdd6d3d5de487b845527.tar.xz
git-b9497848df63272aa505cdd6d3d5de487b845527.zip
Merge branch 'tb/incremental-midx-part-1'
Incremental updates of multi-pack index files. * tb/incremental-midx-part-1: midx: implement support for writing incremental MIDX chains t/t5313-pack-bounds-checks.sh: prepare for sub-directories t: retire 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' midx: implement verification support for incremental MIDXs midx: support reading incremental MIDX chains midx: teach `midx_fanout_add_midx_fanout()` about incremental MIDXs midx: teach `midx_preferred_pack()` about incremental MIDXs midx: teach `midx_contains_pack()` about incremental MIDXs midx: remove unused `midx_locate_pack()` midx: teach `fill_midx_entry()` about incremental MIDXs midx: teach `nth_midxed_offset()` about incremental MIDXs midx: teach `bsearch_midx()` about incremental MIDXs midx: introduce `bsearch_one_midx()` midx: teach `nth_bitmapped_pack()` about incremental MIDXs midx: teach `nth_midxed_object_oid()` about incremental MIDXs midx: teach `prepare_midx_pack()` about incremental MIDXs midx: teach `nth_midxed_pack_int_id()` about incremental MIDXs midx: add new fields for incremental MIDX chains Documentation: describe incremental MIDX format
Diffstat (limited to 'midx-write.c')
-rw-r--r--midx-write.c324
1 files changed, 275 insertions, 49 deletions
diff --git a/midx-write.c b/midx-write.c
index 99c3110a43..e3fa33203f 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -17,6 +17,8 @@
#include "refs.h"
#include "revision.h"
#include "list-objects.h"
+#include "path.h"
+#include "pack-revindex.h"
#define PACK_EXPIRED UINT_MAX
#define BITMAP_POS_UNKNOWN (~((uint32_t)0))
@@ -25,7 +27,11 @@
extern int midx_checksum_valid(struct multi_pack_index *m);
extern void clear_midx_files_ext(const char *object_dir, const char *ext,
- unsigned char *keep_hash);
+ const char *keep_hash);
+extern void clear_incremental_midx_files_ext(const char *object_dir,
+ const char *ext,
+ const char **keep_hashes,
+ uint32_t hashes_nr);
extern int cmp_idx_or_pack_name(const char *idx_or_pack_name,
const char *idx_name);
@@ -86,6 +92,7 @@ struct write_midx_context {
size_t nr;
size_t alloc;
struct multi_pack_index *m;
+ struct multi_pack_index *base_midx;
struct progress *progress;
unsigned pack_paths_checked;
@@ -99,6 +106,9 @@ struct write_midx_context {
int preferred_pack_idx;
+ int incremental;
+ uint32_t num_multi_pack_indexes_before;
+
struct string_list *to_include;
};
@@ -122,6 +132,9 @@ static int should_include_pack(const struct write_midx_context *ctx,
*/
if (ctx->m && midx_contains_pack(ctx->m, file_name))
return 0;
+ else if (ctx->base_midx && midx_contains_pack(ctx->base_midx,
+ file_name))
+ return 0;
else if (ctx->to_include &&
!string_list_has_string(ctx->to_include, file_name))
return 0;
@@ -196,7 +209,7 @@ static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
struct pack_midx_entry *e,
uint32_t pos)
{
- if (pos >= m->num_objects)
+ if (pos >= m->num_objects + m->num_objects_in_base)
return 1;
nth_midxed_object_oid(&e->oid, m, pos);
@@ -247,12 +260,16 @@ static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout,
uint32_t cur_fanout,
int preferred_pack)
{
- uint32_t start = 0, end;
+ uint32_t start = m->num_objects_in_base, end;
uint32_t cur_object;
+ if (m->base_midx)
+ midx_fanout_add_midx_fanout(fanout, m->base_midx, cur_fanout,
+ preferred_pack);
+
if (cur_fanout)
- start = ntohl(m->chunk_oid_fanout[cur_fanout - 1]);
- end = ntohl(m->chunk_oid_fanout[cur_fanout]);
+ start += ntohl(m->chunk_oid_fanout[cur_fanout - 1]);
+ end = m->num_objects_in_base + ntohl(m->chunk_oid_fanout[cur_fanout]);
for (cur_object = start; cur_object < end; cur_object++) {
if ((preferred_pack > -1) &&
@@ -334,7 +351,7 @@ static void compute_sorted_entries(struct write_midx_context *ctx,
for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
fanout.nr = 0;
- if (ctx->m)
+ if (ctx->m && !ctx->incremental)
midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout,
ctx->preferred_pack_idx);
@@ -360,6 +377,10 @@ static void compute_sorted_entries(struct write_midx_context *ctx,
if (cur_object && oideq(&fanout.entries[cur_object - 1].oid,
&fanout.entries[cur_object].oid))
continue;
+ if (ctx->incremental && ctx->base_midx &&
+ midx_has_oid(ctx->base_midx,
+ &fanout.entries[cur_object].oid))
+ continue;
ALLOC_GROW(ctx->entries, st_add(ctx->entries_nr, 1),
alloc_objects);
@@ -543,10 +564,16 @@ static int write_midx_revindex(struct hashfile *f,
void *data)
{
struct write_midx_context *ctx = data;
- uint32_t i;
+ uint32_t i, nr_base;
+
+ if (ctx->incremental && ctx->base_midx)
+ nr_base = ctx->base_midx->num_objects +
+ ctx->base_midx->num_objects_in_base;
+ else
+ nr_base = 0;
for (i = 0; i < ctx->entries_nr; i++)
- hashwrite_be32(f, ctx->pack_order[i]);
+ hashwrite_be32(f, ctx->pack_order[i] + nr_base);
return 0;
}
@@ -575,12 +602,18 @@ static int midx_pack_order_cmp(const void *va, const void *vb)
static uint32_t *midx_pack_order(struct write_midx_context *ctx)
{
struct midx_pack_order_data *data;
- uint32_t *pack_order;
+ uint32_t *pack_order, base_objects = 0;
uint32_t i;
trace2_region_enter("midx", "midx_pack_order", the_repository);
+ if (ctx->incremental && ctx->base_midx)
+ base_objects = ctx->base_midx->num_objects +
+ ctx->base_midx->num_objects_in_base;
+
+ ALLOC_ARRAY(pack_order, ctx->entries_nr);
ALLOC_ARRAY(data, ctx->entries_nr);
+
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *e = &ctx->entries[i];
data[i].nr = i;
@@ -592,12 +625,11 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx)
QSORT(data, ctx->entries_nr, midx_pack_order_cmp);
- ALLOC_ARRAY(pack_order, ctx->entries_nr);
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *e = &ctx->entries[data[i].nr];
struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]];
if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
- pack->bitmap_pos = i;
+ pack->bitmap_pos = i + base_objects;
pack->bitmap_nr++;
pack_order[i] = data[i].nr;
}
@@ -645,7 +677,8 @@ static void prepare_midx_packing_data(struct packing_data *pdata,
prepare_packing_data(the_repository, pdata);
for (i = 0; i < ctx->entries_nr; i++) {
- struct pack_midx_entry *from = &ctx->entries[ctx->pack_order[i]];
+ uint32_t pos = ctx->pack_order[i];
+ struct pack_midx_entry *from = &ctx->entries[pos];
struct object_entry *to = packlist_alloc(pdata, &from->oid);
oe_set_in_pack(pdata, to,
@@ -893,35 +926,128 @@ cleanup:
static int fill_packs_from_midx(struct write_midx_context *ctx,
const char *preferred_pack_name, uint32_t flags)
{
- uint32_t i;
+ struct multi_pack_index *m;
- for (i = 0; i < ctx->m->num_packs; i++) {
- ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
+ for (m = ctx->m; m; m = m->base_midx) {
+ uint32_t i;
+
+ for (i = 0; i < m->num_packs; i++) {
+ ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) {
/*
* If generating a reverse index, need to have
* packed_git's loaded to compare their
* mtimes and object count.
*
- *
* If a preferred pack is specified, need to
* have packed_git's loaded to ensure the chosen
* preferred pack has a non-zero object count.
*/
- if (prepare_midx_pack(the_repository, ctx->m, i))
- return error(_("could not load pack"));
+ if (flags & MIDX_WRITE_REV_INDEX ||
+ preferred_pack_name) {
+ if (prepare_midx_pack(the_repository, m,
+ m->num_packs_in_base + i)) {
+ error(_("could not load pack"));
+ return 1;
+ }
+
+ if (open_pack_index(m->packs[i]))
+ die(_("could not open index for %s"),
+ m->packs[i]->pack_name);
+ }
+
+ fill_pack_info(&ctx->info[ctx->nr++], m->packs[i],
+ m->pack_names[i],
+ m->num_packs_in_base + i);
+ }
+ }
+ return 0;
+}
+
+static struct {
+ const char *non_split;
+ const char *split;
+} midx_exts[] = {
+ {NULL, MIDX_EXT_MIDX},
+ {MIDX_EXT_BITMAP, MIDX_EXT_BITMAP},
+ {MIDX_EXT_REV, MIDX_EXT_REV},
+};
+
+static int link_midx_to_chain(struct multi_pack_index *m)
+{
+ struct strbuf from = STRBUF_INIT;
+ struct strbuf to = STRBUF_INIT;
+ int ret = 0;
+ size_t i;
+
+ if (!m || m->has_chain) {
+ /*
+ * Either no MIDX previously existed, or it was already
+ * part of a MIDX chain. In both cases, we have nothing
+ * to link, so return early.
+ */
+ goto done;
+ }
- if (open_pack_index(ctx->m->packs[i]))
- die(_("could not open index for %s"),
- ctx->m->packs[i]->pack_name);
+ for (i = 0; i < ARRAY_SIZE(midx_exts); i++) {
+ const unsigned char *hash = get_midx_checksum(m);
+
+ get_midx_filename_ext(&from, m->object_dir, hash,
+ midx_exts[i].non_split);
+ get_split_midx_filename_ext(&to, m->object_dir, hash,
+ midx_exts[i].split);
+
+ if (link(from.buf, to.buf) < 0 && errno != ENOENT) {
+ ret = error_errno(_("unable to link '%s' to '%s'"),
+ from.buf, to.buf);
+ goto done;
}
- fill_pack_info(&ctx->info[ctx->nr++], ctx->m->packs[i],
- ctx->m->pack_names[i], i);
+ strbuf_reset(&from);
+ strbuf_reset(&to);
}
- return 0;
+done:
+ strbuf_release(&from);
+ strbuf_release(&to);
+ return ret;
+}
+
+static void clear_midx_files(const char *object_dir,
+ const char **hashes,
+ uint32_t hashes_nr,
+ unsigned incremental)
+{
+ /*
+ * if incremental:
+ * - remove all non-incremental MIDX files
+ * - remove any incremental MIDX files not in the current one
+ *
+ * if non-incremental:
+ * - remove all incremental MIDX files
+ * - remove any non-incremental MIDX files not matching the current
+ * hash
+ */
+ struct strbuf buf = STRBUF_INIT;
+ const char *exts[] = { MIDX_EXT_BITMAP, MIDX_EXT_REV, MIDX_EXT_MIDX };
+ uint32_t i, j;
+
+ for (i = 0; i < ARRAY_SIZE(exts); i++) {
+ clear_incremental_midx_files_ext(object_dir, exts[i],
+ hashes, hashes_nr);
+ for (j = 0; j < hashes_nr; j++)
+ clear_midx_files_ext(object_dir, exts[i], hashes[j]);
+ }
+
+ if (incremental)
+ get_midx_filename(&buf, object_dir);
+ else
+ get_midx_chain_filename(&buf, object_dir);
+
+ if (unlink(buf.buf) && errno != ENOENT)
+ die_errno(_("failed to clear multi-pack-index at %s"), buf.buf);
+
+ strbuf_release(&buf);
}
static int write_midx_internal(const char *object_dir,
@@ -936,42 +1062,66 @@ static int write_midx_internal(const char *object_dir,
uint32_t i, start_pack;
struct hashfile *f = NULL;
struct lock_file lk;
+ struct tempfile *incr;
struct write_midx_context ctx = { 0 };
int bitmapped_packs_concat_len = 0;
int pack_name_concat_len = 0;
int dropped_packs = 0;
int result = 0;
+ const char **keep_hashes = NULL;
struct chunkfile *cf;
trace2_region_enter("midx", "write_midx_internal", the_repository);
- get_midx_filename(&midx_name, object_dir);
+ ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL);
+ if (ctx.incremental && (flags & MIDX_WRITE_BITMAP))
+ die(_("cannot write incremental MIDX with bitmap"));
+
+ if (ctx.incremental)
+ strbuf_addf(&midx_name,
+ "%s/pack/multi-pack-index.d/tmp_midx_XXXXXX",
+ object_dir);
+ else
+ get_midx_filename(&midx_name, object_dir);
if (safe_create_leading_directories(midx_name.buf))
die_errno(_("unable to create leading directories of %s"),
midx_name.buf);
- if (!packs_to_include) {
- /*
- * Only reference an existing MIDX when not filtering which
- * packs to include, since all packs and objects are copied
- * blindly from an existing MIDX if one is present.
- */
- ctx.m = lookup_multi_pack_index(the_repository, object_dir);
- }
+ if (!packs_to_include || ctx.incremental) {
+ struct multi_pack_index *m = lookup_multi_pack_index(the_repository,
+ object_dir);
+ if (m && !midx_checksum_valid(m)) {
+ warning(_("ignoring existing multi-pack-index; checksum mismatch"));
+ m = NULL;
+ }
- if (ctx.m && !midx_checksum_valid(ctx.m)) {
- warning(_("ignoring existing multi-pack-index; checksum mismatch"));
- ctx.m = NULL;
+ if (m) {
+ /*
+ * Only reference an existing MIDX when not filtering
+ * which packs to include, since all packs and objects
+ * are copied blindly from an existing MIDX if one is
+ * present.
+ */
+ if (ctx.incremental)
+ ctx.base_midx = m;
+ else if (!packs_to_include)
+ ctx.m = m;
+ }
}
ctx.nr = 0;
- ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
+ ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16;
ctx.info = NULL;
ALLOC_ARRAY(ctx.info, ctx.alloc);
- if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name,
- flags) < 0) {
- result = 1;
+ if (ctx.incremental) {
+ struct multi_pack_index *m = ctx.base_midx;
+ while (m) {
+ ctx.num_multi_pack_indexes_before++;
+ m = m->base_midx;
+ }
+ } else if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name,
+ flags) < 0) {
goto cleanup;
}
@@ -988,7 +1138,8 @@ static int write_midx_internal(const char *object_dir,
for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
stop_progress(&ctx.progress);
- if ((ctx.m && ctx.nr == ctx.m->num_packs) &&
+ if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) &&
+ !ctx.incremental &&
!(packs_to_include || packs_to_drop)) {
struct bitmap_index *bitmap_git;
int bitmap_exists;
@@ -1004,12 +1155,14 @@ static int write_midx_internal(const char *object_dir,
* corresponding bitmap (or one wasn't requested).
*/
if (!want_bitmap)
- clear_midx_files_ext(object_dir, ".bitmap",
- NULL);
+ clear_midx_files_ext(object_dir, "bitmap", NULL);
goto cleanup;
}
}
+ if (ctx.incremental && !ctx.nr)
+ goto cleanup; /* nothing to do */
+
if (preferred_pack_name) {
ctx.preferred_pack_idx = -1;
@@ -1155,8 +1308,30 @@ static int write_midx_internal(const char *object_dir,
pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
(pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
- hold_lock_file_for_update(&lk, midx_name.buf, LOCK_DIE_ON_ERROR);
- f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk));
+ if (ctx.incremental) {
+ struct strbuf lock_name = STRBUF_INIT;
+
+ get_midx_chain_filename(&lock_name, object_dir);
+ hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR);
+ strbuf_release(&lock_name);
+
+ incr = mks_tempfile_m(midx_name.buf, 0444);
+ if (!incr) {
+ error(_("unable to create temporary MIDX layer"));
+ return -1;
+ }
+
+ if (adjust_shared_perm(get_tempfile_path(incr))) {
+ error(_("unable to adjust shared permissions for '%s'"),
+ get_tempfile_path(incr));
+ return -1;
+ }
+
+ f = hashfd(get_tempfile_fd(incr), get_tempfile_path(incr));
+ } else {
+ hold_lock_file_for_update(&lk, midx_name.buf, LOCK_DIE_ON_ERROR);
+ f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk));
+ }
if (ctx.nr - dropped_packs == 0) {
error(_("no pack files to index."));
@@ -1249,14 +1424,55 @@ static int write_midx_internal(const char *object_dir,
* have been freed in the previous if block.
*/
- if (ctx.m)
+ CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1);
+
+ if (ctx.incremental) {
+ FILE *chainf = fdopen_lock_file(&lk, "w");
+ struct strbuf final_midx_name = STRBUF_INIT;
+ struct multi_pack_index *m = ctx.base_midx;
+
+ if (!chainf) {
+ error_errno(_("unable to open multi-pack-index chain file"));
+ return -1;
+ }
+
+ if (link_midx_to_chain(ctx.base_midx) < 0)
+ return -1;
+
+ get_split_midx_filename_ext(&final_midx_name, object_dir,
+ midx_hash, MIDX_EXT_MIDX);
+
+ if (rename_tempfile(&incr, final_midx_name.buf) < 0) {
+ error_errno(_("unable to rename new multi-pack-index layer"));
+ return -1;
+ }
+
+ keep_hashes[ctx.num_multi_pack_indexes_before] =
+ xstrdup(hash_to_hex(midx_hash));
+
+ for (i = 0; i < ctx.num_multi_pack_indexes_before; i++) {
+ uint32_t j = ctx.num_multi_pack_indexes_before - i - 1;
+
+ keep_hashes[j] = xstrdup(hash_to_hex(get_midx_checksum(m)));
+ m = m->base_midx;
+ }
+
+ for (i = 0; i < ctx.num_multi_pack_indexes_before + 1; i++)
+ fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]);
+ } else {
+ keep_hashes[ctx.num_multi_pack_indexes_before] =
+ xstrdup(hash_to_hex(midx_hash));
+ }
+
+ if (ctx.m || ctx.base_midx)
close_object_store(the_repository->objects);
if (commit_lock_file(&lk) < 0)
die_errno(_("could not write multi-pack-index"));
- clear_midx_files_ext(object_dir, ".bitmap", midx_hash);
- clear_midx_files_ext(object_dir, ".rev", midx_hash);
+ clear_midx_files(object_dir, keep_hashes,
+ ctx.num_multi_pack_indexes_before + 1,
+ ctx.incremental);
cleanup:
for (i = 0; i < ctx.nr; i++) {
@@ -1271,6 +1487,11 @@ cleanup:
free(ctx.entries);
free(ctx.pack_perm);
free(ctx.pack_order);
+ if (keep_hashes) {
+ for (i = 0; i < ctx.num_multi_pack_indexes_before + 1; i++)
+ free((char *)keep_hashes[i]);
+ free(keep_hashes);
+ }
strbuf_release(&midx_name);
trace2_region_leave("midx", "write_midx_internal", the_repository);
@@ -1307,6 +1528,9 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla
if (!m)
return 0;
+ if (m->base_midx)
+ die(_("cannot expire packs from an incremental multi-pack-index"));
+
CALLOC_ARRAY(count, m->num_packs);
if (flags & MIDX_PROGRESS)
@@ -1481,6 +1705,8 @@ int midx_repack(struct repository *r, const char *object_dir, size_t batch_size,
if (!m)
return 0;
+ if (m->base_midx)
+ die(_("cannot repack an incremental multi-pack-index"));
CALLOC_ARRAY(include_pack, m->num_packs);