summaryrefslogtreecommitdiffstats
path: root/pack-bitmap.c
diff options
context:
space:
mode:
authorTaylor Blau <me@ttaylorr.com>2021-08-31 22:52:21 +0200
committerJunio C Hamano <gitster@pobox.com>2021-09-01 22:56:43 +0200
commit0f533c728418fd3ef6ebcae5240e8df566cdaa72 (patch)
tree3b05aacecd6a5041b6f9bc165e7dd438830f393f /pack-bitmap.c
parentpack-bitmap.c: avoid redundant calls to try_partial_reuse (diff)
downloadgit-0f533c728418fd3ef6ebcae5240e8df566cdaa72.tar.xz
git-0f533c728418fd3ef6ebcae5240e8df566cdaa72.zip
pack-bitmap: read multi-pack bitmaps
This prepares the code in pack-bitmap to interpret the new multi-pack bitmaps described in Documentation/technical/bitmap-format.txt, which mostly involves converting bit positions to accommodate looking them up in a MIDX. Note that there are currently no writers who write multi-pack bitmaps, and that this will be implemented in the subsequent commit. Note also that get_midx_checksum() and get_midx_filename() are made non-static so they can be called from pack-bitmap.c. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'pack-bitmap.c')
-rw-r--r--pack-bitmap.c357
1 files changed, 319 insertions, 38 deletions
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4e37f5d574..fa69ed7a6d 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -13,6 +13,7 @@
#include "repository.h"
#include "object-store.h"
#include "list-objects-filter-options.h"
+#include "midx.h"
#include "config.h"
/*
@@ -35,8 +36,15 @@ struct stored_bitmap {
* the active bitmap index is the largest one.
*/
struct bitmap_index {
- /* Packfile to which this bitmap index belongs to */
+ /*
+ * The pack or multi-pack index (MIDX) that this bitmap index belongs
+ * to.
+ *
+ * Exactly one of these must be non-NULL; this specifies the object
+ * order used to interpret this bitmap.
+ */
struct packed_git *pack;
+ struct multi_pack_index *midx;
/*
* Mark the first `reuse_objects` in the packfile as reused:
@@ -71,6 +79,9 @@ struct bitmap_index {
/* If not NULL, this is a name-hash cache pointing into map. */
uint32_t *hashes;
+ /* The checksum of the packfile or MIDX; points into map. */
+ const unsigned char *checksum;
+
/*
* Extended index.
*
@@ -138,6 +149,8 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
static uint32_t bitmap_num_objects(struct bitmap_index *index)
{
+ if (index->midx)
+ return index->midx->num_objects;
return index->pack->num_objects;
}
@@ -175,6 +188,7 @@ static int load_bitmap_header(struct bitmap_index *index)
}
index->entry_count = ntohl(header->entry_count);
+ index->checksum = header->checksum;
index->map_pos += header_size;
return 0;
}
@@ -227,6 +241,8 @@ static int nth_bitmap_object_oid(struct bitmap_index *index,
struct object_id *oid,
uint32_t n)
{
+ if (index->midx)
+ return nth_midxed_object_oid(oid, index->midx, n) ? 0 : -1;
return nth_packed_object_id(oid, index->pack, n);
}
@@ -274,7 +290,14 @@ static int load_bitmap_entries_v1(struct bitmap_index *index)
return 0;
}
-static char *pack_bitmap_filename(struct packed_git *p)
+char *midx_bitmap_filename(struct multi_pack_index *midx)
+{
+ return xstrfmt("%s-%s.bitmap",
+ get_midx_filename(midx->object_dir),
+ hash_to_hex(get_midx_checksum(midx)));
+}
+
+char *pack_bitmap_filename(struct packed_git *p)
{
size_t len;
@@ -283,6 +306,57 @@ static char *pack_bitmap_filename(struct packed_git *p)
return xstrfmt("%.*s.bitmap", (int)len, p->pack_name);
}
+static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
+ struct multi_pack_index *midx)
+{
+ struct stat st;
+ char *idx_name = midx_bitmap_filename(midx);
+ int fd = git_open(idx_name);
+
+ free(idx_name);
+
+ if (fd < 0)
+ return -1;
+
+ if (fstat(fd, &st)) {
+ close(fd);
+ return -1;
+ }
+
+ if (bitmap_git->pack || bitmap_git->midx) {
+ /* ignore extra bitmap file; we can only handle one */
+ warning("ignoring extra bitmap file: %s",
+ get_midx_filename(midx->object_dir));
+ close(fd);
+ return -1;
+ }
+
+ bitmap_git->midx = midx;
+ bitmap_git->map_size = xsize_t(st.st_size);
+ bitmap_git->map_pos = 0;
+ bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ,
+ MAP_PRIVATE, fd, 0);
+ close(fd);
+
+ if (load_bitmap_header(bitmap_git) < 0)
+ goto cleanup;
+
+ if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum))
+ goto cleanup;
+
+ if (load_midx_revindex(bitmap_git->midx) < 0) {
+ warning(_("multi-pack bitmap is missing required reverse index"));
+ goto cleanup;
+ }
+ return 0;
+
+cleanup:
+ munmap(bitmap_git->map, bitmap_git->map_size);
+ bitmap_git->map_size = 0;
+ bitmap_git->map = NULL;
+ return -1;
+}
+
static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git *packfile)
{
int fd;
@@ -304,7 +378,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- if (bitmap_git->pack) {
+ if (bitmap_git->pack || bitmap_git->midx) {
+ /* ignore extra bitmap file; we can only handle one */
warning("ignoring extra bitmap file: %s", packfile->pack_name);
close(fd);
return -1;
@@ -331,13 +406,39 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return 0;
}
-static int load_pack_bitmap(struct bitmap_index *bitmap_git)
+static int load_reverse_index(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git)) {
+ uint32_t i;
+ int ret;
+
+ /*
+ * The multi-pack-index's .rev file is already loaded via
+ * open_pack_bitmap_1().
+ *
+ * But we still need to open the individual pack .rev files,
+ * since we will need to make use of them in pack-objects.
+ */
+ for (i = 0; i < bitmap_git->midx->num_packs; i++) {
+ if (prepare_midx_pack(the_repository, bitmap_git->midx, i))
+ die(_("load_reverse_index: could not open pack"));
+ ret = load_pack_revindex(bitmap_git->midx->packs[i]);
+ if (ret)
+ return ret;
+ }
+ return 0;
+ }
+ return load_pack_revindex(bitmap_git->pack);
+}
+
+static int load_bitmap(struct bitmap_index *bitmap_git)
{
assert(bitmap_git->map);
bitmap_git->bitmaps = kh_init_oid_map();
bitmap_git->ext_index.positions = kh_init_oid_pos();
- if (load_pack_revindex(bitmap_git->pack))
+
+ if (load_reverse_index(bitmap_git))
goto failed;
if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) ||
@@ -381,11 +482,47 @@ static int open_pack_bitmap(struct repository *r,
return ret;
}
+static int open_midx_bitmap(struct repository *r,
+ struct bitmap_index *bitmap_git)
+{
+ struct multi_pack_index *midx;
+
+ assert(!bitmap_git->map);
+
+ for (midx = get_multi_pack_index(r); midx; midx = midx->next) {
+ if (!open_midx_bitmap_1(bitmap_git, midx))
+ return 0;
+ }
+ return -1;
+}
+
+static int open_bitmap(struct repository *r,
+ struct bitmap_index *bitmap_git)
+{
+ assert(!bitmap_git->map);
+
+ if (!open_midx_bitmap(r, bitmap_git))
+ return 0;
+ return open_pack_bitmap(r, bitmap_git);
+}
+
struct bitmap_index *prepare_bitmap_git(struct repository *r)
{
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
- if (!open_pack_bitmap(r, bitmap_git) && !load_pack_bitmap(bitmap_git))
+ if (!open_bitmap(r, bitmap_git) && !load_bitmap(bitmap_git))
+ return bitmap_git;
+
+ free_bitmap_index(bitmap_git);
+ return NULL;
+}
+
+struct bitmap_index *prepare_midx_bitmap_git(struct repository *r,
+ struct multi_pack_index *midx)
+{
+ struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
+
+ if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(bitmap_git))
return bitmap_git;
free_bitmap_index(bitmap_git);
@@ -435,10 +572,26 @@ static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git,
return pos;
}
+static int bitmap_position_midx(struct bitmap_index *bitmap_git,
+ const struct object_id *oid)
+{
+ uint32_t want, got;
+ if (!bsearch_midx(oid, bitmap_git->midx, &want))
+ return -1;
+
+ if (midx_to_pack_pos(bitmap_git->midx, want, &got) < 0)
+ return -1;
+ return got;
+}
+
static int bitmap_position(struct bitmap_index *bitmap_git,
const struct object_id *oid)
{
- int pos = bitmap_position_packfile(bitmap_git, oid);
+ int pos;
+ if (bitmap_is_midx(bitmap_git))
+ pos = bitmap_position_midx(bitmap_git, oid);
+ else
+ pos = bitmap_position_packfile(bitmap_git, oid);
return (pos >= 0) ? pos : bitmap_position_extended(bitmap_git, oid);
}
@@ -749,6 +902,7 @@ static void show_objects_for_type(
continue;
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
+ struct packed_git *pack;
struct object_id oid;
uint32_t hash = 0, index_pos;
off_t ofs;
@@ -758,14 +912,28 @@ static void show_objects_for_type(
offset += ewah_bit_ctz64(word >> offset);
- index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset);
- ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset);
- nth_packed_object_id(&oid, bitmap_git->pack, index_pos);
+ if (bitmap_is_midx(bitmap_git)) {
+ struct multi_pack_index *m = bitmap_git->midx;
+ uint32_t pack_id;
+
+ index_pos = pack_pos_to_midx(m, pos + offset);
+ ofs = nth_midxed_offset(m, index_pos);
+ nth_midxed_object_oid(&oid, m, index_pos);
+
+ pack_id = nth_midxed_pack_int_id(m, index_pos);
+ pack = bitmap_git->midx->packs[pack_id];
+ } else {
+ index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset);
+ ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset);
+ nth_bitmap_object_oid(bitmap_git, &oid, index_pos);
+
+ pack = bitmap_git->pack;
+ }
if (bitmap_git->hashes)
hash = get_be32(bitmap_git->hashes + index_pos);
- show_reach(&oid, object_type, 0, hash, bitmap_git->pack, ofs);
+ show_reach(&oid, object_type, 0, hash, pack, ofs);
}
}
}
@@ -777,8 +945,13 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git,
struct object *object = roots->item;
roots = roots->next;
- if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0)
- return 1;
+ if (bitmap_is_midx(bitmap_git)) {
+ if (bsearch_midx(&object->oid, bitmap_git->midx, NULL))
+ return 1;
+ } else {
+ if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0)
+ return 1;
+ }
}
return 0;
@@ -865,14 +1038,26 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
uint32_t pos)
{
- struct packed_git *pack = bitmap_git->pack;
unsigned long size;
struct object_info oi = OBJECT_INFO_INIT;
oi.sizep = &size;
if (pos < bitmap_num_objects(bitmap_git)) {
- off_t ofs = pack_pos_to_offset(pack, pos);
+ struct packed_git *pack;
+ off_t ofs;
+
+ if (bitmap_is_midx(bitmap_git)) {
+ uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, pos);
+ uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos);
+
+ pack = bitmap_git->midx->packs[pack_id];
+ ofs = nth_midxed_offset(bitmap_git->midx, midx_pos);
+ } else {
+ pack = bitmap_git->pack;
+ ofs = pack_pos_to_offset(pack, pos);
+ }
+
if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
@@ -1053,7 +1238,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
/* try to open a bitmapped pack, but don't parse it yet
* because we may not need to use it */
CALLOC_ARRAY(bitmap_git, 1);
- if (open_pack_bitmap(revs->repo, bitmap_git) < 0)
+ if (open_bitmap(revs->repo, bitmap_git) < 0)
goto cleanup;
for (i = 0; i < revs->pending.nr; ++i) {
@@ -1097,7 +1282,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
* from disk. this is the point of no return; after this the rev_list
* becomes invalidated and we must perform the revwalk through bitmaps
*/
- if (load_pack_bitmap(bitmap_git) < 0)
+ if (load_bitmap(bitmap_git) < 0)
goto cleanup;
object_array_clear(&revs->pending);
@@ -1145,19 +1330,43 @@ cleanup:
* reused, but you can keep feeding bits.
*/
static int try_partial_reuse(struct bitmap_index *bitmap_git,
+ struct packed_git *pack,
size_t pos,
struct bitmap *reuse,
struct pack_window **w_curs)
{
- off_t offset, header;
+ off_t offset, delta_obj_offset;
enum object_type type;
unsigned long size;
- if (pos >= bitmap_num_objects(bitmap_git))
- return -1; /* not actually in the pack or MIDX */
+ /*
+ * try_partial_reuse() is called either on (a) objects in the
+ * bitmapped pack (in the case of a single-pack bitmap) or (b)
+ * objects in the preferred pack of a multi-pack bitmap.
+ * Importantly, the latter can pretend as if only a single pack
+ * exists because:
+ *
+ * - The first pack->num_objects bits of a MIDX bitmap are
+ * reserved for the preferred pack, and
+ *
+ * - Ties due to duplicate objects are always resolved in
+ * favor of the preferred pack.
+ *
+ * Therefore we do not need to ever ask the MIDX for its copy of
+ * an object by OID, since it will always select it from the
+ * preferred pack. Likewise, the selected copy of the base
+ * object for any deltas will reside in the same pack.
+ *
+ * This means that we can reuse pos when looking up the bit in
+ * the reuse bitmap, too, since bits corresponding to the
+ * preferred pack precede all bits from other packs.
+ */
+
+ if (pos >= pack->num_objects)
+ return -1; /* not actually in the pack or MIDX preferred pack */
- offset = header = pack_pos_to_offset(bitmap_git->pack, pos);
- type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size);
+ offset = delta_obj_offset = pack_pos_to_offset(pack, pos);
+ type = unpack_object_header(pack, w_curs, &offset, &size);
if (type < 0)
return -1; /* broken packfile, punt */
@@ -1173,11 +1382,11 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
* and the normal slow path will complain about it in
* more detail.
*/
- base_offset = get_delta_base(bitmap_git->pack, w_curs,
- &offset, type, header);
+ base_offset = get_delta_base(pack, w_curs, &offset, type,
+ delta_obj_offset);
if (!base_offset)
return 0;
- if (offset_to_pack_pos(bitmap_git->pack, base_offset, &base_pos) < 0)
+ if (offset_to_pack_pos(pack, base_offset, &base_pos) < 0)
return 0;
/*
@@ -1211,24 +1420,48 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
return 0;
}
+static uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git)
+{
+ struct multi_pack_index *m = bitmap_git->midx;
+ if (!m)
+ BUG("midx_preferred_pack: requires non-empty MIDX");
+ return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0));
+}
+
int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct packed_git **packfile_out,
uint32_t *entries,
struct bitmap **reuse_out)
{
+ struct packed_git *pack;
struct bitmap *result = bitmap_git->result;
struct bitmap *reuse;
struct pack_window *w_curs = NULL;
size_t i = 0;
uint32_t offset;
- uint32_t objects_nr = bitmap_num_objects(bitmap_git);
+ uint32_t objects_nr;
assert(result);
+ load_reverse_index(bitmap_git);
+
+ if (bitmap_is_midx(bitmap_git))
+ pack = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)];
+ else
+ pack = bitmap_git->pack;
+ objects_nr = pack->num_objects;
+
while (i < result->word_alloc && result->words[i] == (eword_t)~0)
i++;
- /* Don't mark objects not in the packfile */
+ /*
+ * Don't mark objects not in the packfile or preferred pack. This bitmap
+ * marks objects eligible for reuse, but the pack-reuse code only
+ * understands how to reuse a single pack. Since the preferred pack is
+ * guaranteed to have all bases for its deltas (in a multi-pack bitmap),
+ * we use it instead of another pack. In single-pack bitmaps, the choice
+ * is made for us.
+ */
if (i > objects_nr / BITS_IN_EWORD)
i = objects_nr / BITS_IN_EWORD;
@@ -1244,8 +1477,8 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
break;
offset += ewah_bit_ctz64(word >> offset);
- if (try_partial_reuse(bitmap_git, pos + offset, reuse,
- &w_curs) < 0) {
+ if (try_partial_reuse(bitmap_git, pack, pos + offset,
+ reuse, &w_curs) < 0) {
/*
* try_partial_reuse indicated we couldn't reuse
* any bits, so there is no point in trying more
@@ -1274,7 +1507,7 @@ done:
* need to be handled separately.
*/
bitmap_and_not(result, reuse);
- *packfile_out = bitmap_git->pack;
+ *packfile_out = pack;
*reuse_out = reuse;
return 0;
}
@@ -1548,6 +1781,12 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
uint32_t i, num_objects;
uint32_t *reposition;
+ if (!bitmap_is_midx(bitmap_git))
+ load_reverse_index(bitmap_git);
+ else if (load_midx_revindex(bitmap_git->midx) < 0)
+ BUG("rebuild_existing_bitmaps: missing required rev-cache "
+ "extension");
+
num_objects = bitmap_num_objects(bitmap_git);
CALLOC_ARRAY(reposition, num_objects);
@@ -1555,8 +1794,13 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct object_id oid;
struct object_entry *oe;
- nth_packed_object_id(&oid, bitmap_git->pack,
- pack_pos_to_index(bitmap_git->pack, i));
+ if (bitmap_is_midx(bitmap_git))
+ nth_midxed_object_oid(&oid,
+ bitmap_git->midx,
+ pack_pos_to_midx(bitmap_git->midx, i));
+ else
+ nth_packed_object_id(&oid, bitmap_git->pack,
+ pack_pos_to_index(bitmap_git->pack, i));
oe = packlist_find(mapping, &oid);
if (oe)
@@ -1582,6 +1826,19 @@ void free_bitmap_index(struct bitmap_index *b)
free(b->ext_index.hashes);
bitmap_free(b->result);
bitmap_free(b->haves);
+ if (bitmap_is_midx(b)) {
+ /*
+ * Multi-pack bitmaps need to have resources associated with
+ * their on-disk reverse indexes unmapped so that stale .rev and
+ * .bitmap files can be removed.
+ *
+ * Unlike pack-based bitmaps, multi-pack bitmaps can be read and
+ * written in the same 'git multi-pack-index write --bitmap'
+ * process. Close resources so they can be removed safely on
+ * platforms like Windows.
+ */
+ close_midx_revindex(b->midx);
+ }
free(b);
}
@@ -1596,7 +1853,6 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git,
enum object_type object_type)
{
struct bitmap *result = bitmap_git->result;
- struct packed_git *pack = bitmap_git->pack;
off_t total = 0;
struct ewah_iterator it;
eword_t filter;
@@ -1613,15 +1869,35 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git,
continue;
for (offset = 0; offset < BITS_IN_EWORD; offset++) {
- size_t pos;
-
if ((word >> offset) == 0)
break;
offset += ewah_bit_ctz64(word >> offset);
- pos = base + offset;
- total += pack_pos_to_offset(pack, pos + 1) -
- pack_pos_to_offset(pack, pos);
+
+ if (bitmap_is_midx(bitmap_git)) {
+ uint32_t pack_pos;
+ uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, base + offset);
+ off_t offset = nth_midxed_offset(bitmap_git->midx, midx_pos);
+
+ uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos);
+ struct packed_git *pack = bitmap_git->midx->packs[pack_id];
+
+ if (offset_to_pack_pos(pack, offset, &pack_pos) < 0) {
+ struct object_id oid;
+ nth_midxed_object_oid(&oid, bitmap_git->midx, midx_pos);
+
+ die(_("could not find %s in pack %s at offset %"PRIuMAX),
+ oid_to_hex(&oid),
+ pack->pack_name,
+ (uintmax_t)offset);
+ }
+
+ total += pack_pos_to_offset(pack, pack_pos + 1) - offset;
+ } else {
+ size_t pos = base + offset;
+ total += pack_pos_to_offset(bitmap_git->pack, pos + 1) -
+ pack_pos_to_offset(bitmap_git->pack, pos);
+ }
}
}
@@ -1672,6 +1948,11 @@ off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git,
return total;
}
+int bitmap_is_midx(struct bitmap_index *bitmap_git)
+{
+ return !!bitmap_git->midx;
+}
+
const struct string_list *bitmap_preferred_tips(struct repository *r)
{
return repo_config_get_value_multi(r, "pack.preferbitmaptips");