diff options
Diffstat (limited to 'pack-bitmap.c')
-rw-r--r-- | pack-bitmap.c | 172 |
1 files changed, 106 insertions, 66 deletions
diff --git a/pack-bitmap.c b/pack-bitmap.c index 1682f99596..242a5908f7 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1841,8 +1841,10 @@ cleanup: * -1 means "stop trying further objects"; 0 means we may or may not have * reused, but you can keep feeding bits. */ -static int try_partial_reuse(struct bitmapped_pack *pack, - size_t pos, +static int try_partial_reuse(struct bitmap_index *bitmap_git, + struct bitmapped_pack *pack, + size_t bitmap_pos, + uint32_t pack_pos, struct bitmap *reuse, struct pack_window **w_curs) { @@ -1850,33 +1852,10 @@ static int try_partial_reuse(struct bitmapped_pack *pack, enum object_type type; unsigned long size; - /* - * try_partial_reuse() is called either on (a) objects in the - * bitmapped pack (in the case of a single-pack bitmap) or (b) - * objects in the preferred pack of a multi-pack bitmap. - * Importantly, the latter can pretend as if only a single pack - * exists because: - * - * - The first pack->num_objects bits of a MIDX bitmap are - * reserved for the preferred pack, and - * - * - Ties due to duplicate objects are always resolved in - * favor of the preferred pack. - * - * Therefore we do not need to ever ask the MIDX for its copy of - * an object by OID, since it will always select it from the - * preferred pack. Likewise, the selected copy of the base - * object for any deltas will reside in the same pack. - * - * This means that we can reuse pos when looking up the bit in - * the reuse bitmap, too, since bits corresponding to the - * preferred pack precede all bits from other packs. - */ + if (pack_pos >= pack->p->num_objects) + return -1; /* not actually in the pack */ - if (pos >= pack->p->num_objects) - return -1; /* not actually in the pack or MIDX preferred pack */ - - offset = delta_obj_offset = pack_pos_to_offset(pack->p, pos); + offset = delta_obj_offset = pack_pos_to_offset(pack->p, pack_pos); type = unpack_object_header(pack->p, w_curs, &offset, &size); if (type < 0) return -1; /* broken packfile, punt */ @@ -1884,6 +1863,7 @@ static int try_partial_reuse(struct bitmapped_pack *pack, if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { off_t base_offset; uint32_t base_pos; + uint32_t base_bitmap_pos; /* * Find the position of the base object so we can look it up @@ -1897,20 +1877,44 @@ static int try_partial_reuse(struct bitmapped_pack *pack, delta_obj_offset); if (!base_offset) return 0; - if (offset_to_pack_pos(pack->p, base_offset, &base_pos) < 0) - return 0; - /* - * We assume delta dependencies always point backwards. This - * lets us do a single pass, and is basically always true - * due to the way OFS_DELTAs work. You would not typically - * find REF_DELTA in a bitmapped pack, since we only bitmap - * packs we write fresh, and OFS_DELTA is the default). But - * let's double check to make sure the pack wasn't written with - * odd parameters. - */ - if (base_pos >= pos) - return 0; + offset_to_pack_pos(pack->p, base_offset, &base_pos); + + if (bitmap_is_midx(bitmap_git)) { + /* + * Cross-pack deltas are rejected for now, but could + * theoretically be supported in the future. + * + * We would need to ensure that we're sending both + * halves of the delta/base pair, regardless of whether + * or not the two cross a pack boundary. If they do, + * then we must convert the delta to an REF_DELTA to + * refer back to the base in the other pack. + * */ + if (midx_pair_to_pack_pos(bitmap_git->midx, + pack->pack_int_id, + base_offset, + &base_bitmap_pos) < 0) { + return 0; + } + } else { + if (offset_to_pack_pos(pack->p, base_offset, + &base_pos) < 0) + return 0; + /* + * We assume delta dependencies always point backwards. + * This lets us do a single pass, and is basically + * always true due to the way OFS_DELTAs work. You would + * not typically find REF_DELTA in a bitmapped pack, + * since we only bitmap packs we write fresh, and + * OFS_DELTA is the default). But let's double check to + * make sure the pack wasn't written with odd + * parameters. + */ + if (base_pos >= pack_pos) + return 0; + base_bitmap_pos = pack->bitmap_pos + base_pos; + } /* * And finally, if we're not sending the base as part of our @@ -1920,14 +1924,14 @@ static int try_partial_reuse(struct bitmapped_pack *pack, * to REF_DELTA on the fly. Better to just let the normal * object_entry code path handle it. */ - if (!bitmap_get(reuse, pack->bitmap_pos + base_pos)) + if (!bitmap_get(reuse, base_bitmap_pos)) return 0; } /* * If we got here, then the object is OK to reuse. Mark it. */ - bitmap_set(reuse, pack->bitmap_pos + pos); + bitmap_set(reuse, bitmap_pos); return 0; } @@ -1937,36 +1941,72 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git { struct bitmap *result = bitmap_git->result; struct pack_window *w_curs = NULL; - size_t i = 0; - - while (i < result->word_alloc && result->words[i] == (eword_t)~0) - i++; - - /* - * Don't mark objects not in the packfile or preferred pack. This bitmap - * marks objects eligible for reuse, but the pack-reuse code only - * understands how to reuse a single pack. Since the preferred pack is - * guaranteed to have all bases for its deltas (in a multi-pack bitmap), - * we use it instead of another pack. In single-pack bitmaps, the choice - * is made for us. - */ - if (i > pack->p->num_objects / BITS_IN_EWORD) - i = pack->p->num_objects / BITS_IN_EWORD; + size_t pos = pack->bitmap_pos / BITS_IN_EWORD; - memset(reuse->words, 0xFF, i * sizeof(eword_t)); + if (!pack->bitmap_pos) { + /* + * If we're processing the first (in the case of a MIDX, the + * preferred pack) or the only (in the case of single-pack + * bitmaps) pack, then we can reuse whole words at a time. + * + * This is because we know that any deltas in this range *must* + * have their bases chosen from the same pack, since: + * + * - In the single pack case, there is no other pack to choose + * them from. + * + * - In the MIDX case, the first pack is the preferred pack, so + * all ties are broken in favor of that pack (i.e. the one + * we're currently processing). So any duplicate bases will be + * resolved in favor of the pack we're processing. + */ + while (pos < result->word_alloc && + pos < pack->bitmap_nr / BITS_IN_EWORD && + result->words[pos] == (eword_t)~0) + pos++; + memset(reuse->words, 0xFF, pos * sizeof(eword_t)); + } - for (; i < result->word_alloc; ++i) { - eword_t word = result->words[i]; - size_t pos = (i * BITS_IN_EWORD); + for (; pos < result->word_alloc; pos++) { + eword_t word = result->words[pos]; size_t offset; - for (offset = 0; offset < BITS_IN_EWORD; ++offset) { - if ((word >> offset) == 0) + for (offset = 0; offset < BITS_IN_EWORD; offset++) { + size_t bit_pos; + uint32_t pack_pos; + + if (word >> offset == 0) break; offset += ewah_bit_ctz64(word >> offset); - if (try_partial_reuse(pack, pos + offset, - reuse, &w_curs) < 0) { + + bit_pos = pos * BITS_IN_EWORD + offset; + if (bit_pos < pack->bitmap_pos) + continue; + if (bit_pos >= pack->bitmap_pos + pack->bitmap_nr) + goto done; + + if (bitmap_is_midx(bitmap_git)) { + uint32_t midx_pos; + off_t ofs; + + midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos); + ofs = nth_midxed_offset(bitmap_git->midx, midx_pos); + + if (offset_to_pack_pos(pack->p, ofs, &pack_pos) < 0) + BUG("could not find object in pack %s " + "at offset %"PRIuMAX" in MIDX", + pack_basename(pack->p), (uintmax_t)ofs); + } else { + pack_pos = cast_size_t_to_uint32_t(st_sub(bit_pos, pack->bitmap_pos)); + if (pack_pos >= pack->p->num_objects) + BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")", + pack_basename(pack->p), (uintmax_t)pack_pos, + pack->p->num_objects); + } + + if (try_partial_reuse(bitmap_git, pack, bit_pos, + pack_pos, reuse, &w_curs) < 0) { /* * try_partial_reuse indicated we couldn't reuse * any bits, so there is no point in trying more |