diff options
-rw-r--r-- | bloom.c | 34 | ||||
-rw-r--r-- | chunk-format.c | 24 | ||||
-rw-r--r-- | chunk-format.h | 9 | ||||
-rw-r--r-- | commit-graph.c | 119 | ||||
-rw-r--r-- | commit-graph.h | 4 | ||||
-rw-r--r-- | midx.c | 68 | ||||
-rw-r--r-- | midx.h | 3 | ||||
-rw-r--r-- | pack-revindex.c | 13 | ||||
-rw-r--r-- | t/lib-chunk.sh | 17 | ||||
-rw-r--r-- | t/lib-chunk/corrupt-chunk-file.pl | 66 | ||||
-rwxr-xr-x | t/t4216-log-bloom.sh | 50 | ||||
-rwxr-xr-x | t/t5318-commit-graph.sh | 76 | ||||
-rwxr-xr-x | t/t5319-multi-pack-index.sh | 104 | ||||
-rwxr-xr-x | t/t5324-split-commit-graph.sh | 20 | ||||
-rwxr-xr-x | t/t5328-commit-graph-64bit-time.sh | 10 |
15 files changed, 570 insertions, 47 deletions
@@ -29,6 +29,26 @@ static inline unsigned char get_bitmask(uint32_t pos) return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1)); } +static int check_bloom_offset(struct commit_graph *g, uint32_t pos, + uint32_t offset) +{ + /* + * Note that we allow offsets equal to the data size, which would set + * our pointers at one past the end of the chunk memory. This is + * necessary because the on-disk index points to the end of the + * entries (so we can compute size by comparing adjacent ones). And + * naturally the final entry's end is one-past-the-end of the chunk. + */ + if (offset <= g->chunk_bloom_data_size - BLOOMDATA_CHUNK_HEADER_SIZE) + return 0; + + warning("ignoring out-of-range offset (%"PRIuMAX") for changed-path" + " filter at pos %"PRIuMAX" of %s (chunk size: %"PRIuMAX")", + (uintmax_t)offset, (uintmax_t)pos, + g->filename, (uintmax_t)g->chunk_bloom_data_size); + return -1; +} + static int load_bloom_filter_from_graph(struct commit_graph *g, struct bloom_filter *filter, uint32_t graph_pos) @@ -51,6 +71,20 @@ static int load_bloom_filter_from_graph(struct commit_graph *g, else start_index = 0; + if (check_bloom_offset(g, lex_pos, end_index) < 0 || + check_bloom_offset(g, lex_pos - 1, start_index) < 0) + return 0; + + if (end_index < start_index) { + warning("ignoring decreasing changed-path index offsets" + " (%"PRIuMAX" > %"PRIuMAX") for positions" + " %"PRIuMAX" and %"PRIuMAX" of %s", + (uintmax_t)start_index, (uintmax_t)end_index, + (uintmax_t)(lex_pos-1), (uintmax_t)lex_pos, + g->filename); + return 0; + } + filter->len = end_index - start_index; filter->data = (unsigned char *)(g->chunk_bloom_data + sizeof(unsigned char) * start_index + diff --git a/chunk-format.c b/chunk-format.c index 140dfa0dcc..cdc7f39b70 100644 --- a/chunk-format.c +++ b/chunk-format.c @@ -102,7 +102,8 @@ int read_table_of_contents(struct chunkfile *cf, const unsigned char *mfile, size_t mfile_size, uint64_t toc_offset, - int toc_length) + int toc_length, + unsigned expected_alignment) { int i; uint32_t chunk_id; @@ -120,6 +121,11 @@ int read_table_of_contents(struct chunkfile *cf, error(_("terminating chunk id appears earlier than expected")); return 1; } + if (chunk_offset % expected_alignment != 0) { + error(_("chunk id %"PRIx32" not %d-byte aligned"), + chunk_id, expected_alignment); + return 1; + } table_of_contents += CHUNK_TOC_ENTRY_SIZE; next_chunk_offset = get_be64(table_of_contents + 4); @@ -154,20 +160,28 @@ int read_table_of_contents(struct chunkfile *cf, return 0; } +struct pair_chunk_data { + const unsigned char **p; + size_t *size; +}; + static int pair_chunk_fn(const unsigned char *chunk_start, size_t chunk_size, void *data) { - const unsigned char **p = data; - *p = chunk_start; + struct pair_chunk_data *pcd = data; + *pcd->p = chunk_start; + *pcd->size = chunk_size; return 0; } int pair_chunk(struct chunkfile *cf, uint32_t chunk_id, - const unsigned char **p) + const unsigned char **p, + size_t *size) { - return read_chunk(cf, chunk_id, pair_chunk_fn, p); + struct pair_chunk_data pcd = { .p = p, .size = size }; + return read_chunk(cf, chunk_id, pair_chunk_fn, &pcd); } int read_chunk(struct chunkfile *cf, diff --git a/chunk-format.h b/chunk-format.h index c7794e84ad..14b76180ef 100644 --- a/chunk-format.h +++ b/chunk-format.h @@ -36,20 +36,23 @@ int read_table_of_contents(struct chunkfile *cf, const unsigned char *mfile, size_t mfile_size, uint64_t toc_offset, - int toc_length); + int toc_length, + unsigned expected_alignment); #define CHUNK_NOT_FOUND (-2) /* * Find 'chunk_id' in the given chunkfile and assign the * given pointer to the position in the mmap'd file where - * that chunk begins. + * that chunk begins. Likewise the "size" parameter is filled + * with the size of the chunk. * * Returns CHUNK_NOT_FOUND if the chunk does not exist. */ int pair_chunk(struct chunkfile *cf, uint32_t chunk_id, - const unsigned char **p); + const unsigned char **p, + size_t *size); typedef int (*chunk_read_fn)(const unsigned char *chunk_start, size_t chunk_size, void *data); diff --git a/commit-graph.c b/commit-graph.c index fd2f700b2e..c2b782af3b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -277,6 +277,8 @@ struct commit_graph *load_commit_graph_one_fd_st(struct repository *r, static int verify_commit_graph_lite(struct commit_graph *g) { + int i; + /* * Basic validation shared between parse_commit_graph() * which'll be called every time the graph is used, and the @@ -302,6 +304,30 @@ static int verify_commit_graph_lite(struct commit_graph *g) return 1; } + for (i = 0; i < 255; i++) { + uint32_t oid_fanout1 = ntohl(g->chunk_oid_fanout[i]); + uint32_t oid_fanout2 = ntohl(g->chunk_oid_fanout[i + 1]); + + if (oid_fanout1 > oid_fanout2) { + error("commit-graph fanout values out of order"); + return 1; + } + } + if (ntohl(g->chunk_oid_fanout[255]) != g->num_commits) { + error("commit-graph oid table and fanout disagree on size"); + return 1; + } + + return 0; +} + +static int graph_read_oid_fanout(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + if (chunk_size != 256 * sizeof(uint32_t)) + return error("commit-graph oid fanout chunk is wrong size"); + g->chunk_oid_fanout = (const uint32_t *)chunk_start; return 0; } @@ -314,12 +340,54 @@ static int graph_read_oid_lookup(const unsigned char *chunk_start, return 0; } +static int graph_read_commit_data(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + if (chunk_size != g->num_commits * GRAPH_DATA_WIDTH) + return error("commit-graph commit data chunk is wrong size"); + g->chunk_commit_data = chunk_start; + return 0; +} + +static int graph_read_generation_data(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + if (chunk_size != g->num_commits * sizeof(uint32_t)) + return error("commit-graph generations chunk is wrong size"); + g->chunk_generation_data = chunk_start; + return 0; +} + +static int graph_read_bloom_index(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + if (chunk_size != g->num_commits * 4) { + warning("commit-graph changed-path index chunk is too small"); + return -1; + } + g->chunk_bloom_indexes = chunk_start; + return 0; +} + static int graph_read_bloom_data(const unsigned char *chunk_start, size_t chunk_size, void *data) { struct commit_graph *g = data; uint32_t hash_version; + + if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) { + warning("ignoring too-small changed-path chunk" + " (%"PRIuMAX" < %"PRIuMAX") in commit-graph file", + (uintmax_t)chunk_size, + (uintmax_t)BLOOMDATA_CHUNK_HEADER_SIZE); + return -1; + } + g->chunk_bloom_data = chunk_start; + g->chunk_bloom_data_size = chunk_size; hash_version = get_be32(chunk_start); if (hash_version != 1) @@ -391,29 +459,31 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, cf = init_chunkfile(NULL); if (read_table_of_contents(cf, graph->data, graph_size, - GRAPH_HEADER_SIZE, graph->num_chunks)) + GRAPH_HEADER_SIZE, graph->num_chunks, 1)) goto free_and_return; - pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, - (const unsigned char **)&graph->chunk_oid_fanout); + read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph); read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph); - pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data); - pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges); - pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs); + read_chunk(cf, GRAPH_CHUNKID_DATA, graph_read_commit_data, graph); + pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges, + &graph->chunk_extra_edges_size); + pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs, + &graph->chunk_base_graphs_size); if (s->commit_graph_generation_version >= 2) { - pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, - &graph->chunk_generation_data); + read_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, + graph_read_generation_data, graph); pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, - &graph->chunk_generation_data_overflow); + &graph->chunk_generation_data_overflow, + &graph->chunk_generation_data_overflow_size); if (graph->chunk_generation_data) graph->read_generation_data = 1; } if (s->commit_graph_read_changed_paths) { - pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES, - &graph->chunk_bloom_indexes); + read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES, + graph_read_bloom_index, graph); read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA, graph_read_bloom_data, graph); } @@ -510,6 +580,11 @@ static int add_graph_to_chain(struct commit_graph *g, return 0; } + if (g->chunk_base_graphs_size / g->hash_len < n) { + warning(_("commit-graph base graphs chunk is too small")); + return 0; + } + while (n) { n--; @@ -837,7 +912,10 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, die(_("commit-graph requires overflow generation data but has none")); offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW; - graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + st_mult(8, offset_pos)); + if (g->chunk_generation_data_overflow_size / sizeof(uint64_t) <= offset_pos) + die(_("commit-graph overflow generation data is too small")); + graph_data->generation = item->date + + get_be64(g->chunk_generation_data_overflow + sizeof(uint64_t) * offset_pos); } else graph_data->generation = item->date + offset; } else @@ -857,7 +935,7 @@ static int fill_commit_in_graph(struct repository *r, struct commit_graph *g, uint32_t pos) { uint32_t edge_value; - uint32_t *parent_data_ptr; + uint32_t parent_data_pos; struct commit_list **pptr; const unsigned char *commit_data; uint32_t lex_index; @@ -889,14 +967,21 @@ static int fill_commit_in_graph(struct repository *r, return 1; } - parent_data_ptr = (uint32_t*)(g->chunk_extra_edges + - st_mult(4, edge_value & GRAPH_EDGE_LAST_MASK)); + parent_data_pos = edge_value & GRAPH_EDGE_LAST_MASK; do { - edge_value = get_be32(parent_data_ptr); + if (g->chunk_extra_edges_size / sizeof(uint32_t) <= parent_data_pos) { + error("commit-graph extra-edges pointer out of bounds"); + free_commit_list(item->parents); + item->parents = NULL; + item->object.parsed = 0; + return 0; + } + edge_value = get_be32(g->chunk_extra_edges + + sizeof(uint32_t) * parent_data_pos); pptr = insert_parent_or_die(r, g, edge_value & GRAPH_EDGE_LAST_MASK, pptr); - parent_data_ptr++; + parent_data_pos++; } while (!(edge_value & GRAPH_LAST_EDGE)); return 1; diff --git a/commit-graph.h b/commit-graph.h index 20ada7e891..c6870274c5 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -94,10 +94,14 @@ struct commit_graph { const unsigned char *chunk_commit_data; const unsigned char *chunk_generation_data; const unsigned char *chunk_generation_data_overflow; + size_t chunk_generation_data_overflow_size; const unsigned char *chunk_extra_edges; + size_t chunk_extra_edges_size; const unsigned char *chunk_base_graphs; + size_t chunk_base_graphs_size; const unsigned char *chunk_bloom_indexes; const unsigned char *chunk_bloom_data; + size_t chunk_bloom_data_size; struct topo_level_slab *topo_levels; struct bloom_filter_settings *bloom_filter_settings; @@ -71,6 +71,33 @@ static int midx_read_oid_fanout(const unsigned char *chunk_start, error(_("multi-pack-index OID fanout is of the wrong size")); return 1; } + m->num_objects = ntohl(m->chunk_oid_fanout[255]); + return 0; +} + +static int midx_read_oid_lookup(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct multi_pack_index *m = data; + m->chunk_oid_lookup = chunk_start; + + if (chunk_size != st_mult(m->hash_len, m->num_objects)) { + error(_("multi-pack-index OID lookup chunk is the wrong size")); + return 1; + } + return 0; +} + +static int midx_read_object_offsets(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct multi_pack_index *m = data; + m->chunk_object_offsets = chunk_start; + + if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) { + error(_("multi-pack-index object offset chunk is the wrong size")); + return 1; + } return 0; } @@ -140,33 +167,41 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local cf = init_chunkfile(NULL); if (read_table_of_contents(cf, m->data, midx_size, - MIDX_HEADER_SIZE, m->num_chunks)) + MIDX_HEADER_SIZE, m->num_chunks, + MIDX_CHUNK_ALIGNMENT)) goto cleanup_fail; - if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names) == CHUNK_NOT_FOUND) - die(_("multi-pack-index missing required pack-name chunk")); - if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m) == CHUNK_NOT_FOUND) - die(_("multi-pack-index missing required OID fanout chunk")); - if (pair_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup) == CHUNK_NOT_FOUND) - die(_("multi-pack-index missing required OID lookup chunk")); - if (pair_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets) == CHUNK_NOT_FOUND) - die(_("multi-pack-index missing required object offsets chunk")); + if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len)) + die(_("multi-pack-index required pack-name chunk missing or corrupted")); + if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m)) + die(_("multi-pack-index required OID fanout chunk missing or corrupted")); + if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m)) + die(_("multi-pack-index required OID lookup chunk missing or corrupted")); + if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m)) + die(_("multi-pack-index required object offsets chunk missing or corrupted")); - pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); + pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets, + &m->chunk_large_offsets_len); if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) - pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); - - m->num_objects = ntohl(m->chunk_oid_fanout[255]); + pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex, + &m->chunk_revindex_len); CALLOC_ARRAY(m->pack_names, m->num_packs); CALLOC_ARRAY(m->packs, m->num_packs); cur_pack_name = (const char *)m->chunk_pack_names; for (i = 0; i < m->num_packs; i++) { + const char *end; + size_t avail = m->chunk_pack_names_len - + (cur_pack_name - (const char *)m->chunk_pack_names); + m->pack_names[i] = cur_pack_name; - cur_pack_name += strlen(cur_pack_name) + 1; + end = memchr(cur_pack_name, '\0', avail); + if (!end) + die(_("multi-pack-index pack-name chunk is too short")); + cur_pack_name = end + 1; if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) die(_("multi-pack-index pack names out of order: '%s' before '%s'"), @@ -270,8 +305,9 @@ off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); offset32 ^= MIDX_LARGE_OFFSET_NEEDED; - return get_be64(m->chunk_large_offsets + - st_mult(sizeof(uint64_t), offset32)); + if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t)) + die(_("multi-pack-index large offset out of bounds")); + return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); } return offset32; @@ -32,11 +32,14 @@ struct multi_pack_index { int local; const unsigned char *chunk_pack_names; + size_t chunk_pack_names_len; const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_object_offsets; const unsigned char *chunk_large_offsets; + size_t chunk_large_offsets_len; const unsigned char *chunk_revindex; + size_t chunk_revindex_len; const char **pack_names; struct packed_git **packs; diff --git a/pack-revindex.c b/pack-revindex.c index a01a2a4640..acf1dd9786 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -343,6 +343,17 @@ int verify_pack_revindex(struct packed_git *p) return res; } +static int can_use_midx_ridx_chunk(struct multi_pack_index *m) +{ + if (!m->chunk_revindex) + return 0; + if (m->chunk_revindex_len != st_mult(sizeof(uint32_t), m->num_objects)) { + error(_("multi-pack-index reverse-index chunk is the wrong size")); + return 0; + } + return 1; +} + int load_midx_revindex(struct multi_pack_index *m) { struct strbuf revindex_name = STRBUF_INIT; @@ -351,7 +362,7 @@ int load_midx_revindex(struct multi_pack_index *m) if (m->revindex_data) return 0; - if (m->chunk_revindex) { + if (can_use_midx_ridx_chunk(m)) { /* * If the MIDX `m` has a `RIDX` chunk, then use its contents for * the reverse index instead of trying to load a separate `.rev` diff --git a/t/lib-chunk.sh b/t/lib-chunk.sh new file mode 100644 index 0000000000..a7cd9c3c6d --- /dev/null +++ b/t/lib-chunk.sh @@ -0,0 +1,17 @@ +# Shell library for working with "chunk" files (commit-graph, midx, etc). + +# corrupt_chunk_file <fn> <chunk> <offset> <bytes> +# +# Corrupt a chunk-based file (like a commit-graph) by overwriting the bytes +# found in the chunk specified by the 4-byte <chunk> identifier. If <offset> is +# "clear", replace the chunk entirely. Otherwise, overwrite data <offset> bytes +# into the chunk. +# +# The <bytes> are interpreted as pairs of hex digits (so "000000FE" would be +# big-endian 254). +corrupt_chunk_file () { + fn=$1; shift + perl "$TEST_DIRECTORY"/lib-chunk/corrupt-chunk-file.pl \ + "$@" <"$fn" >"$fn.tmp" && + mv "$fn.tmp" "$fn" +} diff --git a/t/lib-chunk/corrupt-chunk-file.pl b/t/lib-chunk/corrupt-chunk-file.pl new file mode 100644 index 0000000000..cd6d386fef --- /dev/null +++ b/t/lib-chunk/corrupt-chunk-file.pl @@ -0,0 +1,66 @@ +#!/usr/bin/perl + +my ($chunk, $seek, $bytes) = @ARGV; +$bytes =~ s/../chr(hex($&))/ge; + +binmode STDIN; +binmode STDOUT; + +# A few helpers to read bytes, or read and copy them to the +# output. +sub get { + my $n = shift; + return unless $n; + read(STDIN, my $buf, $n) + or die "read error or eof: $!\n"; + return $buf; +} +sub copy { + my $buf = get(@_); + print $buf; + return $buf; +} + +# read until we find table-of-contents entry for chunk; +# note that we cheat a bit by assuming 4-byte alignment and +# that no ToC entry will accidentally look like a header. +# +# If we don't find the entry, copy() will hit EOF and exit +# (which should cause the caller to fail the test). +while (copy(4) ne $chunk) { } +my $offset = unpack("Q>", copy(8)); + +# In clear mode, our length will change. So figure out +# the length by comparing to the offset of the next chunk, and +# then adjust that offset (and all subsequent) ones. +my $len; +if ($seek eq "clear") { + my $id; + do { + $id = copy(4); + my $next = unpack("Q>", get(8)); + if (!defined $len) { + $len = $next - $offset; + } + print pack("Q>", $next - $len + length($bytes)); + } while (unpack("N", $id)); +} + +# and now copy up to our existing chunk data +copy($offset - tell(STDIN)); +if ($seek eq "clear") { + # if clearing, skip past existing data + get($len); +} else { + # otherwise, copy up to the requested offset, + # and skip past the overwritten bytes + copy($seek); + get(length($bytes)); +} + +# now write out the requested bytes, along +# with any other remaining data +print $bytes; +while (read(STDIN, my $buf, 4096)) { + print $buf; +} diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index fa9d32facf..2ba0324a69 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -5,6 +5,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_COMMIT_GRAPH=0 GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 @@ -404,4 +405,53 @@ test_expect_success 'Bloom generation backfills empty commits' ' ) ' +corrupt_graph () { + graph=.git/objects/info/commit-graph && + test_when_finished "rm -rf $graph" && + git commit-graph write --reachable --changed-paths && + corrupt_chunk_file $graph "$@" +} + +check_corrupt_graph () { + corrupt_graph "$@" && + git -c core.commitGraph=false log -- A/B/file2 >expect.out && + git -c core.commitGraph=true log -- A/B/file2 >out 2>err && + test_cmp expect.out out +} + +test_expect_success 'Bloom reader notices too-small data chunk' ' + check_corrupt_graph BDAT clear 00000000 && + echo "warning: ignoring too-small changed-path chunk" \ + "(4 < 12) in commit-graph file" >expect.err && + test_cmp expect.err err +' + +test_expect_success 'Bloom reader notices out-of-bounds filter offsets' ' + check_corrupt_graph BIDX 12 FFFFFFFF && + # use grep to avoid depending on exact chunk size + grep "warning: ignoring out-of-range offset (4294967295) for changed-path filter at pos 3 of .git/objects/info/commit-graph" err +' + +test_expect_success 'Bloom reader notices too-small index chunk' ' + # replace the index with a single entry, making most + # lookups out-of-bounds + check_corrupt_graph BIDX clear 00000000 && + echo "warning: commit-graph changed-path index chunk" \ + "is too small" >expect.err && + test_cmp expect.err err +' + +test_expect_success 'Bloom reader notices out-of-order index offsets' ' + # we do not know any real offsets, but we can pick + # something plausible; we should not get to the point of + # actually reading from the bogus offsets anyway. + corrupt_graph BIDX 4 0000000c00000005 && + echo "warning: ignoring decreasing changed-path index offsets" \ + "(12 > 5) for positions 1 and 2 of .git/objects/info/commit-graph" >expect.err && + git -c core.commitGraph=false log -- A/B/file2 >expect.out && + git -c core.commitGraph=true log -- A/B/file2 >out 2>err && + test_cmp expect.out out && + test_cmp expect.err err +' + test_done diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index ba65f17dd9..6505ff595a 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -2,6 +2,7 @@ test_description='commit graph' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 @@ -559,7 +560,7 @@ test_expect_success 'detect incorrect fanout' ' test_expect_success 'detect incorrect fanout final value' ' corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \ - "fanout value" + "oid table and fanout disagree on size" ' test_expect_success 'detect incorrect OID order' ' @@ -821,4 +822,77 @@ test_expect_success 'overflow during generation version upgrade' ' ) ' +corrupt_chunk () { + graph=full/.git/objects/info/commit-graph && + test_when_finished "rm -rf $graph" && + git -C full commit-graph write --reachable && + corrupt_chunk_file $graph "$@" +} + +check_corrupt_chunk () { + corrupt_chunk "$@" && + git -C full -c core.commitGraph=false log >expect.out && + git -C full -c core.commitGraph=true log >out 2>err && + test_cmp expect.out out +} + +test_expect_success 'reader notices too-small oid fanout chunk' ' + # make it big enough that the graph file is plausible, + # otherwise we hit an earlier check + check_corrupt_chunk OIDF clear $(printf "000000%02x" $(test_seq 250)) && + cat >expect.err <<-\EOF && + error: commit-graph oid fanout chunk is wrong size + error: commit-graph is missing the OID Fanout chunk + EOF + test_cmp expect.err err +' + +test_expect_success 'reader notices fanout/lookup table mismatch' ' + check_corrupt_chunk OIDF 1020 "FFFFFFFF" && + cat >expect.err <<-\EOF && + error: commit-graph oid table and fanout disagree on size + EOF + test_cmp expect.err err +' + +test_expect_success 'reader notices out-of-bounds fanout' ' + # Rather than try to corrupt a specific hash, we will just + # wreck them all. But we cannot just set them all to 0xFFFFFFFF or + # similar, as they are used for hi/lo starts in a binary search (so if + # they are identical, that indicates that the search should abort + # immediately). Instead, we will give them high values that differ by + # 2^24, ensuring that any that are used would cause an out-of-bounds + # read. + check_corrupt_chunk OIDF 0 $(printf "%02x000000" $(test_seq 0 254)) && + cat >expect.err <<-\EOF && + error: commit-graph fanout values out of order + EOF + test_cmp expect.err err +' + +test_expect_success 'reader notices too-small commit data chunk' ' + check_corrupt_chunk CDAT clear 00000000 && + cat >expect.err <<-\EOF && + error: commit-graph commit data chunk is wrong size + error: commit-graph is missing the Commit Data chunk + EOF + test_cmp expect.err err +' + +test_expect_success 'reader notices out-of-bounds extra edge' ' + check_corrupt_chunk EDGE clear && + cat >expect.err <<-\EOF && + error: commit-graph extra-edges pointer out of bounds + EOF + test_cmp expect.err err +' + +test_expect_success 'reader notices too-small generations chunk' ' + check_corrupt_chunk GDA2 clear 00000000 && + cat >expect.err <<-\EOF && + error: commit-graph generations chunk is wrong size + EOF + test_cmp expect.err err +' + test_done diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 1bcc02004d..d3c9e97feb 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -2,6 +2,7 @@ test_description='multi-pack-indexes' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_MULTI_PACK_INDEX=0 objdir=.git/objects @@ -438,7 +439,7 @@ test_expect_success 'verify extended chunk count' ' test_expect_success 'verify missing required chunk' ' corrupt_midx_and_verify $MIDX_BYTE_CHUNK_ID "\01" $objdir \ - "missing required" + "required pack-name chunk missing" ' test_expect_success 'verify invalid chunk offset' ' @@ -1055,4 +1056,105 @@ test_expect_success 'repack with delta islands' ' ) ' +corrupt_chunk () { + midx=.git/objects/pack/multi-pack-index && + test_when_finished "rm -rf $midx" && + git repack -ad --write-midx && + corrupt_chunk_file $midx "$@" +} + +test_expect_success 'reader notices too-small oid fanout chunk' ' + corrupt_chunk OIDF clear 00000000 && + test_must_fail git log 2>err && + cat >expect <<-\EOF && + error: multi-pack-index OID fanout is of the wrong size + fatal: multi-pack-index required OID fanout chunk missing or corrupted + EOF + test_cmp expect err +' + +test_expect_success 'reader notices too-small oid lookup chunk' ' + corrupt_chunk OIDL clear 00000000 && + test_must_fail git log 2>err && + cat >expect <<-\EOF && + error: multi-pack-index OID lookup chunk is the wrong size + fatal: multi-pack-index required OID lookup chunk missing or corrupted + EOF + test_cmp expect err +' + +test_expect_success 'reader notices too-small pack names chunk' ' + # There is no NUL to terminate the name here, so the + # chunk is too short. + corrupt_chunk PNAM clear 70656666 && + test_must_fail git log 2>err && + cat >expect <<-\EOF && + fatal: multi-pack-index pack-name chunk is too short + EOF + test_cmp expect err +' + +test_expect_success 'reader handles unaligned chunks' ' + # A 9-byte PNAM means all of the subsequent chunks + # will no longer be 4-byte aligned, but it is still + # a valid one-pack chunk on its own (it is "foo.pack\0"). + corrupt_chunk PNAM clear 666f6f2e7061636b00 && + git -c core.multipackindex=false log >expect.out && + git -c core.multipackindex=true log >out 2>err && + test_cmp expect.out out && + cat >expect.err <<-\EOF && + error: chunk id 4f494446 not 4-byte aligned + EOF + test_cmp expect.err err +' + +test_expect_success 'reader notices too-small object offset chunk' ' + corrupt_chunk OOFF clear 00000000 && + test_must_fail git log 2>err && + cat >expect <<-\EOF && + error: multi-pack-index object offset chunk is the wrong size + fatal: multi-pack-index required object offsets chunk missing or corrupted + EOF + test_cmp expect err +' + +test_expect_success 'reader bounds-checks large offset table' ' + # re-use the objects64 dir here to cheaply get access to a midx + # with large offsets. + git init repo && + test_when_finished "rm -rf repo" && + ( + cd repo && + (cd ../objects64 && pwd) >.git/objects/info/alternates && + git multi-pack-index --object-dir=../objects64 write && + midx=../objects64/pack/multi-pack-index && + corrupt_chunk_file $midx LOFF clear && + # using only %(objectsize) is important here; see the commit + # message for more details + test_must_fail git cat-file --batch-all-objects \ + --batch-check="%(objectsize)" 2>err && + cat >expect <<-\EOF && + fatal: multi-pack-index large offset out of bounds + EOF + test_cmp expect err + ) +' + +test_expect_success 'reader notices too-small revindex chunk' ' + # We only get a revindex with bitmaps (and likewise only + # load it when they are asked for). + test_config repack.writeBitmaps true && + corrupt_chunk RIDX clear 00000000 && + git -c core.multipackIndex=false rev-list \ + --all --use-bitmap-index >expect.out && + git -c core.multipackIndex=true rev-list \ + --all --use-bitmap-index >out 2>err && + test_cmp expect.out out && + cat >expect.err <<-\EOF && + error: multi-pack-index reverse-index chunk is the wrong size + warning: multi-pack bitmap is missing required reverse index + EOF + test_cmp expect.err err +' + test_done diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 8a9720dcb0..97eb6d2e72 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -4,6 +4,7 @@ test_description='split commit graph' TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_COMMIT_GRAPH=0 GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 @@ -319,7 +320,7 @@ test_expect_success 'verify --shallow does not check base contents' ' cd verify-shallow && git commit-graph verify && base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph && - corrupt_file "$base_file" 1000 "\01" && + corrupt_file "$base_file" 1500 "\01" && git commit-graph verify --shallow && test_must_fail git commit-graph verify 2>test_err && grep -v "^+" test_err >err && @@ -393,10 +394,23 @@ test_expect_success 'verify across alternates' ' test_commit extra && git commit-graph write --reachable --split && tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && - corrupt_file "$tip_file" 100 "\01" && + corrupt_file "$tip_file" 1500 "\01" && test_must_fail git commit-graph verify --shallow 2>test_err && grep -v "^+" test_err >err && - test_i18ngrep "commit-graph has incorrect fanout value" err + test_i18ngrep "incorrect checksum" err + ) +' + +test_expect_success 'reader bounds-checks base-graph chunk' ' + git clone --no-hardlinks . corrupt-base-chunk && + ( + cd corrupt-base-chunk && + tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && + corrupt_chunk_file "$tip_file" BASE clear 01020304 && + git -c core.commitGraph=false log >expect.out && + git -c core.commitGraph=true log >out 2>err && + test_cmp expect.out out && + grep "commit-graph base graphs chunk is too small" err ) ' diff --git a/t/t5328-commit-graph-64bit-time.sh b/t/t5328-commit-graph-64bit-time.sh index ca476e80a0..fc6a242b56 100755 --- a/t/t5328-commit-graph-64bit-time.sh +++ b/t/t5328-commit-graph-64bit-time.sh @@ -12,6 +12,7 @@ then fi . "$TEST_DIRECTORY"/lib-commit-graph.sh +. "$TEST_DIRECTORY/lib-chunk.sh" UNIX_EPOCH_ZERO="@0 +0000" FUTURE_DATE="@4147483646 +0000" @@ -74,4 +75,13 @@ test_expect_success 'single commit with generation data exceeding UINT32_MAX' ' git -C repo-uint32-max commit-graph verify ' +test_expect_success 'reader notices out-of-bounds generation overflow' ' + graph=.git/objects/info/commit-graph && + test_when_finished "rm -rf $graph" && + git commit-graph write --reachable && + corrupt_chunk_file $graph GDO2 clear && + test_must_fail git log 2>err && + grep "commit-graph overflow generation data is too small" err +' + test_done |