From eb32d236df0c16b936b04f0c5402addb61cdb311 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:06:49 -0400 Subject: introduce delta objects with offset to base This adds a new object, namely OBJ_OFS_DELTA, renames OBJ_DELTA to OBJ_REF_DELTA to better make the distinction between those two delta objects, and adds support for the handling of those new delta objects in sha1_file.c only. The OBJ_OFS_DELTA contains a relative offset from the delta object's position in a pack instead of the 20-byte SHA1 reference to identify the base object. Since the base is likely to be not so far away, the relative offset is more likely to have a smaller encoding on average than an absolute offset. And for those delta objects the base must always be stored first because there is no way to know the distance of later objects when streaming a pack. Hence this relative offset is always meant to be negative. The offset encoding is slightly denser than the one used for object size -- credits to (whoever this is) for bringing it to my attention. This allows for pack size reduction between 3.2% (Linux-2.6) to over 5% (linux-historic). Runtime pack access should be faster too since delta replay does skip a search in the pack index for each delta in a chain. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 16 ++++---- builtin-unpack-objects.c | 2 +- cache.h | 5 ++- index-pack.c | 8 ++-- sha1_file.c | 96 +++++++++++++++++++++++++++++++++--------------- 5 files changed, 82 insertions(+), 45 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 96c069a81d..c62734a2a6 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -232,7 +232,7 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha int n = 1; unsigned char c; - if (type < OBJ_COMMIT || type > OBJ_DELTA) + if (type < OBJ_COMMIT || type > OBJ_REF_DELTA) die("bad type %d", type); c = (type << 4) | (size & 15); @@ -297,7 +297,7 @@ static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data used = unpack_object_header_gently(data, len, &type, &size); if (!used) return -1; - if (type == OBJ_DELTA) + if (type == OBJ_REF_DELTA) used += 20; /* skip base object name */ data += used; len -= used; @@ -340,7 +340,7 @@ static unsigned long write_object(struct sha1file *f, obj_type = entry->type; if (! entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ - else if (obj_type == OBJ_DELTA) + else if (obj_type == OBJ_REF_DELTA) to_reuse = 1; /* check_object() decided it for us */ else if (obj_type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ @@ -380,7 +380,7 @@ static unsigned long write_object(struct sha1file *f, if (entry->delta) { buf = delta_against(buf, size, entry); size = entry->delta_size; - obj_type = OBJ_DELTA; + obj_type = OBJ_REF_DELTA; } /* * The object header is a byte of 'type' followed by zero or @@ -409,11 +409,11 @@ static unsigned long write_object(struct sha1file *f, sha1write(f, buf, datalen); unuse_packed_git(p); hdrlen = 0; /* not really */ - if (obj_type == OBJ_DELTA) + if (obj_type == OBJ_REF_DELTA) reused_delta++; reused++; } - if (obj_type == OBJ_DELTA) + if (obj_type == OBJ_REF_DELTA) written_delta++; written++; return hdrlen + datalen; @@ -916,7 +916,7 @@ static void check_object(struct object_entry *entry) * delta. */ if (!no_reuse_delta && - entry->in_pack_type == OBJ_DELTA && + entry->in_pack_type == OBJ_REF_DELTA && (base_entry = locate_object_entry(base)) && (!base_entry->preferred_base)) { @@ -929,7 +929,7 @@ static void check_object(struct object_entry *entry) /* uncompressed size of the delta data */ entry->size = entry->delta_size = size; entry->delta = base_entry; - entry->type = OBJ_DELTA; + entry->type = OBJ_REF_DELTA; entry->delta_sibling = base_entry->delta_child; base_entry->delta_child = entry; diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index 4f96bcae32..c6c6368956 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -241,7 +241,7 @@ static void unpack_one(unsigned nr, unsigned total) case OBJ_TAG: unpack_non_delta_entry(type, size); return; - case OBJ_DELTA: + case OBJ_REF_DELTA: unpack_delta_entry(size); return; default: diff --git a/cache.h b/cache.h index 97debd03c5..3c5415e77d 100644 --- a/cache.h +++ b/cache.h @@ -274,8 +274,9 @@ enum object_type { OBJ_TREE = 2, OBJ_BLOB = 3, OBJ_TAG = 4, - /* 5/6 for future expansion */ - OBJ_DELTA = 7, + /* 5 for future expansion */ + OBJ_OFS_DELTA = 6, + OBJ_REF_DELTA = 7, OBJ_BAD, }; diff --git a/index-pack.c b/index-pack.c index 80bc6cb45b..aef7f0a32e 100644 --- a/index-pack.c +++ b/index-pack.c @@ -158,7 +158,7 @@ static void *unpack_raw_entry(unsigned long offset, } switch (type) { - case OBJ_DELTA: + case OBJ_REF_DELTA: if (pos + 20 >= pack_limit) bad_object(offset, "object extends past end of pack"); hashcpy(delta_base, pack_base + pos); @@ -301,7 +301,7 @@ static void parse_pack_objects(void) data = unpack_raw_entry(offset, &obj->type, &data_size, base_sha1, &offset); obj->real_type = obj->type; - if (obj->type == OBJ_DELTA) { + if (obj->type == OBJ_REF_DELTA) { struct delta_entry *delta = &deltas[nr_deltas++]; delta->obj = obj; hashcpy(delta->base_sha1, base_sha1); @@ -328,7 +328,7 @@ static void parse_pack_objects(void) struct object_entry *obj = &objects[i]; int j, first, last; - if (obj->type == OBJ_DELTA) + if (obj->type == OBJ_REF_DELTA) continue; if (find_deltas_based_on_sha1(obj->sha1, &first, &last)) continue; @@ -341,7 +341,7 @@ static void parse_pack_objects(void) /* Check for unresolved deltas */ for (i = 0; i < nr_deltas; i++) { - if (deltas[i].obj->real_type == OBJ_DELTA) + if (deltas[i].obj->real_type == OBJ_REF_DELTA) die("packfile '%s' has unresolved deltas", pack_name); } } diff --git a/sha1_file.c b/sha1_file.c index 27b1ebb720..fdb4588280 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -883,26 +883,61 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l return unpack_sha1_rest(&stream, hdr, *size); } +static unsigned long get_delta_base(struct packed_git *p, + unsigned long offset, + enum object_type kind, + unsigned long delta_obj_offset, + unsigned long *base_obj_offset) +{ + unsigned char *base_info = (unsigned char *) p->pack_base + offset; + unsigned long base_offset; + + /* there must be at least 20 bytes left regardless of delta type */ + if (p->pack_size <= offset + 20) + die("truncated pack file"); + + if (kind == OBJ_OFS_DELTA) { + unsigned used = 0; + unsigned char c = base_info[used++]; + base_offset = c & 127; + while (c & 128) { + base_offset += 1; + if (!base_offset || base_offset & ~(~0UL >> 7)) + die("offset value overflow for delta base object"); + c = base_info[used++]; + base_offset = (base_offset << 7) + (c & 127); + } + base_offset = delta_obj_offset - base_offset; + if (base_offset >= delta_obj_offset) + die("delta base offset out of bound"); + offset += used; + } else if (kind == OBJ_REF_DELTA) { + /* The base entry _must_ be in the same pack */ + base_offset = find_pack_entry_one(base_info, p); + if (!base_offset) + die("failed to find delta-pack base object %s", + sha1_to_hex(base_info)); + offset += 20; + } else + die("I am totally screwed"); + *base_obj_offset = base_offset; + return offset; +} + /* forward declaration for a mutually recursive function */ static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep); static int packed_delta_info(struct packed_git *p, unsigned long offset, + enum object_type kind, + unsigned long obj_offset, char *type, unsigned long *sizep) { unsigned long base_offset; - unsigned char *base_sha1 = (unsigned char *) p->pack_base + offset; - if (p->pack_size < offset + 20) - die("truncated pack file"); - /* The base entry _must_ be in the same pack */ - base_offset = find_pack_entry_one(base_sha1, p); - if (!base_offset) - die("failed to find delta-pack base object %s", - sha1_to_hex(base_sha1)); - offset += 20; + offset = get_delta_base(p, offset, kind, obj_offset, &base_offset); /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta @@ -975,7 +1010,7 @@ int check_reuse_pack_delta(struct packed_git *p, unsigned long offset, use_packed_git(p); ptr = offset; ptr = unpack_object_header(p, ptr, kindp, sizep); - if (*kindp != OBJ_DELTA) + if (*kindp != OBJ_REF_DELTA) goto done; hashcpy(base, (unsigned char *) p->pack_base + ptr); status = 0; @@ -992,11 +1027,12 @@ void packed_object_info_detail(struct packed_git *p, unsigned int *delta_chain_length, unsigned char *base_sha1) { - unsigned long val; + unsigned long obj_offset, val; unsigned char *next_sha1; enum object_type kind; *delta_chain_length = 0; + obj_offset = offset; offset = unpack_object_header(p, offset, &kind, size); for (;;) { @@ -1011,7 +1047,13 @@ void packed_object_info_detail(struct packed_git *p, strcpy(type, type_names[kind]); *store_size = 0; /* notyet */ return; - case OBJ_DELTA: + case OBJ_OFS_DELTA: + get_delta_base(p, offset, kind, obj_offset, &offset); + if (*delta_chain_length == 0) { + /* TODO: find base_sha1 as pointed by offset */ + } + break; + case OBJ_REF_DELTA: if (p->pack_size <= offset + 20) die("pack file %s records an incomplete delta base", p->pack_name); @@ -1021,6 +1063,7 @@ void packed_object_info_detail(struct packed_git *p, offset = find_pack_entry_one(next_sha1, p); break; } + obj_offset = offset; offset = unpack_object_header(p, offset, &kind, &val); (*delta_chain_length)++; } @@ -1029,15 +1072,15 @@ void packed_object_info_detail(struct packed_git *p, static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { - unsigned long size; + unsigned long size, obj_offset = offset; enum object_type kind; offset = unpack_object_header(p, offset, &kind, &size); - if (kind == OBJ_DELTA) - return packed_delta_info(p, offset, type, sizep); - switch (kind) { + case OBJ_OFS_DELTA: + case OBJ_REF_DELTA: + return packed_delta_info(p, offset, kind, obj_offset, type, sizep); case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: @@ -1083,23 +1126,15 @@ static void *unpack_compressed_entry(struct packed_git *p, static void *unpack_delta_entry(struct packed_git *p, unsigned long offset, unsigned long delta_size, + enum object_type kind, + unsigned long obj_offset, char *type, unsigned long *sizep) { void *delta_data, *result, *base; unsigned long result_size, base_size, base_offset; - unsigned char *base_sha1; - - if (p->pack_size < offset + 20) - die("truncated pack file"); - /* The base entry _must_ be in the same pack */ - base_sha1 = (unsigned char*)p->pack_base + offset; - base_offset = find_pack_entry_one(base_sha1, p); - if (!base_offset) - die("failed to find delta-pack base object %s", - sha1_to_hex(base_sha1)); - offset += 20; + offset = get_delta_base(p, offset, kind, obj_offset, &base_offset); base = unpack_entry_gently(p, base_offset, type, &base_size); if (!base) die("failed to read delta base object at %lu from %s", @@ -1136,13 +1171,14 @@ static void *unpack_entry(struct pack_entry *entry, void *unpack_entry_gently(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { - unsigned long size; + unsigned long size, obj_offset = offset; enum object_type kind; offset = unpack_object_header(p, offset, &kind, &size); switch (kind) { - case OBJ_DELTA: - return unpack_delta_entry(p, offset, size, type, sizep); + case OBJ_OFS_DELTA: + case OBJ_REF_DELTA: + return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep); case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: -- cgit v1.2.3 From 209c554ab45701fb78de77fdcec803fef2c8fd39 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:07:39 -0400 Subject: teach git-unpack-objects about deltas with offset to base For delta resolution to be possible, a list of sha1/offset tuple must be constructed in memory in order to load the appropriate base object. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-unpack-objects.c | 138 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 32 deletions(-) diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index c6c6368956..e70a71163d 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -15,7 +15,7 @@ static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-fil /* We always read in 4kB chunks. */ static unsigned char buffer[4096]; -static unsigned long offset, len; +static unsigned long offset, len, consumed_bytes; static SHA_CTX ctx; /* @@ -51,6 +51,7 @@ static void use(int bytes) die("used more bytes than were available"); len -= bytes; offset += bytes; + consumed_bytes += bytes; } static void *get_data(unsigned long size) @@ -89,35 +90,49 @@ static void *get_data(unsigned long size) struct delta_info { unsigned char base_sha1[20]; + unsigned long base_offset; unsigned long size; void *delta; + unsigned nr; struct delta_info *next; }; static struct delta_info *delta_list; -static void add_delta_to_list(unsigned char *base_sha1, void *delta, unsigned long size) +static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1, + unsigned long base_offset, + void *delta, unsigned long size) { struct delta_info *info = xmalloc(sizeof(*info)); hashcpy(info->base_sha1, base_sha1); + info->base_offset = base_offset; info->size = size; info->delta = delta; + info->nr = nr; info->next = delta_list; delta_list = info; } -static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size); +struct obj_info { + unsigned long offset; + unsigned char sha1[20]; +}; + +static struct obj_info *obj_list; -static void write_object(void *buf, unsigned long size, const char *type) +static void added_object(unsigned nr, const char *type, void *data, + unsigned long size); + +static void write_object(unsigned nr, void *buf, unsigned long size, + const char *type) { - unsigned char sha1[20]; - if (write_sha1_file(buf, size, type, sha1) < 0) + if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0) die("failed to write object"); - added_object(sha1, type, buf, size); + added_object(nr, type, buf, size); } -static void resolve_delta(const char *type, +static void resolve_delta(unsigned nr, const char *type, void *base, unsigned long base_size, void *delta, unsigned long delta_size) { @@ -130,20 +145,23 @@ static void resolve_delta(const char *type, if (!result) die("failed to apply delta"); free(delta); - write_object(result, result_size, type); + write_object(nr, result, result_size, type); free(result); } -static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size) +static void added_object(unsigned nr, const char *type, void *data, + unsigned long size) { struct delta_info **p = &delta_list; struct delta_info *info; while ((info = *p) != NULL) { - if (!hashcmp(info->base_sha1, sha1)) { + if (!hashcmp(info->base_sha1, obj_list[nr].sha1) || + info->base_offset == obj_list[nr].offset) { *p = info->next; p = &delta_list; - resolve_delta(type, data, size, info->delta, info->size); + resolve_delta(info->nr, type, data, size, + info->delta, info->size); free(info); continue; } @@ -151,7 +169,8 @@ static void added_object(unsigned char *sha1, const char *type, void *data, unsi } } -static void unpack_non_delta_entry(enum object_type kind, unsigned long size) +static void unpack_non_delta_entry(enum object_type kind, unsigned long size, + unsigned nr) { void *buf = get_data(size); const char *type; @@ -164,30 +183,80 @@ static void unpack_non_delta_entry(enum object_type kind, unsigned long size) default: die("bad type %d", kind); } if (!dry_run && buf) - write_object(buf, size, type); + write_object(nr, buf, size, type); free(buf); } -static void unpack_delta_entry(unsigned long delta_size) +static void unpack_delta_entry(enum object_type kind, unsigned long delta_size, + unsigned nr) { void *delta_data, *base; unsigned long base_size; char type[20]; unsigned char base_sha1[20]; - hashcpy(base_sha1, fill(20)); - use(20); + if (kind == OBJ_REF_DELTA) { + hashcpy(base_sha1, fill(20)); + use(20); + delta_data = get_data(delta_size); + if (dry_run || !delta_data) { + free(delta_data); + return; + } + if (!has_sha1_file(base_sha1)) { + hashcpy(obj_list[nr].sha1, null_sha1); + add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size); + return; + } + } else { + unsigned base_found = 0; + unsigned char *pack, c; + unsigned long base_offset; + unsigned lo, mid, hi; - delta_data = get_data(delta_size); - if (dry_run || !delta_data) { - free(delta_data); - return; - } + pack = fill(1); + c = *pack; + use(1); + base_offset = c & 127; + while (c & 128) { + base_offset += 1; + if (!base_offset || base_offset & ~(~0UL >> 7)) + die("offset value overflow for delta base object"); + pack = fill(1); + c = *pack; + use(1); + base_offset = (base_offset << 7) + (c & 127); + } + base_offset = obj_list[nr].offset - base_offset; - if (!has_sha1_file(base_sha1)) { - add_delta_to_list(base_sha1, delta_data, delta_size); - return; + delta_data = get_data(delta_size); + if (dry_run || !delta_data) { + free(delta_data); + return; + } + lo = 0; + hi = nr; + while (lo < hi) { + mid = (lo + hi)/2; + if (base_offset < obj_list[mid].offset) { + hi = mid; + } else if (base_offset > obj_list[mid].offset) { + lo = mid + 1; + } else { + hashcpy(base_sha1, obj_list[mid].sha1); + base_found = !is_null_sha1(base_sha1); + break; + } + } + if (!base_found) { + /* The delta base object is itself a delta that + has not been resolved yet. */ + hashcpy(obj_list[nr].sha1, null_sha1); + add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size); + return; + } } + base = read_sha1_file(base_sha1, type, &base_size); if (!base) { error("failed to read delta-pack base object %s", @@ -197,7 +266,7 @@ static void unpack_delta_entry(unsigned long delta_size) has_errors = 1; return; } - resolve_delta(type, base, base_size, delta_data, delta_size); + resolve_delta(nr, type, base, base_size, delta_data, delta_size); free(base); } @@ -208,6 +277,8 @@ static void unpack_one(unsigned nr, unsigned total) unsigned long size; enum object_type type; + obj_list[nr].offset = consumed_bytes; + pack = fill(1); c = *pack; use(1); @@ -216,7 +287,7 @@ static void unpack_one(unsigned nr, unsigned total) shift = 4; while (c & 0x80) { pack = fill(1); - c = *pack++; + c = *pack; use(1); size += (c & 0x7f) << shift; shift += 7; @@ -225,13 +296,14 @@ static void unpack_one(unsigned nr, unsigned total) static unsigned long last_sec; static unsigned last_percent; struct timeval now; - unsigned percentage = (nr * 100) / total; + unsigned percentage = ((nr+1) * 100) / total; gettimeofday(&now, NULL); if (percentage != last_percent || now.tv_sec != last_sec) { last_sec = now.tv_sec; last_percent = percentage; - fprintf(stderr, "%4u%% (%u/%u) done\r", percentage, nr, total); + fprintf(stderr, "%4u%% (%u/%u) done\r", + percentage, (nr+1), total); } } switch (type) { @@ -239,10 +311,11 @@ static void unpack_one(unsigned nr, unsigned total) case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - unpack_non_delta_entry(type, size); + unpack_non_delta_entry(type, size, nr); return; case OBJ_REF_DELTA: - unpack_delta_entry(size); + case OBJ_OFS_DELTA: + unpack_delta_entry(type, size, nr); return; default: error("bad object type %d", type); @@ -265,9 +338,10 @@ static void unpack_all(void) die("unknown pack file version %d", ntohl(hdr->hdr_version)); fprintf(stderr, "Unpacking %d objects\n", nr_objects); + obj_list = xmalloc(nr_objects * sizeof(*obj_list)); use(sizeof(struct pack_header)); for (i = 0; i < nr_objects; i++) - unpack_one(i+1, nr_objects); + unpack_one(i, nr_objects); if (delta_list) die("unresolved deltas left after unpacking"); } -- cgit v1.2.3 From 53dda6ff6263a3f350514d9edae600468c946ed4 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:08:33 -0400 Subject: teach git-index-pack about deltas with offset to base Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- index-pack.c | 111 ++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 80 insertions(+), 31 deletions(-) diff --git a/index-pack.c b/index-pack.c index aef7f0a32e..fffddd25c9 100644 --- a/index-pack.c +++ b/index-pack.c @@ -18,10 +18,15 @@ struct object_entry unsigned char sha1[20]; }; +union delta_base { + unsigned char sha1[20]; + unsigned long offset; +}; + struct delta_entry { struct object_entry *obj; - unsigned char base_sha1[20]; + union delta_base base; }; static const char *pack_name; @@ -134,13 +139,13 @@ static void *unpack_entry_data(unsigned long offset, static void *unpack_raw_entry(unsigned long offset, enum object_type *obj_type, unsigned long *obj_size, - unsigned char *delta_base, + union delta_base *delta_base, unsigned long *next_obj_offset) { unsigned long pack_limit = pack_size - 20; unsigned long pos = offset; unsigned char c; - unsigned long size; + unsigned long size, base_offset; unsigned shift; enum object_type type; void *data; @@ -161,26 +166,43 @@ static void *unpack_raw_entry(unsigned long offset, case OBJ_REF_DELTA: if (pos + 20 >= pack_limit) bad_object(offset, "object extends past end of pack"); - hashcpy(delta_base, pack_base + pos); + hashcpy(delta_base->sha1, pack_base + pos); pos += 20; - /* fallthru */ + break; + case OBJ_OFS_DELTA: + memset(delta_base, 0, sizeof(*delta_base)); + c = pack_base[pos++]; + base_offset = c & 127; + while (c & 128) { + base_offset += 1; + if (!base_offset || base_offset & ~(~0UL >> 7)) + bad_object(offset, "offset value overflow for delta base object"); + if (pos >= pack_limit) + bad_object(offset, "object extends past end of pack"); + c = pack_base[pos++]; + base_offset = (base_offset << 7) + (c & 127); + } + delta_base->offset = offset - base_offset; + if (delta_base->offset >= offset) + bad_object(offset, "delta base offset is out of bound"); + break; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - data = unpack_entry_data(offset, &pos, size); break; default: bad_object(offset, "bad object type %d", type); } + data = unpack_entry_data(offset, &pos, size); *obj_type = type; *obj_size = size; *next_obj_offset = pos; return data; } -static int find_delta(const unsigned char *base_sha1) +static int find_delta(const union delta_base *base) { int first = 0, last = nr_deltas; @@ -189,7 +211,7 @@ static int find_delta(const unsigned char *base_sha1) struct delta_entry *delta = &deltas[next]; int cmp; - cmp = hashcmp(base_sha1, delta->base_sha1); + cmp = memcmp(base, &delta->base, sizeof(*base)); if (!cmp) return next; if (cmp < 0) { @@ -201,18 +223,18 @@ static int find_delta(const unsigned char *base_sha1) return -first-1; } -static int find_deltas_based_on_sha1(const unsigned char *base_sha1, - int *first_index, int *last_index) +static int find_delta_childs(const union delta_base *base, + int *first_index, int *last_index) { - int first = find_delta(base_sha1); + int first = find_delta(base); int last = first; int end = nr_deltas - 1; if (first < 0) return -1; - while (first > 0 && !hashcmp(deltas[first - 1].base_sha1, base_sha1)) + while (first > 0 && !memcmp(&deltas[first - 1].base, base, sizeof(*base))) --first; - while (last < end && !hashcmp(deltas[last + 1].base_sha1, base_sha1)) + while (last < end && !memcmp(&deltas[last + 1].base, base, sizeof(*base))) ++last; *first_index = first; *last_index = last; @@ -253,13 +275,13 @@ static void resolve_delta(struct delta_entry *delta, void *base_data, void *result; unsigned long result_size; enum object_type delta_type; - unsigned char base_sha1[20]; + union delta_base delta_base; unsigned long next_obj_offset; int j, first, last; obj->real_type = type; delta_data = unpack_raw_entry(obj->offset, &delta_type, - &delta_size, base_sha1, + &delta_size, &delta_base, &next_obj_offset); result = patch_delta(base_data, base_size, delta_data, delta_size, &result_size); @@ -267,10 +289,22 @@ static void resolve_delta(struct delta_entry *delta, void *base_data, if (!result) bad_object(obj->offset, "failed to apply delta"); sha1_object(result, result_size, type, obj->sha1); - if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) { + + hashcpy(delta_base.sha1, obj->sha1); + if (!find_delta_childs(&delta_base, &first, &last)) { for (j = first; j <= last; j++) - resolve_delta(&deltas[j], result, result_size, type); + if (deltas[j].obj->type == OBJ_REF_DELTA) + resolve_delta(&deltas[j], result, result_size, type); } + + memset(&delta_base, 0, sizeof(delta_base)); + delta_base.offset = obj->offset; + if (!find_delta_childs(&delta_base, &first, &last)) { + for (j = first; j <= last; j++) + if (deltas[j].obj->type == OBJ_OFS_DELTA) + resolve_delta(&deltas[j], result, result_size, type); + } + free(result); } @@ -278,14 +312,14 @@ static int compare_delta_entry(const void *a, const void *b) { const struct delta_entry *delta_a = a; const struct delta_entry *delta_b = b; - return hashcmp(delta_a->base_sha1, delta_b->base_sha1); + return memcmp(&delta_a->base, &delta_b->base, sizeof(union delta_base)); } static void parse_pack_objects(void) { int i; unsigned long offset = sizeof(struct pack_header); - unsigned char base_sha1[20]; + struct delta_entry *delta = deltas; void *data; unsigned long data_size; @@ -299,12 +333,12 @@ static void parse_pack_objects(void) struct object_entry *obj = &objects[i]; obj->offset = offset; data = unpack_raw_entry(offset, &obj->type, &data_size, - base_sha1, &offset); + &delta->base, &offset); obj->real_type = obj->type; - if (obj->type == OBJ_REF_DELTA) { - struct delta_entry *delta = &deltas[nr_deltas++]; + if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { + nr_deltas++; delta->obj = obj; - hashcpy(delta->base_sha1, base_sha1); + delta++; } else sha1_object(data, data_size, obj->type, obj->sha1); free(data); @@ -312,7 +346,7 @@ static void parse_pack_objects(void) if (offset != pack_size - 20) die("packfile '%s' has junk at the end", pack_name); - /* Sort deltas by base SHA1 for fast searching */ + /* Sort deltas by base SHA1/offset for fast searching */ qsort(deltas, nr_deltas, sizeof(struct delta_entry), compare_delta_entry); @@ -326,22 +360,37 @@ static void parse_pack_objects(void) */ for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - int j, first, last; + union delta_base base; + int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last; - if (obj->type == OBJ_REF_DELTA) + if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) continue; - if (find_deltas_based_on_sha1(obj->sha1, &first, &last)) + hashcpy(base.sha1, obj->sha1); + ref = !find_delta_childs(&base, &ref_first, &ref_last); + memset(&base, 0, sizeof(base)); + base.offset = obj->offset; + ofs = !find_delta_childs(&base, &ofs_first, &ofs_last); + if (!ref && !ofs) continue; data = unpack_raw_entry(obj->offset, &obj->type, &data_size, - base_sha1, &offset); - for (j = first; j <= last; j++) - resolve_delta(&deltas[j], data, data_size, obj->type); + &base, &offset); + if (ref) + for (j = ref_first; j <= ref_last; j++) + if (deltas[j].obj->type == OBJ_REF_DELTA) + resolve_delta(&deltas[j], data, + data_size, obj->type); + if (ofs) + for (j = ofs_first; j <= ofs_last; j++) + if (deltas[j].obj->type == OBJ_OFS_DELTA) + resolve_delta(&deltas[j], data, + data_size, obj->type); free(data); } /* Check for unresolved deltas */ for (i = 0; i < nr_deltas; i++) { - if (deltas[i].obj->real_type == OBJ_REF_DELTA) + if (deltas[i].obj->real_type == OBJ_REF_DELTA || + deltas[i].obj->real_type == OBJ_OFS_DELTA) die("packfile '%s' has unresolved deltas", pack_name); } } -- cgit v1.2.3 From be6b19145f64f62790049c06320c35011f7312a7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:09:44 -0400 Subject: make git-pack-objects able to create deltas with offset to base This is enabled with --delta-base-offset only, and doesn't work with pack data reuse yet. The idea is to allow for the fetch protocol to use an extension flag to notify the remote end that --delta-base-offset can be used with git-pack-objects. Eventually git-repack will always provide this flag. With this, all delta base objects are now pushed before deltas that depend on them. This is a requirements for OBJ_OFS_DELTA. This is not a requirement for OBJ_REF_DELTA but always doing so makes the code simpler. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index c62734a2a6..221264916d 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -60,6 +60,8 @@ static int non_empty; static int no_reuse_delta; static int local; static int incremental; +static int allow_ofs_delta; + static struct object_entry **sorted_by_sha, **sorted_by_type; static struct object_entry *objects; static int nr_objects, nr_alloc, nr_result; @@ -334,9 +336,6 @@ static unsigned long write_object(struct sha1file *f, enum object_type obj_type; int to_reuse = 0; - if (entry->preferred_base) - return 0; - obj_type = entry->type; if (! entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ @@ -380,18 +379,35 @@ static unsigned long write_object(struct sha1file *f, if (entry->delta) { buf = delta_against(buf, size, entry); size = entry->delta_size; - obj_type = OBJ_REF_DELTA; + obj_type = (allow_ofs_delta && entry->delta->offset) ? + OBJ_OFS_DELTA : OBJ_REF_DELTA; } /* * The object header is a byte of 'type' followed by zero or - * more bytes of length. For deltas, the 20 bytes of delta - * sha1 follows that. + * more bytes of length. */ hdrlen = encode_header(obj_type, size, header); sha1write(f, header, hdrlen); - if (entry->delta) { - sha1write(f, entry->delta, 20); + if (obj_type == OBJ_OFS_DELTA) { + /* + * Deltas with relative base contain an additional + * encoding of the relative offset for the delta + * base from this object's position in the pack. + */ + unsigned long ofs = entry->offset - entry->delta->offset; + unsigned pos = sizeof(header) - 1; + header[pos] = ofs & 127; + while (ofs >>= 7) + header[--pos] = 128 | (--ofs & 127); + sha1write(f, header + pos, sizeof(header) - pos); + hdrlen += sizeof(header) - pos; + } else if (obj_type == OBJ_REF_DELTA) { + /* + * Deltas with a base reference contain + * an additional 20 bytes for the base sha1. + */ + sha1write(f, entry->delta->sha1, 20); hdrlen += 20; } datalen = sha1write_compressed(f, buf, size); @@ -413,7 +429,7 @@ static unsigned long write_object(struct sha1file *f, reused_delta++; reused++; } - if (obj_type == OBJ_REF_DELTA) + if (entry->delta) written_delta++; written++; return hdrlen + datalen; @@ -423,17 +439,16 @@ static unsigned long write_one(struct sha1file *f, struct object_entry *e, unsigned long offset) { - if (e->offset) + if (e->offset || e->preferred_base) /* offset starts from header size and cannot be zero * if it is written already. */ return offset; - e->offset = offset; - offset += write_object(f, e); - /* if we are deltified, write out its base object. */ + /* if we are deltified, write out its base object first. */ if (e->delta) offset = write_one(f, e->delta, offset); - return offset; + e->offset = offset; + return offset + write_object(f, e); } static void write_pack_file(void) @@ -1484,6 +1499,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) no_reuse_delta = 1; continue; } + if (!strcmp("--delta-base-offset", arg)) { + allow_ofs_delta = no_reuse_delta = 1; + continue; + } if (!strcmp("--stdout", arg)) { pack_to_stdout = 1; continue; -- cgit v1.2.3 From 780e6e735be189097dad4b223d8edeb18cce1928 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 22 Sep 2006 21:25:04 -0400 Subject: make pack data reuse compatible with both delta types This is the missing part to git-pack-objects allowing it to reuse delta data to/from any of the two delta types. It can reuse delta from any type, and it outputs base offsets when --allow-delta-base-offset is provided and the base is also included in the pack. Otherwise it outputs base sha1 references just like it always did. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 204 +++++++++++++++++++++++++++++++------------------ pack.h | 3 - sha1_file.c | 19 ----- 3 files changed, 130 insertions(+), 96 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 221264916d..6db97b685f 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -29,6 +29,7 @@ struct object_entry { enum object_type type; enum object_type in_pack_type; /* could be delta */ unsigned long delta_size; /* delta data size (uncompressed) */ +#define in_pack_header_size delta_size /* only when reusing pack data */ struct object_entry *delta; /* delta base object */ struct packed_git *in_pack; /* already in pack */ unsigned int in_pack_offset; @@ -86,17 +87,25 @@ static int object_ix_hashsz; * Pack index for existing packs give us easy access to the offsets into * corresponding pack file where each object's data starts, but the entries * do not store the size of the compressed representation (uncompressed - * size is easily available by examining the pack entry header). We build - * a hashtable of existing packs (pack_revindex), and keep reverse index - * here -- pack index file is sorted by object name mapping to offset; this - * pack_revindex[].revindex array is an ordered list of offsets, so if you - * know the offset of an object, next offset is where its packed - * representation ends. + * size is easily available by examining the pack entry header). It is + * also rather expensive to find the sha1 for an object given its offset. + * + * We build a hashtable of existing packs (pack_revindex), and keep reverse + * index here -- pack index file is sorted by object name mapping to offset; + * this pack_revindex[].revindex array is a list of offset/index_nr pairs + * ordered by offset, so if you know the offset of an object, next offset + * is where its packed representation ends and the index_nr can be used to + * get the object sha1 from the main index. */ +struct revindex_entry { + unsigned int offset; + unsigned int nr; +}; struct pack_revindex { struct packed_git *p; - unsigned long *revindex; -} *pack_revindex = NULL; + struct revindex_entry *revindex; +}; +static struct pack_revindex *pack_revindex; static int pack_revindex_hashsz; /* @@ -143,14 +152,9 @@ static void prepare_pack_ix(void) static int cmp_offset(const void *a_, const void *b_) { - unsigned long a = *(unsigned long *) a_; - unsigned long b = *(unsigned long *) b_; - if (a < b) - return -1; - else if (a == b) - return 0; - else - return 1; + const struct revindex_entry *a = a_; + const struct revindex_entry *b = b_; + return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0; } /* @@ -163,25 +167,27 @@ static void prepare_pack_revindex(struct pack_revindex *rix) int i; void *index = p->index_base + 256; - rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1)); + rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1)); for (i = 0; i < num_ent; i++) { unsigned int hl = *((unsigned int *)((char *) index + 24*i)); - rix->revindex[i] = ntohl(hl); + rix->revindex[i].offset = ntohl(hl); + rix->revindex[i].nr = i; } /* This knows the pack format -- the 20-byte trailer * follows immediately after the last object data. */ - rix->revindex[num_ent] = p->pack_size - 20; - qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset); + rix->revindex[num_ent].offset = p->pack_size - 20; + rix->revindex[num_ent].nr = -1; + qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset); } -static unsigned long find_packed_object_size(struct packed_git *p, - unsigned long ofs) +static struct revindex_entry * find_packed_object(struct packed_git *p, + unsigned int ofs) { int num; int lo, hi; struct pack_revindex *rix; - unsigned long *revindex; + struct revindex_entry *revindex; num = pack_revindex_ix(p); if (num < 0) die("internal error: pack revindex uninitialized"); @@ -193,10 +199,10 @@ static unsigned long find_packed_object_size(struct packed_git *p, hi = num_packed_objects(p) + 1; do { int mi = (lo + hi) / 2; - if (revindex[mi] == ofs) { - return revindex[mi+1] - ofs; + if (revindex[mi].offset == ofs) { + return revindex + mi; } - else if (ofs < revindex[mi]) + else if (ofs < revindex[mi].offset) hi = mi; else lo = mi + 1; @@ -204,6 +210,20 @@ static unsigned long find_packed_object_size(struct packed_git *p, die("internal error: pack revindex corrupt"); } +static unsigned long find_packed_object_size(struct packed_git *p, + unsigned long ofs) +{ + struct revindex_entry *entry = find_packed_object(p, ofs); + return entry[1].offset - ofs; +} + +static unsigned char *find_packed_object_name(struct packed_git *p, + unsigned long ofs) +{ + struct revindex_entry *entry = find_packed_object(p, ofs); + return (unsigned char *)(p->index_base + 256) + 24 * entry->nr + 4; +} + static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) { unsigned long othersize, delta_size; @@ -249,6 +269,10 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha return n; } +/* + * we are going to reuse the existing object data as is. make + * sure it is not corrupt. + */ static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect) { z_stream stream; @@ -280,32 +304,6 @@ static int check_inflate(unsigned char *data, unsigned long len, unsigned long e return st; } -/* - * we are going to reuse the existing pack entry data. make - * sure it is not corrupt. - */ -static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len) -{ - enum object_type type; - unsigned long size, used; - - if (pack_to_stdout) - return 0; - - /* the caller has already called use_packed_git() for us, - * so it is safe to access the pack data from mmapped location. - * make sure the entry inflates correctly. - */ - used = unpack_object_header_gently(data, len, &type, &size); - if (!used) - return -1; - if (type == OBJ_REF_DELTA) - used += 20; /* skip base object name */ - data += used; - len -= used; - return check_inflate(data, len, entry->size); -} - static int revalidate_loose_object(struct object_entry *entry, unsigned char *map, unsigned long mapsize) @@ -339,7 +337,7 @@ static unsigned long write_object(struct sha1file *f, obj_type = entry->type; if (! entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ - else if (obj_type == OBJ_REF_DELTA) + else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA) to_reuse = 1; /* check_object() decided it for us */ else if (obj_type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ @@ -415,18 +413,38 @@ static unsigned long write_object(struct sha1file *f, } else { struct packed_git *p = entry->in_pack; - use_packed_git(p); - datalen = find_packed_object_size(p, entry->in_pack_offset); - buf = (char *) p->pack_base + entry->in_pack_offset; + if (entry->delta) { + obj_type = (allow_ofs_delta && entry->delta->offset) ? + OBJ_OFS_DELTA : OBJ_REF_DELTA; + reused_delta++; + } + hdrlen = encode_header(obj_type, entry->size, header); + sha1write(f, header, hdrlen); + if (obj_type == OBJ_OFS_DELTA) { + unsigned long ofs = entry->offset - entry->delta->offset; + unsigned pos = sizeof(header) - 1; + header[pos] = ofs & 127; + while (ofs >>= 7) + header[--pos] = 128 | (--ofs & 127); + sha1write(f, header + pos, sizeof(header) - pos); + hdrlen += sizeof(header) - pos; + } else if (obj_type == OBJ_REF_DELTA) { + sha1write(f, entry->delta->sha1, 20); + hdrlen += 20; + } - if (revalidate_pack_entry(entry, buf, datalen)) + use_packed_git(p); + buf = (char *) p->pack_base + + entry->in_pack_offset + + entry->in_pack_header_size; + datalen = find_packed_object_size(p, entry->in_pack_offset) + - entry->in_pack_header_size; +//fprintf(stderr, "reusing %d at %d header %d size %d\n", obj_type, entry->in_pack_offset, entry->in_pack_header_size, datalen); + if (!pack_to_stdout && check_inflate(buf, datalen, entry->size)) die("corrupt delta in pack %s", sha1_to_hex(entry->sha1)); sha1write(f, buf, datalen); unuse_packed_git(p); - hdrlen = 0; /* not really */ - if (obj_type == OBJ_REF_DELTA) - reused_delta++; reused++; } if (entry->delta) @@ -914,26 +932,64 @@ static void check_object(struct object_entry *entry) char type[20]; if (entry->in_pack && !entry->preferred_base) { - unsigned char base[20]; - unsigned long size; - struct object_entry *base_entry; + struct packed_git *p = entry->in_pack; + unsigned long left = p->pack_size - entry->in_pack_offset; + unsigned long size, used; + unsigned char *buf; + struct object_entry *base_entry = NULL; + + use_packed_git(p); + buf = p->pack_base; + buf += entry->in_pack_offset; /* We want in_pack_type even if we do not reuse delta. * There is no point not reusing non-delta representations. */ - check_reuse_pack_delta(entry->in_pack, - entry->in_pack_offset, - base, &size, - &entry->in_pack_type); + used = unpack_object_header_gently(buf, left, + &entry->in_pack_type, &size); + if (!used || left - used <= 20) + die("corrupt pack for %s", sha1_to_hex(entry->sha1)); /* Check if it is delta, and the base is also an object * we are going to pack. If so we will reuse the existing * delta. */ - if (!no_reuse_delta && - entry->in_pack_type == OBJ_REF_DELTA && - (base_entry = locate_object_entry(base)) && - (!base_entry->preferred_base)) { + if (!no_reuse_delta) { + unsigned char c, *base_name; + unsigned long ofs; + /* there is at least 20 bytes left in the pack */ + switch (entry->in_pack_type) { + case OBJ_REF_DELTA: + base_name = buf + used; + used += 20; + break; + case OBJ_OFS_DELTA: + c = buf[used++]; + ofs = c & 127; + while (c & 128) { + ofs += 1; + if (!ofs || ofs & ~(~0UL >> 7)) + die("delta base offset overflow in pack for %s", + sha1_to_hex(entry->sha1)); + c = buf[used++]; + ofs = (ofs << 7) + (c & 127); + } + if (ofs >= entry->in_pack_offset) + die("delta base offset out of bound for %s", + sha1_to_hex(entry->sha1)); + ofs = entry->in_pack_offset - ofs; + base_name = find_packed_object_name(p, ofs); + break; + default: + base_name = NULL; + } + if (base_name) + base_entry = locate_object_entry(base_name); + } + unuse_packed_git(p); + entry->in_pack_header_size = used; + + if (base_entry && !base_entry->preferred_base) { /* Depth value does not matter - find_deltas() * will never consider reused delta as the @@ -942,9 +998,9 @@ static void check_object(struct object_entry *entry) */ /* uncompressed size of the delta data */ - entry->size = entry->delta_size = size; + entry->size = size; entry->delta = base_entry; - entry->type = OBJ_REF_DELTA; + entry->type = entry->in_pack_type; entry->delta_sibling = base_entry->delta_child; base_entry->delta_child = entry; @@ -1500,7 +1556,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) continue; } if (!strcmp("--delta-base-offset", arg)) { - allow_ofs_delta = no_reuse_delta = 1; + allow_ofs_delta = 1; continue; } if (!strcmp("--stdout", arg)) { diff --git a/pack.h b/pack.h index 05557da152..4c9bddd4f2 100644 --- a/pack.h +++ b/pack.h @@ -16,7 +16,4 @@ struct pack_header { }; extern int verify_pack(struct packed_git *, int); -extern int check_reuse_pack_delta(struct packed_git *, unsigned long, - unsigned char *, unsigned long *, - enum object_type *); #endif diff --git a/sha1_file.c b/sha1_file.c index fdb4588280..18c2f88112 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1000,25 +1000,6 @@ static unsigned long unpack_object_header(struct packed_git *p, unsigned long of return offset + used; } -int check_reuse_pack_delta(struct packed_git *p, unsigned long offset, - unsigned char *base, unsigned long *sizep, - enum object_type *kindp) -{ - unsigned long ptr; - int status = -1; - - use_packed_git(p); - ptr = offset; - ptr = unpack_object_header(p, ptr, kindp, sizep); - if (*kindp != OBJ_REF_DELTA) - goto done; - hashcpy(base, (unsigned char *) p->pack_base + ptr); - status = 0; - done: - unuse_packed_git(p); - return status; -} - void packed_object_info_detail(struct packed_git *p, unsigned long offset, char *type, -- cgit v1.2.3 From e4fe4b8ef7cdde842a9e5e2594d0fba1367d9dd3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 26 Sep 2006 11:27:39 -0400 Subject: let the GIT native protocol use offsets to delta base when possible There is no reason not to always do this when both ends agree. Therefore a client that can accept offsets to delta base always sends the "ofs-delta" flag. The server will stream a pack with or without offset to delta base depending on whether that flag is provided or not with no additional cost. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- fetch-pack.c | 5 +++-- upload-pack.c | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index e8708aa802..474d54520e 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -166,12 +166,13 @@ static int find_common(int fd[2], unsigned char *result_sha1, } if (!fetching) - packet_write(fd[1], "want %s%s%s%s%s\n", + packet_write(fd[1], "want %s%s%s%s%s%s\n", sha1_to_hex(remote), (multi_ack ? " multi_ack" : ""), (use_sideband == 2 ? " side-band-64k" : ""), (use_sideband == 1 ? " side-band" : ""), - (use_thin_pack ? " thin-pack" : "")); + (use_thin_pack ? " thin-pack" : ""), + " ofs-delta"); else packet_write(fd[1], "want %s\n", sha1_to_hex(remote)); fetching++; diff --git a/upload-pack.c b/upload-pack.c index 189b239cc0..9ec3775049 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -16,7 +16,7 @@ static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=n #define OUR_REF (1U << 1) #define WANTED (1U << 2) static int multi_ack, nr_our_refs; -static int use_thin_pack; +static int use_thin_pack, use_ofs_delta; static struct object_array have_obj; static struct object_array want_obj; static unsigned int timeout; @@ -137,7 +137,9 @@ static void create_pack_file(void) close(pu_pipe[1]); close(pe_pipe[0]); close(pe_pipe[1]); - execl_git_cmd("pack-objects", "--stdout", "--progress", NULL); + execl_git_cmd("pack-objects", "--stdout", "--progress", + use_ofs_delta ? "--delta-base-offset" : NULL, + NULL); kill(pid_rev_list, SIGKILL); die("git-upload-pack: unable to exec git-pack-objects"); } @@ -393,6 +395,8 @@ static void receive_needs(void) multi_ack = 1; if (strstr(line+45, "thin-pack")) use_thin_pack = 1; + if (strstr(line+45, "ofs-delta")) + use_ofs_delta = 1; if (strstr(line+45, "side-band-64k")) use_sideband = LARGE_PACKET_MAX; else if (strstr(line+45, "side-band")) @@ -418,7 +422,7 @@ static void receive_needs(void) static int send_ref(const char *refname, const unsigned char *sha1) { - static const char *capabilities = "multi_ack thin-pack side-band side-band-64k"; + static const char *capabilities = "multi_ack thin-pack side-band side-band-64k ofs-delta"; struct object *o = parse_object(sha1); if (!o) -- cgit v1.2.3 From f130446920b550a69716346fb9a9947c04fc7f90 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 27 Sep 2006 15:30:21 -0400 Subject: zap a debug remnant Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 1 - 1 file changed, 1 deletion(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 6db97b685f..16e98f3f3f 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -440,7 +440,6 @@ static unsigned long write_object(struct sha1file *f, + entry->in_pack_header_size; datalen = find_packed_object_size(p, entry->in_pack_offset) - entry->in_pack_header_size; -//fprintf(stderr, "reusing %d at %d header %d size %d\n", obj_type, entry->in_pack_offset, entry->in_pack_header_size, datalen); if (!pack_to_stdout && check_inflate(buf, datalen, entry->size)) die("corrupt delta in pack %s", sha1_to_hex(entry->sha1)); sha1write(f, buf, datalen); -- cgit v1.2.3 From a2700696995651322796e04092bf4a4bfed31b88 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 27 Sep 2006 15:42:16 -0400 Subject: allow delta data reuse even if base object is a preferred base Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 16e98f3f3f..ee5f031bc2 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -988,7 +988,7 @@ static void check_object(struct object_entry *entry) unuse_packed_git(p); entry->in_pack_header_size = used; - if (base_entry && !base_entry->preferred_base) { + if (base_entry) { /* Depth value does not matter - find_deltas() * will never consider reused delta as the -- cgit v1.2.3 From 63fba759bc1d9405f362e73918096815bf8e2a15 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 10 Oct 2006 01:06:20 -0700 Subject: pack-objects: document --delta-base-offset option Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.txt | 13 ++++++++++++- builtin-pack-objects.c | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index d4661ddc2f..5788709710 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -9,7 +9,7 @@ git-pack-objects - Create a packed archive of objects SYNOPSIS -------- [verse] -'git-pack-objects' [-q] [--no-reuse-delta] [--non-empty] +'git-pack-objects' [-q] [--no-reuse-delta] [--delta-base-offset] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] < object-list @@ -110,6 +110,17 @@ base-name:: This flag tells the command not to reuse existing deltas but compute them from scratch. +--delta-base-offset:: + A packed archive can express base object of a delta as + either 20-byte object name or as an offset in the + stream, but older version of git does not understand the + latter. By default, git-pack-objects only uses the + former format for better compatibility. This option + allows the command to use the latter format for + compactness. Depending on the average delta chain + length, this option typically shrinks the resulting + packfile by 3-5 per-cent. + Author ------ diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index ee5f031bc2..41e1e74533 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -15,7 +15,7 @@ #include #include -static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] Date: Fri, 13 Oct 2006 21:28:58 -0700 Subject: git-repack: repo.usedeltabaseoffset When configuration variable `repack.UseDeltaBaseOffset` is set for the repository, the command passes `--delta-base-offset` option to `git-pack-objects`; this typically results in slightly smaller packs, but the generated packs are incompatible with versions of git older than (and including) v1.4.3. We will make it default to true sometime in the future, but not for a while. Signed-off-by: Junio C Hamano --- Documentation/config.txt | 4 ++++ Documentation/git-repack.txt | 14 ++++++++++++++ git-repack.sh | 11 ++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 98c1f3e2e3..88e0bf00c2 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -224,6 +224,10 @@ pull.octopus:: pull.twohead:: The default merge strategy to use when pulling a single branch. +repack.usedeltabaseoffset:: + Allow gitlink:git-repack[1] to create packs that uses + delta-base offset. Defaults to false. + show.difftree:: The default gitlink:git-diff-tree[1] arguments to be used for gitlink:git-show[1]. diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index 49f7e0a4a4..4e6631a27f 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -66,6 +66,20 @@ OPTIONS to be applied that many times to get to the necessary object. +Configuration +------------- + +When configuration variable `repack.UseDeltaBaseOffset` is set +for the repository, the command passes `--delta-base-offset` +option to `git-pack-objects`; this typically results in slightly +smaller packs, but the generated packs are incompatible with +versions of git older than (and including) v1.4.3; do not set +the variable in a repository that older version of git needs to +be able to read (this includes repositories from which packs can +be copied out over http or rsync, and people who obtained packs +that way can try to use older git with it). + + Author ------ Written by Linus Torvalds diff --git a/git-repack.sh b/git-repack.sh index b525fc5dfd..2a214891a8 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -3,7 +3,7 @@ # Copyright (c) 2005 Linus Torvalds # -USAGE='[-a] [-d] [-f] [-l] [-n] [-q]' +USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--window=N] [--depth=N]' . git-sh-setup no_update_info= all_into_one= remove_redundant= @@ -24,6 +24,15 @@ do shift done +# Later we will default repack.UseDeltaBaseOffset to true +default_dbo=false + +case "`git repo-config --bool repack.usedeltabaseoffset || + echo $default_dbo`" in +true) + extra="$extra --delta-base-offset" ;; +esac + PACKDIR="$GIT_OBJECT_DIRECTORY/pack" PACKTMP="$GIT_DIR/.tmp-$$-pack" rm -f "$PACKTMP"-* -- cgit v1.2.3 From 3c552873c698117689af4e5159c7e491fe3a89a3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 17 Oct 2006 16:23:26 -0400 Subject: index-pack: compare only the first 20-bytes of the key. The "union delta_base" is a strange beast. It is a 20-byte binary blob key to search a binary searchable deltas[] array, each element of which uses it to represent its base object with either a full 20-byte SHA-1 or an offset in the pack. Which representation is used is determined by another field of the deltas[] array element, obj->type, so there is no room for confusion, as long as we make sure we compare the keys for the same type only with appropriate length. The code compared the full union with memcmp(). When storing the in-pack offset, the union was first cleared before storing an unsigned long, so comparison worked fine. On 64-bit architectures, however, the union typically is 24-byte long; the code did not clear the remaining 4-byte alignment padding when storing a full 20-byte SHA-1 representation. Using memcmp() to compare the whole union was wrong. This fixes the comparison to look at the first 20-bytes of the union, regardless of the architecture. As long as ulong is smaller than 20-bytes this works fine. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- index-pack.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/index-pack.c b/index-pack.c index fffddd25c9..56c590e3fa 100644 --- a/index-pack.c +++ b/index-pack.c @@ -23,6 +23,12 @@ union delta_base { unsigned long offset; }; +/* + * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want + * to memcmp() only the first 20 bytes. + */ +#define UNION_BASE_SZ 20 + struct delta_entry { struct object_entry *obj; @@ -211,7 +217,7 @@ static int find_delta(const union delta_base *base) struct delta_entry *delta = &deltas[next]; int cmp; - cmp = memcmp(base, &delta->base, sizeof(*base)); + cmp = memcmp(base, &delta->base, UNION_BASE_SZ); if (!cmp) return next; if (cmp < 0) { @@ -232,9 +238,9 @@ static int find_delta_childs(const union delta_base *base, if (first < 0) return -1; - while (first > 0 && !memcmp(&deltas[first - 1].base, base, sizeof(*base))) + while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ)) --first; - while (last < end && !memcmp(&deltas[last + 1].base, base, sizeof(*base))) + while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ)) ++last; *first_index = first; *last_index = last; @@ -312,7 +318,7 @@ static int compare_delta_entry(const void *a, const void *b) { const struct delta_entry *delta_a = a; const struct delta_entry *delta_b = b; - return memcmp(&delta_a->base, &delta_b->base, sizeof(union delta_base)); + return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ); } static void parse_pack_objects(void) -- cgit v1.2.3 From 2d477051ef260aad352d63fc7d9c07e4ebb4359b Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 20 Oct 2006 14:45:21 -0400 Subject: add the capability for index-pack to read from a stream This patch only adds the streaming capability to index-pack. Although the code is different it has the exact same functionality as before to make sure nothing broke. This is in preparation for receiving packs over the net, parse them on the fly, fix them up if they are "thin" packs, and keep the resulting pack instead of exploding it into loose objects. But such functionality should come separately. One immediate advantage of this patch is that index-pack can now deal with packs up to 4GB in size even on 32-bit architectures since the pack is not entirely mmap()'d all at once anymore. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- index-pack.c | 244 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 143 insertions(+), 101 deletions(-) diff --git a/index-pack.c b/index-pack.c index 56c590e3fa..e33f60524f 100644 --- a/index-pack.c +++ b/index-pack.c @@ -13,6 +13,8 @@ static const char index_pack_usage[] = struct object_entry { unsigned long offset; + unsigned long size; + unsigned int hdr_size; enum object_type type; enum object_type real_type; unsigned char sha1[20]; @@ -36,51 +38,68 @@ struct delta_entry }; static const char *pack_name; -static unsigned char *pack_base; -static unsigned long pack_size; static struct object_entry *objects; static struct delta_entry *deltas; static int nr_objects; static int nr_deltas; -static void open_pack_file(void) +/* We always read in 4kB chunks. */ +static unsigned char input_buffer[4096]; +static unsigned long input_offset, input_len, consumed_bytes; +static SHA_CTX input_ctx; +static int input_fd; + +/* + * Make sure at least "min" bytes are available in the buffer, and + * return the pointer to the buffer. + */ +static void * fill(int min) { - int fd; - struct stat st; + if (min <= input_len) + return input_buffer + input_offset; + if (min > sizeof(input_buffer)) + die("cannot fill %d bytes", min); + if (input_offset) { + SHA1_Update(&input_ctx, input_buffer, input_offset); + memcpy(input_buffer, input_buffer + input_offset, input_len); + input_offset = 0; + } + do { + int ret = xread(input_fd, input_buffer + input_len, + sizeof(input_buffer) - input_len); + if (ret <= 0) { + if (!ret) + die("early EOF"); + die("read error on input: %s", strerror(errno)); + } + input_len += ret; + } while (input_len < min); + return input_buffer; +} + +static void use(int bytes) +{ + if (bytes > input_len) + die("used more bytes than were available"); + input_len -= bytes; + input_offset += bytes; + consumed_bytes += bytes; +} - fd = open(pack_name, O_RDONLY); - if (fd < 0) +static void open_pack_file(void) +{ + input_fd = open(pack_name, O_RDONLY); + if (input_fd < 0) die("cannot open packfile '%s': %s", pack_name, strerror(errno)); - if (fstat(fd, &st)) { - int err = errno; - close(fd); - die("cannot fstat packfile '%s': %s", pack_name, - strerror(err)); - } - pack_size = st.st_size; - pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (pack_base == MAP_FAILED) { - int err = errno; - close(fd); - die("cannot mmap packfile '%s': %s", pack_name, - strerror(err)); - } - close(fd); + SHA1_Init(&input_ctx); } static void parse_pack_header(void) { - const struct pack_header *hdr; - unsigned char sha1[20]; - SHA_CTX ctx; - - /* Ensure there are enough bytes for the header and final SHA1 */ - if (pack_size < sizeof(struct pack_header) + 20) - die("packfile '%s' is too small", pack_name); + struct pack_header *hdr = fill(sizeof(struct pack_header)); /* Header consistency check */ - hdr = (void *)pack_base; if (hdr->hdr_signature != htonl(PACK_SIGNATURE)) die("packfile '%s' signature mismatch", pack_name); if (!pack_version_ok(hdr->hdr_version)) @@ -88,13 +107,8 @@ static void parse_pack_header(void) pack_name, ntohl(hdr->hdr_version)); nr_objects = ntohl(hdr->hdr_entries); - - /* Check packfile integrity */ - SHA1_Init(&ctx); - SHA1_Update(&ctx, pack_base, pack_size - 20); - SHA1_Final(sha1, &ctx); - if (hashcmp(sha1, pack_base + pack_size - 20)) - die("packfile '%s' SHA1 mismatch", pack_name); + use(sizeof(struct pack_header)); + /*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/ } static void bad_object(unsigned long offset, const char *format, @@ -112,85 +126,78 @@ static void bad_object(unsigned long offset, const char *format, ...) pack_name, offset, buf); } -static void *unpack_entry_data(unsigned long offset, - unsigned long *current_pos, unsigned long size) +static void *unpack_entry_data(unsigned long offset, unsigned long size) { - unsigned long pack_limit = pack_size - 20; - unsigned long pos = *current_pos; z_stream stream; void *buf = xmalloc(size); memset(&stream, 0, sizeof(stream)); stream.next_out = buf; stream.avail_out = size; - stream.next_in = pack_base + pos; - stream.avail_in = pack_limit - pos; + stream.next_in = fill(1); + stream.avail_in = input_len; inflateInit(&stream); for (;;) { int ret = inflate(&stream, 0); - if (ret == Z_STREAM_END) + use(input_len - stream.avail_in); + if (stream.total_out == size && ret == Z_STREAM_END) break; if (ret != Z_OK) bad_object(offset, "inflate returned %d", ret); + stream.next_in = fill(1); + stream.avail_in = input_len; } inflateEnd(&stream); - if (stream.total_out != size) - bad_object(offset, "size mismatch (expected %lu, got %lu)", - size, stream.total_out); - *current_pos = pack_limit - stream.avail_in; return buf; } -static void *unpack_raw_entry(unsigned long offset, - enum object_type *obj_type, - unsigned long *obj_size, - union delta_base *delta_base, - unsigned long *next_obj_offset) +static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base) { - unsigned long pack_limit = pack_size - 20; - unsigned long pos = offset; - unsigned char c; + unsigned char *p, c; unsigned long size, base_offset; unsigned shift; - enum object_type type; - void *data; - c = pack_base[pos++]; - type = (c >> 4) & 7; + obj->offset = consumed_bytes; + + p = fill(1); + c = *p; + use(1); + obj->type = (c >> 4) & 7; size = (c & 15); shift = 4; while (c & 0x80) { - if (pos >= pack_limit) - bad_object(offset, "object extends past end of pack"); - c = pack_base[pos++]; + p = fill(1); + c = *p; + use(1); size += (c & 0x7fUL) << shift; shift += 7; } + obj->size = size; - switch (type) { + switch (obj->type) { case OBJ_REF_DELTA: - if (pos + 20 >= pack_limit) - bad_object(offset, "object extends past end of pack"); - hashcpy(delta_base->sha1, pack_base + pos); - pos += 20; + hashcpy(delta_base->sha1, fill(20)); + use(20); break; case OBJ_OFS_DELTA: memset(delta_base, 0, sizeof(*delta_base)); - c = pack_base[pos++]; + p = fill(1); + c = *p; + use(1); base_offset = c & 127; while (c & 128) { base_offset += 1; if (!base_offset || base_offset & ~(~0UL >> 7)) - bad_object(offset, "offset value overflow for delta base object"); - if (pos >= pack_limit) - bad_object(offset, "object extends past end of pack"); - c = pack_base[pos++]; + bad_object(obj->offset, "offset value overflow for delta base object"); + p = fill(1); + c = *p; + use(1); base_offset = (base_offset << 7) + (c & 127); } - delta_base->offset = offset - base_offset; - if (delta_base->offset >= offset) - bad_object(offset, "delta base offset is out of bound"); + delta_base->offset = obj->offset - base_offset; + if (delta_base->offset >= obj->offset) + bad_object(obj->offset, "delta base offset is out of bound"); break; case OBJ_COMMIT: case OBJ_TREE: @@ -198,13 +205,38 @@ static void *unpack_raw_entry(unsigned long offset, case OBJ_TAG: break; default: - bad_object(offset, "bad object type %d", type); + bad_object(obj->offset, "bad object type %d", obj->type); } + obj->hdr_size = consumed_bytes - obj->offset; + + return unpack_entry_data(obj->offset, obj->size); +} + +static void * get_data_from_pack(struct object_entry *obj) +{ + unsigned long from = obj[0].offset + obj[0].hdr_size; + unsigned long len = obj[1].offset - from; + unsigned pg_offset = from % getpagesize(); + unsigned char *map, *data; + z_stream stream; + int st; - data = unpack_entry_data(offset, &pos, size); - *obj_type = type; - *obj_size = size; - *next_obj_offset = pos; + map = mmap(NULL, len + pg_offset, PROT_READ, MAP_PRIVATE, + input_fd, from - pg_offset); + if (map == MAP_FAILED) + die("cannot mmap packfile '%s': %s", pack_name, strerror(errno)); + data = xmalloc(obj->size); + memset(&stream, 0, sizeof(stream)); + stream.next_out = data; + stream.avail_out = obj->size; + stream.next_in = map + pg_offset; + stream.avail_in = len; + inflateInit(&stream); + while ((st = inflate(&stream, Z_FINISH)) == Z_OK); + inflateEnd(&stream); + if (st != Z_STREAM_END || stream.total_out != obj->size) + die("serious inflate inconsistency"); + munmap(map, len + pg_offset); return data; } @@ -280,15 +312,12 @@ static void resolve_delta(struct delta_entry *delta, void *base_data, unsigned long delta_size; void *result; unsigned long result_size; - enum object_type delta_type; union delta_base delta_base; - unsigned long next_obj_offset; int j, first, last; obj->real_type = type; - delta_data = unpack_raw_entry(obj->offset, &delta_type, - &delta_size, &delta_base, - &next_obj_offset); + delta_data = get_data_from_pack(obj); + delta_size = obj->size; result = patch_delta(base_data, base_size, delta_data, delta_size, &result_size); free(delta_data); @@ -321,13 +350,13 @@ static int compare_delta_entry(const void *a, const void *b) return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ); } -static void parse_pack_objects(void) +/* Parse all objects and return the pack content SHA1 hash */ +static void parse_pack_objects(unsigned char *sha1) { int i; - unsigned long offset = sizeof(struct pack_header); struct delta_entry *delta = deltas; void *data; - unsigned long data_size; + struct stat st; /* * First pass: @@ -337,19 +366,29 @@ static void parse_pack_objects(void) */ for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - obj->offset = offset; - data = unpack_raw_entry(offset, &obj->type, &data_size, - &delta->base, &offset); + data = unpack_raw_entry(obj, &delta->base); obj->real_type = obj->type; if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { nr_deltas++; delta->obj = obj; delta++; } else - sha1_object(data, data_size, obj->type, obj->sha1); + sha1_object(data, obj->size, obj->type, obj->sha1); free(data); } - if (offset != pack_size - 20) + objects[i].offset = consumed_bytes; + + /* Check pack integrity */ + SHA1_Update(&input_ctx, input_buffer, input_offset); + SHA1_Final(sha1, &input_ctx); + if (hashcmp(fill(20), sha1)) + die("packfile '%s' SHA1 mismatch", pack_name); + use(20); + + /* If input_fd is a file, we should have reached its end now. */ + if (fstat(input_fd, &st)) + die("cannot fstat packfile '%s': %s", pack_name, strerror(errno)); + if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes) die("packfile '%s' has junk at the end", pack_name); /* Sort deltas by base SHA1/offset for fast searching */ @@ -378,18 +417,17 @@ static void parse_pack_objects(void) ofs = !find_delta_childs(&base, &ofs_first, &ofs_last); if (!ref && !ofs) continue; - data = unpack_raw_entry(obj->offset, &obj->type, &data_size, - &base, &offset); + data = get_data_from_pack(obj); if (ref) for (j = ref_first; j <= ref_last; j++) if (deltas[j].obj->type == OBJ_REF_DELTA) resolve_delta(&deltas[j], data, - data_size, obj->type); + obj->size, obj->type); if (ofs) for (j = ofs_first; j <= ofs_last; j++) if (deltas[j].obj->type == OBJ_OFS_DELTA) resolve_delta(&deltas[j], data, - data_size, obj->type); + obj->size, obj->type); free(data); } @@ -408,6 +446,10 @@ static int sha1_compare(const void *_a, const void *_b) return hashcmp(a->sha1, b->sha1); } +/* + * On entry *sha1 contains the pack content SHA1 hash, on exit it is + * the SHA1 hash of sorted object names. + */ static void write_index_file(const char *index_name, unsigned char *sha1) { struct sha1file *f; @@ -467,7 +509,7 @@ static void write_index_file(const char *index_name, unsigned char *sha1) sha1write(f, obj->sha1, 20); SHA1_Update(&ctx, obj->sha1, 20); } - sha1write(f, pack_base + pack_size - 20, 20); + sha1write(f, sha1, 20); sha1close(f, NULL, 1); free(sorted_by_sha); SHA1_Final(sha1, &ctx); @@ -513,9 +555,9 @@ int main(int argc, char **argv) open_pack_file(); parse_pack_header(); - objects = xcalloc(nr_objects, sizeof(struct object_entry)); + objects = xcalloc(nr_objects + 1, sizeof(struct object_entry)); deltas = xcalloc(nr_objects, sizeof(struct delta_entry)); - parse_pack_objects(); + parse_pack_objects(sha1); free(deltas); write_index_file(index_name, sha1); free(objects); -- cgit v1.2.3