summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2006-10-23 07:51:42 +0200
committerJunio C Hamano <junkio@cox.net>2006-10-23 07:51:42 +0200
commit05eb811aa1546b696c6c4d55593cfd4fbef0dce5 (patch)
treebad8266ba08310f00a1998063c545dd3707fdc3c
parentMerge branch 'maint' (diff)
parentadd the capability for index-pack to read from a stream (diff)
downloadgit-05eb811aa1546b696c6c4d55593cfd4fbef0dce5.tar.xz
git-05eb811aa1546b696c6c4d55593cfd4fbef0dce5.zip
Merge branch 'np/pack'
* np/pack: add the capability for index-pack to read from a stream index-pack: compare only the first 20-bytes of the key. git-repack: repo.usedeltabaseoffset pack-objects: document --delta-base-offset option allow delta data reuse even if base object is a preferred base zap a debug remnant let the GIT native protocol use offsets to delta base when possible make pack data reuse compatible with both delta types make git-pack-objects able to create deltas with offset to base teach git-index-pack about deltas with offset to base teach git-unpack-objects about deltas with offset to base introduce delta objects with offset to base
-rw-r--r--Documentation/config.txt4
-rw-r--r--Documentation/git-pack-objects.txt13
-rw-r--r--Documentation/git-repack.txt14
-rw-r--r--builtin-pack-objects.c252
-rw-r--r--builtin-unpack-objects.c140
-rw-r--r--cache.h5
-rw-r--r--fetch-pack.c5
-rwxr-xr-xgit-repack.sh11
-rw-r--r--index-pack.c331
-rw-r--r--pack.h3
-rw-r--r--sha1_file.c113
-rw-r--r--upload-pack.c10
12 files changed, 602 insertions, 299 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt
index 84e38911ee..05d657444f 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -230,6 +230,10 @@ pull.octopus::
pull.twohead::
The default merge strategy to use when pulling a single branch.
+repack.usedeltabaseoffset::
+ Allow gitlink:git-repack[1] to create packs that uses
+ delta-base offset. Defaults to false.
+
show.difftree::
The default gitlink:git-diff-tree[1] arguments to be used
for gitlink:git-show[1].
diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt
index f52e8fa8bf..a1e55054bd 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -9,7 +9,7 @@ git-pack-objects - Create a packed archive of objects
SYNOPSIS
--------
[verse]
-'git-pack-objects' [-q] [--no-reuse-delta] [--non-empty]
+'git-pack-objects' [-q] [--no-reuse-delta] [--delta-base-offset] [--non-empty]
[--local] [--incremental] [--window=N] [--depth=N]
[--revs [--unpacked | --all]*] [--stdout | base-name] < object-list
@@ -111,6 +111,17 @@ base-name::
This flag tells the command not to reuse existing deltas
but compute them from scratch.
+--delta-base-offset::
+ A packed archive can express base object of a delta as
+ either 20-byte object name or as an offset in the
+ stream, but older version of git does not understand the
+ latter. By default, git-pack-objects only uses the
+ former format for better compatibility. This option
+ allows the command to use the latter format for
+ compactness. Depending on the average delta chain
+ length, this option typically shrinks the resulting
+ packfile by 3-5 per-cent.
+
Author
------
diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index d2eaa0995d..0fa47e3b01 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -67,6 +67,20 @@ OPTIONS
The default value for both --window and --depth is 10.
+Configuration
+-------------
+
+When configuration variable `repack.UseDeltaBaseOffset` is set
+for the repository, the command passes `--delta-base-offset`
+option to `git-pack-objects`; this typically results in slightly
+smaller packs, but the generated packs are incompatible with
+versions of git older than (and including) v1.4.3; do not set
+the variable in a repository that older version of git needs to
+be able to read (this includes repositories from which packs can
+be copied out over http or rsync, and people who obtained packs
+that way can try to use older git with it).
+
+
Author
------
Written by Linus Torvalds <torvalds@osdl.org>
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 96c069a81d..41e1e74533 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -15,7 +15,7 @@
#include <sys/time.h>
#include <signal.h>
-static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] <ref-list | <object-list]";
+static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--delta-base-offset] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] <ref-list | <object-list]";
struct object_entry {
unsigned char sha1[20];
@@ -29,6 +29,7 @@ struct object_entry {
enum object_type type;
enum object_type in_pack_type; /* could be delta */
unsigned long delta_size; /* delta data size (uncompressed) */
+#define in_pack_header_size delta_size /* only when reusing pack data */
struct object_entry *delta; /* delta base object */
struct packed_git *in_pack; /* already in pack */
unsigned int in_pack_offset;
@@ -60,6 +61,8 @@ static int non_empty;
static int no_reuse_delta;
static int local;
static int incremental;
+static int allow_ofs_delta;
+
static struct object_entry **sorted_by_sha, **sorted_by_type;
static struct object_entry *objects;
static int nr_objects, nr_alloc, nr_result;
@@ -84,17 +87,25 @@ static int object_ix_hashsz;
* Pack index for existing packs give us easy access to the offsets into
* corresponding pack file where each object's data starts, but the entries
* do not store the size of the compressed representation (uncompressed
- * size is easily available by examining the pack entry header). We build
- * a hashtable of existing packs (pack_revindex), and keep reverse index
- * here -- pack index file is sorted by object name mapping to offset; this
- * pack_revindex[].revindex array is an ordered list of offsets, so if you
- * know the offset of an object, next offset is where its packed
- * representation ends.
+ * size is easily available by examining the pack entry header). It is
+ * also rather expensive to find the sha1 for an object given its offset.
+ *
+ * We build a hashtable of existing packs (pack_revindex), and keep reverse
+ * index here -- pack index file is sorted by object name mapping to offset;
+ * this pack_revindex[].revindex array is a list of offset/index_nr pairs
+ * ordered by offset, so if you know the offset of an object, next offset
+ * is where its packed representation ends and the index_nr can be used to
+ * get the object sha1 from the main index.
*/
+struct revindex_entry {
+ unsigned int offset;
+ unsigned int nr;
+};
struct pack_revindex {
struct packed_git *p;
- unsigned long *revindex;
-} *pack_revindex = NULL;
+ struct revindex_entry *revindex;
+};
+static struct pack_revindex *pack_revindex;
static int pack_revindex_hashsz;
/*
@@ -141,14 +152,9 @@ static void prepare_pack_ix(void)
static int cmp_offset(const void *a_, const void *b_)
{
- unsigned long a = *(unsigned long *) a_;
- unsigned long b = *(unsigned long *) b_;
- if (a < b)
- return -1;
- else if (a == b)
- return 0;
- else
- return 1;
+ const struct revindex_entry *a = a_;
+ const struct revindex_entry *b = b_;
+ return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
}
/*
@@ -161,25 +167,27 @@ static void prepare_pack_revindex(struct pack_revindex *rix)
int i;
void *index = p->index_base + 256;
- rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
+ rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
for (i = 0; i < num_ent; i++) {
unsigned int hl = *((unsigned int *)((char *) index + 24*i));
- rix->revindex[i] = ntohl(hl);
+ rix->revindex[i].offset = ntohl(hl);
+ rix->revindex[i].nr = i;
}
/* This knows the pack format -- the 20-byte trailer
* follows immediately after the last object data.
*/
- rix->revindex[num_ent] = p->pack_size - 20;
- qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
+ rix->revindex[num_ent].offset = p->pack_size - 20;
+ rix->revindex[num_ent].nr = -1;
+ qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
}
-static unsigned long find_packed_object_size(struct packed_git *p,
- unsigned long ofs)
+static struct revindex_entry * find_packed_object(struct packed_git *p,
+ unsigned int ofs)
{
int num;
int lo, hi;
struct pack_revindex *rix;
- unsigned long *revindex;
+ struct revindex_entry *revindex;
num = pack_revindex_ix(p);
if (num < 0)
die("internal error: pack revindex uninitialized");
@@ -191,10 +199,10 @@ static unsigned long find_packed_object_size(struct packed_git *p,
hi = num_packed_objects(p) + 1;
do {
int mi = (lo + hi) / 2;
- if (revindex[mi] == ofs) {
- return revindex[mi+1] - ofs;
+ if (revindex[mi].offset == ofs) {
+ return revindex + mi;
}
- else if (ofs < revindex[mi])
+ else if (ofs < revindex[mi].offset)
hi = mi;
else
lo = mi + 1;
@@ -202,6 +210,20 @@ static unsigned long find_packed_object_size(struct packed_git *p,
die("internal error: pack revindex corrupt");
}
+static unsigned long find_packed_object_size(struct packed_git *p,
+ unsigned long ofs)
+{
+ struct revindex_entry *entry = find_packed_object(p, ofs);
+ return entry[1].offset - ofs;
+}
+
+static unsigned char *find_packed_object_name(struct packed_git *p,
+ unsigned long ofs)
+{
+ struct revindex_entry *entry = find_packed_object(p, ofs);
+ return (unsigned char *)(p->index_base + 256) + 24 * entry->nr + 4;
+}
+
static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
{
unsigned long othersize, delta_size;
@@ -232,7 +254,7 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
int n = 1;
unsigned char c;
- if (type < OBJ_COMMIT || type > OBJ_DELTA)
+ if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
die("bad type %d", type);
c = (type << 4) | (size & 15);
@@ -247,6 +269,10 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
return n;
}
+/*
+ * we are going to reuse the existing object data as is. make
+ * sure it is not corrupt.
+ */
static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
{
z_stream stream;
@@ -278,32 +304,6 @@ static int check_inflate(unsigned char *data, unsigned long len, unsigned long e
return st;
}
-/*
- * we are going to reuse the existing pack entry data. make
- * sure it is not corrupt.
- */
-static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len)
-{
- enum object_type type;
- unsigned long size, used;
-
- if (pack_to_stdout)
- return 0;
-
- /* the caller has already called use_packed_git() for us,
- * so it is safe to access the pack data from mmapped location.
- * make sure the entry inflates correctly.
- */
- used = unpack_object_header_gently(data, len, &type, &size);
- if (!used)
- return -1;
- if (type == OBJ_DELTA)
- used += 20; /* skip base object name */
- data += used;
- len -= used;
- return check_inflate(data, len, entry->size);
-}
-
static int revalidate_loose_object(struct object_entry *entry,
unsigned char *map,
unsigned long mapsize)
@@ -334,13 +334,10 @@ static unsigned long write_object(struct sha1file *f,
enum object_type obj_type;
int to_reuse = 0;
- if (entry->preferred_base)
- return 0;
-
obj_type = entry->type;
if (! entry->in_pack)
to_reuse = 0; /* can't reuse what we don't have */
- else if (obj_type == OBJ_DELTA)
+ else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
to_reuse = 1; /* check_object() decided it for us */
else if (obj_type != entry->in_pack_type)
to_reuse = 0; /* pack has delta which is unusable */
@@ -380,18 +377,35 @@ static unsigned long write_object(struct sha1file *f,
if (entry->delta) {
buf = delta_against(buf, size, entry);
size = entry->delta_size;
- obj_type = OBJ_DELTA;
+ obj_type = (allow_ofs_delta && entry->delta->offset) ?
+ OBJ_OFS_DELTA : OBJ_REF_DELTA;
}
/*
* The object header is a byte of 'type' followed by zero or
- * more bytes of length. For deltas, the 20 bytes of delta
- * sha1 follows that.
+ * more bytes of length.
*/
hdrlen = encode_header(obj_type, size, header);
sha1write(f, header, hdrlen);
- if (entry->delta) {
- sha1write(f, entry->delta, 20);
+ if (obj_type == OBJ_OFS_DELTA) {
+ /*
+ * Deltas with relative base contain an additional
+ * encoding of the relative offset for the delta
+ * base from this object's position in the pack.
+ */
+ unsigned long ofs = entry->offset - entry->delta->offset;
+ unsigned pos = sizeof(header) - 1;
+ header[pos] = ofs & 127;
+ while (ofs >>= 7)
+ header[--pos] = 128 | (--ofs & 127);
+ sha1write(f, header + pos, sizeof(header) - pos);
+ hdrlen += sizeof(header) - pos;
+ } else if (obj_type == OBJ_REF_DELTA) {
+ /*
+ * Deltas with a base reference contain
+ * an additional 20 bytes for the base sha1.
+ */
+ sha1write(f, entry->delta->sha1, 20);
hdrlen += 20;
}
datalen = sha1write_compressed(f, buf, size);
@@ -399,21 +413,40 @@ static unsigned long write_object(struct sha1file *f,
}
else {
struct packed_git *p = entry->in_pack;
- use_packed_git(p);
- datalen = find_packed_object_size(p, entry->in_pack_offset);
- buf = (char *) p->pack_base + entry->in_pack_offset;
+ if (entry->delta) {
+ obj_type = (allow_ofs_delta && entry->delta->offset) ?
+ OBJ_OFS_DELTA : OBJ_REF_DELTA;
+ reused_delta++;
+ }
+ hdrlen = encode_header(obj_type, entry->size, header);
+ sha1write(f, header, hdrlen);
+ if (obj_type == OBJ_OFS_DELTA) {
+ unsigned long ofs = entry->offset - entry->delta->offset;
+ unsigned pos = sizeof(header) - 1;
+ header[pos] = ofs & 127;
+ while (ofs >>= 7)
+ header[--pos] = 128 | (--ofs & 127);
+ sha1write(f, header + pos, sizeof(header) - pos);
+ hdrlen += sizeof(header) - pos;
+ } else if (obj_type == OBJ_REF_DELTA) {
+ sha1write(f, entry->delta->sha1, 20);
+ hdrlen += 20;
+ }
- if (revalidate_pack_entry(entry, buf, datalen))
+ use_packed_git(p);
+ buf = (char *) p->pack_base
+ + entry->in_pack_offset
+ + entry->in_pack_header_size;
+ datalen = find_packed_object_size(p, entry->in_pack_offset)
+ - entry->in_pack_header_size;
+ if (!pack_to_stdout && check_inflate(buf, datalen, entry->size))
die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
sha1write(f, buf, datalen);
unuse_packed_git(p);
- hdrlen = 0; /* not really */
- if (obj_type == OBJ_DELTA)
- reused_delta++;
reused++;
}
- if (obj_type == OBJ_DELTA)
+ if (entry->delta)
written_delta++;
written++;
return hdrlen + datalen;
@@ -423,17 +456,16 @@ static unsigned long write_one(struct sha1file *f,
struct object_entry *e,
unsigned long offset)
{
- if (e->offset)
+ if (e->offset || e->preferred_base)
/* offset starts from header size and cannot be zero
* if it is written already.
*/
return offset;
- e->offset = offset;
- offset += write_object(f, e);
- /* if we are deltified, write out its base object. */
+ /* if we are deltified, write out its base object first. */
if (e->delta)
offset = write_one(f, e->delta, offset);
- return offset;
+ e->offset = offset;
+ return offset + write_object(f, e);
}
static void write_pack_file(void)
@@ -899,26 +931,64 @@ static void check_object(struct object_entry *entry)
char type[20];
if (entry->in_pack && !entry->preferred_base) {
- unsigned char base[20];
- unsigned long size;
- struct object_entry *base_entry;
+ struct packed_git *p = entry->in_pack;
+ unsigned long left = p->pack_size - entry->in_pack_offset;
+ unsigned long size, used;
+ unsigned char *buf;
+ struct object_entry *base_entry = NULL;
+
+ use_packed_git(p);
+ buf = p->pack_base;
+ buf += entry->in_pack_offset;
/* We want in_pack_type even if we do not reuse delta.
* There is no point not reusing non-delta representations.
*/
- check_reuse_pack_delta(entry->in_pack,
- entry->in_pack_offset,
- base, &size,
- &entry->in_pack_type);
+ used = unpack_object_header_gently(buf, left,
+ &entry->in_pack_type, &size);
+ if (!used || left - used <= 20)
+ die("corrupt pack for %s", sha1_to_hex(entry->sha1));
/* Check if it is delta, and the base is also an object
* we are going to pack. If so we will reuse the existing
* delta.
*/
- if (!no_reuse_delta &&
- entry->in_pack_type == OBJ_DELTA &&
- (base_entry = locate_object_entry(base)) &&
- (!base_entry->preferred_base)) {
+ if (!no_reuse_delta) {
+ unsigned char c, *base_name;
+ unsigned long ofs;
+ /* there is at least 20 bytes left in the pack */
+ switch (entry->in_pack_type) {
+ case OBJ_REF_DELTA:
+ base_name = buf + used;
+ used += 20;
+ break;
+ case OBJ_OFS_DELTA:
+ c = buf[used++];
+ ofs = c & 127;
+ while (c & 128) {
+ ofs += 1;
+ if (!ofs || ofs & ~(~0UL >> 7))
+ die("delta base offset overflow in pack for %s",
+ sha1_to_hex(entry->sha1));
+ c = buf[used++];
+ ofs = (ofs << 7) + (c & 127);
+ }
+ if (ofs >= entry->in_pack_offset)
+ die("delta base offset out of bound for %s",
+ sha1_to_hex(entry->sha1));
+ ofs = entry->in_pack_offset - ofs;
+ base_name = find_packed_object_name(p, ofs);
+ break;
+ default:
+ base_name = NULL;
+ }
+ if (base_name)
+ base_entry = locate_object_entry(base_name);
+ }
+ unuse_packed_git(p);
+ entry->in_pack_header_size = used;
+
+ if (base_entry) {
/* Depth value does not matter - find_deltas()
* will never consider reused delta as the
@@ -927,9 +997,9 @@ static void check_object(struct object_entry *entry)
*/
/* uncompressed size of the delta data */
- entry->size = entry->delta_size = size;
+ entry->size = size;
entry->delta = base_entry;
- entry->type = OBJ_DELTA;
+ entry->type = entry->in_pack_type;
entry->delta_sibling = base_entry->delta_child;
base_entry->delta_child = entry;
@@ -1484,6 +1554,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
no_reuse_delta = 1;
continue;
}
+ if (!strcmp("--delta-base-offset", arg)) {
+ allow_ofs_delta = 1;
+ continue;
+ }
if (!strcmp("--stdout", arg)) {
pack_to_stdout = 1;
continue;
diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c
index 4f96bcae32..e70a71163d 100644
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
@@ -15,7 +15,7 @@ static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-fil
/* We always read in 4kB chunks. */
static unsigned char buffer[4096];
-static unsigned long offset, len;
+static unsigned long offset, len, consumed_bytes;
static SHA_CTX ctx;
/*
@@ -51,6 +51,7 @@ static void use(int bytes)
die("used more bytes than were available");
len -= bytes;
offset += bytes;
+ consumed_bytes += bytes;
}
static void *get_data(unsigned long size)
@@ -89,35 +90,49 @@ static void *get_data(unsigned long size)
struct delta_info {
unsigned char base_sha1[20];
+ unsigned long base_offset;
unsigned long size;
void *delta;
+ unsigned nr;
struct delta_info *next;
};
static struct delta_info *delta_list;
-static void add_delta_to_list(unsigned char *base_sha1, void *delta, unsigned long size)
+static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
+ unsigned long base_offset,
+ void *delta, unsigned long size)
{
struct delta_info *info = xmalloc(sizeof(*info));
hashcpy(info->base_sha1, base_sha1);
+ info->base_offset = base_offset;
info->size = size;
info->delta = delta;
+ info->nr = nr;
info->next = delta_list;
delta_list = info;
}
-static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size);
+struct obj_info {
+ unsigned long offset;
+ unsigned char sha1[20];
+};
+
+static struct obj_info *obj_list;
-static void write_object(void *buf, unsigned long size, const char *type)
+static void added_object(unsigned nr, const char *type, void *data,
+ unsigned long size);
+
+static void write_object(unsigned nr, void *buf, unsigned long size,
+ const char *type)
{
- unsigned char sha1[20];
- if (write_sha1_file(buf, size, type, sha1) < 0)
+ if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0)
die("failed to write object");
- added_object(sha1, type, buf, size);
+ added_object(nr, type, buf, size);
}
-static void resolve_delta(const char *type,
+static void resolve_delta(unsigned nr, const char *type,
void *base, unsigned long base_size,
void *delta, unsigned long delta_size)
{
@@ -130,20 +145,23 @@ static void resolve_delta(const char *type,
if (!result)
die("failed to apply delta");
free(delta);
- write_object(result, result_size, type);
+ write_object(nr, result, result_size, type);
free(result);
}
-static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size)
+static void added_object(unsigned nr, const char *type, void *data,
+ unsigned long size)
{
struct delta_info **p = &delta_list;
struct delta_info *info;
while ((info = *p) != NULL) {
- if (!hashcmp(info->base_sha1, sha1)) {
+ if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
+ info->base_offset == obj_list[nr].offset) {
*p = info->next;
p = &delta_list;
- resolve_delta(type, data, size, info->delta, info->size);
+ resolve_delta(info->nr, type, data, size,
+ info->delta, info->size);
free(info);
continue;
}
@@ -151,7 +169,8 @@ static void added_object(unsigned char *sha1, const char *type, void *data, unsi
}
}
-static void unpack_non_delta_entry(enum object_type kind, unsigned long size)
+static void unpack_non_delta_entry(enum object_type kind, unsigned long size,
+ unsigned nr)
{
void *buf = get_data(size);
const char *type;
@@ -164,30 +183,80 @@ static void unpack_non_delta_entry(enum object_type kind, unsigned long size)
default: die("bad type %d", kind);
}
if (!dry_run && buf)
- write_object(buf, size, type);
+ write_object(nr, buf, size, type);
free(buf);
}
-static void unpack_delta_entry(unsigned long delta_size)
+static void unpack_delta_entry(enum object_type kind, unsigned long delta_size,
+ unsigned nr)
{
void *delta_data, *base;
unsigned long base_size;
char type[20];
unsigned char base_sha1[20];
- hashcpy(base_sha1, fill(20));
- use(20);
+ if (kind == OBJ_REF_DELTA) {
+ hashcpy(base_sha1, fill(20));
+ use(20);
+ delta_data = get_data(delta_size);
+ if (dry_run || !delta_data) {
+ free(delta_data);
+ return;
+ }
+ if (!has_sha1_file(base_sha1)) {
+ hashcpy(obj_list[nr].sha1, null_sha1);
+ add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
+ return;
+ }
+ } else {
+ unsigned base_found = 0;
+ unsigned char *pack, c;
+ unsigned long base_offset;
+ unsigned lo, mid, hi;
- delta_data = get_data(delta_size);
- if (dry_run || !delta_data) {
- free(delta_data);
- return;
- }
+ pack = fill(1);
+ c = *pack;
+ use(1);
+ base_offset = c & 127;
+ while (c & 128) {
+ base_offset += 1;
+ if (!base_offset || base_offset & ~(~0UL >> 7))
+ die("offset value overflow for delta base object");
+ pack = fill(1);
+ c = *pack;
+ use(1);
+ base_offset = (base_offset << 7) + (c & 127);
+ }
+ base_offset = obj_list[nr].offset - base_offset;
- if (!has_sha1_file(base_sha1)) {
- add_delta_to_list(base_sha1, delta_data, delta_size);
- return;
+ delta_data = get_data(delta_size);
+ if (dry_run || !delta_data) {
+ free(delta_data);
+ return;
+ }
+ lo = 0;
+ hi = nr;
+ while (lo < hi) {
+ mid = (lo + hi)/2;
+ if (base_offset < obj_list[mid].offset) {
+ hi = mid;
+ } else if (base_offset > obj_list[mid].offset) {
+ lo = mid + 1;
+ } else {
+ hashcpy(base_sha1, obj_list[mid].sha1);
+ base_found = !is_null_sha1(base_sha1);
+ break;
+ }
+ }
+ if (!base_found) {
+ /* The delta base object is itself a delta that
+ has not been resolved yet. */
+ hashcpy(obj_list[nr].sha1, null_sha1);
+ add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
+ return;
+ }
}
+
base = read_sha1_file(base_sha1, type, &base_size);
if (!base) {
error("failed to read delta-pack base object %s",
@@ -197,7 +266,7 @@ static void unpack_delta_entry(unsigned long delta_size)
has_errors = 1;
return;
}
- resolve_delta(type, base, base_size, delta_data, delta_size);
+ resolve_delta(nr, type, base, base_size, delta_data, delta_size);
free(base);
}
@@ -208,6 +277,8 @@ static void unpack_one(unsigned nr, unsigned total)
unsigned long size;
enum object_type type;
+ obj_list[nr].offset = consumed_bytes;
+
pack = fill(1);
c = *pack;
use(1);
@@ -216,7 +287,7 @@ static void unpack_one(unsigned nr, unsigned total)
shift = 4;
while (c & 0x80) {
pack = fill(1);
- c = *pack++;
+ c = *pack;
use(1);
size += (c & 0x7f) << shift;
shift += 7;
@@ -225,13 +296,14 @@ static void unpack_one(unsigned nr, unsigned total)
static unsigned long last_sec;
static unsigned last_percent;
struct timeval now;
- unsigned percentage = (nr * 100) / total;
+ unsigned percentage = ((nr+1) * 100) / total;
gettimeofday(&now, NULL);
if (percentage != last_percent || now.tv_sec != last_sec) {
last_sec = now.tv_sec;
last_percent = percentage;
- fprintf(stderr, "%4u%% (%u/%u) done\r", percentage, nr, total);
+ fprintf(stderr, "%4u%% (%u/%u) done\r",
+ percentage, (nr+1), total);
}
}
switch (type) {
@@ -239,10 +311,11 @@ static void unpack_one(unsigned nr, unsigned total)
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
- unpack_non_delta_entry(type, size);
+ unpack_non_delta_entry(type, size, nr);
return;
- case OBJ_DELTA:
- unpack_delta_entry(size);
+ case OBJ_REF_DELTA:
+ case OBJ_OFS_DELTA:
+ unpack_delta_entry(type, size, nr);
return;
default:
error("bad object type %d", type);
@@ -265,9 +338,10 @@ static void unpack_all(void)
die("unknown pack file version %d", ntohl(hdr->hdr_version));
fprintf(stderr, "Unpacking %d objects\n", nr_objects);
+ obj_list = xmalloc(nr_objects * sizeof(*obj_list));
use(sizeof(struct pack_header));
for (i = 0; i < nr_objects; i++)
- unpack_one(i+1, nr_objects);
+ unpack_one(i, nr_objects);
if (delta_list)
die("unresolved deltas left after unpacking");
}
diff --git a/cache.h b/cache.h
index c35470107d..d0a1657292 100644
--- a/cache.h
+++ b/cache.h
@@ -269,8 +269,9 @@ enum object_type {
OBJ_TREE = 2,
OBJ_BLOB = 3,
OBJ_TAG = 4,
- /* 5/6 for future expansion */
- OBJ_DELTA = 7,
+ /* 5 for future expansion */
+ OBJ_OFS_DELTA = 6,
+ OBJ_REF_DELTA = 7,
OBJ_BAD,
};
diff --git a/fetch-pack.c b/fetch-pack.c
index e8708aa802..474d54520e 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -166,12 +166,13 @@ static int find_common(int fd[2], unsigned char *result_sha1,
}
if (!fetching)
- packet_write(fd[1], "want %s%s%s%s%s\n",
+ packet_write(fd[1], "want %s%s%s%s%s%s\n",
sha1_to_hex(remote),
(multi_ack ? " multi_ack" : ""),
(use_sideband == 2 ? " side-band-64k" : ""),
(use_sideband == 1 ? " side-band" : ""),
- (use_thin_pack ? " thin-pack" : ""));
+ (use_thin_pack ? " thin-pack" : ""),
+ " ofs-delta");
else
packet_write(fd[1], "want %s\n", sha1_to_hex(remote));
fetching++;
diff --git a/git-repack.sh b/git-repack.sh
index f2c9071d11..17e24526c2 100755
--- a/git-repack.sh
+++ b/git-repack.sh
@@ -3,7 +3,7 @@
# Copyright (c) 2005 Linus Torvalds
#
-USAGE='[-a] [-d] [-f] [-l] [-n] [-q]'
+USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--window=N] [--depth=N]'
SUBDIRECTORY_OK='Yes'
. git-sh-setup
@@ -25,6 +25,15 @@ do
shift
done
+# Later we will default repack.UseDeltaBaseOffset to true
+default_dbo=false
+
+case "`git repo-config --bool repack.usedeltabaseoffset ||
+ echo $default_dbo`" in
+true)
+ extra="$extra --delta-base-offset" ;;
+esac
+
PACKDIR="$GIT_OBJECT_DIRECTORY/pack"
PACKTMP="$GIT_DIR/.tmp-$$-pack"
rm -f "$PACKTMP"-*
diff --git a/index-pack.c b/index-pack.c
index 80bc6cb45b..e33f60524f 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -13,63 +13,93 @@ static const char index_pack_usage[] =
struct object_entry
{
unsigned long offset;
+ unsigned long size;
+ unsigned int hdr_size;
enum object_type type;
enum object_type real_type;
unsigned char sha1[20];
};
+union delta_base {
+ unsigned char sha1[20];
+ unsigned long offset;
+};
+
+/*
+ * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
+ * to memcmp() only the first 20 bytes.
+ */
+#define UNION_BASE_SZ 20
+
struct delta_entry
{
struct object_entry *obj;
- unsigned char base_sha1[20];
+ union delta_base base;
};
static const char *pack_name;
-static unsigned char *pack_base;
-static unsigned long pack_size;
static struct object_entry *objects;
static struct delta_entry *deltas;
static int nr_objects;
static int nr_deltas;
-static void open_pack_file(void)
+/* We always read in 4kB chunks. */
+static unsigned char input_buffer[4096];
+static unsigned long input_offset, input_len, consumed_bytes;
+static SHA_CTX input_ctx;
+static int input_fd;
+
+/*
+ * Make sure at least "min" bytes are available in the buffer, and
+ * return the pointer to the buffer.
+ */
+static void * fill(int min)
{
- int fd;
- struct stat st;
+ if (min <= input_len)
+ return input_buffer + input_offset;
+ if (min > sizeof(input_buffer))
+ die("cannot fill %d bytes", min);
+ if (input_offset) {
+ SHA1_Update(&input_ctx, input_buffer, input_offset);
+ memcpy(input_buffer, input_buffer + input_offset, input_len);
+ input_offset = 0;
+ }
+ do {
+ int ret = xread(input_fd, input_buffer + input_len,
+ sizeof(input_buffer) - input_len);
+ if (ret <= 0) {
+ if (!ret)
+ die("early EOF");
+ die("read error on input: %s", strerror(errno));
+ }
+ input_len += ret;
+ } while (input_len < min);
+ return input_buffer;
+}
+
+static void use(int bytes)
+{
+ if (bytes > input_len)
+ die("used more bytes than were available");
+ input_len -= bytes;
+ input_offset += bytes;
+ consumed_bytes += bytes;
+}
- fd = open(pack_name, O_RDONLY);
- if (fd < 0)
+static void open_pack_file(void)
+{
+ input_fd = open(pack_name, O_RDONLY);
+ if (input_fd < 0)
die("cannot open packfile '%s': %s", pack_name,
strerror(errno));
- if (fstat(fd, &st)) {
- int err = errno;
- close(fd);
- die("cannot fstat packfile '%s': %s", pack_name,
- strerror(err));
- }
- pack_size = st.st_size;
- pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
- if (pack_base == MAP_FAILED) {
- int err = errno;
- close(fd);
- die("cannot mmap packfile '%s': %s", pack_name,
- strerror(err));
- }
- close(fd);
+ SHA1_Init(&input_ctx);
}
static void parse_pack_header(void)
{
- const struct pack_header *hdr;
- unsigned char sha1[20];
- SHA_CTX ctx;
-
- /* Ensure there are enough bytes for the header and final SHA1 */
- if (pack_size < sizeof(struct pack_header) + 20)
- die("packfile '%s' is too small", pack_name);
+ struct pack_header *hdr = fill(sizeof(struct pack_header));
/* Header consistency check */
- hdr = (void *)pack_base;
if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
die("packfile '%s' signature mismatch", pack_name);
if (!pack_version_ok(hdr->hdr_version))
@@ -77,13 +107,8 @@ static void parse_pack_header(void)
pack_name, ntohl(hdr->hdr_version));
nr_objects = ntohl(hdr->hdr_entries);
-
- /* Check packfile integrity */
- SHA1_Init(&ctx);
- SHA1_Update(&ctx, pack_base, pack_size - 20);
- SHA1_Final(sha1, &ctx);
- if (hashcmp(sha1, pack_base + pack_size - 20))
- die("packfile '%s' SHA1 mismatch", pack_name);
+ use(sizeof(struct pack_header));
+ /*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/
}
static void bad_object(unsigned long offset, const char *format,
@@ -101,86 +126,121 @@ static void bad_object(unsigned long offset, const char *format, ...)
pack_name, offset, buf);
}
-static void *unpack_entry_data(unsigned long offset,
- unsigned long *current_pos, unsigned long size)
+static void *unpack_entry_data(unsigned long offset, unsigned long size)
{
- unsigned long pack_limit = pack_size - 20;
- unsigned long pos = *current_pos;
z_stream stream;
void *buf = xmalloc(size);
memset(&stream, 0, sizeof(stream));
stream.next_out = buf;
stream.avail_out = size;
- stream.next_in = pack_base + pos;
- stream.avail_in = pack_limit - pos;
+ stream.next_in = fill(1);
+ stream.avail_in = input_len;
inflateInit(&stream);
for (;;) {
int ret = inflate(&stream, 0);
- if (ret == Z_STREAM_END)
+ use(input_len - stream.avail_in);
+ if (stream.total_out == size && ret == Z_STREAM_END)
break;
if (ret != Z_OK)
bad_object(offset, "inflate returned %d", ret);
+ stream.next_in = fill(1);
+ stream.avail_in = input_len;
}
inflateEnd(&stream);
- if (stream.total_out != size)
- bad_object(offset, "size mismatch (expected %lu, got %lu)",
- size, stream.total_out);
- *current_pos = pack_limit - stream.avail_in;
return buf;
}
-static void *unpack_raw_entry(unsigned long offset,
- enum object_type *obj_type,
- unsigned long *obj_size,
- unsigned char *delta_base,
- unsigned long *next_obj_offset)
+static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
{
- unsigned long pack_limit = pack_size - 20;
- unsigned long pos = offset;
- unsigned char c;
- unsigned long size;
+ unsigned char *p, c;
+ unsigned long size, base_offset;
unsigned shift;
- enum object_type type;
- void *data;
- c = pack_base[pos++];
- type = (c >> 4) & 7;
+ obj->offset = consumed_bytes;
+
+ p = fill(1);
+ c = *p;
+ use(1);
+ obj->type = (c >> 4) & 7;
size = (c & 15);
shift = 4;
while (c & 0x80) {
- if (pos >= pack_limit)
- bad_object(offset, "object extends past end of pack");
- c = pack_base[pos++];
+ p = fill(1);
+ c = *p;
+ use(1);
size += (c & 0x7fUL) << shift;
shift += 7;
}
+ obj->size = size;
- switch (type) {
- case OBJ_DELTA:
- if (pos + 20 >= pack_limit)
- bad_object(offset, "object extends past end of pack");
- hashcpy(delta_base, pack_base + pos);
- pos += 20;
- /* fallthru */
+ switch (obj->type) {
+ case OBJ_REF_DELTA:
+ hashcpy(delta_base->sha1, fill(20));
+ use(20);
+ break;
+ case OBJ_OFS_DELTA:
+ memset(delta_base, 0, sizeof(*delta_base));
+ p = fill(1);
+ c = *p;
+ use(1);
+ base_offset = c & 127;
+ while (c & 128) {
+ base_offset += 1;
+ if (!base_offset || base_offset & ~(~0UL >> 7))
+ bad_object(obj->offset, "offset value overflow for delta base object");
+ p = fill(1);
+ c = *p;
+ use(1);
+ base_offset = (base_offset << 7) + (c & 127);
+ }
+ delta_base->offset = obj->offset - base_offset;
+ if (delta_base->offset >= obj->offset)
+ bad_object(obj->offset, "delta base offset is out of bound");
+ break;
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
- data = unpack_entry_data(offset, &pos, size);
break;
default:
- bad_object(offset, "bad object type %d", type);
+ bad_object(obj->offset, "bad object type %d", obj->type);
}
+ obj->hdr_size = consumed_bytes - obj->offset;
- *obj_type = type;
- *obj_size = size;
- *next_obj_offset = pos;
+ return unpack_entry_data(obj->offset, obj->size);
+}
+
+static void * get_data_from_pack(struct object_entry *obj)
+{
+ unsigned long from = obj[0].offset + obj[0].hdr_size;
+ unsigned long len = obj[1].offset - from;
+ unsigned pg_offset = from % getpagesize();
+ unsigned char *map, *data;
+ z_stream stream;
+ int st;
+
+ map = mmap(NULL, len + pg_offset, PROT_READ, MAP_PRIVATE,
+ input_fd, from - pg_offset);
+ if (map == MAP_FAILED)
+ die("cannot mmap packfile '%s': %s", pack_name, strerror(errno));
+ data = xmalloc(obj->size);
+ memset(&stream, 0, sizeof(stream));
+ stream.next_out = data;
+ stream.avail_out = obj->size;
+ stream.next_in = map + pg_offset;
+ stream.avail_in = len;
+ inflateInit(&stream);
+ while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
+ inflateEnd(&stream);
+ if (st != Z_STREAM_END || stream.total_out != obj->size)
+ die("serious inflate inconsistency");
+ munmap(map, len + pg_offset);
return data;
}
-static int find_delta(const unsigned char *base_sha1)
+static int find_delta(const union delta_base *base)
{
int first = 0, last = nr_deltas;
@@ -189,7 +249,7 @@ static int find_delta(const unsigned char *base_sha1)
struct delta_entry *delta = &deltas[next];
int cmp;
- cmp = hashcmp(base_sha1, delta->base_sha1);
+ cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
if (!cmp)
return next;
if (cmp < 0) {
@@ -201,18 +261,18 @@ static int find_delta(const unsigned char *base_sha1)
return -first-1;
}
-static int find_deltas_based_on_sha1(const unsigned char *base_sha1,
- int *first_index, int *last_index)
+static int find_delta_childs(const union delta_base *base,
+ int *first_index, int *last_index)
{
- int first = find_delta(base_sha1);
+ int first = find_delta(base);
int last = first;
int end = nr_deltas - 1;
if (first < 0)
return -1;
- while (first > 0 && !hashcmp(deltas[first - 1].base_sha1, base_sha1))
+ while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
--first;
- while (last < end && !hashcmp(deltas[last + 1].base_sha1, base_sha1))
+ while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
++last;
*first_index = first;
*last_index = last;
@@ -252,25 +312,34 @@ static void resolve_delta(struct delta_entry *delta, void *base_data,
unsigned long delta_size;
void *result;
unsigned long result_size;
- enum object_type delta_type;
- unsigned char base_sha1[20];
- unsigned long next_obj_offset;
+ union delta_base delta_base;
int j, first, last;
obj->real_type = type;
- delta_data = unpack_raw_entry(obj->offset, &delta_type,
- &delta_size, base_sha1,
- &next_obj_offset);
+ delta_data = get_data_from_pack(obj);
+ delta_size = obj->size;
result = patch_delta(base_data, base_size, delta_data, delta_size,
&result_size);
free(delta_data);
if (!result)
bad_object(obj->offset, "failed to apply delta");
sha1_object(result, result_size, type, obj->sha1);
- if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) {
+
+ hashcpy(delta_base.sha1, obj->sha1);
+ if (!find_delta_childs(&delta_base, &first, &last)) {
+ for (j = first; j <= last; j++)
+ if (deltas[j].obj->type == OBJ_REF_DELTA)
+ resolve_delta(&deltas[j], result, result_size, type);
+ }
+
+ memset(&delta_base, 0, sizeof(delta_base));
+ delta_base.offset = obj->offset;
+ if (!find_delta_childs(&delta_base, &first, &last)) {
for (j = first; j <= last; j++)
- resolve_delta(&deltas[j], result, result_size, type);
+ if (deltas[j].obj->type == OBJ_OFS_DELTA)
+ resolve_delta(&deltas[j], result, result_size, type);
}
+
free(result);
}
@@ -278,16 +347,16 @@ static int compare_delta_entry(const void *a, const void *b)
{
const struct delta_entry *delta_a = a;
const struct delta_entry *delta_b = b;
- return hashcmp(delta_a->base_sha1, delta_b->base_sha1);
+ return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
}
-static void parse_pack_objects(void)
+/* Parse all objects and return the pack content SHA1 hash */
+static void parse_pack_objects(unsigned char *sha1)
{
int i;
- unsigned long offset = sizeof(struct pack_header);
- unsigned char base_sha1[20];
+ struct delta_entry *delta = deltas;
void *data;
- unsigned long data_size;
+ struct stat st;
/*
* First pass:
@@ -297,22 +366,32 @@ static void parse_pack_objects(void)
*/
for (i = 0; i < nr_objects; i++) {
struct object_entry *obj = &objects[i];
- obj->offset = offset;
- data = unpack_raw_entry(offset, &obj->type, &data_size,
- base_sha1, &offset);
+ data = unpack_raw_entry(obj, &delta->base);
obj->real_type = obj->type;
- if (obj->type == OBJ_DELTA) {
- struct delta_entry *delta = &deltas[nr_deltas++];
+ if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
+ nr_deltas++;
delta->obj = obj;
- hashcpy(delta->base_sha1, base_sha1);
+ delta++;
} else
- sha1_object(data, data_size, obj->type, obj->sha1);
+ sha1_object(data, obj->size, obj->type, obj->sha1);
free(data);
}
- if (offset != pack_size - 20)
+ objects[i].offset = consumed_bytes;
+
+ /* Check pack integrity */
+ SHA1_Update(&input_ctx, input_buffer, input_offset);
+ SHA1_Final(sha1, &input_ctx);
+ if (hashcmp(fill(20), sha1))
+ die("packfile '%s' SHA1 mismatch", pack_name);
+ use(20);
+
+ /* If input_fd is a file, we should have reached its end now. */
+ if (fstat(input_fd, &st))
+ die("cannot fstat packfile '%s': %s", pack_name, strerror(errno));
+ if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes)
die("packfile '%s' has junk at the end", pack_name);
- /* Sort deltas by base SHA1 for fast searching */
+ /* Sort deltas by base SHA1/offset for fast searching */
qsort(deltas, nr_deltas, sizeof(struct delta_entry),
compare_delta_entry);
@@ -326,22 +405,36 @@ static void parse_pack_objects(void)
*/
for (i = 0; i < nr_objects; i++) {
struct object_entry *obj = &objects[i];
- int j, first, last;
+ union delta_base base;
+ int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
- if (obj->type == OBJ_DELTA)
+ if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
continue;
- if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
+ hashcpy(base.sha1, obj->sha1);
+ ref = !find_delta_childs(&base, &ref_first, &ref_last);
+ memset(&base, 0, sizeof(base));
+ base.offset = obj->offset;
+ ofs = !find_delta_childs(&base, &ofs_first, &ofs_last);
+ if (!ref && !ofs)
continue;
- data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
- base_sha1, &offset);
- for (j = first; j <= last; j++)
- resolve_delta(&deltas[j], data, data_size, obj->type);
+ data = get_data_from_pack(obj);
+ if (ref)
+ for (j = ref_first; j <= ref_last; j++)
+ if (deltas[j].obj->type == OBJ_REF_DELTA)
+ resolve_delta(&deltas[j], data,
+ obj->size, obj->type);
+ if (ofs)
+ for (j = ofs_first; j <= ofs_last; j++)
+ if (deltas[j].obj->type == OBJ_OFS_DELTA)
+ resolve_delta(&deltas[j], data,
+ obj->size, obj->type);
free(data);
}
/* Check for unresolved deltas */
for (i = 0; i < nr_deltas; i++) {
- if (deltas[i].obj->real_type == OBJ_DELTA)
+ if (deltas[i].obj->real_type == OBJ_REF_DELTA ||
+ deltas[i].obj->real_type == OBJ_OFS_DELTA)
die("packfile '%s' has unresolved deltas", pack_name);
}
}
@@ -353,6 +446,10 @@ static int sha1_compare(const void *_a, const void *_b)
return hashcmp(a->sha1, b->sha1);
}
+/*
+ * On entry *sha1 contains the pack content SHA1 hash, on exit it is
+ * the SHA1 hash of sorted object names.
+ */
static void write_index_file(const char *index_name, unsigned char *sha1)
{
struct sha1file *f;
@@ -412,7 +509,7 @@ static void write_index_file(const char *index_name, unsigned char *sha1)
sha1write(f, obj->sha1, 20);
SHA1_Update(&ctx, obj->sha1, 20);
}
- sha1write(f, pack_base + pack_size - 20, 20);
+ sha1write(f, sha1, 20);
sha1close(f, NULL, 1);
free(sorted_by_sha);
SHA1_Final(sha1, &ctx);
@@ -458,9 +555,9 @@ int main(int argc, char **argv)
open_pack_file();
parse_pack_header();
- objects = xcalloc(nr_objects, sizeof(struct object_entry));
+ objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
- parse_pack_objects();
+ parse_pack_objects(sha1);
free(deltas);
write_index_file(index_name, sha1);
free(objects);
diff --git a/pack.h b/pack.h
index eb07b033ae..4814800f28 100644
--- a/pack.h
+++ b/pack.h
@@ -16,7 +16,4 @@ struct pack_header {
};
extern int verify_pack(struct packed_git *, int);
-extern int check_reuse_pack_delta(struct packed_git *, unsigned long,
- unsigned char *, unsigned long *,
- enum object_type *);
#endif
diff --git a/sha1_file.c b/sha1_file.c
index 47e2a29abd..e89d24c015 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -877,26 +877,61 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l
return unpack_sha1_rest(&stream, hdr, *size);
}
+static unsigned long get_delta_base(struct packed_git *p,
+ unsigned long offset,
+ enum object_type kind,
+ unsigned long delta_obj_offset,
+ unsigned long *base_obj_offset)
+{
+ unsigned char *base_info = (unsigned char *) p->pack_base + offset;
+ unsigned long base_offset;
+
+ /* there must be at least 20 bytes left regardless of delta type */
+ if (p->pack_size <= offset + 20)
+ die("truncated pack file");
+
+ if (kind == OBJ_OFS_DELTA) {
+ unsigned used = 0;
+ unsigned char c = base_info[used++];
+ base_offset = c & 127;
+ while (c & 128) {
+ base_offset += 1;
+ if (!base_offset || base_offset & ~(~0UL >> 7))
+ die("offset value overflow for delta base object");
+ c = base_info[used++];
+ base_offset = (base_offset << 7) + (c & 127);
+ }
+ base_offset = delta_obj_offset - base_offset;
+ if (base_offset >= delta_obj_offset)
+ die("delta base offset out of bound");
+ offset += used;
+ } else if (kind == OBJ_REF_DELTA) {
+ /* The base entry _must_ be in the same pack */
+ base_offset = find_pack_entry_one(base_info, p);
+ if (!base_offset)
+ die("failed to find delta-pack base object %s",
+ sha1_to_hex(base_info));
+ offset += 20;
+ } else
+ die("I am totally screwed");
+ *base_obj_offset = base_offset;
+ return offset;
+}
+
/* forward declaration for a mutually recursive function */
static int packed_object_info(struct packed_git *p, unsigned long offset,
char *type, unsigned long *sizep);
static int packed_delta_info(struct packed_git *p,
unsigned long offset,
+ enum object_type kind,
+ unsigned long obj_offset,
char *type,
unsigned long *sizep)
{
unsigned long base_offset;
- unsigned char *base_sha1 = (unsigned char *) p->pack_base + offset;
- if (p->pack_size < offset + 20)
- die("truncated pack file");
- /* The base entry _must_ be in the same pack */
- base_offset = find_pack_entry_one(base_sha1, p);
- if (!base_offset)
- die("failed to find delta-pack base object %s",
- sha1_to_hex(base_sha1));
- offset += 20;
+ offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
/* We choose to only get the type of the base object and
* ignore potentially corrupt pack file that expects the delta
@@ -959,25 +994,6 @@ static unsigned long unpack_object_header(struct packed_git *p, unsigned long of
return offset + used;
}
-int check_reuse_pack_delta(struct packed_git *p, unsigned long offset,
- unsigned char *base, unsigned long *sizep,
- enum object_type *kindp)
-{
- unsigned long ptr;
- int status = -1;
-
- use_packed_git(p);
- ptr = offset;
- ptr = unpack_object_header(p, ptr, kindp, sizep);
- if (*kindp != OBJ_DELTA)
- goto done;
- hashcpy(base, (unsigned char *) p->pack_base + ptr);
- status = 0;
- done:
- unuse_packed_git(p);
- return status;
-}
-
void packed_object_info_detail(struct packed_git *p,
unsigned long offset,
char *type,
@@ -986,11 +1002,12 @@ void packed_object_info_detail(struct packed_git *p,
unsigned int *delta_chain_length,
unsigned char *base_sha1)
{
- unsigned long val;
+ unsigned long obj_offset, val;
unsigned char *next_sha1;
enum object_type kind;
*delta_chain_length = 0;
+ obj_offset = offset;
offset = unpack_object_header(p, offset, &kind, size);
for (;;) {
@@ -1005,7 +1022,13 @@ void packed_object_info_detail(struct packed_git *p,
strcpy(type, type_names[kind]);
*store_size = 0; /* notyet */
return;
- case OBJ_DELTA:
+ case OBJ_OFS_DELTA:
+ get_delta_base(p, offset, kind, obj_offset, &offset);
+ if (*delta_chain_length == 0) {
+ /* TODO: find base_sha1 as pointed by offset */
+ }
+ break;
+ case OBJ_REF_DELTA:
if (p->pack_size <= offset + 20)
die("pack file %s records an incomplete delta base",
p->pack_name);
@@ -1015,6 +1038,7 @@ void packed_object_info_detail(struct packed_git *p,
offset = find_pack_entry_one(next_sha1, p);
break;
}
+ obj_offset = offset;
offset = unpack_object_header(p, offset, &kind, &val);
(*delta_chain_length)++;
}
@@ -1023,15 +1047,15 @@ void packed_object_info_detail(struct packed_git *p,
static int packed_object_info(struct packed_git *p, unsigned long offset,
char *type, unsigned long *sizep)
{
- unsigned long size;
+ unsigned long size, obj_offset = offset;
enum object_type kind;
offset = unpack_object_header(p, offset, &kind, &size);
- if (kind == OBJ_DELTA)
- return packed_delta_info(p, offset, type, sizep);
-
switch (kind) {
+ case OBJ_OFS_DELTA:
+ case OBJ_REF_DELTA:
+ return packed_delta_info(p, offset, kind, obj_offset, type, sizep);
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
@@ -1077,23 +1101,15 @@ static void *unpack_compressed_entry(struct packed_git *p,
static void *unpack_delta_entry(struct packed_git *p,
unsigned long offset,
unsigned long delta_size,
+ enum object_type kind,
+ unsigned long obj_offset,
char *type,
unsigned long *sizep)
{
void *delta_data, *result, *base;
unsigned long result_size, base_size, base_offset;
- unsigned char *base_sha1;
-
- if (p->pack_size < offset + 20)
- die("truncated pack file");
- /* The base entry _must_ be in the same pack */
- base_sha1 = (unsigned char*)p->pack_base + offset;
- base_offset = find_pack_entry_one(base_sha1, p);
- if (!base_offset)
- die("failed to find delta-pack base object %s",
- sha1_to_hex(base_sha1));
- offset += 20;
+ offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
base = unpack_entry_gently(p, base_offset, type, &base_size);
if (!base)
die("failed to read delta base object at %lu from %s",
@@ -1130,13 +1146,14 @@ static void *unpack_entry(struct pack_entry *entry,
void *unpack_entry_gently(struct packed_git *p, unsigned long offset,
char *type, unsigned long *sizep)
{
- unsigned long size;
+ unsigned long size, obj_offset = offset;
enum object_type kind;
offset = unpack_object_header(p, offset, &kind, &size);
switch (kind) {
- case OBJ_DELTA:
- return unpack_delta_entry(p, offset, size, type, sizep);
+ case OBJ_OFS_DELTA:
+ case OBJ_REF_DELTA:
+ return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep);
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
diff --git a/upload-pack.c b/upload-pack.c
index 189b239cc0..9ec3775049 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -16,7 +16,7 @@ static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=n
#define OUR_REF (1U << 1)
#define WANTED (1U << 2)
static int multi_ack, nr_our_refs;
-static int use_thin_pack;
+static int use_thin_pack, use_ofs_delta;
static struct object_array have_obj;
static struct object_array want_obj;
static unsigned int timeout;
@@ -137,7 +137,9 @@ static void create_pack_file(void)
close(pu_pipe[1]);
close(pe_pipe[0]);
close(pe_pipe[1]);
- execl_git_cmd("pack-objects", "--stdout", "--progress", NULL);
+ execl_git_cmd("pack-objects", "--stdout", "--progress",
+ use_ofs_delta ? "--delta-base-offset" : NULL,
+ NULL);
kill(pid_rev_list, SIGKILL);
die("git-upload-pack: unable to exec git-pack-objects");
}
@@ -393,6 +395,8 @@ static void receive_needs(void)
multi_ack = 1;
if (strstr(line+45, "thin-pack"))
use_thin_pack = 1;
+ if (strstr(line+45, "ofs-delta"))
+ use_ofs_delta = 1;
if (strstr(line+45, "side-band-64k"))
use_sideband = LARGE_PACKET_MAX;
else if (strstr(line+45, "side-band"))
@@ -418,7 +422,7 @@ static void receive_needs(void)
static int send_ref(const char *refname, const unsigned char *sha1)
{
- static const char *capabilities = "multi_ack thin-pack side-band side-band-64k";
+ static const char *capabilities = "multi_ack thin-pack side-band side-band-64k ofs-delta";
struct object *o = parse_object(sha1);
if (!o)