summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2014-10-29 18:07:56 +0100
committerJunio C Hamano <gitster@pobox.com>2014-10-29 18:07:56 +0100
commitd70e331c0e8eaeb0bd75ae3020c3be71de075ff7 (patch)
tree645e25685926321704394063059a928d7492a9e8
parentMerge branch 'bc/asciidoctor' (diff)
parentdrop add_object_array_with_mode (diff)
downloadgit-d70e331c0e8eaeb0bd75ae3020c3be71de075ff7.tar.xz
git-d70e331c0e8eaeb0bd75ae3020c3be71de075ff7.zip
Merge branch 'jk/prune-mtime'
Tighten the logic to decide that an unreachable cruft is sufficiently old by covering corner cases such as an ancient object becoming reachable and then going unreachable again, in which case its retention period should be prolonged. * jk/prune-mtime: (28 commits) drop add_object_array_with_mode revision: remove definition of unused 'add_object' function pack-objects: double-check options before discarding objects repack: pack objects mentioned by the index pack-objects: use argv_array reachable: use revision machinery's --indexed-objects code rev-list: add --indexed-objects option rev-list: document --reflog option t5516: test pushing a tag of an otherwise unreferenced blob traverse_commit_list: support pending blobs/trees with paths make add_object_array_with_context interface more sane write_sha1_file: freshen existing objects pack-objects: match prune logic for discarding objects pack-objects: refactor unpack-unreachable expiration check prune: keep objects reachable from recent objects sha1_file: add for_each iterators for loose and packed objects count-objects: use for_each_loose_file_in_objdir count-objects: do not use xsize_t when counting object size prune-packed: use for_each_loose_file_in_objdir reachable: mark index blobs as SEEN ...
-rw-r--r--Documentation/rev-list-options.txt9
-rw-r--r--builtin/count-objects.c101
-rw-r--r--builtin/grep.c8
-rw-r--r--builtin/pack-objects.c86
-rw-r--r--builtin/prune-packed.c69
-rw-r--r--builtin/prune.c89
-rw-r--r--builtin/reflog.c2
-rw-r--r--builtin/repack.c1
-rw-r--r--cache.h46
-rw-r--r--git-compat-util.h2
-rw-r--r--list-objects.c14
-rw-r--r--object.c48
-rw-r--r--object.h11
-rw-r--r--reachable.c263
-rw-r--r--reachable.h5
-rw-r--r--revision.c113
-rw-r--r--revision.h7
-rw-r--r--sha1_file.c209
-rwxr-xr-xt/t5516-fetch-push.sh13
-rwxr-xr-xt/t6000-rev-list-misc.sh23
-rwxr-xr-xt/t6501-freshen-objects.sh132
-rwxr-xr-xt/t7701-repack-unpack-unreachable.sh13
-rw-r--r--urlmatch.c8
23 files changed, 836 insertions, 436 deletions
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 5d311b8d46..3301fdebf0 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -168,6 +168,15 @@ respectively, and they must begin with `refs/` when applied to `--glob`
or `--all`. If a trailing '/{asterisk}' is intended, it must be given
explicitly.
+--reflog::
+ Pretend as if all objects mentioned by reflogs are listed on the
+ command line as `<commit>`.
+
+--indexed-objects::
+ Pretend as if all trees and blobs used by the index are listed
+ on the command line. Note that you probably want to use
+ `--objects`, too.
+
--ignore-missing::
Upon seeing an invalid object name in the input, pretend as if
the bad input was not given.
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index a7f70cb858..e47ef0b1af 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -11,6 +11,9 @@
static unsigned long garbage;
static off_t size_garbage;
+static int verbose;
+static unsigned long loose, packed, packed_loose;
+static off_t loose_size;
static void real_report_garbage(const char *desc, const char *path)
{
@@ -21,61 +24,31 @@ static void real_report_garbage(const char *desc, const char *path)
garbage++;
}
-static void count_objects(DIR *d, char *path, int len, int verbose,
- unsigned long *loose,
- off_t *loose_size,
- unsigned long *packed_loose)
+static void loose_garbage(const char *path)
{
- struct dirent *ent;
- while ((ent = readdir(d)) != NULL) {
- char hex[41];
- unsigned char sha1[20];
- const char *cp;
- int bad = 0;
+ if (verbose)
+ report_garbage("garbage found", path);
+}
- if (is_dot_or_dotdot(ent->d_name))
- continue;
- for (cp = ent->d_name; *cp; cp++) {
- int ch = *cp;
- if (('0' <= ch && ch <= '9') ||
- ('a' <= ch && ch <= 'f'))
- continue;
- bad = 1;
- break;
- }
- if (cp - ent->d_name != 38)
- bad = 1;
- else {
- struct stat st;
- memcpy(path + len + 3, ent->d_name, 38);
- path[len + 2] = '/';
- path[len + 41] = 0;
- if (lstat(path, &st) || !S_ISREG(st.st_mode))
- bad = 1;
- else
- (*loose_size) += xsize_t(on_disk_bytes(st));
- }
- if (bad) {
- if (verbose) {
- struct strbuf sb = STRBUF_INIT;
- strbuf_addf(&sb, "%.*s/%s",
- len + 2, path, ent->d_name);
- report_garbage("garbage found", sb.buf);
- strbuf_release(&sb);
- }
- continue;
- }
- (*loose)++;
- if (!verbose)
- continue;
- memcpy(hex, path+len, 2);
- memcpy(hex+2, ent->d_name, 38);
- hex[40] = 0;
- if (get_sha1_hex(hex, sha1))
- die("internal error");
- if (has_sha1_pack(sha1))
- (*packed_loose)++;
+static int count_loose(const unsigned char *sha1, const char *path, void *data)
+{
+ struct stat st;
+
+ if (lstat(path, &st) || !S_ISREG(st.st_mode))
+ loose_garbage(path);
+ else {
+ loose_size += on_disk_bytes(st);
+ loose++;
+ if (verbose && has_sha1_pack(sha1))
+ packed_loose++;
}
+ return 0;
+}
+
+static int count_cruft(const char *basename, const char *path, void *data)
+{
+ loose_garbage(path);
+ return 0;
}
static char const * const count_objects_usage[] = {
@@ -85,12 +58,7 @@ static char const * const count_objects_usage[] = {
int cmd_count_objects(int argc, const char **argv, const char *prefix)
{
- int i, verbose = 0, human_readable = 0;
- const char *objdir = get_object_directory();
- int len = strlen(objdir);
- char *path = xmalloc(len + 50);
- unsigned long loose = 0, packed = 0, packed_loose = 0;
- off_t loose_size = 0;
+ int human_readable = 0;
struct option opts[] = {
OPT__VERBOSE(&verbose, N_("be verbose")),
OPT_BOOL('H', "human-readable", &human_readable,
@@ -104,19 +72,10 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
usage_with_options(count_objects_usage, opts);
if (verbose)
report_garbage = real_report_garbage;
- memcpy(path, objdir, len);
- if (len && objdir[len-1] != '/')
- path[len++] = '/';
- for (i = 0; i < 256; i++) {
- DIR *d;
- sprintf(path + len, "%02x", i);
- d = opendir(path);
- if (!d)
- continue;
- count_objects(d, path, len, verbose,
- &loose, &loose_size, &packed_loose);
- closedir(d);
- }
+
+ for_each_loose_file_in_objdir(get_object_directory(),
+ count_loose, count_cruft, NULL, NULL);
+
if (verbose) {
struct packed_git *p;
unsigned long num_pack = 0;
diff --git a/builtin/grep.c b/builtin/grep.c
index c86a142f30..4063882f06 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -456,10 +456,10 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
}
static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
- struct object *obj, const char *name, struct object_context *oc)
+ struct object *obj, const char *name, const char *path)
{
if (obj->type == OBJ_BLOB)
- return grep_sha1(opt, obj->sha1, name, 0, oc ? oc->path : NULL);
+ return grep_sha1(opt, obj->sha1, name, 0, path);
if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
struct tree_desc tree;
void *data;
@@ -501,7 +501,7 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec,
for (i = 0; i < nr; i++) {
struct object *real_obj;
real_obj = deref_tag(list->objects[i].item, NULL, 0);
- if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].context)) {
+ if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].path)) {
hit = 1;
if (opt->status_only)
break;
@@ -821,7 +821,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
struct object *object = parse_object_or_die(sha1, arg);
if (!seen_dashdash)
verify_non_filename(prefix, arg);
- add_object_array_with_context(object, arg, &list, xmemdupz(&oc, sizeof(struct object_context)));
+ add_object_array_with_path(object, arg, &list, oc.mode, oc.path);
continue;
}
if (!strcmp(arg, "--")) {
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 78c659a6b4..3f9f5c7760 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -20,6 +20,9 @@
#include "streaming.h"
#include "thread-utils.h"
#include "pack-bitmap.h"
+#include "reachable.h"
+#include "sha1-array.h"
+#include "argv-array.h"
static const char *pack_usage[] = {
N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"),
@@ -2406,6 +2409,27 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1)
return 0;
}
+/*
+ * Store a list of sha1s that are should not be discarded
+ * because they are either written too recently, or are
+ * reachable from another object that was.
+ *
+ * This is filled by get_object_list.
+ */
+static struct sha1_array recent_objects;
+
+static int loosened_object_can_be_discarded(const unsigned char *sha1,
+ unsigned long mtime)
+{
+ if (!unpack_unreachable_expiration)
+ return 0;
+ if (mtime > unpack_unreachable_expiration)
+ return 0;
+ if (sha1_array_lookup(&recent_objects, sha1) >= 0)
+ return 0;
+ return 1;
+}
+
static void loosen_unused_packed_objects(struct rev_info *revs)
{
struct packed_git *p;
@@ -2416,17 +2440,14 @@ static void loosen_unused_packed_objects(struct rev_info *revs)
if (!p->pack_local || p->pack_keep)
continue;
- if (unpack_unreachable_expiration &&
- p->mtime < unpack_unreachable_expiration)
- continue;
-
if (open_pack_index(p))
die("cannot open pack index");
for (i = 0; i < p->num_objects; i++) {
sha1 = nth_packed_object_sha1(p, i);
if (!packlist_find(&to_pack, sha1, NULL) &&
- !has_sha1_pack_kept_or_nonlocal(sha1))
+ !has_sha1_pack_kept_or_nonlocal(sha1) &&
+ !loosened_object_can_be_discarded(sha1, p->mtime))
if (force_object_loose(sha1, p->mtime))
die("unable to force loose object");
}
@@ -2462,6 +2483,19 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
return 0;
}
+static void record_recent_object(struct object *obj,
+ const struct name_path *path,
+ const char *last,
+ void *data)
+{
+ sha1_array_append(&recent_objects, obj->sha1);
+}
+
+static void record_recent_commit(struct commit *commit, void *data)
+{
+ sha1_array_append(&recent_objects, commit->object.sha1);
+}
+
static void get_object_list(int ac, const char **av)
{
struct rev_info revs;
@@ -2509,10 +2543,23 @@ static void get_object_list(int ac, const char **av)
mark_edges_uninteresting(&revs, show_edge);
traverse_commit_list(&revs, show_commit, show_object, NULL);
+ if (unpack_unreachable_expiration) {
+ revs.ignore_missing_links = 1;
+ if (add_unseen_recent_objects_to_traversal(&revs,
+ unpack_unreachable_expiration))
+ die("unable to add recent objects");
+ if (prepare_revision_walk(&revs))
+ die("revision walk setup failed");
+ traverse_commit_list(&revs, record_recent_commit,
+ record_recent_object, NULL);
+ }
+
if (keep_unreachable)
add_objects_in_unpacked_packs(&revs);
if (unpack_unreachable)
loosen_unused_packed_objects(&revs);
+
+ sha1_array_clear(&recent_objects);
}
static int option_parse_index_version(const struct option *opt,
@@ -2567,9 +2614,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
int use_internal_rev_list = 0;
int thin = 0;
int all_progress_implied = 0;
- const char *rp_av[6];
- int rp_ac = 0;
+ struct argv_array rp = ARGV_ARRAY_INIT;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
+ int rev_list_index = 0;
struct option pack_objects_options[] = {
OPT_SET_INT('q', "quiet", &progress,
N_("do not show progress meter"), 0),
@@ -2616,6 +2663,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{ OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,
N_("include objects referred by reflog entries"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
+ { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL,
+ N_("include objects referred to by the index"),
+ PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
OPT_BOOL(0, "stdout", &pack_to_stdout,
N_("output pack to stdout")),
OPT_BOOL(0, "include-tag", &include_tag,
@@ -2658,24 +2708,28 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (pack_to_stdout != !base_name || argc)
usage_with_options(pack_usage, pack_objects_options);
- rp_av[rp_ac++] = "pack-objects";
+ argv_array_push(&rp, "pack-objects");
if (thin) {
use_internal_rev_list = 1;
- rp_av[rp_ac++] = "--objects-edge";
+ argv_array_push(&rp, "--objects-edge");
} else
- rp_av[rp_ac++] = "--objects";
+ argv_array_push(&rp, "--objects");
if (rev_list_all) {
use_internal_rev_list = 1;
- rp_av[rp_ac++] = "--all";
+ argv_array_push(&rp, "--all");
}
if (rev_list_reflog) {
use_internal_rev_list = 1;
- rp_av[rp_ac++] = "--reflog";
+ argv_array_push(&rp, "--reflog");
+ }
+ if (rev_list_index) {
+ use_internal_rev_list = 1;
+ argv_array_push(&rp, "--indexed-objects");
}
if (rev_list_unpacked) {
use_internal_rev_list = 1;
- rp_av[rp_ac++] = "--unpacked";
+ argv_array_push(&rp, "--unpacked");
}
if (!reuse_object)
@@ -2706,6 +2760,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (keep_unreachable && unpack_unreachable)
die("--keep-unreachable and --unpack-unreachable are incompatible.");
+ if (!rev_list_all || !rev_list_reflog || !rev_list_index)
+ unpack_unreachable_expiration = 0;
if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow())
use_bitmap_index = 0;
@@ -2723,8 +2779,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (!use_internal_rev_list)
read_object_list_from_stdin();
else {
- rp_av[rp_ac] = NULL;
- get_object_list(rp_ac, rp_av);
+ get_object_list(rp.argc, rp.argv);
+ argv_array_clear(&rp);
}
cleanup_preferred_base();
if (include_tag && nr_result)
diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c
index d430731d70..f24a2c2bdc 100644
--- a/builtin/prune-packed.c
+++ b/builtin/prune-packed.c
@@ -10,65 +10,42 @@ static const char * const prune_packed_usage[] = {
static struct progress *progress;
-static void prune_dir(int i, DIR *dir, struct strbuf *pathname, int opts)
+static int prune_subdir(int nr, const char *path, void *data)
{
- struct dirent *de;
- char hex[40];
- int top_len = pathname->len;
+ int *opts = data;
+ display_progress(progress, nr + 1);
+ if (!(*opts & PRUNE_PACKED_DRY_RUN))
+ rmdir(path);
+ return 0;
+}
+
+static int prune_object(const unsigned char *sha1, const char *path,
+ void *data)
+{
+ int *opts = data;
- sprintf(hex, "%02x", i);
- while ((de = readdir(dir)) != NULL) {
- unsigned char sha1[20];
- if (strlen(de->d_name) != 38)
- continue;
- memcpy(hex + 2, de->d_name, 38);
- if (get_sha1_hex(hex, sha1))
- continue;
- if (!has_sha1_pack(sha1))
- continue;
+ if (!has_sha1_pack(sha1))
+ return 0;
- strbuf_add(pathname, de->d_name, 38);
- if (opts & PRUNE_PACKED_DRY_RUN)
- printf("rm -f %s\n", pathname->buf);
- else
- unlink_or_warn(pathname->buf);
- display_progress(progress, i + 1);
- strbuf_setlen(pathname, top_len);
- }
+ if (*opts & PRUNE_PACKED_DRY_RUN)
+ printf("rm -f %s\n", path);
+ else
+ unlink_or_warn(path);
+ return 0;
}
void prune_packed_objects(int opts)
{
- int i;
- const char *dir = get_object_directory();
- struct strbuf pathname = STRBUF_INIT;
- int top_len;
-
- strbuf_addstr(&pathname, dir);
if (opts & PRUNE_PACKED_VERBOSE)
progress = start_progress_delay(_("Removing duplicate objects"),
256, 95, 2);
- if (pathname.len && pathname.buf[pathname.len - 1] != '/')
- strbuf_addch(&pathname, '/');
-
- top_len = pathname.len;
- for (i = 0; i < 256; i++) {
- DIR *d;
+ for_each_loose_file_in_objdir(get_object_directory(),
+ prune_object, NULL, prune_subdir, &opts);
- display_progress(progress, i + 1);
- strbuf_setlen(&pathname, top_len);
- strbuf_addf(&pathname, "%02x/", i);
- d = opendir(pathname.buf);
- if (!d)
- continue;
- prune_dir(i, d, &pathname, opts);
- closedir(d);
- strbuf_setlen(&pathname, top_len + 2);
- rmdir(pathname.buf);
- }
+ /* Ensure we show 100% before finishing progress */
+ display_progress(progress, 256);
stop_progress(&progress);
- strbuf_release(&pathname);
}
int cmd_prune_packed(int argc, const char **argv, const char *prefix)
diff --git a/builtin/prune.c b/builtin/prune.c
index 144a3bdb33..04d3b12ae4 100644
--- a/builtin/prune.c
+++ b/builtin/prune.c
@@ -31,11 +31,23 @@ static int prune_tmp_file(const char *fullpath)
return 0;
}
-static int prune_object(const char *fullpath, const unsigned char *sha1)
+static int prune_object(const unsigned char *sha1, const char *fullpath,
+ void *data)
{
struct stat st;
- if (lstat(fullpath, &st))
- return error("Could not stat '%s'", fullpath);
+
+ /*
+ * Do we know about this object?
+ * It must have been reachable
+ */
+ if (lookup_object(sha1))
+ return 0;
+
+ if (lstat(fullpath, &st)) {
+ /* report errors, but do not stop pruning */
+ error("Could not stat '%s'", fullpath);
+ return 0;
+ }
if (st.st_mtime > expire)
return 0;
if (show_only || verbose) {
@@ -48,68 +60,20 @@ static int prune_object(const char *fullpath, const unsigned char *sha1)
return 0;
}
-static int prune_dir(int i, struct strbuf *path)
+static int prune_cruft(const char *basename, const char *path, void *data)
{
- size_t baselen = path->len;
- DIR *dir = opendir(path->buf);
- struct dirent *de;
-
- if (!dir)
- return 0;
-
- while ((de = readdir(dir)) != NULL) {
- char name[100];
- unsigned char sha1[20];
-
- if (is_dot_or_dotdot(de->d_name))
- continue;
- if (strlen(de->d_name) == 38) {
- sprintf(name, "%02x", i);
- memcpy(name+2, de->d_name, 39);
- if (get_sha1_hex(name, sha1) < 0)
- break;
-
- /*
- * Do we know about this object?
- * It must have been reachable
- */
- if (lookup_object(sha1))
- continue;
-
- strbuf_addf(path, "/%s", de->d_name);
- prune_object(path->buf, sha1);
- strbuf_setlen(path, baselen);
- continue;
- }
- if (starts_with(de->d_name, "tmp_obj_")) {
- strbuf_addf(path, "/%s", de->d_name);
- prune_tmp_file(path->buf);
- strbuf_setlen(path, baselen);
- continue;
- }
- fprintf(stderr, "bad sha1 file: %s/%s\n", path->buf, de->d_name);
- }
- closedir(dir);
- if (!show_only)
- rmdir(path->buf);
+ if (starts_with(basename, "tmp_obj_"))
+ prune_tmp_file(path);
+ else
+ fprintf(stderr, "bad sha1 file: %s\n", path);
return 0;
}
-static void prune_object_dir(const char *path)
+static int prune_subdir(int nr, const char *path, void *data)
{
- struct strbuf buf = STRBUF_INIT;
- size_t baselen;
- int i;
-
- strbuf_addstr(&buf, path);
- strbuf_addch(&buf, '/');
- baselen = buf.len;
-
- for (i = 0; i < 256; i++) {
- strbuf_addf(&buf, "%02x", i);
- prune_dir(i, &buf);
- strbuf_setlen(&buf, baselen);
- }
+ if (!show_only)
+ rmdir(path);
+ return 0;
}
/*
@@ -171,9 +135,10 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
if (show_progress)
progress = start_progress_delay(_("Checking connectivity"), 0, 0, 2);
- mark_reachable_objects(&revs, 1, progress);
+ mark_reachable_objects(&revs, 1, expire, progress);
stop_progress(&progress);
- prune_object_dir(get_object_directory());
+ for_each_loose_file_in_objdir(get_object_directory(), prune_object,
+ prune_cruft, prune_subdir, NULL);
prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0);
remove_temporary_files(get_object_directory());
diff --git a/builtin/reflog.c b/builtin/reflog.c
index b6388f75b0..2d85d260ca 100644
--- a/builtin/reflog.c
+++ b/builtin/reflog.c
@@ -649,7 +649,7 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
init_revisions(&cb.revs, prefix);
if (cb.verbose)
printf("Marking reachable objects...");
- mark_reachable_objects(&cb.revs, 0, NULL);
+ mark_reachable_objects(&cb.revs, 0, 0, NULL);
if (cb.verbose)
putchar('\n');
}
diff --git a/builtin/repack.c b/builtin/repack.c
index 2aae05d364..28456206c5 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -209,6 +209,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
argv_array_push(&cmd_args, "--non-empty");
argv_array_push(&cmd_args, "--all");
argv_array_push(&cmd_args, "--reflog");
+ argv_array_push(&cmd_args, "--indexed-objects");
if (window)
argv_array_pushf(&cmd_args, "--window=%s", window);
if (window_memory)
diff --git a/cache.h b/cache.h
index 0501f7dca8..99ed096aed 100644
--- a/cache.h
+++ b/cache.h
@@ -1145,7 +1145,7 @@ extern void prepare_alt_odb(void);
extern void read_info_alternates(const char * relative_base, int depth);
extern void add_to_alternates_file(const char *reference);
typedef int alt_odb_fn(struct alternate_object_database *, void *);
-extern void foreach_alt_odb(alt_odb_fn, void*);
+extern int foreach_alt_odb(alt_odb_fn, void*);
struct pack_window {
struct pack_window *next;
@@ -1241,6 +1241,50 @@ extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsig
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
+/*
+ * Iterate over the files in the loose-object parts of the object
+ * directory "path", triggering the following callbacks:
+ *
+ * - loose_object is called for each loose object we find.
+ *
+ * - loose_cruft is called for any files that do not appear to be
+ * loose objects. Note that we only look in the loose object
+ * directories "objects/[0-9a-f]{2}/", so we will not report
+ * "objects/foobar" as cruft.
+ *
+ * - loose_subdir is called for each top-level hashed subdirectory
+ * of the object directory (e.g., "$OBJDIR/f0"). It is called
+ * after the objects in the directory are processed.
+ *
+ * Any callback that is NULL will be ignored. Callbacks returning non-zero
+ * will end the iteration.
+ */
+typedef int each_loose_object_fn(const unsigned char *sha1,
+ const char *path,
+ void *data);
+typedef int each_loose_cruft_fn(const char *basename,
+ const char *path,
+ void *data);
+typedef int each_loose_subdir_fn(int nr,
+ const char *path,
+ void *data);
+int for_each_loose_file_in_objdir(const char *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
+
+/*
+ * Iterate over loose and packed objects in both the local
+ * repository and any alternates repositories.
+ */
+typedef int each_packed_object_fn(const unsigned char *sha1,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *data);
+extern int for_each_loose_object(each_loose_object_fn, void *);
+extern int for_each_packed_object(each_packed_object_fn, void *);
+
struct object_info {
/* Request */
enum object_type *typep;
diff --git a/git-compat-util.h b/git-compat-util.h
index 210712728d..fc83339bd7 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -684,7 +684,7 @@ extern const unsigned char sane_ctype[256];
#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
-#define isxdigit(x) (hexval_table[x] != -1)
+#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
#define tolower(x) sane_case((unsigned char)(x), 0x20)
#define toupper(x) sane_case((unsigned char)(x), 0)
#define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)
diff --git a/list-objects.c b/list-objects.c
index 3595ee7a22..2910becd6c 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -208,6 +208,7 @@ void traverse_commit_list(struct rev_info *revs,
struct object_array_entry *pending = revs->pending.objects + i;
struct object *obj = pending->item;
const char *name = pending->name;
+ const char *path = pending->path;
if (obj->flags & (UNINTERESTING | SEEN))
continue;
if (obj->type == OBJ_TAG) {
@@ -215,24 +216,21 @@ void traverse_commit_list(struct rev_info *revs,
show_object(obj, NULL, name, data);
continue;
}
+ if (!path)
+ path = "";
if (obj->type == OBJ_TREE) {
process_tree(revs, (struct tree *)obj, show_object,
- NULL, &base, name, data);
+ NULL, &base, path, data);
continue;
}
if (obj->type == OBJ_BLOB) {
process_blob(revs, (struct blob *)obj, show_object,
- NULL, name, data);
+ NULL, path, data);
continue;
}
die("unknown pending object %s (%s)",
sha1_to_hex(obj->sha1), name);
}
- if (revs->pending.nr) {
- free(revs->pending.objects);
- revs->pending.nr = 0;
- revs->pending.alloc = 0;
- revs->pending.objects = NULL;
- }
+ object_array_clear(&revs->pending);
strbuf_release(&base);
}
diff --git a/object.c b/object.c
index ca9d790f4d..23d6c96719 100644
--- a/object.c
+++ b/object.c
@@ -307,10 +307,9 @@ int object_list_contains(struct object_list *list, struct object *obj)
*/
static char object_array_slopbuf[1];
-static void add_object_array_with_mode_context(struct object *obj, const char *name,
- struct object_array *array,
- unsigned mode,
- struct object_context *context)
+void add_object_array_with_path(struct object *obj, const char *name,
+ struct object_array *array,
+ unsigned mode, const char *path)
{
unsigned nr = array->nr;
unsigned alloc = array->alloc;
@@ -333,26 +332,27 @@ static void add_object_array_with_mode_context(struct object *obj, const char *n
else
entry->name = xstrdup(name);
entry->mode = mode;
- entry->context = context;
+ if (path)
+ entry->path = xstrdup(path);
+ else
+ entry->path = NULL;
array->nr = ++nr;
}
void add_object_array(struct object *obj, const char *name, struct object_array *array)
{
- add_object_array_with_mode(obj, name, array, S_IFINVALID);
+ add_object_array_with_path(obj, name, array, S_IFINVALID, NULL);
}
-void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode)
+/*
+ * Free all memory associated with an entry; the result is
+ * in an unspecified state and should not be examined.
+ */
+static void object_array_release_entry(struct object_array_entry *ent)
{
- add_object_array_with_mode_context(obj, name, array, mode, NULL);
-}
-
-void add_object_array_with_context(struct object *obj, const char *name, struct object_array *array, struct object_context *context)
-{
- if (context)
- add_object_array_with_mode_context(obj, name, array, context->mode, context);
- else
- add_object_array_with_mode_context(obj, name, array, S_IFINVALID, context);
+ if (ent->name != object_array_slopbuf)
+ free(ent->name);
+ free(ent->path);
}
void object_array_filter(struct object_array *array,
@@ -367,13 +367,22 @@ void object_array_filter(struct object_array *array,
objects[dst] = objects[src];
dst++;
} else {
- if (objects[src].name != object_array_slopbuf)
- free(objects[src].name);
+ object_array_release_entry(&objects[src]);
}
}
array->nr = dst;
}
+void object_array_clear(struct object_array *array)
+{
+ int i;
+ for (i = 0; i < array->nr; i++)
+ object_array_release_entry(&array->objects[i]);
+ free(array->objects);
+ array->objects = NULL;
+ array->nr = array->alloc = 0;
+}
+
/*
* Return true iff array already contains an entry with name.
*/
@@ -400,8 +409,7 @@ void object_array_remove_duplicates(struct object_array *array)
objects[array->nr] = objects[src];
array->nr++;
} else {
- if (objects[src].name != object_array_slopbuf)
- free(objects[src].name);
+ object_array_release_entry(&objects[src]);
}
}
}
diff --git a/object.h b/object.h
index e028ced74c..6416247def 100644
--- a/object.h
+++ b/object.h
@@ -18,8 +18,8 @@ struct object_array {
* empty string.
*/
char *name;
+ char *path;
unsigned mode;
- struct object_context *context;
} *objects;
};
@@ -114,8 +114,7 @@ int object_list_contains(struct object_list *list, struct object *obj);
/* Object array handling .. */
void add_object_array(struct object *obj, const char *name, struct object_array *array);
-void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode);
-void add_object_array_with_context(struct object *obj, const char *name, struct object_array *array, struct object_context *context);
+void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path);
typedef int (*object_array_each_func_t)(struct object_array_entry *, void *);
@@ -133,6 +132,12 @@ void object_array_filter(struct object_array *array,
*/
void object_array_remove_duplicates(struct object_array *array);
+/*
+ * Remove any objects from the array, freeing all used memory; afterwards
+ * the array is ready to store more objects with add_object_array().
+ */
+void object_array_clear(struct object_array *array);
+
void clear_object_flags(unsigned flags);
#endif /* OBJECT_H */
diff --git a/reachable.c b/reachable.c
index 6f6835bf27..a647267ae9 100644
--- a/reachable.c
+++ b/reachable.c
@@ -8,6 +8,7 @@
#include "reachable.h"
#include "cache-tree.h"
#include "progress.h"
+#include "list-objects.h"
struct connectivity_progress {
struct progress *progress;
@@ -21,196 +22,134 @@ static void update_progress(struct connectivity_progress *cp)
display_progress(cp->progress, cp->count);
}
-static void process_blob(struct blob *blob,
- struct object_array *p,
- struct name_path *path,
- const char *name,
- struct connectivity_progress *cp)
+static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data)
{
- struct object *obj = &blob->object;
+ struct object *object = parse_object_or_die(sha1, path);
+ struct rev_info *revs = (struct rev_info *)cb_data;
- if (!blob)
- die("bad blob object");
- if (obj->flags & SEEN)
- return;
- obj->flags |= SEEN;
- update_progress(cp);
- /* Nothing to do, really .. The blob lookup was the important part */
+ add_pending_object(revs, object, "");
+
+ return 0;
}
-static void process_gitlink(const unsigned char *sha1,
- struct object_array *p,
- struct name_path *path,
- const char *name)
+/*
+ * The traversal will have already marked us as SEEN, so we
+ * only need to handle any progress reporting here.
+ */
+static void mark_object(struct object *obj, const struct name_path *path,
+ const char *name, void *data)
{
- /* I don't think we want to recurse into this, really. */
+ update_progress(data);
}
-static void process_tree(struct tree *tree,
- struct object_array *p,
- struct name_path *path,
- const char *name,
- struct connectivity_progress *cp)
+static void mark_commit(struct commit *c, void *data)
{
- struct object *obj = &tree->object;
- struct tree_desc desc;
- struct name_entry entry;
- struct name_path me;
-
- if (!tree)
- die("bad tree object");
- if (obj->flags & SEEN)
- return;
- obj->flags |= SEEN;
- update_progress(cp);
- if (parse_tree(tree) < 0)
- die("bad tree object %s", sha1_to_hex(obj->sha1));
- add_object(obj, p, path, name);
- me.up = path;
- me.elem = name;
- me.elem_len = strlen(name);
-
- init_tree_desc(&desc, tree->buffer, tree->size);
-
- while (tree_entry(&desc, &entry)) {
- if (S_ISDIR(entry.mode))
- process_tree(lookup_tree(entry.sha1), p, &me, entry.path, cp);
- else if (S_ISGITLINK(entry.mode))
- process_gitlink(entry.sha1, p, &me, entry.path);
- else
- process_blob(lookup_blob(entry.sha1), p, &me, entry.path, cp);
- }
- free_tree_buffer(tree);
+ mark_object(&c->object, NULL, NULL, data);
}
-static void process_tag(struct tag *tag, struct object_array *p,
- const char *name, struct connectivity_progress *cp)
+struct recent_data {
+ struct rev_info *revs;
+ unsigned long timestamp;
+};
+
+static void add_recent_object(const unsigned char *sha1,
+ unsigned long mtime,
+ struct recent_data *data)
{
- struct object *obj = &tag->object;
+ struct object *obj;
+ enum object_type type;
- if (obj->flags & SEEN)
+ if (mtime <= data->timestamp)
return;
- obj->flags |= SEEN;
- update_progress(cp);
- if (parse_tag(tag) < 0)
- die("bad tag object %s", sha1_to_hex(obj->sha1));
- if (tag->tagged)
- add_object(tag->tagged, p, NULL, name);
-}
-
-static void walk_commit_list(struct rev_info *revs,
- struct connectivity_progress *cp)
-{
- int i;
- struct commit *commit;
- struct object_array objects = OBJECT_ARRAY_INIT;
-
- /* Walk all commits, process their trees */
- while ((commit = get_revision(revs)) != NULL) {
- process_tree(commit->tree, &objects, NULL, "", cp);
- update_progress(cp);
- }
-
- /* Then walk all the pending objects, recursively processing them too */
- for (i = 0; i < revs->pending.nr; i++) {
- struct object_array_entry *pending = revs->pending.objects + i;
- struct object *obj = pending->item;
- const char *name = pending->name;
- if (obj->type == OBJ_TAG) {
- process_tag((struct tag *) obj, &objects, name, cp);
- continue;
- }
- if (obj->type == OBJ_TREE) {
- process_tree((struct tree *)obj, &objects, NULL, name, cp);
- continue;
- }
- if (obj->type == OBJ_BLOB) {
- process_blob((struct blob *)obj, &objects, NULL, name, cp);
- continue;
- }
- die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name);
+ /*
+ * We do not want to call parse_object here, because
+ * inflating blobs and trees could be very expensive.
+ * However, we do need to know the correct type for
+ * later processing, and the revision machinery expects
+ * commits and tags to have been parsed.
+ */
+ type = sha1_object_info(sha1, NULL);
+ if (type < 0)
+ die("unable to get object info for %s", sha1_to_hex(sha1));
+
+ switch (type) {
+ case OBJ_TAG:
+ case OBJ_COMMIT:
+ obj = parse_object_or_die(sha1, NULL);
+ break;
+ case OBJ_TREE:
+ obj = (struct object *)lookup_tree(sha1);
+ break;
+ case OBJ_BLOB:
+ obj = (struct object *)lookup_blob(sha1);
+ break;
+ default:
+ die("unknown object type for %s: %s",
+ sha1_to_hex(sha1), typename(type));
}
-}
-static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
- const char *email, unsigned long timestamp, int tz,
- const char *message, void *cb_data)
-{
- struct object *object;
- struct rev_info *revs = (struct rev_info *)cb_data;
+ if (!obj)
+ die("unable to lookup %s", sha1_to_hex(sha1));
- object = parse_object(osha1);
- if (object)
- add_pending_object(revs, object, "");
- object = parse_object(nsha1);
- if (object)
- add_pending_object(revs, object, "");
- return 0;
+ add_pending_object(data->revs, obj, "");
}
-static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data)
+static int add_recent_loose(const unsigned char *sha1,
+ const char *path, void *data)
{
- struct object *object = parse_object_or_die(sha1, path);
- struct rev_info *revs = (struct rev_info *)cb_data;
+ struct stat st;
+ struct object *obj = lookup_object(sha1);
- add_pending_object(revs, object, "");
+ if (obj && obj->flags & SEEN)
+ return 0;
- return 0;
-}
+ if (stat(path, &st) < 0) {
+ /*
+ * It's OK if an object went away during our iteration; this
+ * could be due to a simultaneous repack. But anything else
+ * we should abort, since we might then fail to mark objects
+ * which should not be pruned.
+ */
+ if (errno == ENOENT)
+ return 0;
+ return error("unable to stat %s: %s",
+ sha1_to_hex(sha1), strerror(errno));
+ }
-static int add_one_reflog(const char *path, const unsigned char *sha1, int flag, void *cb_data)
-{
- for_each_reflog_ent(path, add_one_reflog_ent, cb_data);
+ add_recent_object(sha1, st.st_mtime, data);
return 0;
}
-static void add_one_tree(const unsigned char *sha1, struct rev_info *revs)
+static int add_recent_packed(const unsigned char *sha1,
+ struct packed_git *p, uint32_t pos,
+ void *data)
{
- struct tree *tree = lookup_tree(sha1);
- if (tree)
- add_pending_object(revs, &tree->object, "");
-}
+ struct object *obj = lookup_object(sha1);
-static void add_cache_tree(struct cache_tree *it, struct rev_info *revs)
-{
- int i;
-
- if (it->entry_count >= 0)
- add_one_tree(it->sha1, revs);
- for (i = 0; i < it->subtree_nr; i++)
- add_cache_tree(it->down[i]->cache_tree, revs);
+ if (obj && obj->flags & SEEN)
+ return 0;
+ add_recent_object(sha1, p->mtime, data);
+ return 0;
}
-static void add_cache_refs(struct rev_info *revs)
+int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
+ unsigned long timestamp)
{
- int i;
+ struct recent_data data;
+ int r;
- read_cache();
- for (i = 0; i < active_nr; i++) {
- /*
- * The index can contain blobs and GITLINKs, GITLINKs are hashes
- * that don't actually point to objects in the repository, it's
- * almost guaranteed that they are NOT blobs, so we don't call
- * lookup_blob() on them, to avoid populating the hash table
- * with invalid information
- */
- if (S_ISGITLINK(active_cache[i]->ce_mode))
- continue;
+ data.revs = revs;
+ data.timestamp = timestamp;
- lookup_blob(active_cache[i]->sha1);
- /*
- * We could add the blobs to the pending list, but quite
- * frankly, we don't care. Once we've looked them up, and
- * added them as objects, we've really done everything
- * there is to do for a blob
- */
- }
- if (active_cache_tree)
- add_cache_tree(active_cache_tree, revs);
+ r = for_each_loose_object(add_recent_loose, &data);
+ if (r)
+ return r;
+ return for_each_packed_object(add_recent_packed, &data);
}
void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
+ unsigned long mark_recent,
struct progress *progress)
{
struct connectivity_progress cp;
@@ -224,7 +163,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
revs->tree_objects = 1;
/* Add all refs from the index file */
- add_cache_refs(revs);
+ add_index_objects_to_pending(revs, 0);
/* Add all external refs */
for_each_ref(add_one_ref, revs);
@@ -234,7 +173,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
/* Add all reflog info */
if (mark_reflog)
- for_each_reflog(add_one_reflog, revs);
+ add_reflogs_to_pending(revs, 0);
cp.progress = progress;
cp.count = 0;
@@ -245,6 +184,16 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
*/
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- walk_commit_list(revs, &cp);
+ traverse_commit_list(revs, mark_commit, mark_object, &cp);
+
+ if (mark_recent) {
+ revs->ignore_missing_links = 1;
+ if (add_unseen_recent_objects_to_traversal(revs, mark_recent))
+ die("unable to mark recent objects");
+ if (prepare_revision_walk(revs))
+ die("revision walk setup failed");
+ traverse_commit_list(revs, mark_commit, mark_object, &cp);
+ }
+
display_progress(cp.progress, cp.count);
}
diff --git a/reachable.h b/reachable.h
index 5d082adfec..d23efc36ec 100644
--- a/reachable.h
+++ b/reachable.h
@@ -2,6 +2,9 @@
#define REACHEABLE_H
struct progress;
-extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog, struct progress *);
+extern int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
+ unsigned long timestamp);
+extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
+ unsigned long mark_recent, struct progress *);
#endif
diff --git a/revision.c b/revision.c
index e498b7c339..75dda928ea 100644
--- a/revision.c
+++ b/revision.c
@@ -17,6 +17,7 @@
#include "mailmap.h"
#include "commit-slab.h"
#include "dir.h"
+#include "cache-tree.h"
volatile show_early_output_fn_t show_early_output;
@@ -86,16 +87,6 @@ void show_object_with_name(FILE *out, struct object *obj,
fputc('\n', out);
}
-void add_object(struct object *obj,
- struct object_array *p,
- struct name_path *path,
- const char *name)
-{
- char *pn = path_name(path, name);
- add_object_array(obj, pn, p);
- free(pn);
-}
-
static void mark_blob_uninteresting(struct blob *blob)
{
if (!blob)
@@ -198,9 +189,10 @@ void mark_parents_uninteresting(struct commit *commit)
}
}
-static void add_pending_object_with_mode(struct rev_info *revs,
+static void add_pending_object_with_path(struct rev_info *revs,
struct object *obj,
- const char *name, unsigned mode)
+ const char *name, unsigned mode,
+ const char *path)
{
if (!obj)
return;
@@ -220,7 +212,14 @@ static void add_pending_object_with_mode(struct rev_info *revs,
if (st)
return;
}
- add_object_array_with_mode(obj, name, &revs->pending, mode);
+ add_object_array_with_path(obj, name, &revs->pending, mode, path);
+}
+
+static void add_pending_object_with_mode(struct rev_info *revs,
+ struct object *obj,
+ const char *name, unsigned mode)
+{
+ add_pending_object_with_path(revs, obj, name, mode, NULL);
}
void add_pending_object(struct rev_info *revs,
@@ -265,8 +264,12 @@ void add_pending_sha1(struct rev_info *revs, const char *name,
}
static struct commit *handle_commit(struct rev_info *revs,
- struct object *object, const char *name)
+ struct object_array_entry *entry)
{
+ struct object *object = entry->item;
+ const char *name = entry->name;
+ const char *path = entry->path;
+ unsigned int mode = entry->mode;
unsigned long flags = object->flags;
/*
@@ -285,6 +288,14 @@ static struct commit *handle_commit(struct rev_info *revs,
die("bad object %s", sha1_to_hex(tag->tagged->sha1));
}
object->flags |= flags;
+ /*
+ * We'll handle the tagged object by looping or dropping
+ * through to the non-tag handlers below. Do not
+ * propagate data from the tag's pending entry.
+ */
+ name = "";
+ path = NULL;
+ mode = 0;
}
/*
@@ -300,7 +311,7 @@ static struct commit *handle_commit(struct rev_info *revs,
revs->limited = 1;
}
if (revs->show_source && !commit->util)
- commit->util = (void *) name;
+ commit->util = xstrdup(name);
return commit;
}
@@ -316,7 +327,7 @@ static struct commit *handle_commit(struct rev_info *revs,
mark_tree_contents_uninteresting(tree);
return NULL;
}
- add_pending_object(revs, object, "");
+ add_pending_object_with_path(revs, object, name, mode, path);
return NULL;
}
@@ -328,7 +339,7 @@ static struct commit *handle_commit(struct rev_info *revs,
return NULL;
if (flags & UNINTERESTING)
return NULL;
- add_pending_object(revs, object, "");
+ add_pending_object_with_path(revs, object, name, mode, path);
return NULL;
}
die("%s is unknown object", name);
@@ -1275,7 +1286,7 @@ static int handle_one_reflog(const char *path, const unsigned char *sha1, int fl
return 0;
}
-static void handle_reflog(struct rev_info *revs, unsigned flags)
+void add_reflogs_to_pending(struct rev_info *revs, unsigned flags)
{
struct all_refs_cb cb;
cb.all_revs = revs;
@@ -1283,6 +1294,53 @@ static void handle_reflog(struct rev_info *revs, unsigned flags)
for_each_reflog(handle_one_reflog, &cb);
}
+static void add_cache_tree(struct cache_tree *it, struct rev_info *revs,
+ struct strbuf *path)
+{
+ size_t baselen = path->len;
+ int i;
+
+ if (it->entry_count >= 0) {
+ struct tree *tree = lookup_tree(it->sha1);
+ add_pending_object_with_path(revs, &tree->object, "",
+ 040000, path->buf);
+ }
+
+ for (i = 0; i < it->subtree_nr; i++) {
+ struct cache_tree_sub *sub = it->down[i];
+ strbuf_addf(path, "%s%s", baselen ? "/" : "", sub->name);
+ add_cache_tree(sub->cache_tree, revs, path);
+ strbuf_setlen(path, baselen);
+ }
+
+}
+
+void add_index_objects_to_pending(struct rev_info *revs, unsigned flags)
+{
+ int i;
+
+ read_cache();
+ for (i = 0; i < active_nr; i++) {
+ struct cache_entry *ce = active_cache[i];
+ struct blob *blob;
+
+ if (S_ISGITLINK(ce->ce_mode))
+ continue;
+
+ blob = lookup_blob(ce->sha1);
+ if (!blob)
+ die("unable to add index blob to traversal");
+ add_pending_object_with_path(revs, &blob->object, "",
+ ce->ce_mode, ce->name);
+ }
+
+ if (active_cache_tree) {
+ struct strbuf path = STRBUF_INIT;
+ add_cache_tree(active_cache_tree, revs, &path);
+ strbuf_release(&path);
+ }
+}
+
static int add_parents_only(struct rev_info *revs, const char *arg_, int flags)
{
unsigned char sha1[20];
@@ -1633,6 +1691,7 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
!strcmp(arg, "--reflog") || !strcmp(arg, "--not") ||
!strcmp(arg, "--no-walk") || !strcmp(arg, "--do-walk") ||
!strcmp(arg, "--bisect") || starts_with(arg, "--glob=") ||
+ !strcmp(arg, "--indexed-objects") ||
starts_with(arg, "--exclude=") ||
starts_with(arg, "--branches=") || starts_with(arg, "--tags=") ||
starts_with(arg, "--remotes=") || starts_with(arg, "--no-walk="))
@@ -2061,7 +2120,9 @@ static int handle_revision_pseudo_opt(const char *submodule,
for_each_glob_ref_in(handle_one_ref, arg + 10, "refs/remotes/", &cb);
clear_ref_exclusion(&revs->ref_excludes);
} else if (!strcmp(arg, "--reflog")) {
- handle_reflog(revs, *flags);
+ add_reflogs_to_pending(revs, *flags);
+ } else if (!strcmp(arg, "--indexed-objects")) {
+ add_index_objects_to_pending(revs, *flags);
} else if (!strcmp(arg, "--not")) {
*flags ^= UNINTERESTING | BOTTOM;
} else if (!strcmp(arg, "--no-walk")) {
@@ -2656,26 +2717,26 @@ void reset_revision_walk(void)
int prepare_revision_walk(struct rev_info *revs)
{
- int nr = revs->pending.nr;
- struct object_array_entry *e, *list;
+ int i;
+ struct object_array old_pending;
struct commit_list **next = &revs->commits;
- e = list = revs->pending.objects;
+ memcpy(&old_pending, &revs->pending, sizeof(old_pending));
revs->pending.nr = 0;
revs->pending.alloc = 0;
revs->pending.objects = NULL;
- while (--nr >= 0) {
- struct commit *commit = handle_commit(revs, e->item, e->name);
+ for (i = 0; i < old_pending.nr; i++) {
+ struct object_array_entry *e = old_pending.objects + i;
+ struct commit *commit = handle_commit(revs, e);
if (commit) {
if (!(commit->object.flags & SEEN)) {
commit->object.flags |= SEEN;
next = commit_list_append(commit, next);
}
}
- e++;
}
if (!revs->leak_pending)
- free(list);
+ object_array_clear(&old_pending);
/* Signal whether we need per-parent treesame decoration */
if (revs->simplify_merges ||
diff --git a/revision.h b/revision.h
index a6205307cf..9cb5adc4ea 100644
--- a/revision.h
+++ b/revision.h
@@ -264,11 +264,6 @@ char *path_name(const struct name_path *path, const char *name);
extern void show_object_with_name(FILE *, struct object *,
const struct name_path *, const char *);
-extern void add_object(struct object *obj,
- struct object_array *p,
- struct name_path *path,
- const char *name);
-
extern void add_pending_object(struct rev_info *revs,
struct object *obj, const char *name);
extern void add_pending_sha1(struct rev_info *revs,
@@ -276,6 +271,8 @@ extern void add_pending_sha1(struct rev_info *revs,
unsigned int flags);
extern void add_head_to_pending(struct rev_info *);
+extern void add_reflogs_to_pending(struct rev_info *, unsigned int flags);
+extern void add_index_objects_to_pending(struct rev_info *, unsigned int flags);
enum commit_action {
commit_ignore,
diff --git a/sha1_file.c b/sha1_file.c
index 83f77f01b6..d7f1838c13 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -413,14 +413,18 @@ void add_to_alternates_file(const char *reference)
link_alt_odb_entries(alt, strlen(alt), '\n', NULL, 0);
}
-void foreach_alt_odb(alt_odb_fn fn, void *cb)
+int foreach_alt_odb(alt_odb_fn fn, void *cb)
{
struct alternate_object_database *ent;
+ int r = 0;
prepare_alt_odb();
- for (ent = alt_odb_list; ent; ent = ent->next)
- if (fn(ent, cb))
- return;
+ for (ent = alt_odb_list; ent; ent = ent->next) {
+ r = fn(ent, cb);
+ if (r)
+ break;
+ }
+ return r;
}
void prepare_alt_odb(void)
@@ -439,27 +443,53 @@ void prepare_alt_odb(void)
read_info_alternates(get_object_directory(), 0);
}
-static int has_loose_object_local(const unsigned char *sha1)
+static int freshen_file(const char *fn)
{
- return !access(sha1_file_name(sha1), F_OK);
+ struct utimbuf t;
+ t.actime = t.modtime = time(NULL);
+ return !utime(fn, &t);
}
-int has_loose_object_nonlocal(const unsigned char *sha1)
+static int check_and_freshen_file(const char *fn, int freshen)
+{
+ if (access(fn, F_OK))
+ return 0;
+ if (freshen && freshen_file(fn))
+ return 0;
+ return 1;
+}
+
+static int check_and_freshen_local(const unsigned char *sha1, int freshen)
+{
+ return check_and_freshen_file(sha1_file_name(sha1), freshen);
+}
+
+static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen)
{
struct alternate_object_database *alt;
prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next) {
fill_sha1_path(alt->name, sha1);
- if (!access(alt->base, F_OK))
+ if (check_and_freshen_file(alt->base, freshen))
return 1;
}
return 0;
}
+static int check_and_freshen(const unsigned char *sha1, int freshen)
+{
+ return check_and_freshen_local(sha1, freshen) ||
+ check_and_freshen_nonlocal(sha1, freshen);
+}
+
+int has_loose_object_nonlocal(const unsigned char *sha1)
+{
+ return check_and_freshen_nonlocal(sha1, 0);
+}
+
static int has_loose_object(const unsigned char *sha1)
{
- return has_loose_object_local(sha1) ||
- has_loose_object_nonlocal(sha1);
+ return check_and_freshen(sha1, 0);
}
static unsigned int pack_used_ctr;
@@ -2962,6 +2992,17 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
return move_temp_to_file(tmp_file, filename);
}
+static int freshen_loose_object(const unsigned char *sha1)
+{
+ return check_and_freshen(sha1, 1);
+}
+
+static int freshen_packed_object(const unsigned char *sha1)
+{
+ struct pack_entry e;
+ return find_pack_entry(sha1, &e) && freshen_file(e.p->pack_name);
+}
+
int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
{
unsigned char sha1[20];
@@ -2974,7 +3015,7 @@ int write_sha1_file(const void *buf, unsigned long len, const char *type, unsign
write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
if (returnsha1)
hashcpy(returnsha1, sha1);
- if (has_sha1_file(sha1))
+ if (freshen_loose_object(sha1) || freshen_packed_object(sha1))
return 0;
return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
}
@@ -3261,3 +3302,149 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
die("%s is not a valid '%s' object", sha1_to_hex(sha1),
typename(expect));
}
+
+static int for_each_file_in_obj_subdir(int subdir_nr,
+ struct strbuf *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data)
+{
+ size_t baselen = path->len;
+ DIR *dir = opendir(path->buf);
+ struct dirent *de;
+ int r = 0;
+
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+ return error("unable to open %s: %s", path->buf, strerror(errno));
+ }
+
+ while ((de = readdir(dir))) {
+ if (is_dot_or_dotdot(de->d_name))
+ continue;
+
+ strbuf_setlen(path, baselen);
+ strbuf_addf(path, "/%s", de->d_name);
+
+ if (strlen(de->d_name) == 38) {
+ char hex[41];
+ unsigned char sha1[20];
+
+ snprintf(hex, sizeof(hex), "%02x%s",
+ subdir_nr, de->d_name);
+ if (!get_sha1_hex(hex, sha1)) {
+ if (obj_cb) {
+ r = obj_cb(sha1, path->buf, data);
+ if (r)
+ break;
+ }
+ continue;
+ }
+ }
+
+ if (cruft_cb) {
+ r = cruft_cb(de->d_name, path->buf, data);
+ if (r)
+ break;
+ }
+ }
+ strbuf_setlen(path, baselen);
+
+ if (!r && subdir_cb)
+ r = subdir_cb(subdir_nr, path->buf, data);
+
+ closedir(dir);
+ return r;
+}
+
+int for_each_loose_file_in_objdir(const char *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data)
+{
+ struct strbuf buf = STRBUF_INIT;
+ size_t baselen;
+ int r = 0;
+ int i;
+
+ strbuf_addstr(&buf, path);
+ strbuf_addch(&buf, '/');
+ baselen = buf.len;
+
+ for (i = 0; i < 256; i++) {
+ strbuf_addf(&buf, "%02x", i);
+ r = for_each_file_in_obj_subdir(i, &buf, obj_cb, cruft_cb,
+ subdir_cb, data);
+ strbuf_setlen(&buf, baselen);
+ if (r)
+ break;
+ }
+
+ strbuf_release(&buf);
+ return r;
+}
+
+struct loose_alt_odb_data {
+ each_loose_object_fn *cb;
+ void *data;
+};
+
+static int loose_from_alt_odb(struct alternate_object_database *alt,
+ void *vdata)
+{
+ struct loose_alt_odb_data *data = vdata;
+ return for_each_loose_file_in_objdir(alt->base,
+ data->cb, NULL, NULL,
+ data->data);
+}
+
+int for_each_loose_object(each_loose_object_fn cb, void *data)
+{
+ struct loose_alt_odb_data alt;
+ int r;
+
+ r = for_each_loose_file_in_objdir(get_object_directory(),
+ cb, NULL, NULL, data);
+ if (r)
+ return r;
+
+ alt.cb = cb;
+ alt.data = data;
+ return foreach_alt_odb(loose_from_alt_odb, &alt);
+}
+
+static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
+{
+ uint32_t i;
+ int r = 0;
+
+ for (i = 0; i < p->num_objects; i++) {
+ const unsigned char *sha1 = nth_packed_object_sha1(p, i);
+
+ if (!sha1)
+ return error("unable to get sha1 of object %u in %s",
+ i, p->pack_name);
+
+ r = cb(sha1, p, i, data);
+ if (r)
+ break;
+ }
+ return r;
+}
+
+int for_each_packed_object(each_packed_object_fn cb, void *data)
+{
+ struct packed_git *p;
+ int r = 0;
+
+ prepare_packed_git();
+ for (p = packed_git; p; p = p->next) {
+ r = for_each_object_in_pack(p, cb, data);
+ if (r)
+ break;
+ }
+ return r;
+}
diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh
index 67e0ab3462..7c8a769a90 100755
--- a/t/t5516-fetch-push.sh
+++ b/t/t5516-fetch-push.sh
@@ -1277,4 +1277,17 @@ EOF
git push --no-thin --receive-pack="$rcvpck" no-thin/.git refs/heads/master:refs/heads/foo
'
+test_expect_success 'pushing a tag pushes the tagged object' '
+ rm -rf dst.git &&
+ blob=$(echo unreferenced | git hash-object -w --stdin) &&
+ git tag -m foo tag-of-blob $blob &&
+ git init --bare dst.git &&
+ git push dst.git tag-of-blob &&
+ # the receiving index-pack should have noticed
+ # any problems, but we double check
+ echo unreferenced >expect &&
+ git --git-dir=dst.git cat-file blob tag-of-blob >actual &&
+ test_cmp expect actual
+'
+
test_done
diff --git a/t/t6000-rev-list-misc.sh b/t/t6000-rev-list-misc.sh
index 3794e4ceaf..2602086303 100755
--- a/t/t6000-rev-list-misc.sh
+++ b/t/t6000-rev-list-misc.sh
@@ -73,4 +73,27 @@ test_expect_success 'symleft flag bit is propagated down from tag' '
test_cmp expect actual
'
+test_expect_success 'rev-list can show index objects' '
+ # Of the blobs and trees in the index, note:
+ #
+ # - we do not show two/three, because it is the
+ # same blob as "one", and we show objects only once
+ #
+ # - we do show the tree "two", because it has a valid cache tree
+ # from the last commit
+ #
+ # - we do not show the root tree; since we updated the index, it
+ # does not have a valid cache tree
+ #
+ cat >expect <<-\EOF
+ 8e4020bb5a8d8c873b25de15933e75cc0fc275df one
+ d9d3a7417b9605cfd88ee6306b28dadc29e6ab08 only-in-index
+ 9200b628cf9dc883a85a7abc8d6e6730baee589c two
+ EOF
+ echo only-in-index >only-in-index &&
+ git add only-in-index &&
+ git rev-list --objects --indexed-objects >actual &&
+ test_cmp expect actual
+'
+
test_done
diff --git a/t/t6501-freshen-objects.sh b/t/t6501-freshen-objects.sh
new file mode 100755
index 0000000000..157f3f91db
--- /dev/null
+++ b/t/t6501-freshen-objects.sh
@@ -0,0 +1,132 @@
+#!/bin/sh
+#
+# This test covers the handling of objects which might have old
+# mtimes in the filesystem (because they were used previously)
+# and are just now becoming referenced again.
+#
+# We're going to do two things that are a little bit "fake" to
+# help make our simulation easier:
+#
+# 1. We'll turn off reflogs. You can still run into
+# problems with reflogs on, but your objects
+# don't get pruned until both the reflog expiration
+# has passed on their references, _and_ they are out
+# of prune's expiration period. Dropping reflogs
+# means we only have to deal with one variable in our tests,
+# but the results generalize.
+#
+# 2. We'll use a temporary index file to create our
+# works-in-progress. Most workflows would mention
+# referenced objects in the index, which prune takes
+# into account. However, many operations don't. For
+# example, a partial commit with "git commit foo"
+# will use a temporary index. Or they may not need
+# an index at all (e.g., creating a new commit
+# to refer to an existing tree).
+
+test_description='check pruning of dependent objects'
+. ./test-lib.sh
+
+# We care about reachability, so we do not want to use
+# the normal test_commit, which creates extra tags.
+add () {
+ echo "$1" >"$1" &&
+ git add "$1"
+}
+commit () {
+ test_tick &&
+ add "$1" &&
+ git commit -m "$1"
+}
+
+maybe_repack () {
+ if test -n "$repack"; then
+ git repack -ad
+ fi
+}
+
+for repack in '' true; do
+ title=${repack:+repack}
+ title=${title:-loose}
+
+ test_expect_success "make repo completely empty ($title)" '
+ rm -rf .git &&
+ git init
+ '
+
+ test_expect_success "disable reflogs ($title)" '
+ git config core.logallrefupdates false &&
+ rm -rf .git/logs
+ '
+
+ test_expect_success "setup basic history ($title)" '
+ commit base
+ '
+
+ test_expect_success "create and abandon some objects ($title)" '
+ git checkout -b experiment &&
+ commit abandon &&
+ maybe_repack &&
+ git checkout master &&
+ git branch -D experiment
+ '
+
+ test_expect_success "simulate time passing ($title)" '
+ find .git/objects -type f |
+ xargs test-chmtime -v -86400
+ '
+
+ test_expect_success "start writing new commit with old blob ($title)" '
+ tree=$(
+ GIT_INDEX_FILE=index.tmp &&
+ export GIT_INDEX_FILE &&
+ git read-tree HEAD &&
+ add unrelated &&
+ add abandon &&
+ git write-tree
+ )
+ '
+
+ test_expect_success "simultaneous gc ($title)" '
+ git gc --prune=12.hours.ago
+ '
+
+ test_expect_success "finish writing out commit ($title)" '
+ commit=$(echo foo | git commit-tree -p HEAD $tree) &&
+ git update-ref HEAD $commit
+ '
+
+ # "abandon" blob should have been rescued by reference from new tree
+ test_expect_success "repository passes fsck ($title)" '
+ git fsck
+ '
+
+ test_expect_success "abandon objects again ($title)" '
+ git reset --hard HEAD^ &&
+ find .git/objects -type f |
+ xargs test-chmtime -v -86400
+ '
+
+ test_expect_success "start writing new commit with same tree ($title)" '
+ tree=$(
+ GIT_INDEX_FILE=index.tmp &&
+ export GIT_INDEX_FILE &&
+ git read-tree HEAD &&
+ add abandon &&
+ add unrelated &&
+ git write-tree
+ )
+ '
+
+ test_expect_success "simultaneous gc ($title)" '
+ git gc --prune=12.hours.ago
+ '
+
+ # tree should have been refreshed by write-tree
+ test_expect_success "finish writing out commit ($title)" '
+ commit=$(echo foo | git commit-tree -p HEAD $tree) &&
+ git update-ref HEAD $commit
+ '
+done
+
+test_done
diff --git a/t/t7701-repack-unpack-unreachable.sh b/t/t7701-repack-unpack-unreachable.sh
index b8d4cdea8c..aad8a9c64d 100755
--- a/t/t7701-repack-unpack-unreachable.sh
+++ b/t/t7701-repack-unpack-unreachable.sh
@@ -109,4 +109,17 @@ test_expect_success 'do not bother loosening old objects' '
test_must_fail git cat-file -p $obj2
'
+test_expect_success 'keep packed objects found only in index' '
+ echo my-unique-content >file &&
+ git add file &&
+ git commit -m "make it reachable" &&
+ git gc &&
+ git reset HEAD^ &&
+ git reflog expire --expire=now --all &&
+ git add file &&
+ test-chmtime =-86400 .git/objects/pack/* &&
+ git gc --prune=1.hour.ago &&
+ git cat-file blob :file
+'
+
test_done
diff --git a/urlmatch.c b/urlmatch.c
index 3d4c54b5cd..618d216491 100644
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -43,11 +43,11 @@ static int append_normalized_escapes(struct strbuf *buf,
from_len--;
if (ch == '%') {
if (from_len < 2 ||
- !isxdigit((unsigned char)from[0]) ||
- !isxdigit((unsigned char)from[1]))
+ !isxdigit(from[0]) ||
+ !isxdigit(from[1]))
return 0;
- ch = hexval_table[(unsigned char)*from++] << 4;
- ch |= hexval_table[(unsigned char)*from++];
+ ch = hexval(*from++) << 4;
+ ch |= hexval(*from++);
from_len -= 2;
was_esc = 1;
}