diff options
author | Junio C Hamano <gitster@pobox.com> | 2017-12-13 22:28:54 +0100 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2017-12-13 22:28:54 +0100 |
commit | 721cc4314cb593e799213ad5f926a1e9fc5779b0 (patch) | |
tree | ffa71b39724a9eed8dc7c653cafb4e1211e7820d | |
parent | RelNotes: the eighth batch (diff) | |
parent | repository: fix a sparse 'using integer as NULL pointer' warning (diff) | |
download | git-721cc4314cb593e799213ad5f926a1e9fc5779b0.tar.xz git-721cc4314cb593e799213ad5f926a1e9fc5779b0.zip |
Merge branch 'bc/hash-algo'
An infrastructure to define what hash function is used in Git is
introduced, and an effort to plumb that throughout various
codepaths has been started.
* bc/hash-algo:
repository: fix a sparse 'using integer as NULL pointer' warning
Switch empty tree and blob lookups to use hash abstraction
Integrate hash algorithm support with repo setup
Add structure representing hash algorithm
setup: expose enumerated repo info
-rw-r--r-- | builtin/am.c | 2 | ||||
-rw-r--r-- | builtin/checkout.c | 2 | ||||
-rw-r--r-- | builtin/diff.c | 2 | ||||
-rw-r--r-- | builtin/pull.c | 2 | ||||
-rw-r--r-- | cache.h | 12 | ||||
-rw-r--r-- | diff-lib.c | 2 | ||||
-rw-r--r-- | hash.h | 57 | ||||
-rw-r--r-- | merge-recursive.c | 2 | ||||
-rw-r--r-- | notes-merge.c | 2 | ||||
-rw-r--r-- | repository.c | 9 | ||||
-rw-r--r-- | repository.h | 5 | ||||
-rw-r--r-- | sequencer.c | 6 | ||||
-rw-r--r-- | setup.c | 49 | ||||
-rw-r--r-- | sha1_file.c | 58 | ||||
-rw-r--r-- | submodule.c | 2 |
15 files changed, 175 insertions, 37 deletions
diff --git a/builtin/am.c b/builtin/am.c index 02853b3e05..3d98e52085 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -1433,7 +1433,7 @@ static void write_index_patch(const struct am_state *state) if (!get_oid_tree("HEAD", &head)) tree = lookup_tree(&head); else - tree = lookup_tree(&empty_tree_oid); + tree = lookup_tree(the_hash_algo->empty_tree); fp = xfopen(am_path(state, "patch"), "w"); init_revisions(&rev_info, NULL); diff --git a/builtin/checkout.c b/builtin/checkout.c index 3faae382de..e1e157d205 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -514,7 +514,7 @@ static int merge_working_tree(const struct checkout_opts *opts, } tree = parse_tree_indirect(old->commit ? &old->commit->object.oid : - &empty_tree_oid); + the_hash_algo->empty_tree); init_tree_desc(&trees[0], tree->buffer, tree->size); tree = parse_tree_indirect(&new->commit->object.oid); init_tree_desc(&trees[1], tree->buffer, tree->size); diff --git a/builtin/diff.c b/builtin/diff.c index 9808d062a8..16bfb22f73 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -379,7 +379,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) add_head_to_pending(&rev); if (!rev.pending.nr) { struct tree *tree; - tree = lookup_tree(&empty_tree_oid); + tree = lookup_tree(the_hash_algo->empty_tree); add_pending_object(&rev, &tree->object, "HEAD"); } break; diff --git a/builtin/pull.c b/builtin/pull.c index 166b777ed6..511dbbe0f6 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -557,7 +557,7 @@ static int pull_into_void(const struct object_id *merge_head, * index/worktree changes that the user already made on the unborn * branch. */ - if (checkout_fast_forward(&empty_tree_oid, merge_head, 0)) + if (checkout_fast_forward(the_hash_algo->empty_tree, merge_head, 0)) return 1; if (update_ref("initial pull", "HEAD", merge_head, curr_head, 0, UPDATE_REFS_DIE_ON_ERR)) @@ -14,6 +14,7 @@ #include "hash.h" #include "path.h" #include "sha1-array.h" +#include "repository.h" #ifndef platform_SHA_CTX /* @@ -77,6 +78,8 @@ struct object_id { unsigned char hash[GIT_MAX_RAWSZ]; }; +#define the_hash_algo the_repository->hash_algo + #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT) #define DTYPE(de) ((de)->d_type) #else @@ -907,6 +910,7 @@ struct repository_format { int version; int precious_objects; int is_bare; + int hash_algo; char *work_tree; struct string_list unknown_extensions; }; @@ -1039,22 +1043,22 @@ extern const struct object_id empty_blob_oid; static inline int is_empty_blob_sha1(const unsigned char *sha1) { - return !hashcmp(sha1, EMPTY_BLOB_SHA1_BIN); + return !hashcmp(sha1, the_hash_algo->empty_blob->hash); } static inline int is_empty_blob_oid(const struct object_id *oid) { - return !hashcmp(oid->hash, EMPTY_BLOB_SHA1_BIN); + return !oidcmp(oid, the_hash_algo->empty_blob); } static inline int is_empty_tree_sha1(const unsigned char *sha1) { - return !hashcmp(sha1, EMPTY_TREE_SHA1_BIN); + return !hashcmp(sha1, the_hash_algo->empty_tree->hash); } static inline int is_empty_tree_oid(const struct object_id *oid) { - return !hashcmp(oid->hash, EMPTY_TREE_SHA1_BIN); + return !oidcmp(oid, the_hash_algo->empty_tree); } /* set default permissions by passing mode arguments to open(2) */ diff --git a/diff-lib.c b/diff-lib.c index 5173023cd3..8104603a3b 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -218,7 +218,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) } else if (revs->diffopt.ita_invisible_in_index && ce_intent_to_add(ce)) { diff_addremove(&revs->diffopt, '+', ce->ce_mode, - &empty_tree_oid, 0, + the_hash_algo->empty_tree, 0, ce->name, 0); continue; } @@ -1,6 +1,8 @@ #ifndef HASH_H #define HASH_H +#include "git-compat-util.h" + #if defined(SHA1_PPC) #include "ppc/sha1.h" #elif defined(SHA1_APPLE) @@ -13,4 +15,59 @@ #include "block-sha1/sha1.h" #endif +/* + * Note that these constants are suitable for indexing the hash_algos array and + * comparing against each other, but are otherwise arbitrary, so they should not + * be exposed to the user or serialized to disk. To know whether a + * git_hash_algo struct points to some usable hash function, test the format_id + * field for being non-zero. Use the name field for user-visible situations and + * the format_id field for fixed-length fields on disk. + */ +/* An unknown hash function. */ +#define GIT_HASH_UNKNOWN 0 +/* SHA-1 */ +#define GIT_HASH_SHA1 1 +/* Number of algorithms supported (including unknown). */ +#define GIT_HASH_NALGOS (GIT_HASH_SHA1 + 1) + +typedef void (*git_hash_init_fn)(void *ctx); +typedef void (*git_hash_update_fn)(void *ctx, const void *in, size_t len); +typedef void (*git_hash_final_fn)(unsigned char *hash, void *ctx); + +struct git_hash_algo { + /* + * The name of the algorithm, as appears in the config file and in + * messages. + */ + const char *name; + + /* A four-byte version identifier, used in pack indices. */ + uint32_t format_id; + + /* The size of a hash context (e.g. git_SHA_CTX). */ + size_t ctxsz; + + /* The length of the hash in binary. */ + size_t rawsz; + + /* The length of the hash in hex characters. */ + size_t hexsz; + + /* The hash initialization function. */ + git_hash_init_fn init_fn; + + /* The hash update function. */ + git_hash_update_fn update_fn; + + /* The hash finalization function. */ + git_hash_final_fn final_fn; + + /* The OID of the empty tree. */ + const struct object_id *empty_tree; + + /* The OID of the empty blob. */ + const struct object_id *empty_blob; +}; +extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS]; + #endif diff --git a/merge-recursive.c b/merge-recursive.c index d00b274381..a4c280dfc7 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -2082,7 +2082,7 @@ int merge_recursive(struct merge_options *o, /* if there is no common ancestor, use an empty tree */ struct tree *tree; - tree = lookup_tree(&empty_tree_oid); + tree = lookup_tree(the_hash_algo->empty_tree); merged_common_ancestors = make_virtual_commit(tree, "ancestor"); } diff --git a/notes-merge.c b/notes-merge.c index 4a83b0ebd5..0f6573cb17 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -595,7 +595,7 @@ int notes_merge(struct notes_merge_options *o, bases = get_merge_bases(local, remote); if (!bases) { base_oid = &null_oid; - base_tree_oid = &empty_tree_oid; + base_tree_oid = the_hash_algo->empty_tree; if (o->verbosity >= 4) printf("No merge base found; doing history-less merge\n"); } else if (!bases->next) { diff --git a/repository.c b/repository.c index bb2fae5446..998413b8bb 100644 --- a/repository.c +++ b/repository.c @@ -5,7 +5,7 @@ /* The main repository */ static struct repository the_repo = { - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &the_index, 0, 0 + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &the_index, NULL, 0, 0 }; struct repository *the_repository = &the_repo; @@ -64,6 +64,11 @@ void repo_set_gitdir(struct repository *repo, const char *path) free(old_gitdir); } +void repo_set_hash_algo(struct repository *repo, int hash_algo) +{ + repo->hash_algo = &hash_algos[hash_algo]; +} + /* * Attempt to resolve and set the provided 'gitdir' for repository 'repo'. * Return 0 upon success and a non-zero value upon failure. @@ -136,6 +141,8 @@ int repo_init(struct repository *repo, const char *gitdir, const char *worktree) if (read_and_verify_repository_format(&format, repo->commondir)) goto error; + repo_set_hash_algo(repo, format.hash_algo); + if (worktree) repo_set_worktree(repo, worktree); diff --git a/repository.h b/repository.h index 7f5e24a0a2..0329e40c7f 100644 --- a/repository.h +++ b/repository.h @@ -4,6 +4,7 @@ struct config_set; struct index_state; struct submodule_cache; +struct git_hash_algo; struct repository { /* Environment */ @@ -67,6 +68,9 @@ struct repository { */ struct index_state *index; + /* Repository's current hash algorithm, as serialized on disk. */ + const struct git_hash_algo *hash_algo; + /* Configurations */ /* * Bit used during initialization to indicate if repository state (like @@ -86,6 +90,7 @@ extern struct repository *the_repository; extern void repo_set_gitdir(struct repository *repo, const char *path); extern void repo_set_worktree(struct repository *repo, const char *path); +extern void repo_set_hash_algo(struct repository *repo, int algo); extern int repo_init(struct repository *repo, const char *gitdir, const char *worktree); extern int repo_submodule_init(struct repository *submodule, struct repository *superproject, diff --git a/sequencer.c b/sequencer.c index fa94ed652d..e90bc316bb 100644 --- a/sequencer.c +++ b/sequencer.c @@ -347,7 +347,7 @@ static int read_oneliner(struct strbuf *buf, static struct tree *empty_tree(void) { - return lookup_tree(&empty_tree_oid); + return lookup_tree(the_hash_algo->empty_tree); } static int error_dirty_index(struct replay_opts *opts) @@ -706,7 +706,7 @@ static int is_original_commit_empty(struct commit *commit) oid_to_hex(&parent->object.oid)); ptree_oid = &parent->tree->object.oid; } else { - ptree_oid = &empty_tree_oid; /* commit is root */ + ptree_oid = the_hash_algo->empty_tree; /* commit is root */ } return !oidcmp(ptree_oid, &commit->tree->object.oid); @@ -959,7 +959,7 @@ static int do_pick_commit(enum todo_command command, struct commit *commit, } else { unborn = get_oid("HEAD", &head); if (unborn) - oidcpy(&head, &empty_tree_oid); + oidcpy(&head, the_hash_algo->empty_tree); if (index_differs_from(unborn ? EMPTY_TREE_SHA1_HEX : "HEAD", NULL, 0)) return error_dirty_index(opts); @@ -434,16 +434,15 @@ static int check_repo_format(const char *var, const char *value, void *vdata) return 0; } -static int check_repository_format_gently(const char *gitdir, int *nongit_ok) +static int check_repository_format_gently(const char *gitdir, struct repository_format *candidate, int *nongit_ok) { struct strbuf sb = STRBUF_INIT; struct strbuf err = STRBUF_INIT; - struct repository_format candidate; int has_common; has_common = get_common_dir(&sb, gitdir); strbuf_addstr(&sb, "/config"); - read_repository_format(&candidate, sb.buf); + read_repository_format(candidate, sb.buf); strbuf_release(&sb); /* @@ -451,10 +450,10 @@ static int check_repository_format_gently(const char *gitdir, int *nongit_ok) * we treat a missing config as a silent "ok", even when nongit_ok * is unset. */ - if (candidate.version < 0) + if (candidate->version < 0) return 0; - if (verify_repository_format(&candidate, &err) < 0) { + if (verify_repository_format(candidate, &err) < 0) { if (nongit_ok) { warning("%s", err.buf); strbuf_release(&err); @@ -464,21 +463,21 @@ static int check_repository_format_gently(const char *gitdir, int *nongit_ok) die("%s", err.buf); } - repository_format_precious_objects = candidate.precious_objects; - string_list_clear(&candidate.unknown_extensions, 0); + repository_format_precious_objects = candidate->precious_objects; + string_list_clear(&candidate->unknown_extensions, 0); if (!has_common) { - if (candidate.is_bare != -1) { - is_bare_repository_cfg = candidate.is_bare; + if (candidate->is_bare != -1) { + is_bare_repository_cfg = candidate->is_bare; if (is_bare_repository_cfg == 1) inside_work_tree = -1; } - if (candidate.work_tree) { + if (candidate->work_tree) { free(git_work_tree_cfg); - git_work_tree_cfg = candidate.work_tree; + git_work_tree_cfg = candidate->work_tree; inside_work_tree = -1; } } else { - free(candidate.work_tree); + free(candidate->work_tree); } return 0; @@ -489,6 +488,7 @@ int read_repository_format(struct repository_format *format, const char *path) memset(format, 0, sizeof(*format)); format->version = -1; format->is_bare = -1; + format->hash_algo = GIT_HASH_SHA1; string_list_init(&format->unknown_extensions, 1); git_config_from_file(check_repo_format, path, format); return format->version; @@ -625,6 +625,7 @@ cleanup_return: static const char *setup_explicit_git_dir(const char *gitdirenv, struct strbuf *cwd, + struct repository_format *repo_fmt, int *nongit_ok) { const char *work_tree_env = getenv(GIT_WORK_TREE_ENVIRONMENT); @@ -650,7 +651,7 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, die("Not a git repository: '%s'", gitdirenv); } - if (check_repository_format_gently(gitdirenv, nongit_ok)) { + if (check_repository_format_gently(gitdirenv, repo_fmt, nongit_ok)) { free(gitfile); return NULL; } @@ -723,9 +724,10 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, static const char *setup_discovered_git_dir(const char *gitdir, struct strbuf *cwd, int offset, + struct repository_format *repo_fmt, int *nongit_ok) { - if (check_repository_format_gently(gitdir, nongit_ok)) + if (check_repository_format_gently(gitdir, repo_fmt, nongit_ok)) return NULL; /* --work-tree is set without --git-dir; use discovered one */ @@ -737,7 +739,7 @@ static const char *setup_discovered_git_dir(const char *gitdir, gitdir = to_free = real_pathdup(gitdir, 1); if (chdir(cwd->buf)) die_errno("Could not come back to cwd"); - ret = setup_explicit_git_dir(gitdir, cwd, nongit_ok); + ret = setup_explicit_git_dir(gitdir, cwd, repo_fmt, nongit_ok); free(to_free); return ret; } @@ -769,11 +771,12 @@ static const char *setup_discovered_git_dir(const char *gitdir, /* #16.1, #17.1, #20.1, #21.1, #22.1 (see t1510) */ static const char *setup_bare_git_dir(struct strbuf *cwd, int offset, + struct repository_format *repo_fmt, int *nongit_ok) { int root_len; - if (check_repository_format_gently(".", nongit_ok)) + if (check_repository_format_gently(".", repo_fmt, nongit_ok)) return NULL; setenv(GIT_IMPLICIT_WORK_TREE_ENVIRONMENT, "0", 1); @@ -785,7 +788,7 @@ static const char *setup_bare_git_dir(struct strbuf *cwd, int offset, gitdir = offset == cwd->len ? "." : xmemdupz(cwd->buf, offset); if (chdir(cwd->buf)) die_errno("Could not come back to cwd"); - return setup_explicit_git_dir(gitdir, cwd, nongit_ok); + return setup_explicit_git_dir(gitdir, cwd, repo_fmt, nongit_ok); } inside_git_dir = 1; @@ -1026,6 +1029,7 @@ const char *setup_git_directory_gently(int *nongit_ok) static struct strbuf cwd = STRBUF_INIT; struct strbuf dir = STRBUF_INIT, gitdir = STRBUF_INIT; const char *prefix; + struct repository_format repo_fmt; /* * We may have read an incomplete configuration before @@ -1053,18 +1057,18 @@ const char *setup_git_directory_gently(int *nongit_ok) prefix = NULL; break; case GIT_DIR_EXPLICIT: - prefix = setup_explicit_git_dir(gitdir.buf, &cwd, nongit_ok); + prefix = setup_explicit_git_dir(gitdir.buf, &cwd, &repo_fmt, nongit_ok); break; case GIT_DIR_DISCOVERED: if (dir.len < cwd.len && chdir(dir.buf)) die(_("Cannot change to '%s'"), dir.buf); prefix = setup_discovered_git_dir(gitdir.buf, &cwd, dir.len, - nongit_ok); + &repo_fmt, nongit_ok); break; case GIT_DIR_BARE: if (dir.len < cwd.len && chdir(dir.buf)) die(_("Cannot change to '%s'"), dir.buf); - prefix = setup_bare_git_dir(&cwd, dir.len, nongit_ok); + prefix = setup_bare_git_dir(&cwd, dir.len, &repo_fmt, nongit_ok); break; case GIT_DIR_HIT_CEILING: prefix = setup_nongit(cwd.buf, nongit_ok); @@ -1110,6 +1114,8 @@ const char *setup_git_directory_gently(int *nongit_ok) repo_set_gitdir(the_repository, gitdir); setup_git_env(); } + if (startup_info->have_repository) + repo_set_hash_algo(the_repository, repo_fmt.hash_algo); } strbuf_release(&dir); @@ -1171,7 +1177,8 @@ int git_config_perm(const char *var, const char *value) void check_repository_format(void) { - check_repository_format_gently(get_git_dir(), NULL); + struct repository_format repo_fmt; + check_repository_format_gently(get_git_dir(), &repo_fmt, NULL); startup_info->have_repository = 1; } diff --git a/sha1_file.c b/sha1_file.c index afe4b90f6e..b44f5247ca 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -39,6 +39,64 @@ const struct object_id empty_blob_oid = { EMPTY_BLOB_SHA1_BIN_LITERAL }; +static void git_hash_sha1_init(void *ctx) +{ + git_SHA1_Init((git_SHA_CTX *)ctx); +} + +static void git_hash_sha1_update(void *ctx, const void *data, size_t len) +{ + git_SHA1_Update((git_SHA_CTX *)ctx, data, len); +} + +static void git_hash_sha1_final(unsigned char *hash, void *ctx) +{ + git_SHA1_Final(hash, (git_SHA_CTX *)ctx); +} + +static void git_hash_unknown_init(void *ctx) +{ + die("trying to init unknown hash"); +} + +static void git_hash_unknown_update(void *ctx, const void *data, size_t len) +{ + die("trying to update unknown hash"); +} + +static void git_hash_unknown_final(unsigned char *hash, void *ctx) +{ + die("trying to finalize unknown hash"); +} + +const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { + { + NULL, + 0x00000000, + 0, + 0, + 0, + git_hash_unknown_init, + git_hash_unknown_update, + git_hash_unknown_final, + NULL, + NULL, + }, + { + "sha-1", + /* "sha1", big-endian */ + 0x73686131, + sizeof(git_SHA_CTX), + GIT_SHA1_RAWSZ, + GIT_SHA1_HEXSZ, + git_hash_sha1_init, + git_hash_sha1_update, + git_hash_sha1_final, + &empty_tree_oid, + &empty_blob_oid, + }, +}; + /* * This is meant to hold a *small* number of objects that you would * want read_sha1_file() to be able to return, but yet you do not want diff --git a/submodule.c b/submodule.c index 95e6aff2bb..fa25888783 100644 --- a/submodule.c +++ b/submodule.c @@ -587,7 +587,7 @@ void show_submodule_inline_diff(struct diff_options *o, const char *path, struct object_id *one, struct object_id *two, unsigned dirty_submodule) { - const struct object_id *old = &empty_tree_oid, *new = &empty_tree_oid; + const struct object_id *old = the_hash_algo->empty_tree, *new = the_hash_algo->empty_tree; struct commit *left = NULL, *right = NULL; struct commit_list *merge_bases = NULL; struct child_process cp = CHILD_PROCESS_INIT; |