diff options
author | Junio C Hamano <gitster@pobox.com> | 2020-06-25 21:27:47 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2020-06-25 21:27:47 +0200 |
commit | 34e849b05a454a2c6487f8fbfa68c39932d22730 (patch) | |
tree | 7a71a357e9f7e2c977849224ca18b2cb9a4461ea | |
parent | Merge branch 'ss/submodule-set-branch-in-c' (diff) | |
parent | upload-pack: fix a sparse '0 as NULL pointer' warning (diff) | |
download | git-34e849b05a454a2c6487f8fbfa68c39932d22730.tar.xz git-34e849b05a454a2c6487f8fbfa68c39932d22730.zip |
Merge branch 'jt/cdn-offload'
The "fetch/clone" protocol has been updated to allow the server to
instruct the clients to grab pre-packaged packfile(s) in addition
to the packed object data coming over the wire.
* jt/cdn-offload:
upload-pack: fix a sparse '0 as NULL pointer' warning
upload-pack: send part of packfile response as uri
fetch-pack: support more than one pack lockfile
upload-pack: refactor reading of pack-objects out
Documentation: add Packfile URIs design doc
Documentation: order protocol v2 sections
http-fetch: support fetching packfiles by URL
http-fetch: refactor into function
http: refactor finish_http_pack_request()
http: use --stdin when indexing dumb HTTP pack
-rw-r--r-- | Documentation/git-http-fetch.txt | 9 | ||||
-rw-r--r-- | Documentation/technical/packfile-uri.txt | 78 | ||||
-rw-r--r-- | Documentation/technical/protocol-v2.txt | 48 | ||||
-rw-r--r-- | builtin/fetch-pack.c | 17 | ||||
-rw-r--r-- | builtin/pack-objects.c | 76 | ||||
-rw-r--r-- | connected.c | 8 | ||||
-rw-r--r-- | fetch-pack.c | 137 | ||||
-rw-r--r-- | fetch-pack.h | 2 | ||||
-rw-r--r-- | http-fetch.c | 126 | ||||
-rw-r--r-- | http-push.c | 8 | ||||
-rw-r--r-- | http-walker.c | 5 | ||||
-rw-r--r-- | http.c | 82 | ||||
-rw-r--r-- | http.h | 24 | ||||
-rwxr-xr-x | t/t5550-http-fetch-dumb.sh | 30 | ||||
-rwxr-xr-x | t/t5702-protocol-v2.sh | 88 | ||||
-rw-r--r-- | transport-helper.c | 5 | ||||
-rw-r--r-- | transport.c | 12 | ||||
-rw-r--r-- | transport.h | 6 | ||||
-rw-r--r-- | upload-pack.c | 156 |
19 files changed, 750 insertions, 167 deletions
diff --git a/Documentation/git-http-fetch.txt b/Documentation/git-http-fetch.txt index 666b042679..4deb4893f5 100644 --- a/Documentation/git-http-fetch.txt +++ b/Documentation/git-http-fetch.txt @@ -9,7 +9,7 @@ git-http-fetch - Download from a remote Git repository via HTTP SYNOPSIS -------- [verse] -'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin] <commit> <url> +'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin | --packfile=<hash> | <commit>] <url> DESCRIPTION ----------- @@ -40,6 +40,13 @@ commit-id:: <commit-id>['\t'<filename-as-in--w>] +--packfile=<hash>:: + Instead of a commit id on the command line (which is not expected in + this case), 'git http-fetch' fetches the packfile directly at the given + URL and uses index-pack to generate corresponding .idx and .keep files. + The hash is used to determine the name of the temporary file and is + arbitrary. The output of index-pack is printed to stdout. + --recover:: Verify that everything reachable from target is fetched. Used after an earlier fetch is interrupted. diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt new file mode 100644 index 0000000000..318713abc3 --- /dev/null +++ b/Documentation/technical/packfile-uri.txt @@ -0,0 +1,78 @@ +Packfile URIs +============= + +This feature allows servers to serve part of their packfile response as URIs. +This allows server designs that improve scalability in bandwidth and CPU usage +(for example, by serving some data through a CDN), and (in the future) provides +some measure of resumability to clients. + +This feature is available only in protocol version 2. + +Protocol +-------- + +The server advertises the `packfile-uris` capability. + +If the client then communicates which protocols (HTTPS, etc.) it supports with +a `packfile-uris` argument, the server MAY send a `packfile-uris` section +directly before the `packfile` section (right after `wanted-refs` if it is +sent) containing URIs of any of the given protocols. The URIs point to +packfiles that use only features that the client has declared that it supports +(e.g. ofs-delta and thin-pack). See protocol-v2.txt for the documentation of +this section. + +Clients should then download and index all the given URIs (in addition to +downloading and indexing the packfile given in the `packfile` section of the +response) before performing the connectivity check. + +Server design +------------- + +The server can be trivially made compatible with the proposed protocol by +having it advertise `packfile-uris`, tolerating the client sending +`packfile-uris`, and never sending any `packfile-uris` section. But we should +include some sort of non-trivial implementation in the Minimum Viable Product, +at least so that we can test the client. + +This is the implementation: a feature, marked experimental, that allows the +server to be configured by one or more `uploadpack.blobPackfileUri=<sha1> +<uri>` entries. Whenever the list of objects to be sent is assembled, all such +blobs are excluded, replaced with URIs. The client will download those URIs, +expecting them to each point to packfiles containing single blobs. + +Client design +------------- + +The client has a config variable `fetch.uriprotocols` that determines which +protocols the end user is willing to use. By default, this is empty. + +When the client downloads the given URIs, it should store them with "keep" +files, just like it does with the packfile in the `packfile` section. These +additional "keep" files can only be removed after the refs have been updated - +just like the "keep" file for the packfile in the `packfile` section. + +The division of work (initial fetch + additional URIs) introduces convenient +points for resumption of an interrupted clone - such resumption can be done +after the Minimum Viable Product (see "Future work"). + +Future work +----------- + +The protocol design allows some evolution of the server and client without any +need for protocol changes, so only a small-scoped design is included here to +form the MVP. For example, the following can be done: + + * On the server, more sophisticated means of excluding objects (e.g. by + specifying a commit to represent that commit and all objects that it + references). + * On the client, resumption of clone. If a clone is interrupted, information + could be recorded in the repository's config and a "clone-resume" command + can resume the clone in progress. (Resumption of subsequent fetches is more + difficult because that must deal with the user wanting to use the repository + even after the fetch was interrupted.) + +There are some possible features that will require a change in protocol: + + * Additional HTTP headers (e.g. authentication) + * Byte range support + * Different file formats referenced by URIs (e.g. raw object) diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index 3996d70891..5852f499a6 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -325,13 +325,26 @@ included in the client's request: indicating its sideband (1, 2, or 3), and the server may send "0005\2" (a PKT-LINE of sideband 2 with no payload) as a keepalive packet. +If the 'packfile-uris' feature is advertised, the following argument +can be included in the client's request as well as the potential +addition of the 'packfile-uris' section in the server's response as +explained below. + + packfile-uris <comma-separated list of protocols> + Indicates to the server that the client is willing to receive + URIs of any of the given protocols in place of objects in the + sent packfile. Before performing the connectivity check, the + client should download from all given URIs. Currently, the + protocols supported are "http" and "https". + The response of `fetch` is broken into a number of sections separated by delimiter packets (0001), with each section beginning with its section -header. +header. Most sections are sent only when the packfile is sent. - output = *section - section = (acknowledgments | shallow-info | wanted-refs | packfile) - (flush-pkt | delim-pkt) + output = acknowledgements flush-pkt | + [acknowledgments delim-pkt] [shallow-info delim-pkt] + [wanted-refs delim-pkt] [packfile-uris delim-pkt] + packfile flush-pkt acknowledgments = PKT-LINE("acknowledgments" LF) (nak | *ack) @@ -349,13 +362,17 @@ header. *PKT-LINE(wanted-ref LF) wanted-ref = obj-id SP refname + packfile-uris = PKT-LINE("packfile-uris" LF) *packfile-uri + packfile-uri = PKT-LINE(40*(HEXDIGIT) SP *%x20-ff LF) + packfile = PKT-LINE("packfile" LF) *PKT-LINE(%x01-03 *%x00-ff) acknowledgments section - * If the client determines that it is finished with negotiations - by sending a "done" line, the acknowledgments sections MUST be - omitted from the server's response. + * If the client determines that it is finished with negotiations by + sending a "done" line (thus requiring the server to send a packfile), + the acknowledgments sections MUST be omitted from the server's + response. * Always begins with the section header "acknowledgments" @@ -406,9 +423,6 @@ header. which the client has not indicated was shallow as a part of its request. - * This section is only included if a packfile section is also - included in the response. - wanted-refs section * This section is only included if the client has requested a ref using a 'want-ref' line and if a packfile section is also @@ -422,6 +436,20 @@ header. * The server MUST NOT send any refs which were not requested using 'want-ref' lines. + packfile-uris section + * This section is only included if the client sent + 'packfile-uris' and the server has at least one such URI to + send. + + * Always begins with the section header "packfile-uris". + + * For each URI the server sends, it sends a hash of the pack's + contents (as output by git index-pack) followed by the URI. + + * The hashes are 40 hex characters long. When Git upgrades to a new + hash algorithm, this might need to be updated. (It should match + whatever index-pack outputs after "pack\t" or "keep\t". + packfile section * This section is only included if the client has sent 'want' lines in its request and either requested that no more diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 94b0c89b82..bbb5c96167 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -48,8 +48,8 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) struct ref **sought = NULL; int nr_sought = 0, alloc_sought = 0; int fd[2]; - char *pack_lockfile = NULL; - char **pack_lockfile_ptr = NULL; + struct string_list pack_lockfiles = STRING_LIST_INIT_DUP; + struct string_list *pack_lockfiles_ptr = NULL; struct child_process *conn; struct fetch_pack_args args; struct oid_array shallow = OID_ARRAY_INIT; @@ -134,7 +134,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) } if (!strcmp("--lock-pack", arg)) { args.lock_pack = 1; - pack_lockfile_ptr = &pack_lockfile; + pack_lockfiles_ptr = &pack_lockfiles; continue; } if (!strcmp("--check-self-contained-and-connected", arg)) { @@ -235,10 +235,15 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) } ref = fetch_pack(&args, fd, ref, sought, nr_sought, - &shallow, pack_lockfile_ptr, version); - if (pack_lockfile) { - printf("lock %s\n", pack_lockfile); + &shallow, pack_lockfiles_ptr, version); + if (pack_lockfiles.nr) { + int i; + + printf("lock %s\n", pack_lockfiles.items[0].string); fflush(stdout); + for (i = 1; i < pack_lockfiles.nr; i++) + warning(_("Lockfile created but not reported: %s"), + pack_lockfiles.items[i].string); } if (args.check_self_contained_and_connected && args.self_contained_and_connected) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c5b433a23f..7016b28485 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -117,6 +117,8 @@ static unsigned long window_memory_limit = 0; static struct list_objects_filter_options filter_options; +static struct string_list uri_protocols = STRING_LIST_INIT_NODUP; + enum missing_action { MA_ERROR = 0, /* fail if any missing objects are encountered */ MA_ALLOW_ANY, /* silently allow ALL missing objects */ @@ -125,6 +127,15 @@ enum missing_action { static enum missing_action arg_missing_action; static show_object_fn fn_show_object; +struct configured_exclusion { + struct oidmap_entry e; + char *pack_hash_hex; + char *uri; +}; +static struct oidmap configured_exclusions; + +static struct oidset excluded_by_config; + /* * stats */ @@ -969,6 +980,25 @@ static void write_reused_pack(struct hashfile *f) unuse_pack(&w_curs); } +static void write_excluded_by_configs(void) +{ + struct oidset_iter iter; + const struct object_id *oid; + + oidset_iter_init(&excluded_by_config, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct configured_exclusion *ex = + oidmap_get(&configured_exclusions, oid); + + if (!ex) + BUG("configured exclusion wasn't configured"); + write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex)); + write_in_full(1, " ", 1); + write_in_full(1, ex->uri, strlen(ex->uri)); + write_in_full(1, "\n", 1); + } +} + static const char no_split_warning[] = N_( "disabling bitmap writing, packs are split due to pack.packSizeLimit" ); @@ -1266,6 +1296,25 @@ static int want_object_in_pack(const struct object_id *oid, } } + if (uri_protocols.nr) { + struct configured_exclusion *ex = + oidmap_get(&configured_exclusions, oid); + int i; + const char *p; + + if (ex) { + for (i = 0; i < uri_protocols.nr; i++) { + if (skip_prefix(ex->uri, + uri_protocols.items[i].string, + &p) && + *p == ':') { + oidset_insert(&excluded_by_config, oid); + return 0; + } + } + } + } + return 1; } @@ -2864,6 +2913,29 @@ static int git_pack_config(const char *k, const char *v, void *cb) pack_idx_opts.version); return 0; } + if (!strcmp(k, "uploadpack.blobpackfileuri")) { + struct configured_exclusion *ex = xmalloc(sizeof(*ex)); + const char *oid_end, *pack_end; + /* + * Stores the pack hash. This is not a true object ID, but is + * of the same form. + */ + struct object_id pack_hash; + + if (parse_oid_hex(v, &ex->e.oid, &oid_end) || + *oid_end != ' ' || + parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) || + *pack_end != ' ') + die(_("value of uploadpack.blobpackfileuri must be " + "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v); + if (oidmap_get(&configured_exclusions, &ex->e.oid)) + die(_("object already configured in another " + "uploadpack.blobpackfileuri (got '%s')"), v); + ex->pack_hash_hex = xcalloc(1, pack_end - oid_end); + memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1); + ex->uri = xstrdup(pack_end + 1); + oidmap_put(&configured_exclusions, ex); + } return git_default_config(k, v, cb); } @@ -3462,6 +3534,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) N_("do not pack objects in promisor packfiles")), OPT_BOOL(0, "delta-islands", &use_delta_islands, N_("respect islands during delta compression")), + OPT_STRING_LIST(0, "uri-protocol", &uri_protocols, + N_("protocol"), + N_("exclude any configured uploadpack.blobpackfileuri with this protocol")), OPT_END(), }; @@ -3650,6 +3725,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) } trace2_region_enter("pack-objects", "write-pack-file", the_repository); + write_excluded_by_configs(); write_pack_file(); trace2_region_leave("pack-objects", "write-pack-file", the_repository); diff --git a/connected.c b/connected.c index 3135b71e19..937b4bae38 100644 --- a/connected.c +++ b/connected.c @@ -43,10 +43,12 @@ int check_connected(oid_iterate_fn fn, void *cb_data, if (transport && transport->smart_options && transport->smart_options->self_contained_and_connected && - transport->pack_lockfile && - strip_suffix(transport->pack_lockfile, ".keep", &base_len)) { + transport->pack_lockfiles.nr == 1 && + strip_suffix(transport->pack_lockfiles.items[0].string, + ".keep", &base_len)) { struct strbuf idx_file = STRBUF_INIT; - strbuf_add(&idx_file, transport->pack_lockfile, base_len); + strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string, + base_len); strbuf_addstr(&idx_file, ".idx"); new_pack = add_packed_git(idx_file.buf, idx_file.len, 1); strbuf_release(&idx_file); diff --git a/fetch-pack.c b/fetch-pack.c index d8bbf45ee2..acd55ba6e8 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -38,6 +38,7 @@ static int server_supports_filtering; static struct shallow_lock shallow_lock; static const char *alternate_shallow_file; static struct strbuf fsck_msg_types = STRBUF_INIT; +static struct string_list uri_protocols = STRING_LIST_INIT_DUP; /* Remember to update object flag allocation in object.h */ #define COMPLETE (1U << 0) @@ -794,7 +795,8 @@ static void write_promisor_file(const char *keep_name, } static int get_pack(struct fetch_pack_args *args, - int xd[2], char **pack_lockfile, + int xd[2], struct string_list *pack_lockfiles, + int only_packfile, struct ref **sought, int nr_sought) { struct async demux; @@ -838,7 +840,7 @@ static int get_pack(struct fetch_pack_args *args, } if (do_keep || args->from_promisor) { - if (pack_lockfile) + if (pack_lockfiles) cmd.out = -1; cmd_name = "index-pack"; argv_array_push(&cmd.args, cmd_name); @@ -855,15 +857,22 @@ static int get_pack(struct fetch_pack_args *args, "--keep=fetch-pack %"PRIuMAX " on %s", (uintmax_t)getpid(), hostname); } - if (args->check_self_contained_and_connected) + if (only_packfile && args->check_self_contained_and_connected) argv_array_push(&cmd.args, "--check-self-contained-and-connected"); + else + /* + * We cannot perform any connectivity checks because + * not all packs have been downloaded; let the caller + * have this responsibility. + */ + args->check_self_contained_and_connected = 0; /* * If we're obtaining the filename of a lockfile, we'll use * that filename to write a .promisor file with more * information below. If not, we need index-pack to do it for * us. */ - if (!(do_keep && pack_lockfile) && args->from_promisor) + if (!(do_keep && pack_lockfiles) && args->from_promisor) argv_array_push(&cmd.args, "--promisor"); } else { @@ -899,8 +908,9 @@ static int get_pack(struct fetch_pack_args *args, cmd.git_cmd = 1; if (start_command(&cmd)) die(_("fetch-pack: unable to fork off %s"), cmd_name); - if (do_keep && pack_lockfile) { - *pack_lockfile = index_pack_lockfile(cmd.out); + if (do_keep && pack_lockfiles) { + string_list_append_nodup(pack_lockfiles, + index_pack_lockfile(cmd.out)); close(cmd.out); } @@ -922,8 +932,8 @@ static int get_pack(struct fetch_pack_args *args, * Now that index-pack has succeeded, write the promisor file using the * obtained .keep filename if necessary */ - if (do_keep && pack_lockfile && args->from_promisor) - write_promisor_file(*pack_lockfile, sought, nr_sought); + if (do_keep && pack_lockfiles && pack_lockfiles->nr && args->from_promisor) + write_promisor_file(pack_lockfiles->items[0].string, sought, nr_sought); return 0; } @@ -940,7 +950,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, const struct ref *orig_ref, struct ref **sought, int nr_sought, struct shallow_info *si, - char **pack_lockfile) + struct string_list *pack_lockfiles) { struct repository *r = the_repository; struct ref *ref = copy_ref_list(orig_ref); @@ -1067,7 +1077,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, alternate_shallow_file = setup_temporary_shallow(si->shallow); else alternate_shallow_file = NULL; - if (get_pack(args, fd, pack_lockfile, sought, nr_sought)) + if (get_pack(args, fd, pack_lockfiles, 1, sought, nr_sought)) die(_("git fetch-pack: fetch failed.")); all_done: @@ -1221,6 +1231,26 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, warning("filtering not recognized by server, ignoring"); } + if (server_supports_feature("fetch", "packfile-uris", 0)) { + int i; + struct strbuf to_send = STRBUF_INIT; + + for (i = 0; i < uri_protocols.nr; i++) { + const char *s = uri_protocols.items[i].string; + + if (!strcmp(s, "https") || !strcmp(s, "http")) { + if (to_send.len) + strbuf_addch(&to_send, ','); + strbuf_addstr(&to_send, s); + } + } + if (to_send.len) { + packet_buf_write(&req_buf, "packfile-uris %s", + to_send.buf); + strbuf_release(&to_send); + } + } + /* add wants */ add_wants(args->no_dependents, wants, &req_buf); @@ -1443,6 +1473,21 @@ static void receive_wanted_refs(struct packet_reader *reader, die(_("error processing wanted refs: %d"), reader->status); } +static void receive_packfile_uris(struct packet_reader *reader, + struct string_list *uris) +{ + process_section_header(reader, "packfile-uris", 0); + while (packet_reader_read(reader) == PACKET_READ_NORMAL) { + if (reader->pktlen < the_hash_algo->hexsz || + reader->line[the_hash_algo->hexsz] != ' ') + die("expected '<hash> <uri>', got: %s\n", reader->line); + + string_list_append(uris, reader->line); + } + if (reader->status != PACKET_READ_DELIM) + die("expected DELIM"); +} + enum fetch_state { FETCH_CHECK_LOCAL = 0, FETCH_SEND_REQUEST, @@ -1464,7 +1509,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct ref **sought, int nr_sought, struct oid_array *shallows, struct shallow_info *si, - char **pack_lockfile) + struct string_list *pack_lockfiles) { struct repository *r = the_repository; struct ref *ref = copy_ref_list(orig_ref); @@ -1476,6 +1521,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct fetch_negotiator negotiator_alloc; struct fetch_negotiator *negotiator; int seen_ack = 0; + struct string_list packfile_uris = STRING_LIST_INIT_DUP; + int i; if (args->no_dependents) { negotiator = NULL; @@ -1569,9 +1616,12 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, if (process_section_header(&reader, "wanted-refs", 1)) receive_wanted_refs(&reader, sought, nr_sought); - /* get the pack */ + /* get the pack(s) */ + if (process_section_header(&reader, "packfile-uris", 1)) + receive_packfile_uris(&reader, &packfile_uris); process_section_header(&reader, "packfile", 0); - if (get_pack(args, fd, pack_lockfile, sought, nr_sought)) + if (get_pack(args, fd, pack_lockfiles, + !packfile_uris.nr, sought, nr_sought)) die(_("git fetch-pack: fetch failed.")); do_check_stateless_delimiter(args, &reader); @@ -1582,8 +1632,55 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, } } + for (i = 0; i < packfile_uris.nr; i++) { + struct child_process cmd = CHILD_PROCESS_INIT; + char packname[GIT_MAX_HEXSZ + 1]; + const char *uri = packfile_uris.items[i].string + + the_hash_algo->hexsz + 1; + + argv_array_push(&cmd.args, "http-fetch"); + argv_array_pushf(&cmd.args, "--packfile=%.*s", + (int) the_hash_algo->hexsz, + packfile_uris.items[i].string); + argv_array_push(&cmd.args, uri); + cmd.git_cmd = 1; + cmd.no_stdin = 1; + cmd.out = -1; + if (start_command(&cmd)) + die("fetch-pack: unable to spawn http-fetch"); + + if (read_in_full(cmd.out, packname, 5) < 0 || + memcmp(packname, "keep\t", 5)) + die("fetch-pack: expected keep then TAB at start of http-fetch output"); + + if (read_in_full(cmd.out, packname, + the_hash_algo->hexsz + 1) < 0 || + packname[the_hash_algo->hexsz] != '\n') + die("fetch-pack: expected hash then LF at end of http-fetch output"); + + packname[the_hash_algo->hexsz] = '\0'; + + close(cmd.out); + + if (finish_command(&cmd)) + die("fetch-pack: unable to finish http-fetch"); + + if (memcmp(packfile_uris.items[i].string, packname, + the_hash_algo->hexsz)) + die("fetch-pack: pack downloaded from %s does not match expected hash %.*s", + uri, (int) the_hash_algo->hexsz, + packfile_uris.items[i].string); + + string_list_append_nodup(pack_lockfiles, + xstrfmt("%s/pack/pack-%s.keep", + get_object_directory(), + packname)); + } + string_list_clear(&packfile_uris, 0); + if (negotiator) negotiator->release(negotiator); + oidset_clear(&common); return ref; } @@ -1620,6 +1717,14 @@ static void fetch_pack_config(void) git_config_get_bool("repack.usedeltabaseoffset", &prefer_ofs_delta); git_config_get_bool("fetch.fsckobjects", &fetch_fsck_objects); git_config_get_bool("transfer.fsckobjects", &transfer_fsck_objects); + if (!uri_protocols.nr) { + char *str; + + if (!git_config_get_string("fetch.uriprotocols", &str) && str) { + string_list_split(&uri_protocols, str, ',', -1); + free(str); + } + } git_config(fetch_pack_config_cb, NULL); } @@ -1772,7 +1877,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, const struct ref *ref, struct ref **sought, int nr_sought, struct oid_array *shallow, - char **pack_lockfile, + struct string_list *pack_lockfiles, enum protocol_version version) { struct ref *ref_cpy; @@ -1807,11 +1912,11 @@ struct ref *fetch_pack(struct fetch_pack_args *args, memset(&si, 0, sizeof(si)); ref_cpy = do_fetch_pack_v2(args, fd, ref, sought, nr_sought, &shallows_scratch, &si, - pack_lockfile); + pack_lockfiles); } else { prepare_shallow_info(&si, shallow); ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought, - &si, pack_lockfile); + &si, pack_lockfiles); } reprepare_packed_git(the_repository); diff --git a/fetch-pack.h b/fetch-pack.h index 67f684229a..85d1e39fe7 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -83,7 +83,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, struct ref **sought, int nr_sought, struct oid_array *shallow, - char **pack_lockfile, + struct string_list *pack_lockfiles, enum protocol_version version); /* diff --git a/http-fetch.c b/http-fetch.c index a32ac118d9..1df376e745 100644 --- a/http-fetch.c +++ b/http-fetch.c @@ -5,22 +5,90 @@ #include "walker.h" static const char http_fetch_usage[] = "git http-fetch " -"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url"; +"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url"; -int cmd_main(int argc, const char **argv) +static int fetch_using_walker(const char *raw_url, int get_verbosely, + int get_recover, int commits, char **commit_id, + const char **write_ref, int commits_on_stdin) { + char *url = NULL; struct walker *walker; + int rc; + + str_end_url_with_slash(raw_url, &url); + + http_init(NULL, url, 0); + + walker = get_http_walker(url); + walker->get_verbosely = get_verbosely; + walker->get_recover = get_recover; + walker->get_progress = 0; + + rc = walker_fetch(walker, commits, commit_id, write_ref, url); + + if (commits_on_stdin) + walker_targets_free(commits, commit_id, write_ref); + + if (walker->corrupt_object_found) { + fprintf(stderr, +"Some loose object were found to be corrupt, but they might be just\n" +"a false '404 Not Found' error message sent with incorrect HTTP\n" +"status code. Suggest running 'git fsck'.\n"); + } + + walker_free(walker); + http_cleanup(); + free(url); + + return rc; +} + +static void fetch_single_packfile(struct object_id *packfile_hash, + const char *url) { + struct http_pack_request *preq; + struct slot_results results; + int ret; + + http_init(NULL, url, 0); + + preq = new_direct_http_pack_request(packfile_hash->hash, xstrdup(url)); + if (preq == NULL) + die("couldn't create http pack request"); + preq->slot->results = &results; + preq->generate_keep = 1; + + if (start_active_slot(preq->slot)) { + run_active_slot(preq->slot); + if (results.curl_result != CURLE_OK) { + die("Unable to get pack file %s\n%s", preq->url, + curl_errorstr); + } + } else { + die("Unable to start request"); + } + + if ((ret = finish_http_pack_request(preq))) + die("finish_http_pack_request gave result %d", ret); + + release_http_pack_request(preq); + http_cleanup(); +} + +int cmd_main(int argc, const char **argv) +{ int commits_on_stdin = 0; int commits; const char **write_ref = NULL; char **commit_id; - char *url = NULL; int arg = 1; - int rc = 0; int get_verbosely = 0; int get_recover = 0; + int packfile = 0; + struct object_id packfile_hash; while (arg < argc && argv[arg][0] == '-') { + const char *p; + if (argv[arg][1] == 't') { } else if (argv[arg][1] == 'c') { } else if (argv[arg][1] == 'a') { @@ -35,46 +103,34 @@ int cmd_main(int argc, const char **argv) get_recover = 1; } else if (!strcmp(argv[arg], "--stdin")) { commits_on_stdin = 1; + } else if (skip_prefix(argv[arg], "--packfile=", &p)) { + const char *end; + + packfile = 1; + if (parse_oid_hex(p, &packfile_hash, &end) || *end) + die(_("argument to --packfile must be a valid hash (got '%s')"), p); } arg++; } - if (argc != arg + 2 - commits_on_stdin) + if (argc != arg + 2 - (commits_on_stdin || packfile)) usage(http_fetch_usage); - if (commits_on_stdin) { - commits = walker_targets_stdin(&commit_id, &write_ref); - } else { - commit_id = (char **) &argv[arg++]; - commits = 1; - } - - if (argv[arg]) - str_end_url_with_slash(argv[arg], &url); setup_git_directory(); git_config(git_default_config, NULL); - http_init(NULL, url, 0); - walker = get_http_walker(url); - walker->get_verbosely = get_verbosely; - walker->get_recover = get_recover; - - rc = walker_fetch(walker, commits, commit_id, write_ref, url); - - if (commits_on_stdin) - walker_targets_free(commits, commit_id, write_ref); - - if (walker->corrupt_object_found) { - fprintf(stderr, -"Some loose object were found to be corrupt, but they might be just\n" -"a false '404 Not Found' error message sent with incorrect HTTP\n" -"status code. Suggest running 'git fsck'.\n"); + if (packfile) { + fetch_single_packfile(&packfile_hash, argv[arg]); + return 0; } - walker_free(walker); - http_cleanup(); - - free(url); - - return rc; + if (commits_on_stdin) { + commits = walker_targets_stdin(&commit_id, &write_ref); + } else { + commit_id = (char **) &argv[arg++]; + commits = 1; + } + return fetch_using_walker(argv[arg], get_verbosely, get_recover, + commits, commit_id, write_ref, + commits_on_stdin); } diff --git a/http-push.c b/http-push.c index 822f326599..ac7868ffee 100644 --- a/http-push.c +++ b/http-push.c @@ -117,6 +117,7 @@ enum transfer_state { struct transfer_request { struct object *obj; + struct packed_git *target; char *url; char *dest; struct remote_lock *lock; @@ -314,17 +315,18 @@ static void start_fetch_packed(struct transfer_request *request) release_request(request); return; } + close_pack_index(target); + request->target = target; fprintf(stderr, "Fetching pack %s\n", hash_to_hex(target->hash)); fprintf(stderr, " which contains %s\n", oid_to_hex(&request->obj->oid)); - preq = new_http_pack_request(target, repo->url); + preq = new_http_pack_request(target->hash, repo->url); if (preq == NULL) { repo->can_update_info_refs = 0; return; } - preq->lst = &repo->packs; /* Make sure there isn't another open request for this pack */ while (check_request) { @@ -597,6 +599,8 @@ static void finish_request(struct transfer_request *request) } if (fail) repo->can_update_info_refs = 0; + else + http_install_packfile(request->target, &repo->packs); release_request(request); } } diff --git a/http-walker.c b/http-walker.c index fe15e325fa..4fb1235cd4 100644 --- a/http-walker.c +++ b/http-walker.c @@ -439,6 +439,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne target = find_sha1_pack(sha1, repo->packs); if (!target) return -1; + close_pack_index(target); if (walker->get_verbosely) { fprintf(stderr, "Getting pack %s\n", @@ -447,10 +448,9 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne hash_to_hex(sha1)); } - preq = new_http_pack_request(target, repo->base); + preq = new_http_pack_request(target->hash, repo->base); if (preq == NULL) goto abort; - preq->lst = &repo->packs; preq->slot->results = &results; if (start_active_slot(preq->slot)) { @@ -469,6 +469,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne release_http_pack_request(preq); if (ret) return ret; + http_install_packfile(target, &repo->packs); return 0; @@ -2261,70 +2261,74 @@ void release_http_pack_request(struct http_pack_request *preq) int finish_http_pack_request(struct http_pack_request *preq) { - struct packed_git **lst; - struct packed_git *p = preq->target; - char *tmp_idx; - size_t len; struct child_process ip = CHILD_PROCESS_INIT; - - close_pack_index(p); + int tmpfile_fd; + int ret = 0; fclose(preq->packfile); preq->packfile = NULL; - lst = preq->lst; - while (*lst != p) - lst = &((*lst)->next); - *lst = (*lst)->next; - - if (!strip_suffix(preq->tmpfile.buf, ".pack.temp", &len)) - BUG("pack tmpfile does not end in .pack.temp?"); - tmp_idx = xstrfmt("%.*s.idx.temp", (int)len, preq->tmpfile.buf); + tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY); argv_array_push(&ip.args, "index-pack"); - argv_array_pushl(&ip.args, "-o", tmp_idx, NULL); - argv_array_push(&ip.args, preq->tmpfile.buf); + argv_array_push(&ip.args, "--stdin"); ip.git_cmd = 1; - ip.no_stdin = 1; - ip.no_stdout = 1; + ip.in = tmpfile_fd; + if (preq->generate_keep) { + argv_array_pushf(&ip.args, "--keep=git %"PRIuMAX, + (uintmax_t)getpid()); + ip.out = 0; + } else { + ip.no_stdout = 1; + } if (run_command(&ip)) { - unlink(preq->tmpfile.buf); - unlink(tmp_idx); - free(tmp_idx); - return -1; + ret = -1; + goto cleanup; } - unlink(sha1_pack_index_name(p->hash)); +cleanup: + close(tmpfile_fd); + unlink(preq->tmpfile.buf); + return ret; +} - if (finalize_object_file(preq->tmpfile.buf, sha1_pack_name(p->hash)) - || finalize_object_file(tmp_idx, sha1_pack_index_name(p->hash))) { - free(tmp_idx); - return -1; - } +void http_install_packfile(struct packed_git *p, + struct packed_git **list_to_remove_from) +{ + struct packed_git **lst = list_to_remove_from; + + while (*lst != p) + lst = &((*lst)->next); + *lst = (*lst)->next; install_packed_git(the_repository, p); - free(tmp_idx); - return 0; } struct http_pack_request *new_http_pack_request( - struct packed_git *target, const char *base_url) + const unsigned char *packed_git_hash, const char *base_url) { + + struct strbuf buf = STRBUF_INIT; + + end_url_with_slash(&buf, base_url); + strbuf_addf(&buf, "objects/pack/pack-%s.pack", + hash_to_hex(packed_git_hash)); + return new_direct_http_pack_request(packed_git_hash, + strbuf_detach(&buf, NULL)); +} + +struct http_pack_request *new_direct_http_pack_request( + const unsigned char *packed_git_hash, char *url) { off_t prev_posn = 0; - struct strbuf buf = STRBUF_INIT; struct http_pack_request *preq; preq = xcalloc(1, sizeof(*preq)); strbuf_init(&preq->tmpfile, 0); - preq->target = target; - end_url_with_slash(&buf, base_url); - strbuf_addf(&buf, "objects/pack/pack-%s.pack", - hash_to_hex(target->hash)); - preq->url = strbuf_detach(&buf, NULL); + preq->url = url; - strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(target->hash)); + strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash)); preq->packfile = fopen(preq->tmpfile.buf, "a"); if (!preq->packfile) { error("Unable to open local file %s for pack", @@ -2348,7 +2352,7 @@ struct http_pack_request *new_http_pack_request( if (http_is_verbose) fprintf(stderr, "Resuming fetch of pack %s at byte %"PRIuMAX"\n", - hash_to_hex(target->hash), + hash_to_hex(packed_git_hash), (uintmax_t)prev_posn); http_opt_request_remainder(preq->slot->curl, prev_posn); } @@ -216,18 +216,36 @@ int http_get_info_packs(const char *base_url, struct http_pack_request { char *url; - struct packed_git *target; - struct packed_git **lst; + + /* + * If this is true, finish_http_pack_request() will pass "--keep" to + * index-pack, resulting in the creation of a keep file, and will not + * suppress its stdout (that is, the "keep\t<hash>\n" line will be + * printed to stdout). + */ + unsigned generate_keep : 1; + FILE *packfile; struct strbuf tmpfile; struct active_request_slot *slot; }; struct http_pack_request *new_http_pack_request( - struct packed_git *target, const char *base_url); + const unsigned char *packed_git_hash, const char *base_url); +struct http_pack_request *new_direct_http_pack_request( + const unsigned char *packed_git_hash, char *url); int finish_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq); +/* + * Remove p from the given list, and invoke install_packed_git() on it. + * + * This is a convenience function for users that have obtained a list of packs + * from http_get_info_packs() and have chosen a specific pack to fetch. + */ +void http_install_packfile(struct packed_git *p, + struct packed_git **list_to_remove_from); + /* Helpers for fetching object */ struct http_object_request { char *url; diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index 50485300eb..ca2e8af022 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -199,6 +199,28 @@ test_expect_success 'fetch packed objects' ' git clone $HTTPD_URL/dumb/repo_pack.git ' +test_expect_success 'http-fetch --packfile' ' + # Arbitrary hash. Use rev-parse so that we get one of the correct + # length. + ARBITRARY=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) && + + git init packfileclient && + p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) && + git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out && + + grep "^keep.[0-9a-f]\{16,\}$" out && + cut -c6- out >packhash && + + # Ensure that the expected files are generated + test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).pack" && + test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).idx" && + test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).keep" && + + # Ensure that it has the HEAD of repo_pack, at least + HASH=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) && + git -C packfileclient cat-file -e "$HASH" +' + test_expect_success 'fetch notices corrupt pack' ' cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && @@ -214,6 +236,14 @@ test_expect_success 'fetch notices corrupt pack' ' ) ' +test_expect_success 'http-fetch --packfile with corrupt pack' ' + rm -rf packfileclient && + git init packfileclient && + p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && ls objects/pack/pack-*.pack) && + test_must_fail git -C packfileclient http-fetch --packfile \ + "$HTTPD_URL"/dumb/repo_bad1.git/$p +' + test_expect_success 'fetch notices corrupt idx' ' cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git && (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git && diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index 8da65e60de..7ab0217fb3 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -795,6 +795,94 @@ test_expect_success 'when server does not send "ready", expect FLUSH' ' test_i18ngrep "expected no other sections to be sent after no .ready." err ' +configure_exclusion () { + git -C "$1" hash-object "$2" >objh && + git -C "$1" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh && + git -C "$1" config --add \ + "uploadpack.blobpackfileuri" \ + "$(cat objh) $(cat packh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" && + cat objh +} + +test_expect_success 'part of packfile response provided as URI' ' + P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + rm -rf "$P" http_child log && + + git init "$P" && + git -C "$P" config "uploadpack.allowsidebandall" "true" && + + echo my-blob >"$P/my-blob" && + git -C "$P" add my-blob && + echo other-blob >"$P/other-blob" && + git -C "$P" add other-blob && + git -C "$P" commit -m x && + + configure_exclusion "$P" my-blob >h && + configure_exclusion "$P" other-blob >h2 && + + GIT_TRACE=1 GIT_TRACE_PACKET="$(pwd)/log" GIT_TEST_SIDEBAND_ALL=1 \ + git -c protocol.version=2 \ + -c fetch.uriprotocols=http,https \ + clone "$HTTPD_URL/smart/http_parent" http_child && + + # Ensure that my-blob and other-blob are in separate packfiles. + for idx in http_child/.git/objects/pack/*.idx + do + git verify-pack --verbose $idx >out && + { + grep "^[0-9a-f]\{16,\} " out || : + } >out.objectlist && + if test_line_count = 1 out.objectlist + then + if grep $(cat h) out + then + >hfound + fi && + if grep $(cat h2) out + then + >h2found + fi + fi + done && + test -f hfound && + test -f h2found && + + # Ensure that there are exactly 6 files (3 .pack and 3 .idx). + ls http_child/.git/objects/pack/* >filelist && + test_line_count = 6 filelist +' + +test_expect_success 'fetching with valid packfile URI but invalid hash fails' ' + P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + rm -rf "$P" http_child log && + + git init "$P" && + git -C "$P" config "uploadpack.allowsidebandall" "true" && + + echo my-blob >"$P/my-blob" && + git -C "$P" add my-blob && + echo other-blob >"$P/other-blob" && + git -C "$P" add other-blob && + git -C "$P" commit -m x && + + configure_exclusion "$P" my-blob >h && + # Configure a URL for other-blob. Just reuse the hash of the object as + # the hash of the packfile, since the hash does not matter for this + # test as long as it is not the hash of the pack, and it is of the + # expected length. + git -C "$P" hash-object other-blob >objh && + git -C "$P" pack-objects "$HTTPD_DOCUMENT_ROOT_PATH/mypack" <objh >packh && + git -C "$P" config --add \ + "uploadpack.blobpackfileuri" \ + "$(cat objh) $(cat objh) $HTTPD_URL/dumb/mypack-$(cat packh).pack" && + + test_must_fail env GIT_TEST_SIDEBAND_ALL=1 \ + git -c protocol.version=2 \ + -c fetch.uriprotocols=http,https \ + clone "$HTTPD_URL/smart/http_parent" http_child 2>err && + test_i18ngrep "pack downloaded from.*does not match expected hash" err +' + # DO NOT add non-httpd-specific tests here, because the last part of this # test script is only executed when httpd is available and enabled. diff --git a/transport-helper.c b/transport-helper.c index a46afcb69d..93a6f50793 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -410,10 +410,11 @@ static int fetch_with_fetch(struct transport *transport, exit(128); if (skip_prefix(buf.buf, "lock ", &name)) { - if (transport->pack_lockfile) + if (transport->pack_lockfiles.nr) warning(_("%s also locked %s"), data->name, name); else - transport->pack_lockfile = xstrdup(name); + string_list_append(&transport->pack_lockfiles, + name); } else if (data->check_connectivity && data->transport_options.check_self_contained_and_connected && diff --git a/transport.c b/transport.c index 7d50c502ad..6ee6771f55 100644 --- a/transport.c +++ b/transport.c @@ -378,7 +378,7 @@ static int fetch_refs_via_pack(struct transport *transport, refs = fetch_pack(&args, data->fd, refs_tmp ? refs_tmp : transport->remote_refs, to_fetch, nr_heads, &data->shallow, - &transport->pack_lockfile, data->version); + &transport->pack_lockfiles, data->version); close(data->fd[0]); close(data->fd[1]); @@ -921,6 +921,7 @@ struct transport *transport_get(struct remote *remote, const char *url) struct transport *ret = xcalloc(1, sizeof(*ret)); ret->progress = isatty(2); + string_list_init(&ret->pack_lockfiles, 1); if (!remote) BUG("No remote provided to transport_get()"); @@ -1316,10 +1317,11 @@ int transport_fetch_refs(struct transport *transport, struct ref *refs) void transport_unlock_pack(struct transport *transport) { - if (transport->pack_lockfile) { - unlink_or_warn(transport->pack_lockfile); - FREE_AND_NULL(transport->pack_lockfile); - } + int i; + + for (i = 0; i < transport->pack_lockfiles.nr; i++) + unlink_or_warn(transport->pack_lockfiles.items[i].string); + string_list_clear(&transport->pack_lockfiles, 0); } int transport_connect(struct transport *transport, const char *name, diff --git a/transport.h b/transport.h index 4298c855be..05efa72db1 100644 --- a/transport.h +++ b/transport.h @@ -5,8 +5,7 @@ #include "run-command.h" #include "remote.h" #include "list-objects-filter-options.h" - -struct string_list; +#include "string-list.h" struct git_transport_options { unsigned thin : 1; @@ -98,7 +97,8 @@ struct transport { */ const struct string_list *server_options; - char *pack_lockfile; + struct string_list pack_lockfiles; + signed verbose : 3; /** * Transports should not set this directly, and should use this diff --git a/upload-pack.c b/upload-pack.c index f899fdf46a..39d0cf00be 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -84,6 +84,7 @@ struct upload_pack_data { /* 0 for no sideband, otherwise DEFAULT_PACKET_MAX or LARGE_PACKET_MAX */ int use_sideband; + struct string_list uri_protocols; enum allow_uor allow_uor; struct list_objects_filter_options filter_options; @@ -117,6 +118,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) struct oid_array haves = OID_ARRAY_INIT; struct object_array shallows = OBJECT_ARRAY_INIT; struct string_list deepen_not = STRING_LIST_INIT_DUP; + struct string_list uri_protocols = STRING_LIST_INIT_DUP; struct object_array extra_edge_obj = OBJECT_ARRAY_INIT; memset(data, 0, sizeof(*data)); @@ -127,6 +129,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) data->haves = haves; data->shallows = shallows; data->deepen_not = deepen_not; + data->uri_protocols = uri_protocols; data->extra_edge_obj = extra_edge_obj; packet_writer_init(&data->writer, 1); @@ -179,13 +182,86 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data) return 0; } -static void create_pack_file(struct upload_pack_data *pack_data) +struct output_state { + char buffer[8193]; + int used; + unsigned packfile_uris_started : 1; + unsigned packfile_started : 1; +}; + +static int relay_pack_data(int pack_objects_out, struct output_state *os, + int use_sideband, int write_packfile_line) +{ + /* + * We keep the last byte to ourselves + * in case we detect broken rev-list, so that we + * can leave the stream corrupted. This is + * unfortunate -- unpack-objects would happily + * accept a valid packdata with trailing garbage, + * so appending garbage after we pass all the + * pack data is not good enough to signal + * breakage to downstream. + */ + ssize_t readsz; + + readsz = xread(pack_objects_out, os->buffer + os->used, + sizeof(os->buffer) - os->used); + if (readsz < 0) { + return readsz; + } + os->used += readsz; + + while (!os->packfile_started) { + char *p; + if (os->used >= 4 && !memcmp(os->buffer, "PACK", 4)) { + os->packfile_started = 1; + if (write_packfile_line) { + if (os->packfile_uris_started) + packet_delim(1); + packet_write_fmt(1, "\1packfile\n"); + } + break; + } + if ((p = memchr(os->buffer, '\n', os->used))) { + if (!os->packfile_uris_started) { + os->packfile_uris_started = 1; + if (!write_packfile_line) + BUG("packfile_uris requires sideband-all"); + packet_write_fmt(1, "\1packfile-uris\n"); + } + *p = '\0'; + packet_write_fmt(1, "\1%s\n", os->buffer); + + os->used -= p - os->buffer + 1; + memmove(os->buffer, p + 1, os->used); + } else { + /* + * Incomplete line. + */ + return readsz; + } + } + + if (os->used > 1) { + send_client_data(1, os->buffer, os->used - 1, use_sideband); + os->buffer[0] = os->buffer[os->used - 1]; + os->used = 1; + } else { + send_client_data(1, os->buffer, os->used, use_sideband); + os->used = 0; + } + + return readsz; +} + +static void create_pack_file(struct upload_pack_data *pack_data, + const struct string_list *uri_protocols) { struct child_process pack_objects = CHILD_PROCESS_INIT; - char data[8193], progress[128]; + struct output_state output_state = { { 0 } }; + char progress[128]; char abort_msg[] = "aborting due to possible repository " "corruption on the remote side."; - int buffered = -1; ssize_t sz; int i; FILE *pipe_fd; @@ -229,6 +305,11 @@ static void create_pack_file(struct upload_pack_data *pack_data) spec); } } + if (uri_protocols) { + for (i = 0; i < uri_protocols->nr; i++) + argv_array_pushf(&pack_objects.args, "--uri-protocol=%s", + uri_protocols->items[i].string); + } pack_objects.in = -1; pack_objects.out = -1; @@ -318,40 +399,17 @@ static void create_pack_file(struct upload_pack_data *pack_data) continue; } if (0 <= pu && (pfd[pu].revents & (POLLIN|POLLHUP))) { - /* Data ready; we keep the last byte to ourselves - * in case we detect broken rev-list, so that we - * can leave the stream corrupted. This is - * unfortunate -- unpack-objects would happily - * accept a valid packdata with trailing garbage, - * so appending garbage after we pass all the - * pack data is not good enough to signal - * breakage to downstream. - */ - char *cp = data; - ssize_t outsz = 0; - if (0 <= buffered) { - *cp++ = buffered; - outsz++; - } - sz = xread(pack_objects.out, cp, - sizeof(data) - outsz); - if (0 < sz) - ; - else if (sz == 0) { + int result = relay_pack_data(pack_objects.out, + &output_state, + pack_data->use_sideband, + !!uri_protocols); + + if (result == 0) { close(pack_objects.out); pack_objects.out = -1; - } - else + } else if (result < 0) { goto fail; - sz += outsz; - if (1 < sz) { - buffered = data[sz-1] & 0xFF; - sz--; } - else - buffered = -1; - send_client_data(1, data, sz, - pack_data->use_sideband); } /* @@ -376,9 +434,8 @@ static void create_pack_file(struct upload_pack_data *pack_data) } /* flush the data */ - if (0 <= buffered) { - data[0] = buffered; - send_client_data(1, data, 1, + if (output_state.used > 0) { + send_client_data(1, output_state.buffer, output_state.used, pack_data->use_sideband); fprintf(stderr, "flushed.\n"); } @@ -1188,7 +1245,7 @@ void upload_pack(struct upload_pack_options *options) receive_needs(&data, &reader); if (data.want_obj.nr) { get_common_commits(&data, &reader); - create_pack_file(&data); + create_pack_file(&data, NULL); } } @@ -1341,10 +1398,18 @@ static void process_args(struct packet_reader *request, continue; } + if (skip_prefix(arg, "packfile-uris ", &p)) { + string_list_split(&data->uri_protocols, p, ',', -1); + continue; + } + /* ignore unknown lines maybe? */ die("unexpected line: '%s'", arg); } + if (data->uri_protocols.nr && !data->writer.use_sideband) + string_list_clear(&data->uri_protocols, 0); + if (request->status != PACKET_READ_FLUSH) die(_("expected flush after fetch arguments")); } @@ -1502,8 +1567,12 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys, send_wanted_ref_info(&data); send_shallow_info(&data); - packet_writer_write(&data.writer, "packfile\n"); - create_pack_file(&data); + if (data.uri_protocols.nr) { + create_pack_file(&data, &data.uri_protocols); + } else { + packet_writer_write(&data.writer, "packfile\n"); + create_pack_file(&data, NULL); + } state = FETCH_DONE; break; case FETCH_DONE: @@ -1522,6 +1591,7 @@ int upload_pack_advertise(struct repository *r, int allow_filter_value; int allow_ref_in_want; int allow_sideband_all_value; + char *str = NULL; strbuf_addstr(value, "shallow"); @@ -1543,6 +1613,14 @@ int upload_pack_advertise(struct repository *r, &allow_sideband_all_value) && allow_sideband_all_value)) strbuf_addstr(value, " sideband-all"); + + if (!repo_config_get_string(the_repository, + "uploadpack.blobpackfileuri", + &str) && + str) { + strbuf_addstr(value, " packfile-uris"); + free(str); + } } return 1; |