Web lists-archives.com

[RFC 1/4] server-endpoint: serve refs without advertisement




Introduce a new binary that can act as an endpoint to serve refs without
first sending the ref advertisement (a list of all ref names and
associated hashes that the server contains). For very large
repositories, including an internal Android repository with more than
700000 refs, this would save tens of megabytes of network bandwidth
during each fetch.

This endpoint handles ref namespaces and "uploadpack.hiderefs" by
itself, and handles other functionality by invoking upload-pack and
acting as an intermediary (therefore having to know the relatively
minute details of the fetch-pack/upload-pack protocol).

Note: There is still an issue with the handling of "deepen" lines. The
documentation for the pack protocol states that "deepen 0" is the same
as not specifying any depth, but upload-pack seems to not accept "deepen
0". I'm not sure if it's better to change the documentation or change
the code - I generally prefer to change the code in such cases, but
treating "deepen 0" (and similar things like "deepen 000") differently
from other "deepen"s requires multiple components to know about this
special case (upload-pack, fetch-pack, and now server-endpoint) so I'm
inclined to just forbid it (like in the current code).

Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx>
---
 .gitignore        |   1 +
 Makefile          |   2 +
 server-endpoint.c | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 231 insertions(+)
 create mode 100644 server-endpoint.c

diff --git a/.gitignore b/.gitignore
index 833ef3b0b..761e06d2c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -140,6 +140,7 @@
 /git-rm
 /git-send-email
 /git-send-pack
+/git-server-endpoint
 /git-sh-i18n
 /git-sh-i18n--envsubst
 /git-sh-setup
diff --git a/Makefile b/Makefile
index c80fec292..0d3813772 100644
--- a/Makefile
+++ b/Makefile
@@ -603,6 +603,7 @@ PROGRAM_OBJS += shell.o
 PROGRAM_OBJS += show-index.o
 PROGRAM_OBJS += upload-pack.o
 PROGRAM_OBJS += remote-testsvn.o
+PROGRAM_OBJS += server-endpoint.o
 
 # Binary suffix, set to .exe for Windows builds
 X =
@@ -673,6 +674,7 @@ BINDIR_PROGRAMS_NEED_X += git-upload-pack
 BINDIR_PROGRAMS_NEED_X += git-receive-pack
 BINDIR_PROGRAMS_NEED_X += git-upload-archive
 BINDIR_PROGRAMS_NEED_X += git-shell
+BINDIR_PROGRAMS_NEED_X += git-server-endpoint
 
 BINDIR_PROGRAMS_NO_X += git-cvsserver
 
diff --git a/server-endpoint.c b/server-endpoint.c
new file mode 100644
index 000000000..a9c0c7c94
--- /dev/null
+++ b/server-endpoint.c
@@ -0,0 +1,228 @@
+#include "cache.h"
+#include "pkt-line.h"
+#include "refs.h"
+#include "revision.h"
+#include "run-command.h"
+
+static const char * const server_endpoint_usage[] = {
+	N_("git server-endpoint [<options>] <dir>"),
+	NULL
+};
+
+static const char *capabilities = "multi_ack_detailed side-band-64k shallow";
+
+struct handle_want_data {
+	int upload_pack_in_fd;
+	int capabilities_sent;
+	struct string_list sent_namespaced_names;
+};
+
+static int send_want(const char *namespaced_name, const struct object_id *oid,
+		     int flags, void *handle_want_data)
+{
+	struct handle_want_data *data = handle_want_data;
+
+	if (ref_is_hidden(strip_namespace(namespaced_name), namespaced_name))
+		return 0;
+	if (string_list_lookup(&data->sent_namespaced_names, namespaced_name))
+		return 0;
+
+	string_list_insert(&data->sent_namespaced_names, namespaced_name);
+
+	if (data->capabilities_sent) {
+		packet_write_fmt(data->upload_pack_in_fd, "want %s\n",
+				 oid_to_hex(oid));
+	} else {
+		packet_write_fmt(data->upload_pack_in_fd, "want %s%s\n",
+				 oid_to_hex(oid), capabilities);
+		data->capabilities_sent = 1;
+	}
+
+	return 0;
+}
+
+static void handle_want(const char *arg, struct handle_want_data *data) {
+	char *namespaced_name = xstrfmt("%s%s", get_git_namespace(), arg);
+	if (has_glob_specials(arg)) {
+		for_each_glob_ref(send_want, namespaced_name, data);
+	} else {
+		struct object_id oid;
+		if (!read_ref(namespaced_name, oid.hash))
+			send_want(namespaced_name, &oid, 0, data);
+	}
+	free(namespaced_name);
+}
+
+static int fetch_ref(int stateless_rpc)
+{
+	struct child_process cmd = CHILD_PROCESS_INIT;
+	static const char *argv[] = {
+		"upload-pack", ".", NULL, NULL
+	};
+	struct handle_want_data handle_want_data = {0, 0, STRING_LIST_INIT_DUP};
+
+	char *line;
+	int size;
+
+	int upload_pack_will_respond = 0;
+	int wanted_refs_sent = 0;
+
+	if (stateless_rpc)
+		argv[2] = "--stateless-rpc";
+	cmd.argv = argv;
+	cmd.git_cmd = 1;
+	cmd.in = -1;
+	cmd.out = -1;
+
+	if (start_command(&cmd))
+		goto error;
+
+	handle_want_data.upload_pack_in_fd = cmd.in;
+
+	if (!stateless_rpc) {
+		/* Drain the initial ref advertisement (until flush-pkt). */
+		while (packet_read_line(cmd.out, NULL))
+			;
+	}
+
+	/* Send the wants. Upload-pack will not respond to this unless a depth
+	 * request is made. */
+	while ((line = packet_read_line(0, NULL))) {
+		const char *arg;
+		if (skip_prefix(line, "want ", &arg)) {
+			handle_want(arg, &handle_want_data);
+		} else if (starts_with(line, "shallow ")) {
+			packet_write_fmt(cmd.in, "%s", line);
+		} else if (starts_with(line, "deepen ") ||
+			   starts_with(line, "deepen-since ") ||
+			   starts_with(line, "deepen-not ")) {
+			packet_write_fmt(cmd.in, "%s", line);
+			upload_pack_will_respond = 1;
+		}
+	}
+	packet_flush(cmd.in);
+
+	if (upload_pack_will_respond) {
+		while ((line = packet_read_line(cmd.out, NULL))) {
+			packet_write_fmt(1, "%s", line);
+		}
+		packet_flush(1);
+	}
+
+	/* Continue to copy the conversation. */
+	do {
+		char buffer[LARGE_PACKET_DATA_MAX];
+		char size_buffer[5]; /* 4 bytes + NUL */
+		int done_received = 0;
+		int ready_received = 0;
+		int options = PACKET_READ_CHOMP_NEWLINE;
+
+		while ((line = packet_read_line(0, NULL))) {
+			packet_write_fmt(cmd.in, "%s", line);
+			if (!strcmp(line, "done")) {
+				done_received = 1;
+				/* "done" also marks the end of the request. */
+				goto after_flush;
+			}
+		}
+		packet_flush(cmd.in);
+after_flush:
+		while ((size = packet_read(cmd.out, NULL, NULL, buffer,
+					   sizeof(buffer), options))) {
+			int send_wanted_refs = 0;
+			if (!wanted_refs_sent) {
+				if ((done_received || ready_received) &&
+				    size == strlen("ACK ") + GIT_SHA1_HEXSZ &&
+				    starts_with(buffer, "ACK "))
+					send_wanted_refs = 1;
+				else if (done_received && !strcmp(buffer, "NAK"))
+					send_wanted_refs = 1;
+				else if (size == strlen("ACK  ready") + GIT_SHA1_HEXSZ &&
+					 starts_with(buffer, "ACK ") &&
+					 !strcmp(buffer + strlen("ACK  ") + GIT_SHA1_HEXSZ, "ready"))
+					ready_received = 1;
+			}
+			if (send_wanted_refs) {
+				struct string_list_item *item;
+				for_each_string_list_item(item,
+							  &handle_want_data.sent_namespaced_names) {
+					struct object_id oid;
+					if (read_ref(item->string, oid.hash))
+						die("something happened");
+					packet_write_fmt(1, "wanted %s %s",
+							 oid_to_hex(&oid),
+							 strip_namespace(item->string));
+				}
+				wanted_refs_sent = 1;
+				/* Do not chomp any more characters because
+				 * binary data (packfile) is about to be sent.
+				 */
+				options = 0;
+			}
+			sprintf(size_buffer, "%04x", size + 4);
+			write_or_die(1, size_buffer, 4);
+			write_or_die(1, buffer, size);
+			if (!wanted_refs_sent && !strcmp(buffer, "NAK")) {
+				/* NAK before we send wanted refs marks the end
+				 * of the response. */
+				goto after_flush_2;
+			}
+		}
+		packet_flush(1);
+after_flush_2:
+		;
+	} while (!stateless_rpc && !wanted_refs_sent);
+
+	close(cmd.in);
+	cmd.in = -1;
+	close(cmd.out);
+	cmd.out = -1;
+
+	if (finish_command(&cmd))
+		return -1;
+
+	return 0;
+
+error:
+
+	if (cmd.in >= 0)
+		close(cmd.in);
+	if (cmd.out >= 0)
+		close(cmd.out);
+	return -1;
+}
+
+static int server_endpoint_config(const char *var, const char *value, void *unused)
+{
+	return parse_hide_refs_config(var, value, "uploadpack");
+}
+
+int cmd_main(int argc, const char **argv)
+{
+	int stateless_rpc = 0;
+
+	struct option options[] = {
+		OPT_BOOL(0, "stateless-rpc", &stateless_rpc,
+			 N_("quit after a single request/response exchange")),
+		OPT_END()
+	};
+
+	char *line;
+
+	packet_trace_identity("server-endpoint");
+	check_replace_refs = 0;
+
+	argc = parse_options(argc, argv, NULL, options, server_endpoint_usage, 0);
+
+	if (argc != 1)
+		die("must have 1 arg");
+
+	if (!enter_repo(argv[0], 0))
+		die("does not appear to be a git repository");
+	git_config(server_endpoint_config, NULL);
+
+	line = packet_read_line(0, NULL);
+	if (!strcmp(line, "fetch-refs"))
+		return fetch_ref(stateless_rpc);
+	die("only fetch-refs is supported");
+}
-- 
2.12.2.715.g7642488e1d-goog