Web lists-archives.com

[RFC 4/4] server-endpoint: serve blobs by hash




Upgrade server-endpoint to also serve blobs in a packfile given their
hashes.  Reachability checks are performed before the packfile is sent -
both an absent blob and an unreachable blob are reported to the user in
the same way ("not our blob").

Due to a bug in "rev-list" in the absence of bitmaps (discussed here
[1]), the server repositories in tests all have bitmaps.

[1] <20170309003547.6930-1-jonathantanmy@xxxxxxxxxx>

Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx>
---
 server-endpoint.c          | 121 ++++++++++++++++++++++++++++++++++++++++++++-
 t/t5573-server-endpoint.sh |  60 ++++++++++++++++++++++
 2 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 t/t5573-server-endpoint.sh

diff --git a/server-endpoint.c b/server-endpoint.c
index a9c0c7c94..870b853a6 100644
--- a/server-endpoint.c
+++ b/server-endpoint.c
@@ -192,6 +192,123 @@ static int fetch_ref(int stateless_rpc)
 	return -1;
 }
 
+/*
+ * Returns 1 if all blobs are reachable. If not, returns 0 and stores the hash
+ * of one of the unreachable blobs in unreachable.
+ */
+static int are_all_reachable(const struct object_array *blobs, struct object_id *unreachable)
+{
+	struct child_process cmd = CHILD_PROCESS_INIT;
+	static const char *argv[] = {
+		"rev-list", "--objects", "--use-bitmap-index", "--stdin", "--not", "--all", "--not", NULL,
+	};
+	int i;
+	char buf[41] = {0};
+
+	cmd.argv = argv;
+	cmd.git_cmd = 1;
+	cmd.in = -1;
+	cmd.out = -1;
+
+	if (start_command(&cmd))
+		goto error;
+	
+	for (i = 0; i < blobs->nr; i++) {
+		write_in_full(cmd.in, sha1_to_hex(blobs->objects[i].item->oid.hash), 40);
+		write_in_full(cmd.in, "\n", 1);
+	}
+	close(cmd.in);
+	cmd.in = -1;
+
+	i = read_in_full(cmd.out, buf, 40);
+	close(cmd.out);
+	cmd.out = -1;
+
+	if (finish_command(&cmd))
+		goto error;
+
+	if (i) {
+		if (get_oid_hex(buf, unreachable))
+			goto error;
+		return 0;
+	}
+
+	return 1;
+
+error:
+	if (cmd.out >= 0)
+		close(cmd.out);
+	die("problem with running rev-list");
+}
+
+static void send_blobs(const struct object_array *blobs)
+{
+	struct child_process cmd = CHILD_PROCESS_INIT;
+	static const char *argv[] = {
+		"pack-objects", "--stdout", NULL
+	};
+	int i;
+
+	cmd.argv = argv;
+	cmd.git_cmd = 1;
+	cmd.in = -1;
+	cmd.out = 0;
+
+	if (start_command(&cmd))
+		goto error;
+	
+	for (i = 0; i < blobs->nr; i++) {
+		write_in_full(cmd.in, sha1_to_hex(blobs->objects[i].item->oid.hash), 40);
+		write_in_full(cmd.in, "\n", 1);
+	}
+	close(cmd.in);
+	cmd.in = -1;
+
+	if (finish_command(&cmd))
+		goto error;
+
+	return;
+
+error:
+	die("problem with running pack-objects");
+}
+
+static int fetch_blob(void)
+{
+	char *line;
+
+	struct object_array wanted_blobs = OBJECT_ARRAY_INIT;
+	struct object_id unreachable;
+
+	while ((line = packet_read_line(0, NULL))) {
+		const char *arg;
+		if (skip_prefix(line, "want ", &arg)) {
+			struct object_id oid;
+			struct object *obj;
+			if (get_oid_hex(arg, &oid)) {
+				packet_write_fmt(1, "ERR invalid object ID <%s>", arg);
+				return 0;
+			}
+			obj = parse_object(oid.hash);
+			if (!obj || obj->type != OBJ_BLOB) {
+				packet_write_fmt(1, "ERR not our blob <%s>", arg);
+				return 0;
+			}
+			add_object_array(obj, NULL, &wanted_blobs);
+		}
+	}
+
+	if (!are_all_reachable(&wanted_blobs, &unreachable)) {
+		packet_write_fmt(1, "ERR not our blob <%s>", oid_to_hex(&unreachable));
+		return 0;
+	}
+
+	packet_write_fmt(1, "ACK\n");
+	send_blobs(&wanted_blobs);
+
+	return 0;
+}
+
 static int server_endpoint_config(const char *var, const char *value, void *unused)
 {
 	return parse_hide_refs_config(var, value, "uploadpack");
@@ -224,5 +341,7 @@ int cmd_main(int argc, const char **argv)
 	line = packet_read_line(0, NULL);
 	if (!strcmp(line, "fetch-refs"))
 		return fetch_ref(stateless_rpc);
-	die("only fetch-refs is supported");
+	if (!strcmp(line, "fetch-blobs"))
+		return fetch_blob();
+	die("only fetch-refs and fetch-blobs are supported");
 }
diff --git a/t/t5573-server-endpoint.sh b/t/t5573-server-endpoint.sh
new file mode 100644
index 000000000..48f052851
--- /dev/null
+++ b/t/t5573-server-endpoint.sh
@@ -0,0 +1,60 @@
+#!/bin/sh
+
+test_description='server-endpoint'
+
+. ./test-lib.sh
+
+test_expect_success 'fetch-blobs basic' '
+	rm -rf server client &&
+	git init server &&
+	(
+		cd server &&
+		test_commit 0 &&
+		test_commit 1 &&
+		git repack -a -d --write-bitmap-index
+	) &&
+	BLOB0=$(git hash-object server/0.t) &&
+	BLOB1=$(git hash-object server/1.t) &&
+	printf "000ffetch-blobs0031want %s0031want %s0000" "$BLOB0" "$BLOB1" | git server-endpoint server >out &&
+
+	test "$(head -1 out)" = "0008ACK" &&
+
+	git init client &&
+	sed 1d out | git -C client unpack-objects &&
+	git -C client cat-file -e "$BLOB0" &&
+	git -C client cat-file -e "$BLOB1"
+'
+
+test_expect_success 'fetch-blobs no such object' '
+	rm -rf server client &&
+	git init server &&
+	(
+		cd server &&
+		test_commit 0 &&
+		git repack -a -d --write-bitmap-index
+	) &&
+	BLOB0=$(git hash-object server/0.t) &&
+	echo myblob >myblob &&
+	MYBLOB=$(git hash-object myblob) &&
+	printf "000ffetch-blobs0031want %s0031want %s0000" "$BLOB0" "$MYBLOB" | git server-endpoint server >out &&
+
+	test_i18ngrep "$(printf "ERR not our blob.*%s" "$MYBLOB")" out
+'
+
+test_expect_success 'fetch-blobs unreachable' '
+	rm -rf server client &&
+	git init server &&
+	(
+		cd server &&
+		test_commit 0 &&
+		git repack -a -d --write-bitmap-index
+	) &&
+	BLOB0=$(git hash-object server/0.t) &&
+	echo myblob >myblob &&
+	MYBLOB=$(git -C server hash-object -w ../myblob) &&
+	printf "000ffetch-blobs0031want %s0031want %s0000" "$BLOB0" "$MYBLOB" | git server-endpoint server >out &&
+
+	test_i18ngrep "$(printf "ERR not our blob.*%s" "$MYBLOB")" out
+'
+
+test_done
-- 
2.12.2.715.g7642488e1d-goog