Web lists-archives.com

[PATCH] clone: do faster object check for partial clones




For partial clones, doing a full connectivity check is wasteful; we skip
promisor objects (which, for a partial clone, is all known objects), and
excluding them all from the connectivity check can take a significant
amount of time on large repos.

At most, we want to make sure that we get the objects referred to by any
wanted refs. For partial clones, just check that these objects were
transferred.

Signed-off-by: Josh Steadmon <steadmon@xxxxxxxxxx>
---
 builtin/clone.c |  6 ++++--
 connected.c     | 15 +++++++++++++++
 connected.h     |  8 ++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 50bde99618..fdbbd8942a 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -657,7 +657,8 @@ static void update_remote_refs(const struct ref *refs,
 			       const char *branch_top,
 			       const char *msg,
 			       struct transport *transport,
-			       int check_connectivity)
+			       int check_connectivity,
+			       int check_refs_only)
 {
 	const struct ref *rm = mapped_refs;
 
@@ -666,6 +667,7 @@ static void update_remote_refs(const struct ref *refs,
 
 		opt.transport = transport;
 		opt.progress = transport->progress;
+		opt.check_refs_only = !!check_refs_only;
 
 		if (check_connected(iterate_ref_map, &rm, &opt))
 			die(_("remote did not send all necessary objects"));
@@ -1224,7 +1226,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 
 	update_remote_refs(refs, mapped_refs, remote_head_points_at,
 			   branch_top.buf, reflog_msg.buf, transport,
-			   !is_local);
+			   !is_local, filter_options.choice);
 
 	update_head(our_head_points_at, remote_head, reflog_msg.buf);
 
diff --git a/connected.c b/connected.c
index 1bba888eff..c297cdc5ab 100644
--- a/connected.c
+++ b/connected.c
@@ -1,4 +1,5 @@
 #include "cache.h"
+#include "object-store.h"
 #include "run-command.h"
 #include "sigchain.h"
 #include "connected.h"
@@ -49,6 +50,20 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
 		strbuf_release(&idx_file);
 	}
 
+	if (opt->check_refs_only) {
+		/*
+		 * For partial clones, we don't want to walk the full commit
+		 * graph because we're skipping promisor objects anyway. We
+		 * should just check that objects referenced by wanted refs were
+		 * transferred.
+		 */
+		do {
+			if (!repo_has_object_file(the_repository, &oid))
+				return 1;
+		} while (!fn(cb_data, &oid));
+		return 0;
+	}
+
 	if (opt->shallow_file) {
 		argv_array_push(&rev_list.args, "--shallow-file");
 		argv_array_push(&rev_list.args, opt->shallow_file);
diff --git a/connected.h b/connected.h
index 8d5a6b3ad6..bb4afcb301 100644
--- a/connected.h
+++ b/connected.h
@@ -46,6 +46,14 @@ struct check_connected_options {
 	 * during a fetch.
 	 */
 	unsigned is_deepening_fetch : 1;
+
+	/*
+	 * If non-zero, only check the top-level objects referenced by the
+	 * wanted refs (passed in as cb_data). This is useful for partial
+	 * clones, where this can be much faster than excluding all promisor
+	 * objects prior to walking the commit graph.
+	 */
+	unsigned check_refs_only : 1;
 };
 
 #define CHECK_CONNECTED_INIT { 0 }
-- 
2.21.0.392.gf8f6787159e-goog