Web lists-archives.com

[PATCH 15/17] commit-graph: write split commit-graph files




From: Derrick Stolee <dstolee@xxxxxxxxxxxxx>

Signed-off-by: Derrick Stolee <dstolee@xxxxxxxxxxxxx>
---
 commit-graph.c          | 248 +++++++++++++++++++++++++++++++++++++++-
 commit-graph.h          |   1 +
 t/t5318-commit-graph.sh |   2 +-
 3 files changed, 244 insertions(+), 7 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 5f6193277a..44448aabe4 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -40,6 +40,8 @@
 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
 			+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
 
+#define MAX_SPLIT_COMMITS 64000
+
 char *get_commit_graph_filename(const char *obj_dir)
 {
 	return xstrfmt("%s/info/commit-graph", obj_dir);
@@ -619,9 +621,14 @@ struct write_commit_graph_context {
 	unsigned long approx_nr_objects;
 	struct progress *progress;
 	int progress_done;
+	int num_commit_graphs_before;
+	int num_commit_graphs_after;
+	uint32_t new_num_commits_in_base;
+	struct commit_graph *new_base_graph;
 	uint64_t progress_cnt;
 	unsigned append:1,
-		 report_progress:1;
+		 report_progress:1,
+		 split:1;
 };
 
 static void write_graph_chunk_fanout(struct hashfile *f,
@@ -691,6 +698,16 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
 					      ctx->commits.nr,
 					      commit_to_sha1);
 
+			if (edge_value >= 0)
+				edge_value += ctx->new_num_commits_in_base;
+			else {
+				uint32_t pos;
+				if (find_commit_in_graph(parent->item,
+							 ctx->new_base_graph,
+							 &pos))
+					edge_value = pos;
+			}
+
 			if (edge_value < 0)
 				BUG("missing parent %s for commit %s",
 				    oid_to_hex(&parent->item->object.oid),
@@ -711,6 +728,17 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
 					      ctx->commits.list,
 					      ctx->commits.nr,
 					      commit_to_sha1);
+
+			if (edge_value >= 0)
+				edge_value += ctx->new_num_commits_in_base;
+			else {
+				uint32_t pos;
+				if (find_commit_in_graph(parent->item,
+							 ctx->new_base_graph,
+							 &pos))
+					edge_value = pos;
+			}
+
 			if (edge_value < 0)
 				BUG("missing parent %s for commit %s",
 				    oid_to_hex(&parent->item->object.oid),
@@ -768,6 +796,16 @@ static void write_graph_chunk_extra_edges(struct hashfile *f,
 						  ctx->commits.nr,
 						  commit_to_sha1);
 
+			if (edge_value >= 0)
+				edge_value += ctx->new_num_commits_in_base;
+			else {
+				uint32_t pos;
+				if (find_commit_in_graph(parent->item,
+							 ctx->new_base_graph,
+							 &pos))
+					edge_value = pos;
+			}
+
 			if (edge_value < 0)
 				BUG("missing parent %s for commit %s",
 				    oid_to_hex(&parent->item->object.oid),
@@ -782,7 +820,7 @@ static void write_graph_chunk_extra_edges(struct hashfile *f,
 	}
 }
 
-static int commit_compare(const void *_a, const void *_b)
+static int oid_compare(const void *_a, const void *_b)
 {
 	const struct object_id *a = (const struct object_id *)_a;
 	const struct object_id *b = (const struct object_id *)_b;
@@ -859,7 +897,13 @@ static void close_reachable(struct write_commit_graph_context *ctx)
 		display_progress(ctx->progress, i + 1);
 		commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
 
-		if (commit && !parse_commit_no_graph(commit))
+		if (!commit)
+			continue;
+		if (ctx->split) {
+			if (!parse_commit(commit) &&
+			    commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
+				add_missing_parents(ctx, commit);
+		} else if (!parse_commit_no_graph(commit))
 			add_missing_parents(ctx, commit);
 	}
 	stop_progress(&ctx->progress);
@@ -1051,12 +1095,20 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
 			_("Counting distinct commits in commit graph"),
 			ctx->oids.nr);
 	display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
-	QSORT(ctx->oids.list, ctx->oids.nr, commit_compare);
+	QSORT(ctx->oids.list, ctx->oids.nr, oid_compare);
 
 	for (i = 1; i < ctx->oids.nr; i++) {
 		display_progress(ctx->progress, i + 1);
-		if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
+		if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) {
+			if (ctx->split) {
+				struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]);
+
+				if (!c || c->graph_pos != COMMIT_NOT_FROM_GRAPH)
+					continue;
+			}
+
 			count_distinct++;
+		}
 	}
 	stop_progress(&ctx->progress);
 
@@ -1079,7 +1131,13 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
 		if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
 			continue;
 
+		ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
 		ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]);
+
+		if (ctx->split &&
+		    ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH)
+			continue;
+
 		parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
 
 		for (parent = ctx->commits.list[ctx->commits.nr]->parents;
@@ -1105,7 +1163,13 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 	struct strbuf progress_title = STRBUF_INIT;
 	int num_chunks = ctx->num_extra_edges ? 4 : 3;
 
-	ctx->graph_name = get_commit_graph_filename(ctx->obj_dir);
+	if (ctx->num_commit_graphs_after > 1)
+		ctx->graph_name = get_split_graph_filename(
+					ctx->obj_dir,
+					ctx->num_commit_graphs_after - 1);
+	else
+		ctx->graph_name = get_commit_graph_filename(ctx->obj_dir);
+
 	if (safe_create_leading_directories(ctx->graph_name)) {
 		UNLEAK(ctx->graph_name);
 		error(_("unable to create leading directories of %s"),
@@ -1167,11 +1231,166 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 
 	close_commit_graph(ctx->r);
 	finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
+
+	while (ctx->num_commit_graphs_before > ctx->num_commit_graphs_after) {
+		char *graph_name = get_split_graph_filename(
+					ctx->obj_dir,
+					--ctx->num_commit_graphs_before);
+		unlink(graph_name);
+		free(graph_name);
+	}
+
 	commit_lock_file(&lk);
 
 	return 0;
 }
 
+static size_t expected_commit_graph_size(size_t num_commits)
+{
+	return GRAPH_HEADER_SIZE + GRAPH_FANOUT_SIZE + 6 * GRAPH_CHUNKLOOKUP_WIDTH +
+		(num_commits + 1) * (GRAPH_DATA_WIDTH + the_hash_algo->rawsz);
+}
+
+static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
+{
+	struct commit_graph *g = ctx->r->objects->commit_graph;
+	uint32_t num_commits = ctx->commits.nr;
+	size_t expected_size = expected_commit_graph_size(num_commits);
+
+	ctx->num_commit_graphs_before = 0;
+	while (g) {
+		ctx->num_commit_graphs_before++;
+		g = g->base_graph;
+	}
+
+	g = ctx->r->objects->commit_graph;
+	ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
+
+	while (g && (g->data_len <= 2 * expected_size || num_commits > MAX_SPLIT_COMMITS)) {
+		num_commits += g->num_commits;
+		expected_size = expected_commit_graph_size(num_commits);
+		g = g->base_graph;
+		ctx->num_commit_graphs_after--;
+	}
+}
+
+static void merge_commit_graph(struct write_commit_graph_context *ctx,
+			       struct commit_graph *g)
+{
+	uint32_t i;
+	uint32_t offset = g->num_commits_in_base;
+
+	for (i = 0; i < g->num_commits; i++) {
+		struct object_id oid;
+		struct commit *result;
+
+		display_progress(ctx->progress, i + 1);
+
+		load_oid_from_graph(g, i + offset, &oid);
+
+		/* only add commits if they still exist in the repo */
+		result = lookup_commit_reference_gently(ctx->r, &oid, 1);
+
+		if (result) {
+			ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
+			ctx->commits.list[ctx->commits.nr] = result;
+			ctx->commits.nr++;
+		}
+	}
+}
+
+static int commit_compare(const void *_a, const void *_b)
+{
+	const struct commit *a = *(const struct commit **)_a;
+	const struct commit *b = *(const struct commit **)_b;
+	return oidcmp(&a->object.oid, &b->object.oid);
+}
+
+static void deduplicate_commits(struct write_commit_graph_context *ctx)
+{
+	uint32_t i, num_parents, last_distinct = 0, duplicates = 0;
+	struct commit_list *parent;
+
+	if (ctx->report_progress)
+		ctx->progress = start_delayed_progress(
+					_("De-duplicating merged commits"),
+					ctx->commits.nr);
+
+	QSORT(ctx->commits.list, ctx->commits.nr, commit_compare);
+
+	ctx->num_extra_edges = 0;
+	for (i = 1; i < ctx->commits.nr; i++) {
+		display_progress(ctx->progress, i);
+
+		if (oideq(&ctx->commits.list[last_distinct]->object.oid,
+			  &ctx->commits.list[i]->object.oid)) {
+			duplicates++;
+		} else {
+			if (duplicates)
+				ctx->commits.list[last_distinct + 1] = ctx->commits.list[i];
+			last_distinct++;
+
+			num_parents = 0;
+			for (parent = ctx->commits.list[i]->parents; parent; parent = parent->next)
+				num_parents++;
+
+			if (num_parents > 2)
+				ctx->num_extra_edges += num_parents - 2;
+		}
+	}
+
+	ctx->commits.nr -= duplicates;
+	stop_progress(&ctx->progress);
+}
+
+static void merge_commit_graphs(struct write_commit_graph_context *ctx)
+{
+	struct commit_graph *g = ctx->r->objects->commit_graph;
+	uint32_t current_graph_number = ctx->num_commit_graphs_before;
+	struct strbuf progress_title = STRBUF_INIT;
+
+	while (g && current_graph_number >= ctx->num_commit_graphs_after) {
+		current_graph_number--;
+
+		if (ctx->report_progress) {
+			if (current_graph_number)
+				strbuf_addf(&progress_title,
+					    _("Merging commit-graph-%d"),
+					    current_graph_number);
+			else
+				strbuf_addstr(&progress_title,
+					      _("Merging commit-graph"));
+			ctx->progress = start_delayed_progress(progress_title.buf, 0);
+		}
+
+		merge_commit_graph(ctx, g);
+		stop_progress(&ctx->progress);
+		strbuf_release(&progress_title);
+
+		g = g->base_graph;
+	}
+
+	if (g) {
+		ctx->new_base_graph = g;
+		ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base;
+	}
+
+	deduplicate_commits(ctx);
+}
+
+static void collapse_all_commit_graphs(struct write_commit_graph_context *ctx)
+{
+	struct commit_graph *g = ctx->r->objects->commit_graph;
+
+	ctx->num_commit_graphs_after = 1;
+	ctx->num_commit_graphs_before = 0;
+
+	while (g) {
+		ctx->num_commit_graphs_before++;
+		g = g->base_graph;
+	}
+}
+
 int write_commit_graph(const char *obj_dir,
 		       struct string_list *pack_indexes,
 		       struct string_list *commit_hex,
@@ -1189,10 +1408,17 @@ int write_commit_graph(const char *obj_dir,
 	ctx->obj_dir = obj_dir;
 	ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0;
 	ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0;
+	ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0;
+
+	if (ctx->split)
+		prepare_commit_graph(ctx->r);
 
 	ctx->approx_nr_objects = approximate_object_count();
 	ctx->oids.alloc = ctx->approx_nr_objects / 32;
 
+	if (ctx->split && ctx->oids.alloc > MAX_SPLIT_COMMITS)
+		ctx->oids.alloc = MAX_SPLIT_COMMITS;
+
 	if (ctx->append) {
 		prepare_commit_graph_one(ctx->r, ctx->obj_dir);
 		if (ctx->r->objects->commit_graph)
@@ -1243,6 +1469,16 @@ int write_commit_graph(const char *obj_dir,
 		goto cleanup;
 	}
 
+	if (!ctx->commits.nr)
+		goto cleanup;
+
+	if (ctx->split) {
+		split_graph_merge_strategy(ctx);
+
+		merge_commit_graphs(ctx);
+	} else
+		collapse_all_commit_graphs(ctx);
+
 	compute_generation_numbers(ctx);
 
 	res = write_commit_graph_file(ctx);
diff --git a/commit-graph.h b/commit-graph.h
index 170920720d..7a39ae2278 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -71,6 +71,7 @@ int generation_numbers_enabled(struct repository *r);
 
 #define COMMIT_GRAPH_APPEND     (1 << 0)
 #define COMMIT_GRAPH_PROGRESS   (1 << 1)
+#define COMMIT_GRAPH_SPLIT      (1 << 2)
 
 int write_commit_graph_reachable(const char *obj_dir, unsigned int flags);
 int write_commit_graph(const char *obj_dir,
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index e80c1cac02..d621608500 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -20,7 +20,7 @@ test_expect_success 'verify graph with no graph file' '
 test_expect_success 'write graph with no packs' '
 	cd "$TRASH_DIRECTORY/full" &&
 	git commit-graph write --object-dir . &&
-	test_path_is_file info/commit-graph
+	test_path_is_missing info/commit-graph
 '
 
 test_expect_success 'create commits and repack' '
-- 
gitgitgadget