Web lists-archives.com

[PATCH v1] speed up refresh_index() by utilizing preload_index()




From: Ben Peart <benpeart@xxxxxxxxxxxxx>

Speed up refresh_index() by utilizing preload_index() to do most of the work
spread across multiple threads.  This works because most cache entries will
get marked CE_UPTODATE so that refresh_cache_ent() can bail out early when
called from within refresh_index().

On a Windows repo with ~200K files, this drops refresh times from 6.64
seconds to 2.87 seconds for a savings of 57%.

Signed-off-by: Ben Peart <benpeart@xxxxxxxxxxxxx>
---

Notes:
    Base Ref: master
    Web-Diff: https://github.com/benpeart/git/commit/feee1054c2
    Checkout: git fetch https://github.com/benpeart/git refresh-index-multithread-preload-v1 && git checkout feee1054c2

 cache.h         | 3 +++
 preload-index.c | 8 ++++----
 read-cache.c    | 6 ++++++
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/cache.h b/cache.h
index f7fabdde8f..883099db08 100644
--- a/cache.h
+++ b/cache.h
@@ -659,6 +659,9 @@ extern int daemonize(void);
 /* Initialize and use the cache information */
 struct lock_file;
 extern int read_index(struct index_state *);
+extern void preload_index(struct index_state *index,
+			  const struct pathspec *pathspec,
+			  unsigned int refresh_flags);
 extern int read_index_preload(struct index_state *,
 			      const struct pathspec *pathspec,
 			      unsigned int refresh_flags);
diff --git a/preload-index.c b/preload-index.c
index 9e7152ab14..222792ccbc 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -9,7 +9,7 @@
 #include "progress.h"
 
 #ifdef NO_PTHREADS
-static void preload_index(struct index_state *index,
+void preload_index(struct index_state *index,
 			  const struct pathspec *pathspec,
 			  unsigned int refresh_flags)
 {
@@ -100,9 +100,9 @@ static void *preload_thread(void *_data)
 	return NULL;
 }
 
-static void preload_index(struct index_state *index,
-			  const struct pathspec *pathspec,
-			  unsigned int refresh_flags)
+void preload_index(struct index_state *index,
+		   const struct pathspec *pathspec,
+		   unsigned int refresh_flags)
 {
 	int threads, i, work, offset;
 	struct thread_data data[MAX_PARALLEL];
diff --git a/read-cache.c b/read-cache.c
index d57958233e..53733d651d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1496,6 +1496,12 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 	typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n");
 	added_fmt = (in_porcelain ? "A\t%s\n" : "%s needs update\n");
 	unmerged_fmt = (in_porcelain ? "U\t%s\n" : "%s: needs merge\n");
+	/*
+	 * Use the multi-threaded preload_index() to refresh most of the
+	 * cache entries quickly then in the single threaded loop below,
+	 * we only have to do the special cases that are left.
+	 */
+	preload_index(istate, pathspec, 0);
 	for (i = 0; i < istate->cache_nr; i++) {
 		struct cache_entry *ce, *new_entry;
 		int cache_errno = 0;

base-commit: c670b1f876521c9f7cd40184bf7ed05aad843433
-- 
2.9.2.gvfs.2.27918.g0990287eef