Web lists-archives.com

[PATCH] fetch-pack.c: use oidset to check existence of loose object




In repository having large number of refs, lstat for non-existing loose
objects makes `git fetch` slow.

This patch stores existing loose objects in hashmap beforehand and use
it to check existence instead of using lstat.

With this patch, the number of lstat calls in `git fetch` is reduced
from 411412 to 13794 for chromium repository.

I took time stat of `git fetch` disabling quickfetch for chromium
repository 3 time on linux with SSD.
* with this patch
8.105s
8.309s
7.640s
avg: 8.018s

* master
12.287s
11.175s
12.227s
avg: 11.896s

On my MacBook Air which has slower lstat.
* with this patch
14.501s

* master
1m16.027s

`git fetch` on slow disk will be improved largely.

Signed-off-by: Takuto Ikuta <tikuta@xxxxxxxxxxxx>
---
 cache.h      |  2 ++
 fetch-pack.c | 22 +++++++++++++++++++---
 sha1_file.c  |  3 +++
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/cache.h b/cache.h
index d06932ed0..db38db40e 100644
--- a/cache.h
+++ b/cache.h
@@ -1773,6 +1773,8 @@ struct object_info {
 #define OBJECT_INFO_SKIP_CACHED 4
 /* Do not retry packed storage after checking packed and loose storage */
 #define OBJECT_INFO_QUICK 8
+/* Do not check loose object */
+#define OBJECT_INFO_SKIP_LOOSE 16
 extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags);
 
 /*
diff --git a/fetch-pack.c b/fetch-pack.c
index d97461296..1658487f7 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -711,6 +711,15 @@ static void mark_alternate_complete(struct object *obj)
 	mark_complete(&obj->oid);
 }
 
+static int add_loose_objects_to_set(const struct object_id *oid,
+				    const char *path,
+				    void *data)
+{
+	struct oidset* set = (struct oidset*)(data);
+	oidset_insert(set, oid);
+	return 0;
+}
+
 static int everything_local(struct fetch_pack_args *args,
 			    struct ref **refs,
 			    struct ref **sought, int nr_sought)
@@ -719,16 +728,21 @@ static int everything_local(struct fetch_pack_args *args,
 	int retval;
 	int old_save_commit_buffer = save_commit_buffer;
 	timestamp_t cutoff = 0;
+	struct oidset loose_oid_set = OIDSET_INIT;
+
+	for_each_loose_object(add_loose_objects_to_set, &loose_oid_set, 0);
 
 	save_commit_buffer = 0;
 
 	for (ref = *refs; ref; ref = ref->next) {
 		struct object *o;
+		unsigned int flag = OBJECT_INFO_QUICK;
 
-		if (!has_object_file_with_flags(&ref->old_oid,
-						OBJECT_INFO_QUICK))
-			continue;
+		if (!oidset_contains(&loose_oid_set, &ref->old_oid))
+			flag |= OBJECT_INFO_SKIP_LOOSE;
 
+		if (!has_object_file_with_flags(&ref->old_oid, flag))
+			continue;
 		o = parse_object(&ref->old_oid);
 		if (!o)
 			continue;
@@ -744,6 +758,8 @@ static int everything_local(struct fetch_pack_args *args,
 		}
 	}
 
+	oidset_clear(&loose_oid_set);
+
 	if (!args->no_dependents) {
 		if (!args->deepen) {
 			for_each_ref(mark_complete_oid, NULL);
diff --git a/sha1_file.c b/sha1_file.c
index 1b94f39c4..c903cbcec 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1262,6 +1262,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 		if (find_pack_entry(real, &e))
 			break;
 
+		if (flags & OBJECT_INFO_SKIP_LOOSE)
+			return -1;
+
 		/* Most likely it's a loose object. */
 		if (!sha1_loose_object_info(real, oi, flags))
 			return 0;
-- 
2.16.2