Web lists-archives.com

Re: [PATCH v1] diffcore-rename: speed up register_rename_src




On Thu, Apr 20, 2017 at 10:00:04AM -0400, Jeff Hostetler wrote:

> Perhaps the thing to learn from this (and the other ones) is that
> we have lots of places where we are building a sorted list by
> iterating over a sorted list.  The insert routines are general
> purpose and cannot assume this, so they search first.  Perhaps it
> would be clearer to have independent _append and _insert functions
> and have the caller explicitly call the appropriate one.  The mainline
> iterations on the existing index could just call the _append form
> and never have to worry about searching or the negative-integer
> return trick.  Whereas, the random iterations (such as on the
> command's arg list), would always call the _insert form.

Yes. I'd be much happier if your patch was flipping between two
general-purpose insertion functions. And if that same trick was used on
the dst side.

Or even, given that this these functions are called from a single
location that has sorted input, the binary search was just replaced
completely with an append combined with a sort-check.

That's not the minimal change you were going for, but I think the end
result is simpler and more consistent.

E.g., something like this:

diff --git a/diffcore-rename.c b/diffcore-rename.c
index f7444c86b..a5c017198 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -43,26 +43,20 @@ static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two)
 }
 
 /*
- * Returns 0 on success, -1 if we found a duplicate.
+ * Returns 0 on success, -1 if we found a duplicate or a sorting problem.
  */
 static int add_rename_dst(struct diff_filespec *two)
 {
-	int first = find_rename_dst(two);
-
-	if (first >= 0)
+	if (rename_dst_nr > 0 &&
+	    strcmp(two->path, rename_dst[rename_dst_nr - 1].two->path) <= 0)
 		return -1;
-	first = -first - 1;
 
-	/* insert to make it at "first" */
 	ALLOC_GROW(rename_dst, rename_dst_nr + 1, rename_dst_alloc);
+	rename_dst[rename_dst_nr].two = alloc_filespec(two->path);
+	fill_filespec(rename_dst[rename_dst_nr].two,
+		      two->oid.hash, two->oid_valid, two->mode);
+	rename_dst[rename_dst_nr].pair = NULL;
 	rename_dst_nr++;
-	if (first < rename_dst_nr)
-		memmove(rename_dst + first + 1, rename_dst + first,
-			(rename_dst_nr - first - 1) * sizeof(*rename_dst));
-	rename_dst[first].two = alloc_filespec(two->path);
-	fill_filespec(rename_dst[first].two, two->oid.hash, two->oid_valid,
-		      two->mode);
-	rename_dst[first].pair = NULL;
 	return 0;
 }
 
@@ -73,36 +67,17 @@ static struct diff_rename_src {
 } *rename_src;
 static int rename_src_nr, rename_src_alloc;
 
-static struct diff_rename_src *register_rename_src(struct diff_filepair *p)
+static int register_rename_src(struct diff_filepair *p)
 {
-	int first, last;
-	struct diff_filespec *one = p->one;
-	unsigned short score = p->score;
-
-	first = 0;
-	last = rename_src_nr;
-	while (last > first) {
-		int next = (last + first) >> 1;
-		struct diff_rename_src *src = &(rename_src[next]);
-		int cmp = strcmp(one->path, src->p->one->path);
-		if (!cmp)
-			return src;
-		if (cmp < 0) {
-			last = next;
-			continue;
-		}
-		first = next+1;
-	}
+	if (rename_src_nr > 0 &&
+	    strcmp(p->one->path, rename_src[rename_src_nr - 1].p->one->path) <= 0)
+		return -1;
 
-	/* insert to make it at "first" */
 	ALLOC_GROW(rename_src, rename_src_nr + 1, rename_src_alloc);
+	rename_src[rename_src_nr].p = p;
+	rename_src[rename_src_nr].score = p->score;
 	rename_src_nr++;
-	if (first < rename_src_nr)
-		memmove(rename_src + first + 1, rename_src + first,
-			(rename_src_nr - first - 1) * sizeof(*rename_src));
-	rename_src[first].p = p;
-	rename_src[first].score = score;
-	return &(rename_src[first]);
+	return 0;
 }
 
 static int basename_same(struct diff_filespec *src, struct diff_filespec *dst)
@@ -468,7 +443,7 @@ void diffcore_rename(struct diff_options *options)
 				continue;
 			else if (add_rename_dst(p->two) < 0) {
 				warning("skipping rename detection, detected"
-					" duplicate destination '%s'",
+					" duplicate or out-of-order destination '%s'",
 					p->two->path);
 				goto cleanup;
 			}
@@ -486,7 +461,12 @@ void diffcore_rename(struct diff_options *options)
 			 */
 			if (p->broken_pair && !p->score)
 				p->one->rename_used++;
-			register_rename_src(p);
+			if (register_rename_src(p) < 0) {
+				warning("skipping rename detection, detected"
+					" duplicate or out-of-order source '%s'",
+					p->one->path);
+				goto cleanup;
+			}
 		}
 		else if (detect_rename == DIFF_DETECT_COPY) {
 			/*
@@ -494,7 +474,12 @@ void diffcore_rename(struct diff_options *options)
 			 * one, to indicate ourselves as a user.
 			 */
 			p->one->rename_used++;
-			register_rename_src(p);
+			if (register_rename_src(p) < 0) {
+				warning("skipping rename detection, detected"
+					" duplicate or out-of-order source '%s'",
+					p->one->path);
+				goto cleanup;
+			}
 		}
 	}
 	if (rename_dst_nr == 0 || rename_src_nr == 0)