Web lists-archives.com

Re: [PATCH] branch & tag: Add a --no-contains option




On Thu, Mar 9, 2017 at 1:51 PM, Jeff King <peff@xxxxxxxx> wrote:
> On Thu, Mar 09, 2017 at 01:12:08PM +0100, Ævar Arnfjörð Bjarmason wrote:
>
>> > I'm almost certain this is because the contains_tag_algo one doesn't
>> > clean up the flag bits it sets on the commit objects. So running it
>> > twice in the same process is going to give you nonsense results.
>>
>> Yeah indeed.
>>
>> I tried to hack something up to avoid this, but the
>> lookup_commit_reference_gently() we call will return the same
>> object.parent pointer for two invocations, i.e. the underlying
>> {commit,object}.c API has a cache of objects it returns, couldn't find
>> some way to quickly make it burst that cache.
>
> Yeah, you'll always get the same struct for a given sha1.
>
>> The other approach of making contains_tag_algo() itself detect that
>> it's been called before (or us passing a flag) and going around
>> setting commit.object.flags on everything to 0 seemed even more
>> brittle, particularly since I think between filter->with_commit &
>> filter->no_commit we might end up visiting different commits, so it's
>> not easy to just clear it.
>
> You can clear the marks with clear_object_flags(). But I don't think
> that type of solution will quite help us here. We consider each ref
> independently and ask "does it match --contains" and "does it match
> --no-contains?". So there is no moment where we are done with all of the
> --contains marks, and can move on to the --no-contains ones. The lookups
> are interleaved.
>
> We could move to doing them in chunks (the way filter->merge_commit
> works), and then clearing in between. Or we could use a separate bitset.
> The patch below does that.
>
>> I'm happy to hack on it given some pointers, will visit it again, but
>> for now unless I'm missing something obvious / you can point out some
>> way to hack this up I'll just submit v2 with the combination of
>> --contains & --no-contains dying with a TODO message.
>>
>> The patch without that functionality is still really useful, and we
>> can implement that later.
>
> I'm not opposed to that, though see what you think of the patch below.
> It's a bit noisy but it's conceptually pretty straightforward. It should
> hopefully be obvious how you'd add in a separate contains_cache for the
> "without" case.

I wonder how worthwhile making the combination of options case fast &
adding more complexity is, as opposed to just using the slower path
for those cases via:

diff --git a/builtin/tag.c b/builtin/tag.c
index 737a83028a..d90e8675a8 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -53,7 +53,10 @@ static int list_tags(struct ref_filter *filter,
struct ref_sorting *sorting, con
        }

        verify_ref_format(format);
-       filter->with_commit_tag_algo = 1;
+       if ((filter->merge_commit + filter->with_commit +
filter->no_commit) > 1)
+               filter->with_commit_tag_algo = 0;
+       else
+               filter->with_commit_tag_algo = 1;
        filter_refs(&array, filter, FILTER_REFS_TAGS);
        ref_array_sort(sorting, &array);

I think I'll amend the tip of my WIP v2 to have something like that,
and then we can follow-up with making these cases where you supply
multiple options faster.

> Looking at this, I'm pretty sure that using "--contains" with "--merged"
> has similar problems, as they both use the UNINTERESTING bit. So even
> without your patch, there is a lurking bug.

I'm currently running this:
https://gist.github.com/avar/45cf288ce7cdc43e7395c6cbf9a98d68

with this patch to builtin/tag.c:

diff --git a/builtin/tag.c b/builtin/tag.c
index 737a83028a..b93c5e1754 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -56,2 +56,4 @@ static int list_tags(struct ref_filter *filter,
struct ref_sorting *sorting, con
        filter->with_commit_tag_algo = 1;
+       if (getenv("GIT_NO_TAG_ALGO"))
+               filter->with_commit_tag_algo = 0;

To smoke out any combinations of commits in git.git where with &
without the tag algo we already return different results, nothing so
far, but it's slow going.

> diff --git a/ref-filter.c b/ref-filter.c
> index 3820b21cc..42b1bc463 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -14,6 +14,7 @@
>  #include "git-compat-util.h"
>  #include "version.h"
>  #include "trailer.h"
> +#include "commit-slab.h"
>
>  typedef enum { FIELD_STR, FIELD_ULONG, FIELD_TIME } cmp_type;
>
> @@ -1137,10 +1138,22 @@ static void get_ref_atom_value(struct ref_array_item *ref, int atom, struct atom
>         *v = &ref->value[atom];
>  }
>
> +/*
> + * Unknown has to be "0" here, because that's what unfilled entries in our slab
> + * will return.
> + */
>  enum contains_result {
> -       CONTAINS_UNKNOWN = -1,
> -       CONTAINS_NO = 0,
> -       CONTAINS_YES = 1
> +       CONTAINS_UNKNOWN = 0,
> +       CONTAINS_NO = 1,
> +       CONTAINS_YES = 2,
> +};
> +
> +define_commit_slab(contains_cache, enum contains_result);
> +
> +struct ref_filter_cbdata {
> +       struct ref_array *array;
> +       struct ref_filter *filter;
> +       struct contains_cache contains_cache;
>  };
>
>  /*
> @@ -1171,24 +1184,25 @@ static int in_commit_list(const struct commit_list *want, struct commit *c)
>   * Do not recurse to find out, though, but return -1 if inconclusive.
>   */
>  static enum contains_result contains_test(struct commit *candidate,
> -                           const struct commit_list *want)
> +                           const struct commit_list *want,
> +                           struct contains_cache *cache)
>  {
> -       /* was it previously marked as containing a want commit? */
> -       if (candidate->object.flags & TMP_MARK)
> -               return 1;
> -       /* or marked as not possibly containing a want commit? */
> -       if (candidate->object.flags & UNINTERESTING)
> -               return 0;
> +       enum contains_result *cached = contains_cache_at(cache, candidate);
> +
> +       /* if we already found the answer, return it without traversing */
> +       if (*cached)
> +               return *cached;
> +
>         /* or are we it? */
>         if (in_commit_list(want, candidate)) {
> -               candidate->object.flags |= TMP_MARK;
> -               return 1;
> +               *cached = CONTAINS_YES;
> +               return *cached;
>         }
>
>         if (parse_commit(candidate) < 0)
> -               return 0;
> +               return CONTAINS_NO;
>
> -       return -1;
> +       return CONTAINS_UNKNOWN;
>  }
>
>  static void push_to_contains_stack(struct commit *candidate, struct contains_stack *contains_stack)
> @@ -1199,10 +1213,11 @@ static void push_to_contains_stack(struct commit *candidate, struct contains_sta
>  }
>
>  static enum contains_result contains_tag_algo(struct commit *candidate,
> -               const struct commit_list *want)
> +                                             const struct commit_list *want,
> +                                             struct contains_cache *cache)
>  {
>         struct contains_stack contains_stack = { 0, 0, NULL };
> -       int result = contains_test(candidate, want);
> +       enum contains_result result = contains_test(candidate, want, cache);
>
>         if (result != CONTAINS_UNKNOWN)
>                 return result;
> @@ -1214,16 +1229,16 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
>                 struct commit_list *parents = entry->parents;
>
>                 if (!parents) {
> -                       commit->object.flags |= UNINTERESTING;
> +                       *contains_cache_at(cache, commit) = CONTAINS_NO;
>                         contains_stack.nr--;
>                 }
>                 /*
>                  * If we just popped the stack, parents->item has been marked,
> -                * therefore contains_test will return a meaningful 0 or 1.
> +                * therefore contains_test will return a meaningful yes/no.
>                  */
> -               else switch (contains_test(parents->item, want)) {
> +               else switch (contains_test(parents->item, want, cache)) {
>                 case CONTAINS_YES:
> -                       commit->object.flags |= TMP_MARK;
> +                       *contains_cache_at(cache, commit) = CONTAINS_YES;
>                         contains_stack.nr--;
>                         break;
>                 case CONTAINS_NO:
> @@ -1235,13 +1250,14 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
>                 }
>         }
>         free(contains_stack.contains_stack);
> -       return contains_test(candidate, want);
> +       return contains_test(candidate, want, cache);
>  }
>
> -static int commit_contains(struct ref_filter *filter, struct commit *commit)
> +static int commit_contains(struct ref_filter *filter, struct commit *commit,
> +                          struct contains_cache *cache)
>  {
>         if (filter->with_commit_tag_algo)
> -               return contains_tag_algo(commit, filter->with_commit);
> +               return contains_tag_algo(commit, filter->with_commit, cache) == CONTAINS_YES;
>         return is_descendant_of(commit, filter->with_commit);
>  }
>
> @@ -1438,7 +1454,7 @@ static int ref_filter_handler(const char *refname, const struct object_id *oid,
>                         return 0;
>                 /* We perform the filtering for the '--contains' option */
>                 if (filter->with_commit &&
> -                   !commit_contains(filter, commit))
> +                   !commit_contains(filter, commit, &ref_cbdata->contains_cache))
>                         return 0;
>         }
>
> @@ -1538,6 +1554,8 @@ int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int
>                 broken = 1;
>         filter->kind = type & FILTER_REFS_KIND_MASK;
>
> +       init_contains_cache(&ref_cbdata.contains_cache);
> +
>         /*  Simple per-ref filtering */
>         if (!filter->kind)
>                 die("filter_refs: invalid type");
> @@ -1560,6 +1578,7 @@ int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int
>                         head_ref(ref_filter_handler, &ref_cbdata);
>         }
>
> +       clear_contains_cache(&ref_cbdata.contains_cache);
>
>         /*  Filters that need revision walking */
>         if (filter->merge_commit)
> diff --git a/ref-filter.h b/ref-filter.h
> index 7b05592ba..89af9f451 100644
> --- a/ref-filter.h
> +++ b/ref-filter.h
> @@ -71,11 +71,6 @@ struct ref_filter {
>                 verbose;
>  };
>
> -struct ref_filter_cbdata {
> -       struct ref_array *array;
> -       struct ref_filter *filter;
> -};
> -
>  /*  Macros for checking --merged and --no-merged options */
>  #define _OPT_MERGED_NO_MERGED(option, filter, h) \
>         { OPTION_CALLBACK, 0, option, (filter), N_("commit"), (h), \