Web lists-archives.com

Re: [PATCH 2/3] archive: implement protocol v2 archive command




On Tue, Sep 11, 2018 at 10:36 PM Josh Steadmon <steadmon@xxxxxxxxxx> wrote:
>
> This adds a new archive command for protocol v2. The command expects
> arguments in the form "argument X" which are passed unmodified to
> git-upload-archive--writer.
>
> This command works over the file://, Git, and SSH transports. HTTP
> support will be added in a separate patch.
>
> Signed-off-by: Josh Steadmon <steadmon@xxxxxxxxxx>
> ---
>  builtin/archive.c        | 45 +++++++++++++++++++++++++++-------------
>  builtin/upload-archive.c | 44 ++++++++++++++++++++++++++++++++++++---
>  t/t5000-tar-tree.sh      |  5 +++++
>  3 files changed, 77 insertions(+), 17 deletions(-)
>
> diff --git a/builtin/archive.c b/builtin/archive.c
> index e54fc39ad..73831887d 100644
> --- a/builtin/archive.c
> +++ b/builtin/archive.c
> @@ -5,9 +5,11 @@
>  #include "cache.h"
>  #include "builtin.h"
>  #include "archive.h"
> +#include "connect.h"
>  #include "transport.h"
>  #include "parse-options.h"
>  #include "pkt-line.h"
> +#include "protocol.h"
>  #include "sideband.h"
>
>  static void create_output_file(const char *output_file)
> @@ -23,6 +25,13 @@ static void create_output_file(const char *output_file)
>         }
>  }
>
> +static int do_v2_command_and_cap(int out)
> +{
> +       packet_write_fmt(out, "command=archive\n");
> +       /* Capability list would go here, if we had any. */
> +       packet_delim(out);
> +}
> +
>  static int run_remote_archiver(int argc, const char **argv,
>                                const char *remote, const char *exec,
>                                const char *name_hint)
> @@ -32,6 +41,7 @@ static int run_remote_archiver(int argc, const char **argv,
>         struct remote *_remote;
>         struct packet_reader reader;
>         enum packet_read_status status;
> +       enum protocol_version version;
>
>         _remote = remote_get(remote);
>         if (!_remote->url[0])
> @@ -41,6 +51,11 @@ static int run_remote_archiver(int argc, const char **argv,
>
>         packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE);
>
> +       version = discover_version(&reader);
> +
> +       if (version == protocol_v2)
> +               do_v2_command_and_cap(fd[1]);
> +
>         /*
>          * Inject a fake --format field at the beginning of the
>          * arguments, with the format inferred from our output
> @@ -56,22 +71,24 @@ static int run_remote_archiver(int argc, const char **argv,
>                 packet_write_fmt(fd[1], "argument %s\n", argv[i]);
>         packet_flush(fd[1]);
>
> -       status = packet_reader_read(&reader);
> -
> -       if (status == PACKET_READ_FLUSH)
> -               die(_("git archive: expected ACK/NAK, got a flush packet"));
> -       if (strcmp(reader.buffer, "ACK")) {
> -               if (starts_with(reader.buffer, "NACK "))
> -                       die(_("git archive: NACK %s"), reader.buffer + 5);
> -               if (starts_with(reader.buffer, "ERR "))
> -                       die(_("remote error: %s"), reader.buffer + 4);
> -               die(_("git archive: protocol error"));

Maybe we also want to support v1
(which is v0 prefixed with one pkt_line saying it is v1).

    If (version == protocol_v1)
        /* drop version v1 line, and then follow v0 logic. */
        packet_reader_read(&reader);

Do we care about v1, or do we just ignore it here? why?
(Don't answer me here, but rather put it in the commit message)

> +       if (version == protocol_v0) {
> +               status = packet_reader_read(&reader);
> +
> +               if (status == PACKET_READ_FLUSH)
> +                       die(_("git archive: expected ACK/NAK, got a flush packet"));
> +               if (strcmp(reader.buffer, "ACK")) {
> +                       if (starts_with(reader.buffer, "NACK "))
> +                               die(_("git archive: NACK %s"), reader.buffer + 5);
> +                       if (starts_with(reader.buffer, "ERR "))
> +                               die(_("remote error: %s"), reader.buffer + 4);
> +                       die(_("git archive: protocol error"));
> +               }
> +
> +               status = packet_reader_read(&reader);
> +               if (status != PACKET_READ_FLUSH)
> +                       die(_("git archive: expected a flush"));
>         }
>
> -       status = packet_reader_read(&reader);
> -       if (status != PACKET_READ_FLUSH)
> -               die(_("git archive: expected a flush"));
> -
>         /* Now, start reading from fd[0] and spit it out to stdout */
>         rv = recv_sideband("archive", fd[0], 1);
>         rv |= transport_disconnect(transport);
> diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c
> index 25d911635..534e8fd56 100644
> --- a/builtin/upload-archive.c
> +++ b/builtin/upload-archive.c
> @@ -5,6 +5,7 @@
>  #include "builtin.h"
>  #include "archive.h"
>  #include "pkt-line.h"
> +#include "protocol.h"
>  #include "sideband.h"
>  #include "run-command.h"
>  #include "argv-array.h"
> @@ -73,13 +74,53 @@ static ssize_t process_input(int child_fd, int band)
>         return sz;
>  }
>
> +static int handle_v2_command_and_cap(void)
> +{
> +       struct packet_reader reader;
> +       enum packet_read_status status;
> +
> +       packet_reader_init(&reader, 0, NULL, 0, PACKET_READ_CHOMP_NEWLINE);
> +
> +       packet_write_fmt(1, "version 2\n");
> +       /*
> +        * We don't currently send any capabilities, but maybe we could list
> +        * supported archival formats?
> +        */
> +       packet_flush(1);
> +
> +       status = packet_reader_read(&reader);
> +       if (status != PACKET_READ_NORMAL ||
> +           strcmp(reader.buffer, "command=archive"))
> +               die(_("upload-archive: expected command=archive"));
> +       while (status == PACKET_READ_NORMAL) {
> +               /* We don't currently expect any client capabilities, but we
> +                * should still read (and ignore) any that happen to get sent.

/*
 * Makes sense to ignore the client capabilities here,
 * but the multi line comments take their opening
 * and closing line on a separate line. just like above.
 */

> +                */
> +               status = packet_reader_read(&reader);
> +       }
> +       if (status != PACKET_READ_DELIM)
> +               die(_("upload-archive: expected delim packet"));

This is upload-archive, which is a low level plumbing command
(see the main man page of git for an explanation of that category),
so we do not translate the error/die() calls. Besides, this is executed
on the server, which might have a different locale than the requesting
client?

Would asking for a setlocale() on the server side be an unreasonable
feature request for the capabilities (in a follow up patch, and then not
just for archive but also fetch/push, etc.)?

>  int cmd_upload_archive(int argc, const char **argv, const char *prefix)
>  {
>         struct child_process writer = { argv };
> +       enum protocol_version version = determine_protocol_version_server();
>
>         if (argc == 2 && !strcmp(argv[1], "-h"))
>                 usage(upload_archive_usage);
>
> +       if (version == protocol_v2)
> +               handle_v2_command_and_cap();
> +       else {

So if the client asked for v1, we still fall back to v0 here,
which answers my question above.

> +               packet_write_fmt(1, "ACK\n");
> +               packet_flush(1);
> +       }
> +
>         /*
>          * Set up sideband subprocess.
>          *
> @@ -96,9 +137,6 @@ int cmd_upload_archive(int argc, const char **argv, const char *prefix)
>                 die("upload-archive: %s", strerror(err));
>         }
>
> -       packet_write_fmt(1, "ACK\n");
> -       packet_flush(1);
> -
>         while (1) {
>                 struct pollfd pfd[2];
>
> diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
> index 2a97b27b0..4be74d6e9 100755
> --- a/t/t5000-tar-tree.sh
> +++ b/t/t5000-tar-tree.sh
> @@ -145,6 +145,11 @@ test_expect_success \
>
>  check_tar b
>
> +test_expect_success 'protocol v2 for remote' '
> +       GIT_PROTOCOL="version=2" git archive --remote=. HEAD >v2_remote.tar
> +'
> +check_tar v2_remote

Our current standard is to keep all executions inside
a test_expect_* block, but here it is hard to comply with
that as the check_tar function contains test_expect_*
and calling test_expect_* from within itself doesn't work
with our test suite.

So bonus points for a refactoring to bring t5000 up to
our current standard (c.f. t0020 for a reasonable new
code, and t2002 for older code, though that only covers
syntax, not functions)

The check itself is just testing that giving GIT_PROTOCOL=2
in the environment also let's you obtain an archive. It doesn't
test if the actual communication *is* v2.
See 5e3548ef161 (fetch: send server options when using
protocol v2, 2018-04-23) for an example how to sniff on the
network traffic in tests, i.e. use GIT_TRACE_PACKET=...
and grep on that?

Thanks,
Stefan