Web lists-archives.com

Re: [PATCH 06/23] midx: struct midxed_git and 'read' subcommand




On Thu, Jun 7, 2018 at 4:03 PM, Derrick Stolee <stolee@xxxxxxxxx> wrote:
> As we build the multi-pack-index feature by adding chunks at a time,
> we want to test that the data is being written correctly.
>
> Create struct midxed_git to store an in-memory representation of a

A word play on 'packed_git'? Amusing. Some more descriptive name would
be better though. midxed looks almost like random letters thrown
together.

> multi-pack-index and a memory-map of the binary file. Initialize this
> struct in load_midxed_git(object_dir).

> +static int read_midx_file(const char *object_dir)
> +{
> +       struct midxed_git *m = load_midxed_git(object_dir);
> +
> +       if (!m)
> +               return 0;

This looks like an error case, please don't just return zero,
typically used to say "success". I don't know if this command stays
"for debugging purposes" until the end. Of course in that case it does
not really matter.

> +struct midxed_git *load_midxed_git(const char *object_dir)
> +{
> +       struct midxed_git *m;
> +       int fd;
> +       struct stat st;
> +       size_t midx_size;
> +       void *midx_map;
> +       const char *midx_name = get_midx_filename(object_dir);

mem leak? This function returns allocated memory if I remember correctly.

> +
> +       fd = git_open(midx_name);
> +       if (fd < 0)
> +               return NULL;

do an error_errno() so we know what went wrong at least.

> +       if (fstat(fd, &st)) {
> +               close(fd);
> +               return NULL;

same here, we should know why fstat() fails.

> +       }
> +       midx_size = xsize_t(st.st_size);
> +
> +       if (midx_size < MIDX_MIN_SIZE) {
> +               close(fd);
> +               die("multi-pack-index file %s is too small", midx_name);

_()

The use of die() should be discouraged though. Many people still try
(or wish) to libify code and new die() does not help. I think error()
here would be enough then you can return NULL. Or you can go fancier
and store the error string in a strbuf like refs code.

> +       }
> +
> +       midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
> +
> +       m = xcalloc(1, sizeof(*m) + strlen(object_dir) + 1);
> +       strcpy(m->object_dir, object_dir);
> +       m->data = midx_map;
> +
> +       m->signature = get_be32(m->data);
> +       if (m->signature != MIDX_SIGNATURE) {
> +               error("multi-pack-index signature %X does not match signature %X",
> +                     m->signature, MIDX_SIGNATURE);

_(). Maybe 0x%08x instead of %x

> +               goto cleanup_fail;
> +       }
> +
> +       m->version = *(m->data + 4);

m->data[4] instead? shorter and easier to understand.

Same comment on "*(m->data + x)" and error() without _() for the rest.

> +       if (m->version != MIDX_VERSION) {
> +               error("multi-pack-index version %d not recognized",
> +                     m->version);

_()

> +               goto cleanup_fail;
> +       }
> +
> +       m->hash_version = *(m->data + 5);

m->data[5]

> +cleanup_fail:
> +       FREE_AND_NULL(m);
> +       munmap(midx_map, midx_size);
> +       close(fd);
> +       exit(1);

It's bad enough that you die() but exit() in this code seems too much.
Please just return NULL and let the caller handle the error.

> diff --git a/midx.h b/midx.h
> index 3a63673952..a1d18ed991 100644
> --- a/midx.h
> +++ b/midx.h
> @@ -1,4 +1,13 @@
> +#ifndef MIDX_H
> +#define MIDX_H
> +
> +#include "git-compat-util.h"
>  #include "cache.h"
> +#include "object-store.h"

I don't really think you need object-store here (git-compat-util.h
too). "struct mixed_git;" would be enough for load_midxed_git
declaration below.

>  #include "packfile.h"
>
> +struct midxed_git *load_midxed_git(const char *object_dir);
> +
>  int write_midx_file(const char *object_dir);
> +
> +#endif
> diff --git a/object-store.h b/object-store.h
> index d683112fd7..77cb82621a 100644
> --- a/object-store.h
> +++ b/object-store.h
> @@ -84,6 +84,25 @@ struct packed_git {
>         char pack_name[FLEX_ARRAY]; /* more */
>  };
>
> +struct midxed_git {
> +       struct midxed_git *next;

Do we really have multiple midx files?

> +
> +       int fd;
> +
> +       const unsigned char *data;
> +       size_t data_len;
> +
> +       uint32_t signature;
> +       unsigned char version;
> +       unsigned char hash_version;
> +       unsigned char hash_len;
> +       unsigned char num_chunks;
> +       uint32_t num_packs;
> +       uint32_t num_objects;
> +
> +       char object_dir[FLEX_ARRAY];

Why do you need to keep object_dir when it could be easily retrieved
when the repo is available?

> +};
> +
>  struct raw_object_store {
>         /*
>          * Path to the repository's object store.
-- 
Duy