Web lists-archives.com

Re: [PATCH 10/23] midx: write a lookup into the pack names chunk




On Thu, Jun 7, 2018 at 7:01 PM Derrick Stolee <stolee@xxxxxxxxx> wrote:
>
> Signed-off-by: Derrick Stolee <dstolee@xxxxxxxxxxxxx>
> ---
>  Documentation/technical/pack-format.txt |  5 +++
>  builtin/midx.c                          |  7 ++++
>  midx.c                                  | 56 +++++++++++++++++++++++--
>  object-store.h                          |  2 +
>  t/t5319-midx.sh                         | 11 +++--
>  5 files changed, 75 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt
> index 2b37be7b33..29bf87283a 100644
> --- a/Documentation/technical/pack-format.txt
> +++ b/Documentation/technical/pack-format.txt
> @@ -296,6 +296,11 @@ CHUNK LOOKUP:
>
>  CHUNK DATA:
>
> +       Packfile Name Lookup (ID: {'P', 'L', 'O', 'O'}) (P * 4 bytes)
> +           P * 4 bytes storing the offset in the packfile name chunk for
> +           the null-terminated string containing the filename for the
> +           ith packfile.
> +

Commit message is too light on this one. Why does this need to be
stored? Isn't the cost of rebuilding this in-core cheap?

Adding this chunk on disk in my opinion only adds more burden. Now you
have to verify that these offsets actually point to the right place.

>         Packfile Names (ID: {'P', 'N', 'A', 'M'})
>             Stores the packfile names as concatenated, null-terminated strings.
>             Packfiles must be listed in lexicographic order for fast lookups by
> diff --git a/builtin/midx.c b/builtin/midx.c
> index fe56560853..3a261e9bbf 100644
> --- a/builtin/midx.c
> +++ b/builtin/midx.c
> @@ -16,6 +16,7 @@ static struct opts_midx {
>
>  static int read_midx_file(const char *object_dir)
>  {
> +       uint32_t i;
>         struct midxed_git *m = load_midxed_git(object_dir);
>
>         if (!m)
> @@ -30,11 +31,17 @@ static int read_midx_file(const char *object_dir)
>
>         printf("chunks:");
>
> +       if (m->chunk_pack_lookup)
> +               printf(" pack_lookup");
>         if (m->chunk_pack_names)
>                 printf(" pack_names");
>
>         printf("\n");
>
> +       printf("packs:\n");
> +       for (i = 0; i < m->num_packs; i++)
> +               printf("%s\n", m->pack_names[i]);
> +
>         printf("object_dir: %s\n", m->object_dir);
>
>         return 0;
> diff --git a/midx.c b/midx.c
> index d4f4a01a51..923acda72e 100644
> --- a/midx.c
> +++ b/midx.c
> @@ -13,8 +13,9 @@
>  #define MIDX_HASH_LEN 20
>  #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
>
> -#define MIDX_MAX_CHUNKS 1
> +#define MIDX_MAX_CHUNKS 2
>  #define MIDX_CHUNK_ALIGNMENT 4
> +#define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */
>  #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
>  #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
>
> @@ -85,6 +86,10 @@ struct midxed_git *load_midxed_git(const char *object_dir)
>                 uint64_t chunk_offset = get_be64(m->data + 16 + MIDX_CHUNKLOOKUP_WIDTH * i);
>
>                 switch (chunk_id) {
> +                       case MIDX_CHUNKID_PACKLOOKUP:
> +                               m->chunk_pack_lookup = (uint32_t *)(m->data + chunk_offset);
> +                               break;
> +
>                         case MIDX_CHUNKID_PACKNAMES:
>                                 m->chunk_pack_names = m->data + chunk_offset;
>                                 break;
> @@ -102,9 +107,32 @@ struct midxed_git *load_midxed_git(const char *object_dir)
>                 }
>         }
>
> +       if (!m->chunk_pack_lookup)
> +               die("MIDX missing required pack lookup chunk");
>         if (!m->chunk_pack_names)
>                 die("MIDX missing required pack-name chunk");
>
> +       m->pack_names = xcalloc(m->num_packs, sizeof(const char *));
> +       for (i = 0; i < m->num_packs; i++) {
> +               if (i) {
> +                       if (ntohl(m->chunk_pack_lookup[i]) <= ntohl(m->chunk_pack_lookup[i - 1])) {
> +                               error("MIDX pack lookup value %d before %d",
> +                                     ntohl(m->chunk_pack_lookup[i - 1]),
> +                                     ntohl(m->chunk_pack_lookup[i]));
> +                               goto cleanup_fail;
> +                       }
> +               }
> +
> +               m->pack_names[i] = (const char *)(m->chunk_pack_names + ntohl(m->chunk_pack_lookup[i]));
> +
> +               if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) {
> +                       error("MIDX pack names out of order: '%s' before '%s'",
> +                             m->pack_names[i - 1],
> +                             m->pack_names[i]);
> +                       goto cleanup_fail;
> +               }
> +       }
> +
>         return m;
>
>  cleanup_fail:
> @@ -162,6 +190,20 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p
>         }
>  }
>
> +static size_t write_midx_pack_lookup(struct hashfile *f,
> +                                    char **pack_names,
> +                                    uint32_t nr_packs)
> +{
> +       uint32_t i, cur_len = 0;
> +
> +       for (i = 0; i < nr_packs; i++) {
> +               hashwrite_be32(f, cur_len);
> +               cur_len += strlen(pack_names[i]) + 1;
> +       }
> +
> +       return sizeof(uint32_t) * (size_t)nr_packs;
> +}
> +
>  static size_t write_midx_pack_names(struct hashfile *f,
>                                     char **pack_names,
>                                     uint32_t num_packs)
> @@ -275,13 +317,17 @@ int write_midx_file(const char *object_dir)
>         FREE_AND_NULL(midx_name);
>
>         cur_chunk = 0;
> -       num_chunks = 1;
> +       num_chunks = 2;
>
>         written = write_midx_header(f, num_chunks, nr_packs);
>
> -       chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
> +       chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKLOOKUP;
>         chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
>
> +       cur_chunk++;
> +       chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
> +       chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_packs * sizeof(uint32_t);
> +
>         cur_chunk++;
>         chunk_ids[cur_chunk] = 0;
>         chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + pack_name_concat_len;
> @@ -311,6 +357,10 @@ int write_midx_file(const char *object_dir)
>                             chunk_ids[i]);
>
>                 switch (chunk_ids[i]) {
> +                       case MIDX_CHUNKID_PACKLOOKUP:
> +                               written += write_midx_pack_lookup(f, pack_names, nr_packs);
> +                               break;
> +
>                         case MIDX_CHUNKID_PACKNAMES:
>                                 written += write_midx_pack_names(f, pack_names, nr_packs);
>                                 break;
> diff --git a/object-store.h b/object-store.h
> index 199cf4bd44..1ba50459ca 100644
> --- a/object-store.h
> +++ b/object-store.h
> @@ -100,8 +100,10 @@ struct midxed_git {
>         uint32_t num_packs;
>         uint32_t num_objects;
>
> +       const uint32_t *chunk_pack_lookup;
>         const unsigned char *chunk_pack_names;
>
> +       const char **pack_names;
>         char object_dir[FLEX_ARRAY];
>  };
>
> diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh
> index fdf4f84a90..a31c387c8f 100755
> --- a/t/t5319-midx.sh
> +++ b/t/t5319-midx.sh
> @@ -6,10 +6,15 @@ test_description='multi-pack-indexes'
>  midx_read_expect() {
>         NUM_PACKS=$1
>         cat >expect <<- EOF
> -       header: 4d494458 1 1 1 $NUM_PACKS
> -       chunks: pack_names
> -       object_dir: .
> +       header: 4d494458 1 1 2 $NUM_PACKS
> +       chunks: pack_lookup pack_names
> +       packs:
>         EOF
> +       if [ $NUM_PACKS -ge 1 ]
> +       then
> +               ls pack/ | grep idx | sort >> expect
> +       fi
> +       printf "object_dir: .\n" >>expect &&
>         git midx read --object-dir=. >actual &&
>         test_cmp expect actual
>  }
> --
> 2.18.0.rc1
>


-- 
Duy