Web lists-archives.com

Re: [PATCH 09/23] midx: write pack names in chunk




On Thu, Jun 7, 2018 at 4:03 PM, Derrick Stolee <stolee@xxxxxxxxx> wrote:
> @@ -74,6 +80,31 @@ struct midxed_git *load_midxed_git(const char *object_dir)
>         m->num_chunks = *(m->data + 6);
>         m->num_packs = get_be32(m->data + 8);
>
> +       for (i = 0; i < m->num_chunks; i++) {
> +               uint32_t chunk_id = get_be32(m->data + 12 + MIDX_CHUNKLOOKUP_WIDTH * i);
> +               uint64_t chunk_offset = get_be64(m->data + 16 + MIDX_CHUNKLOOKUP_WIDTH * i);

Would be good to reduce magic numbers like 12 and 16, I think you have
some header length constants for those already.

> +               switch (chunk_id) {
> +                       case MIDX_CHUNKID_PACKNAMES:
> +                               m->chunk_pack_names = m->data + chunk_offset;
> +                               break;
> +
> +                       case 0:
> +                               die("terminating MIDX chunk id appears earlier than expected");

_()

> +                               break;
> +
> +                       default:
> +                               /*
> +                                * Do nothing on unrecognized chunks, allowing future
> +                                * extensions to add optional chunks.
> +                                */

I wrote about the chunk term reminding me of PNG format then deleted
it. But it may help to do similar to PNG here. The first letter can
let us know if the chunk is optional and can be safely ignored. E.g.
uppercase first letter cannot be ignored, lowercase go wild.

> +                               break;
> +               }
> +       }
> +
> +       if (!m->chunk_pack_names)
> +               die("MIDX missing required pack-name chunk");

_()

> +
>         return m;
>
>  cleanup_fail:
> @@ -99,18 +130,88 @@ static size_t write_midx_header(struct hashfile *f,
>         return MIDX_HEADER_SIZE;
>  }
>
> +struct pack_pair {
> +       uint32_t pack_int_id;

can this be just pack_id?

> +       char *pack_name;
> +};
> +
> +static int pack_pair_compare(const void *_a, const void *_b)
> +{
> +       struct pack_pair *a = (struct pack_pair *)_a;
> +       struct pack_pair *b = (struct pack_pair *)_b;
> +       return strcmp(a->pack_name, b->pack_name);
> +}
> +
> +static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm)
> +{
> +       uint32_t i;
> +       struct pack_pair *pairs;
> +
> +       ALLOC_ARRAY(pairs, nr_packs);
> +
> +       for (i = 0; i < nr_packs; i++) {
> +               pairs[i].pack_int_id = i;
> +               pairs[i].pack_name = pack_names[i];
> +       }
> +
> +       QSORT(pairs, nr_packs, pack_pair_compare);
> +
> +       for (i = 0; i < nr_packs; i++) {
> +               pack_names[i] = pairs[i].pack_name;
> +               perm[pairs[i].pack_int_id] = i;
> +       }

pairs[] is leaked?

> +}
> +
> +static size_t write_midx_pack_names(struct hashfile *f,
> +                                   char **pack_names,
> +                                   uint32_t num_packs)
> +{
> +       uint32_t i;
> +       unsigned char padding[MIDX_CHUNK_ALIGNMENT];
> +       size_t written = 0;
> +
> +       for (i = 0; i < num_packs; i++) {
> +               size_t writelen = strlen(pack_names[i]) + 1;
> +
> +               if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0)
> +                       BUG("incorrect pack-file order: %s before %s",
> +                           pack_names[i - 1],
> +                           pack_names[i]);
> +
> +               hashwrite(f, pack_names[i], writelen);
> +               written += writelen;

side note. This pattern happens a lot. It may be a good idea to make
hashwrite() return writelen so we can just write

written += hashwrite(f, ..., writelen);

> +       }
> +
> +       /* add padding to be aligned */
> +       i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT);
> +       if (i < MIDX_CHUNK_ALIGNMENT) {
> +               bzero(padding, sizeof(padding));
> +               hashwrite(f, padding, i);
> +               written += i;
> +       }
> +
> +       return written;
> +}
> +
>  int write_midx_file(const char *object_dir)
>  {
> -       unsigned char num_chunks = 0;
> +       unsigned char cur_chunk, num_chunks = 0;
>         char *midx_name;
>         struct hashfile *f;
>         struct lock_file lk;
>         struct packed_git **packs = NULL;
> +       char **pack_names = NULL;
> +       uint32_t *pack_perm;
>         uint32_t i, nr_packs = 0, alloc_packs = 0;
> +       uint32_t alloc_pack_names = 0;
>         DIR *dir;
>         struct dirent *de;
>         struct strbuf pack_dir = STRBUF_INIT;
>         size_t pack_dir_len;
> +       uint64_t pack_name_concat_len = 0;
> +       uint64_t written = 0;
> +       uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
> +       uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];

This long list of local vars may be a good indicator that this
function needs split up into smaller ones.

>
>         midx_name = get_midx_filename(object_dir);
>         if (safe_create_leading_directories(midx_name)) {
> @@ -132,12 +233,14 @@ int write_midx_file(const char *object_dir)
>         strbuf_addch(&pack_dir, '/');
>         pack_dir_len = pack_dir.len;
>         ALLOC_ARRAY(packs, alloc_packs);
> +       ALLOC_ARRAY(pack_names, alloc_pack_names);
>         while ((de = readdir(dir)) != NULL) {
>                 if (is_dot_or_dotdot(de->d_name))
>                         continue;
>
>                 if (ends_with(de->d_name, ".idx")) {
>                         ALLOC_GROW(packs, nr_packs + 1, alloc_packs);
> +                       ALLOC_GROW(pack_names, nr_packs + 1, alloc_pack_names);
>
>                         strbuf_setlen(&pack_dir, pack_dir_len);
>                         strbuf_addstr(&pack_dir, de->d_name);
> @@ -145,21 +248,83 @@ int write_midx_file(const char *object_dir)
>                         packs[nr_packs] = add_packed_git(pack_dir.buf,
>                                                          pack_dir.len,
>                                                          0);
> -                       if (!packs[nr_packs])
> +                       if (!packs[nr_packs]) {
>                                 warning("failed to add packfile '%s'",
>                                         pack_dir.buf);
> -                       else
> -                               nr_packs++;
> +                               continue;
> +                       }
> +
> +                       pack_names[nr_packs] = xstrdup(de->d_name);
> +                       pack_name_concat_len += strlen(de->d_name) + 1;
> +                       nr_packs++;
>                 }
>         }
> +
>         closedir(dir);
>         strbuf_release(&pack_dir);
>
> +       if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
> +               pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
> +                                       (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
> +
> +       ALLOC_ARRAY(pack_perm, nr_packs);
> +       sort_packs_by_name(pack_names, nr_packs, pack_perm);
> +
>         hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
>         f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
>         FREE_AND_NULL(midx_name);
>
> -       write_midx_header(f, num_chunks, nr_packs);
> +       cur_chunk = 0;
> +       num_chunks = 1;
> +
> +       written = write_midx_header(f, num_chunks, nr_packs);
> +
> +       chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
> +       chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
> +
> +       cur_chunk++;
> +       chunk_ids[cur_chunk] = 0;
> +       chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + pack_name_concat_len;
> +
> +       for (i = 0; i <= num_chunks; i++) {
> +               if (i && chunk_offsets[i] < chunk_offsets[i - 1])
> +                       BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64,
> +                           chunk_offsets[i - 1],
> +                           chunk_offsets[i]);
> +
> +               if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT)
> +                       BUG("chunk offset %"PRIu64" is not properly aligned",
> +                           chunk_offsets[i]);
> +
> +               hashwrite_be32(f, chunk_ids[i]);
> +               hashwrite_be32(f, chunk_offsets[i] >> 32);
> +               hashwrite_be32(f, chunk_offsets[i]);
> +
> +               written += MIDX_CHUNKLOOKUP_WIDTH;
> +       }
> +
> +       for (i = 0; i < num_chunks; i++) {
> +               if (written != chunk_offsets[i])
> +                       BUG("inccrrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32,

incorrect

> +                           chunk_offsets[i],
> +                           written,
> +                           chunk_ids[i]);
> +
> +               switch (chunk_ids[i]) {
> +                       case MIDX_CHUNKID_PACKNAMES:
> +                               written += write_midx_pack_names(f, pack_names, nr_packs);
> +                               break;
> +
> +                       default:
> +                               BUG("trying to write unknown chunk id %"PRIx32,
> +                                   chunk_ids[i]);
> +               }
> +       }
> +
> +       if (written != chunk_offsets[num_chunks])
> +               BUG("incorrect final offset %"PRIu64" != %"PRIu64,
> +                   written,
> +                   chunk_offsets[num_chunks]);
>
>         finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
>         commit_lock_file(&lk);
> @@ -170,5 +335,6 @@ int write_midx_file(const char *object_dir)
>         }
>
>         FREE_AND_NULL(packs);
> +       FREE_AND_NULL(pack_names);

What about the strings in this array? I think they are xstrdup() but I
didn't spot them being freed.

And maybe just use string_list...

>         return 0;
>  }
-- 
Duy