Web lists-archives.com

[RFC PATCH 08/18] midx: teach git-midx to read midx file details




Commentary: I included the pack directory of the MIDX file as a FLEX_ARRAY
at the end of the midxed_git struct, similar to how the pack name appears
at the end of the packed_git struct. A colleague mentioned this pattern is
confusing and possibly dangerous so I should consider changing it. If there
is no strong reason for this, then I will modify the struct before the v1
patch to use a char*.

-- >8 --

Add a "--read" subcommand to the midx builtin to report summary information
on the head MIDX file or a MIDX file specified by the supplied "--midx-id"
parameter.

This subcommand is used by t5318-midx.sh to verify the indexed objects are
as expected.

Signed-off-by: Derrick Stolee <dstolee@xxxxxxxxxxxxx>
---
 Documentation/git-midx.txt |  23 +++++++-
 builtin/midx.c             |  59 ++++++++++++++++++++
 midx.c                     | 132 +++++++++++++++++++++++++++++++++++++++++++++
 midx.h                     |  58 ++++++++++++++++++++
 t/t5318-midx.sh            |  79 +++++++++++++++++++--------
 5 files changed, 328 insertions(+), 23 deletions(-)

diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt
index 01f79cbba5..3eeed1d969 100644
--- a/Documentation/git-midx.txt
+++ b/Documentation/git-midx.txt
@@ -9,7 +9,7 @@ git-midx - Write and verify multi-pack-indexes (MIDX files).
 SYNOPSIS
 --------
 [verse]
-'git midx' --write <options> [--pack-dir <pack_dir>]
+'git midx' [--write|--read] <options> [--pack-dir <pack_dir>]
 
 DESCRIPTION
 -----------
@@ -22,9 +22,18 @@ OPTIONS
 	Use given directory for the location of packfiles, pack-indexes,
 	and MIDX files.
 
+--read::
+	If specified, read a midx file specified by the midx-head file
+	and output basic details about the midx file. (Cannot be combined
+	with --write.)
+
+--midx-id <oid>::
+	If specified with --read, use the given oid to read midx-[oid].midx
+	instead of using midx-head.
 --write::
 	If specified, write a new midx file to the pack directory using
 	the packfiles present. Outputs the hash of the result midx file.
+	(Cannot be combined with --read.)
 
 --update-head::
 	If specified with --write, update the midx-head file to point to
@@ -58,6 +67,18 @@ $ git midx --write --update-head
 $ git midx --write --pack-dir ../../alt/pack/
 ---------------------------------------------------------
 
+* Read the current midx-head.
++
+-----------------------------------------------
+$ git midx --read
+-----------------------------------------------
+
+* Read a specific MIDX file in the local .git folder.
++
+--------------------------------------------------------------------
+$ git midx --read --midx-id 3e50d982a2257168c7fd0ff12ffe5cf6af38c74e
+--------------------------------------------------------------------
+
 CONFIGURATION
 -------------
 
diff --git a/builtin/midx.c b/builtin/midx.c
index 84ce6588a2..ee9234583d 100644
--- a/builtin/midx.c
+++ b/builtin/midx.c
@@ -16,12 +16,60 @@ static char const * const builtin_midx_usage[] = {
 
 static struct opts_midx {
 	const char *pack_dir;
+	int read;
+	const char *midx_id;
 	int write;
 	int update_head;
 	int has_existing;
 	struct object_id old_midx_oid;
 } opts;
 
+static int midx_read(void)
+{
+	struct object_id midx_oid;
+	struct midxed_git *midx;
+	uint32_t i;
+
+	if (opts.midx_id && strlen(opts.midx_id) == GIT_MAX_HEXSZ)
+		get_oid_hex(opts.midx_id, &midx_oid);
+	else if (!get_midx_head_oid(opts.pack_dir, &midx_oid))
+		die("No midx-head exists.");
+
+	midx = get_midxed_git(opts.pack_dir, &midx_oid);
+
+	printf("header: %08x %x %d %d %d %d %d\n",
+		ntohl(midx->hdr->midx_signature),
+		ntohl(midx->hdr->midx_version),
+		midx->hdr->hash_version,
+		midx->hdr->hash_len,
+		midx->hdr->num_base_midx,
+		midx->hdr->num_chunks,
+		ntohl(midx->hdr->num_packs));
+	printf("num_objects: %d\n", midx->num_objects);
+	printf("chunks:");
+
+	if (midx->chunk_pack_lookup)
+		printf(" pack_lookup");
+	if (midx->chunk_pack_names)
+		printf(" pack_names");
+	if (midx->chunk_oid_fanout)
+		printf(" oid_fanout");
+	if (midx->chunk_oid_lookup)
+		printf(" oid_lookup");
+	if (midx->chunk_object_offsets)
+		printf(" object_offsets");
+	if (midx->chunk_large_offsets)
+		printf(" large_offsets");
+	printf("\n");
+
+	printf("pack_names:\n");
+	for (i = 0; i < midx->num_packs; i++)
+		printf("%s\n", midx->pack_names[i]);
+
+	printf("pack_dir: %s\n", midx->pack_dir);
+	return 0;
+}
+
 static int build_midx_from_packs(
 	const char *pack_dir,
 	const char **pack_names, uint32_t nr_packs,
@@ -187,6 +235,12 @@ int cmd_midx(int argc, const char **argv, const char *prefix)
 		{ OPTION_STRING, 'p', "pack-dir", &opts.pack_dir,
 			N_("dir"),
 			N_("The pack directory containing set of packfile and pack-index pairs.") },
+		OPT_BOOL('r', "read", &opts.read,
+			N_("read midx file")),
+		{ OPTION_STRING, 'M', "midx-id", &opts.midx_id,
+			N_("oid"),
+			N_("An OID for a specific midx file in the pack-dir."),
+			PARSE_OPT_OPTARG, NULL, (intptr_t) "" },
 		OPT_BOOL('w', "write", &opts.write,
 			N_("write midx file")),
 		OPT_BOOL('u', "update-head", &opts.update_head,
@@ -205,6 +259,9 @@ int cmd_midx(int argc, const char **argv, const char *prefix)
 			     builtin_midx_options,
 			     builtin_midx_usage, 0);
 
+	if (opts.write + opts.read > 1)
+		usage_with_options(builtin_midx_usage, builtin_midx_options);
+
 	if (!opts.pack_dir) {
 		struct strbuf path = STRBUF_INIT;
 		strbuf_addstr(&path, get_object_directory());
@@ -214,6 +271,8 @@ int cmd_midx(int argc, const char **argv, const char *prefix)
 
 	opts.has_existing = !!get_midx_head_oid(opts.pack_dir, &opts.old_midx_oid);
 
+	if (opts.read)
+		return midx_read();
 	if (opts.write)
 		return midx_write();
 
diff --git a/midx.c b/midx.c
index f4178c1b81..c631be451f 100644
--- a/midx.c
+++ b/midx.c
@@ -65,6 +65,138 @@ struct object_id *get_midx_head_oid(const char *pack_dir,
 	return oid;
 }
 
+static struct midxed_git *alloc_midxed_git(int extra)
+{
+	struct midxed_git *m = xmalloc(st_add(sizeof(*m), extra));
+	memset(m, 0, sizeof(*m));
+	m->midx_fd = -1;
+
+	return m;
+}
+
+static struct midxed_git *load_midxed_git_one(const char *midx_file, const char *pack_dir)
+{
+	void *midx_map;
+	const unsigned char *data;
+	struct pack_midx_header *hdr;
+	size_t midx_size, packs_len;
+	struct stat st;
+	uint32_t i;
+	struct midxed_git *midx;
+	int fd = git_open(midx_file);
+
+	if (fd < 0)
+		return 0;
+	if (fstat(fd, &st)) {
+		close(fd);
+		return 0;
+	}
+	midx_size = xsize_t(st.st_size);
+
+	if (midx_size < 16 + 8 * 5 + 4 * 256 + GIT_MAX_RAWSZ) {
+		close(fd);
+		die("midx file %s is too small", midx_file);
+	}
+	midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	data = (const unsigned char *)midx_map;
+
+	hdr = midx_map;
+	if (ntohl(hdr->midx_signature) != MIDX_SIGNATURE) {
+		munmap(midx_map, midx_size);
+		close(fd);
+		die("MIDX signature %X does not match signature %X",
+		    ntohl(hdr->midx_signature), MIDX_SIGNATURE);
+	}
+
+	if (ntohl(hdr->midx_version) != MIDX_VERSION) {
+		munmap(midx_map, midx_size);
+		die("MIDX version %X does not match version %X",
+		    ntohl(hdr->midx_version), MIDX_VERSION);
+	}
+
+	midx = alloc_midxed_git(strlen(pack_dir) + 1);
+
+	midx->hdr = hdr;
+	midx->midx_fd = fd;
+	midx->data = midx_map;
+	midx->data_len = midx_size;
+
+	for (i = 0; i <= hdr->num_chunks; i++) {
+		uint32_t chunk_id = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i));
+		uint64_t chunk_offset1 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 4));
+		uint32_t chunk_offset2 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 8));
+		uint64_t chunk_offset = (chunk_offset1 << 32) | chunk_offset2;
+
+		if (sizeof(data) == 4 && chunk_offset >> 32) {
+			munmap(midx_map, midx_size);
+			close(fd);
+			die(_("unable to memory-map in 32-bit address space"));
+		}
+
+		switch (chunk_id) {
+			case MIDX_CHUNKID_PACKLOOKUP:
+				midx->chunk_pack_lookup = data + chunk_offset;
+				break;
+
+			case MIDX_CHUNKID_PACKNAMES:
+				midx->chunk_pack_names = data + chunk_offset;
+				break;
+
+			case MIDX_CHUNKID_OIDFANOUT:
+				midx->chunk_oid_fanout = data + chunk_offset;
+				break;
+
+			case MIDX_CHUNKID_OIDLOOKUP:
+				midx->chunk_oid_lookup = data + chunk_offset;
+				break;
+
+			case MIDX_CHUNKID_OBJECTOFFSETS:
+				midx->chunk_object_offsets = data + chunk_offset;
+				break;
+
+			case MIDX_CHUNKID_LARGEOFFSETS:
+				midx->chunk_large_offsets = data + chunk_offset;
+				break;
+
+			case 0:
+				break;
+
+			default:
+				munmap(midx_map, midx_size);
+				close(fd);
+				die("unrecognized MIDX chunk id: %08x", chunk_id);
+		}
+	}
+
+	midx->num_objects = ntohl(*((uint32_t*)(midx->chunk_oid_fanout + 255 * 4)));
+	midx->num_packs = ntohl(midx->hdr->num_packs);
+
+	packs_len = st_mult(sizeof(struct packed_git*), midx->num_packs);
+
+	if (packs_len) {
+		ALLOC_ARRAY(midx->packs, midx->num_packs);
+		ALLOC_ARRAY(midx->pack_names, midx->num_packs);
+		memset(midx->packs, 0, packs_len);
+
+		for (i = 0; i < midx->num_packs; i++) {
+			uint32_t name_offset = ntohl(*(uint32_t*)(midx->chunk_pack_lookup + 4 * i));
+			midx->pack_names[i] = (const char*)(midx->chunk_pack_names + name_offset);
+		}
+	}
+
+	strcpy(midx->pack_dir, pack_dir);
+	return midx;
+}
+
+struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *oid)
+{
+	struct midxed_git *m;
+	char *fname = get_midx_filename_oid(pack_dir, oid);
+	m = load_midxed_git_one(fname, pack_dir);
+	free(fname);
+	return m;
+}
+
 struct pack_midx_details_internal {
 	uint32_t pack_int_id;
 	uint32_t internal_offset;
diff --git a/midx.h b/midx.h
index 9d9ab85261..92b74e49db 100644
--- a/midx.h
+++ b/midx.h
@@ -27,6 +27,64 @@ struct pack_midx_header {
 	uint32_t num_packs;
 };
 
+struct midxed_git {
+	struct midxed_git *next;
+
+	int midx_fd;
+
+	/* the mmap'd data for the midx file */
+	const unsigned char *data;
+	size_t data_len;
+
+	/* points into the mmap'd data */
+	struct pack_midx_header *hdr;
+
+	/* can construct filename from obj_dir + "/packs/midx-" + oid + ".midx" */
+	struct object_id oid;
+
+	/* derived from the fanout chunk */
+	uint32_t num_objects;
+
+	/* converted number of packs */
+	uint32_t num_packs;
+
+	/* hdr->num_packs * 4 bytes */
+	const unsigned char *chunk_pack_lookup;
+	const unsigned char *chunk_pack_names;
+
+	/* 256 * 4 bytes */
+	const unsigned char *chunk_oid_fanout;
+
+	/* num_objects * hdr->hash_len bytes */
+	const unsigned char *chunk_oid_lookup;
+
+	/* num_objects * 8 bytes */
+	const unsigned char *chunk_object_offsets;
+
+	/*
+	 * 8 bytes per large offset.
+	 * (Optional: may be null.)
+	 */
+	const unsigned char *chunk_large_offsets;
+
+	/*
+	 * Points into mmap'd data storing the pack filenames.
+	 */
+	const char **pack_names;
+
+	/*
+	 * Store an array of pack-pointers. If NULL, then the
+	 * pack has not been loaded yet. The array indices
+	 * correspond to the pack_int_ids from the midx storage.
+	 */
+	struct packed_git **packs;
+
+	/* something like ".git/objects/pack" */
+	char pack_dir[FLEX_ARRAY]; /* more */
+};
+
+extern struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *oid);
+
 /*
  * Write a single MIDX file storing the given entries for the
  * given list of packfiles. If midx_name is null, then a temp
diff --git a/t/t5318-midx.sh b/t/t5318-midx.sh
index b66efcdce9..2e52389442 100755
--- a/t/t5318-midx.sh
+++ b/t/t5318-midx.sh
@@ -26,11 +26,27 @@ test_expect_success 'create objects' \
      git commit -m "test data 1" &&
      git branch commit1 HEAD'
 
+_midx_read_expect() {
+	cat >expect <<- EOF
+	header: 4d494458 1 1 20 0 5 $1
+	num_objects: $2
+	chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets
+	pack_names:
+	$(ls $3 | grep pack | grep -v idx | sort)
+	pack_dir: $3
+	EOF
+}
+
 test_expect_success 'write-midx from index version 1' \
     'pack1=$(git rev-list --all --objects | git pack-objects --index-version=1 ${packdir}/test-1) &&
      midx1=$(git midx --write) &&
      test_path_is_file ${packdir}/midx-${midx1}.midx &&
-     test_path_is_missing ${packdir}/midx-head'
+     test_path_is_missing ${packdir}/midx-head &&
+     _midx_read_expect \
+         "1" "102" \
+         "${packdir}" &&
+     git midx --read --midx-id=${midx1} >output &&
+     cmp output expect'
 
 test_expect_success 'write-midx from index version 2' \
     'rm "${packdir}/test-1-${pack1}.pack" &&
@@ -38,12 +54,17 @@ test_expect_success 'write-midx from index version 2' \
      midx2=$(git midx --write --update-head) &&
      test_path_is_file ${packdir}/midx-${midx2}.midx &&
      test_path_is_file ${packdir}/midx-head &&
-     test $(cat ${packdir}/midx-head) = "$midx2"'
+     test $(cat ${packdir}/midx-head) = "$midx2" &&
+     _midx_read_expect \
+         "1" "102" \
+         "${packdir}" &&
+     git midx --read> output &&
+     cmp output expect'
 
 test_expect_success 'Create more objects' \
     'for i in $(test_seq 100)
      do
-         echo $i >file-2-$i
+         echo extra-$i >file-2-$i
      done &&
      git add file-* &&
      test_tick &&
@@ -55,28 +76,32 @@ test_expect_success 'write-midx with two packs' \
      midx3=$(git midx --write --update-head) &&
      test_path_is_file ${packdir}/midx-${midx3}.midx &&
      test_path_is_file ${packdir}/midx-head &&
-     test $(cat ${packdir}/midx-head) = "$midx3"'
+     test $(cat ${packdir}/midx-head) = "$midx3" &&
+     _midx_read_expect \
+         "2" "204" \
+	 "${packdir}" &&
+     git midx --read >output &&
+     cmp output expect'
 
 test_expect_success 'Add more packs' \
-    'for j in $(test_seq 10)
+    'for i in $(test_seq 10)
      do
-         jjj=$(printf '%03i' $j)
-         test-genrandom "bar" 200 > wide_delta_$jjj &&
-         test-genrandom "baz $jjj" 50 >> wide_delta_$jjj &&
-         test-genrandom "foo"$j 100 > deep_delta_$jjj &&
-         test-genrandom "foo"$(expr $j + 1) 100 >> deep_delta_$jjj &&
-         test-genrandom "foo"$(expr $j + 2) 100 >> deep_delta_$jjj &&
-         echo $jjj >file_$jjj &&
-         test-genrandom "$jjj" 8192 >>file_$jjj &&
-         git update-index --add file_$jjj deep_delta_$jjj wide_delta_$jjj &&
+         iii=$(printf '%03i' $i)
+         test-genrandom "bar" 200 > wide_delta_$iii &&
+         test-genrandom "baz $iii" 50 >> wide_delta_$iii &&
+         test-genrandom "foo"$i 100 > deep_delta_$iii &&
+         test-genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii &&
+         test-genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii &&
+         echo $iii >file_$iii &&
+         test-genrandom "$iii" 8192 >>file_$iii &&
+         git update-index --add file_$iii deep_delta_$iii wide_delta_$iii &&
          { echo 101 && test-genrandom 100 8192; } >file_101 &&
          git update-index --add file_101 &&
-         commit=$(git commit-tree $EMPTY_TREE -p HEAD</dev/null) && {
-         echo $EMPTY_TREE &&
-         git ls-tree $EMPTY_TREE | sed -e "s/.* \\([0-9a-f]*\\)	.*/\\1/"
+         tree=$(git write-tree) &&
+         commit=$(git commit-tree $tree -p HEAD</dev/null) && {
+         echo $tree &&
+         git ls-tree $tree | sed -e "s/.* \\([0-9a-f]*\\)	.*/\\1/"
          } >obj-list &&
-         echo commit_packs_$j = $commit &&
-	 git branch commit_packs_$j $commit &&
          git update-ref HEAD $commit &&
          git pack-objects --index-version=2 ${packdir}/test-pack <obj-list
      done'
@@ -85,7 +110,12 @@ test_expect_success 'write-midx with twelve packs' \
     'midx4=$(git midx --write --update-head) &&
      test_path_is_file ${packdir}/midx-${midx4}.midx &&
      test_path_is_file ${packdir}/midx-head &&
-     test $(cat ${packdir}/midx-head) = "$midx4"'
+     test $(cat ${packdir}/midx-head) = "$midx4" &&
+     _midx_read_expect \
+         "12" "245" \
+         "${packdir}" &&
+     git midx --read >output &&
+     cmp output expect'
 
 test_expect_success 'write-midx with no new packs' \
     'midx5=$(git midx --write --update-head) &&
@@ -100,12 +130,17 @@ test_expect_success 'create bare repo' \
      cd bare &&
      git config core.midx true &&
      git config pack.threads 1 &&
-     baredir=objects/pack'
+     baredir=./objects/pack'
 
 test_expect_success 'write-midx in bare repo' \
     'midxbare=$(git midx --write --update-head) &&
      test_path_is_file ${baredir}/midx-${midxbare}.midx  &&
      test_path_is_file ${baredir}/midx-head &&
-     test $(cat ${baredir}/midx-head) = "$midxbare"'
+     test $(cat ${baredir}/midx-head) = "$midxbare" &&
+     _midx_read_expect \
+         "12" "245" \
+         "${baredir}" &&
+     git midx --read >output &&
+     cmp output expect'
 
 test_done
-- 
2.15.0