Web lists-archives.com

Re: [PATCH] repository.c: always allocate 'index' at repo init time




On Sun, May 19, 2019 at 09:56:36AM +0700, Nguyễn Thái Ngọc Duy wrote:

> This patch goes with the second option, making sure that 'index' is
> always allocated after initialization. It's less effort than the first
> one, and also safer because you could still miss things during the code
> audit. The extra allocation cost is not a real concern.

I think this direction makes sense.

The patch looks good, though I wonder if we could simplify even further
by just embedding an index into the repository object. The purpose of
having it as a pointer, I think, is so that the_repository can point to
the_index. But we could possibly hide the latter behind some macro
trickery like:

  #define the_index (the_repository->index)

I spent a few minutes on a proof of concept patch, but it gets a bit
hairy:

  1. There are some circular dependencies in the header files. We'd need
     repository.h to depend on cache.h to get the definition of
     index_state, but the latter includes repository.h. We'd need to
     break the index bits out of cache.h into index.h, which in turn
     requires breaking out some other parts. I did a sloppy job of it in
     the patch below.

  2. There are hundreds of spots that need to swap out "repo->index" for
     "&repo->index". In the patch below I just did enough to compile
     archive-zip.o, to illustrate. :)

So it's definitely non-trivial to go that way. I'm not sure if it's
worth the effort to switch at this point, but even if it is, your patch
seems like a good thing to do in the meantime.

Either way, I think we could probably revert the non-test portion of my
581d2fd9f2 (get_oid: handle NULL repo->index, 2019-05-14) after this.

-Peff

---
diff --git a/archive-zip.c b/archive-zip.c
index 4d66b5be6e..517e203483 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -353,7 +353,7 @@ static int write_zip_entry(struct archiver_args *args,
 				return error(_("cannot read %s"),
 					     oid_to_hex(oid));
 			crc = crc32(crc, buffer, size);
-			is_binary = entry_is_binary(args->repo->index,
+			is_binary = entry_is_binary(&args->repo->index,
 						    path_without_prefix,
 						    buffer, size);
 			out = buffer;
@@ -430,7 +430,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(&args->repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 			write_or_die(1, buf, readlen);
@@ -463,7 +463,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(&args->repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 
diff --git a/cache.h b/cache.h
index b4bb2e2c11..d0450025e1 100644
--- a/cache.h
+++ b/cache.h
@@ -17,6 +17,7 @@
 #include "sha1-array.h"
 #include "repository.h"
 #include "mem-pool.h"
+#include "oid.h"
 
 #include <zlib.h>
 typedef struct git_zstream {
@@ -43,28 +44,6 @@ int git_deflate_end_gently(git_zstream *);
 int git_deflate(git_zstream *, int flush);
 unsigned long git_deflate_bound(git_zstream *, unsigned long);
 
-/* The length in bytes and in hex digits of an object name (SHA-1 value). */
-#define GIT_SHA1_RAWSZ 20
-#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
-/* The block size of SHA-1. */
-#define GIT_SHA1_BLKSZ 64
-
-/* The length in bytes and in hex digits of an object name (SHA-256 value). */
-#define GIT_SHA256_RAWSZ 32
-#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
-/* The block size of SHA-256. */
-#define GIT_SHA256_BLKSZ 64
-
-/* The length in byte and in hex digits of the largest possible hash value. */
-#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ
-#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ
-/* The largest possible block size for any supported hash. */
-#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ
-
-struct object_id {
-	unsigned char hash[GIT_MAX_RAWSZ];
-};
-
 #define the_hash_algo the_repository->hash_algo
 
 #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
@@ -143,16 +122,6 @@ struct cache_header {
 #define INDEX_FORMAT_LB 2
 #define INDEX_FORMAT_UB 4
 
-/*
- * The "cache_time" is just the low 32 bits of the
- * time. It doesn't matter if it overflows - we only
- * check it for equality in the 32 bits we save.
- */
-struct cache_time {
-	uint32_t sec;
-	uint32_t nsec;
-};
-
 struct stat_data {
 	struct cache_time sd_ctime;
 	struct cache_time sd_mtime;
@@ -326,32 +295,6 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define UNTRACKED_CHANGED	(1 << 7)
 #define FSMONITOR_CHANGED	(1 << 8)
 
-struct split_index;
-struct untracked_cache;
-
-struct index_state {
-	struct cache_entry **cache;
-	unsigned int version;
-	unsigned int cache_nr, cache_alloc, cache_changed;
-	struct string_list *resolve_undo;
-	struct cache_tree *cache_tree;
-	struct split_index *split_index;
-	struct cache_time timestamp;
-	unsigned name_hash_initialized : 1,
-		 initialized : 1,
-		 drop_cache_tree : 1,
-		 updated_workdir : 1,
-		 updated_skipworktree : 1,
-		 fsmonitor_has_run_once : 1;
-	struct hashmap name_hash;
-	struct hashmap dir_hash;
-	struct object_id oid;
-	struct untracked_cache *untracked;
-	uint64_t fsmonitor_last_update;
-	struct ewah_bitmap *fsmonitor_dirty;
-	struct mem_pool *ce_mem_pool;
-};
-
 /* Name hashing */
 int test_lazy_init_name_hash(struct index_state *istate, int try_threaded);
 void add_name_hash(struct index_state *istate, struct cache_entry *ce);
diff --git a/repository.h b/repository.h
index 4fb6a5885f..3371afceaa 100644
--- a/repository.h
+++ b/repository.h
@@ -1,11 +1,11 @@
 #ifndef REPOSITORY_H
 #define REPOSITORY_H
 
+#include "index.h"
 #include "path.h"
 
 struct config_set;
 struct git_hash_algo;
-struct index_state;
 struct lock_file;
 struct pathspec;
 struct raw_object_store;
@@ -87,7 +87,7 @@ struct repository {
 	 * Repository's in-memory index.
 	 * 'repo_read_index()' can be used to populate 'index'.
 	 */
-	struct index_state *index;
+	struct index_state index;
 
 	/* Repository's current hash algorithm, as serialized on disk. */
 	const struct git_hash_algo *hash_algo;