index: make the index file format extensible.

... and move the cache-tree data into it.

Signed-off-by: Junio C Hamano <junkio@cox.net>
diff --git a/apply.c b/apply.c
index e283df3..acecf8d 100644
--- a/apply.c
+++ b/apply.c
@@ -12,10 +12,6 @@
 #include "quote.h"
 #include "blob.h"
 
-static unsigned char active_cache_sha1[20];
-static struct cache_tree *active_cache_tree;
-
-
 //  --check turns on checking that the working tree matches the
 //    files that are being modified, but doesn't apply the patch
 //  --stat does just a diffstat, and doesn't actually apply
@@ -1919,9 +1915,8 @@
 	if (write_index)
 		newfd = hold_index_file_for_update(&cache_file, get_index_file());
 	if (check_index) {
-		if (read_cache_1(active_cache_sha1) < 0)
+		if (read_cache() < 0)
 			die("unable to read index file");
-		active_cache_tree = read_cache_tree(active_cache_sha1);
 	}
 
 	if ((check || apply) && check_patch_list(list) < 0)
@@ -1931,11 +1926,9 @@
 		write_out_results(list, skipped_patch);
 
 	if (write_index) {
-		if (write_cache_1(newfd, active_cache, active_nr,
-				  active_cache_sha1) ||
+		if (write_cache(newfd, active_cache, active_nr) ||
 		    commit_index_file(&cache_file))
 			die("Unable to write new cachefile");
-		write_cache_tree(active_cache_sha1, active_cache_tree);
 	}
 
 	if (show_index_info)
diff --git a/cache-tree.c b/cache-tree.c
index b34b0bc..2146723 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -11,16 +11,18 @@
 	return it;
 }
 
-void cache_tree_free(struct cache_tree *it)
+void cache_tree_free(struct cache_tree **it_p)
 {
 	int i;
+	struct cache_tree *it = *it_p;
 
 	if (!it)
 		return;
 	for (i = 0; i < it->subtree_nr; i++)
-		cache_tree_free(it->down[i]->cache_tree);
+		cache_tree_free(&it->down[i]->cache_tree);
 	free(it->down);
 	free(it);
+	*it_p = NULL;
 }
 
 static struct cache_tree_sub *find_subtree(struct cache_tree *it,
@@ -78,7 +80,7 @@
 				break;
 		}
 		if (i < it->subtree_nr) {
-			cache_tree_free(it->down[i]->cache_tree);
+			cache_tree_free(&it->down[i]->cache_tree);
 			free(it->down[i]);
 			/* 0 1 2 3 4 5
 			 *       ^     ^subtree_nr = 6
@@ -159,13 +161,27 @@
 		if (s->used)
 			down[dst++] = s;
 		else {
-			cache_tree_free(s->cache_tree);
+			cache_tree_free(&s->cache_tree);
 			free(s);
 			it->subtree_nr--;
 		}
 	}
 }
 
+int cache_tree_fully_valid(struct cache_tree *it)
+{
+	int i;
+	if (!it)
+		return 0;
+	if (it->entry_count < 0 || !has_sha1_file(it->sha1))
+		return 0;
+	for (i = 0; i < it->subtree_nr; i++) {
+		if (!cache_tree_fully_valid(it->down[i]->cache_tree))
+			return 0;
+	}
+	return 1;
+}
+
 static int update_one(struct cache_tree *it,
 		      struct cache_entry **cache,
 		      int entries,
@@ -354,19 +370,15 @@
 	return buffer;
 }
 
-static void *cache_tree_write(const unsigned char *cache_sha1,
-			      struct cache_tree *root,
-			      unsigned long *offset_p)
+void *cache_tree_write(struct cache_tree *root, unsigned long *size_p)
 {
 	char path[PATH_MAX];
 	unsigned long size = 8192;
 	char *buffer = xmalloc(size);
 
-	/* the cache checksum of the corresponding index file. */
-	memcpy(buffer, cache_sha1, 20);
-	*offset_p = 20;
+	*size_p = 0;
 	path[0] = 0;
-	return write_one(root, path, 0, buffer, &size, offset_p);
+	return write_one(root, path, 0, buffer, &size, size_p);
 }
 
 static struct cache_tree *read_one(const char **buffer, unsigned long *size_p)
@@ -439,81 +451,13 @@
 	return it;
 
  free_return:
-	cache_tree_free(it);
+	cache_tree_free(&it);
 	return NULL;
 }
 
-static struct cache_tree *cache_tree_read(unsigned char *sha1,
-					  const char *buffer,
-					  unsigned long size)
+struct cache_tree *cache_tree_read(const char *buffer, unsigned long size)
 {
-	/* check the cache-tree matches the index */
-	if (memcmp(buffer, sha1, 20))
-		return NULL; /* checksum mismatch */
-	if (buffer[20])
+	if (buffer[0])
 		return NULL; /* not the whole tree */
-	buffer += 20;
-	size -= 20;
 	return read_one(&buffer, &size);
 }
-
-struct cache_tree *read_cache_tree(unsigned char *sha1)
-{
-	int fd;
-	struct stat st;
-	char path[PATH_MAX];
-	unsigned long size = 0;
-	void *map;
-	struct cache_tree *it;
-
-	sprintf(path, "%s.aux", get_index_file());
-	fd = open(path, O_RDONLY);
-	if (fd < 0)
-		return cache_tree();
-
-	if (fstat(fd, &st))
-		return cache_tree();
-	size = st.st_size;
-	map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
-	close(fd);
-	if (map == MAP_FAILED)
-		return cache_tree();
-	it = cache_tree_read(sha1, map, size);
-	munmap(map, size);
-	if (!it)
-		return cache_tree();
-	return it;
-}
-
-int write_cache_tree(const unsigned char *sha1, struct cache_tree *root)
-{
-	char path[PATH_MAX];
-	unsigned long size = 0;
-	void *buf, *buffer;
-	int fd, ret = -1;
-
-	sprintf(path, "%s.aux", get_index_file());
-	if (!root) {
-		unlink(path);
-		return -1;
-	}
-	fd = open(path, O_WRONLY|O_CREAT, 0666);
-	if (fd < 0)
-		return -1;
-	buffer = buf = cache_tree_write(sha1, root, &size);
-	while (size) {
-		int written = xwrite(fd, buf, size);
-		if (written <= 0)
-			goto fail;
-		buf += written;
-		size -= written;
-	}
-	ret = 0;
-
- fail:
-	close(fd);
-	free(buffer);
-	if (ret)
-		unlink(path);
-	return ret;
-}
diff --git a/cache-tree.h b/cache-tree.h
index 7b149af..c70a769 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -18,12 +18,13 @@
 };
 
 struct cache_tree *cache_tree(void);
-void cache_tree_free(struct cache_tree *);
+void cache_tree_free(struct cache_tree **);
 void cache_tree_invalidate_path(struct cache_tree *, const char *);
 
-int write_cache_tree(const unsigned char *, struct cache_tree *);
-struct cache_tree *read_cache_tree(unsigned char *);
-int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int);
+void *cache_tree_write(struct cache_tree *root, unsigned long *size_p);
+struct cache_tree *cache_tree_read(const char *buffer, unsigned long size);
 
+int cache_tree_fully_valid(struct cache_tree *);
+int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int);
 
 #endif
diff --git a/cache.h b/cache.h
index 8c9947e..a080727 100644
--- a/cache.h
+++ b/cache.h
@@ -114,6 +114,7 @@
 
 extern struct cache_entry **active_cache;
 extern unsigned int active_nr, active_alloc, active_cache_changed;
+extern struct cache_tree *active_cache_tree;
 
 #define GIT_DIR_ENVIRONMENT "GIT_DIR"
 #define DEFAULT_GIT_DIR_ENVIRONMENT ".git"
@@ -138,11 +139,8 @@
 #define alloc_nr(x) (((x)+16)*3/2)
 
 /* Initialize and use the cache information */
-extern int read_cache_1(unsigned char *);
-extern int write_cache_1(int, struct cache_entry **, int, unsigned char *);
 extern int read_cache(void);
-extern int write_cache(int, struct cache_entry **, int);
-
+extern int write_cache(int newfd, struct cache_entry **cache, int entries);
 extern int cache_name_pos(const char *name, int namelen);
 #define ADD_CACHE_OK_TO_ADD 1		/* Ok to add */
 #define ADD_CACHE_OK_TO_REPLACE 2	/* Ok to replace file/directory */
diff --git a/checkout-index.c b/checkout-index.c
index dd6a2d8..e56c354 100644
--- a/checkout-index.c
+++ b/checkout-index.c
@@ -39,6 +39,7 @@
 #include "cache.h"
 #include "strbuf.h"
 #include "quote.h"
+#include "cache-tree.h"
 
 #define CHECKOUT_ALL 4
 static const char *prefix;
diff --git a/dump-cache-tree.c b/dump-cache-tree.c
index 80f8683..01e8bff 100644
--- a/dump-cache-tree.c
+++ b/dump-cache-tree.c
@@ -2,12 +2,11 @@
 #include "tree.h"
 #include "cache-tree.h"
 
-static unsigned char active_cache_sha1[20];
-static struct cache_tree *active_cache_tree;
-
 static void dump_cache_tree(struct cache_tree *it, const char *pfx)
 {
 	int i;
+	if (!it)
+		return;
 	if (it->entry_count < 0)
 		printf("%-40s %s\n", "invalid", pfx);
 	else
@@ -24,9 +23,8 @@
 
 int main(int ac, char **av)
 {
-	if (read_cache_1(active_cache_sha1) < 0)
+	if (read_cache() < 0)
 		die("unable to read index file");
-	active_cache_tree = read_cache_tree(active_cache_sha1);
 	dump_cache_tree(active_cache_tree, "");
 	return 0;
 }
diff --git a/read-cache.c b/read-cache.c
index 50e094e..1f71d12 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -4,11 +4,26 @@
  * Copyright (C) Linus Torvalds, 2005
  */
 #include "cache.h"
+#include "cache-tree.h"
+
+/* Index extensions.
+ *
+ * The first letter should be 'A'..'Z' for extensions that are not
+ * necessary for a correct operation (i.e. optimization data).
+ * When new extensions are added that _needs_ to be understood in
+ * order to correctly interpret the index file, pick character that
+ * is outside the range, to cause the reader to abort.
+ */
+
+#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) )
+#define CACHE_EXT_TREE 0x54524545	/* "TREE" */
 
 struct cache_entry **active_cache = NULL;
 static time_t index_file_timestamp;
 unsigned int active_nr = 0, active_alloc = 0, active_cache_changed = 0;
 
+struct cache_tree *active_cache_tree = NULL;
+
 /*
  * This only updates the "non-critical" parts of the directory
  * cache, ie the parts that aren't tracked by GIT, and only used
@@ -496,12 +511,10 @@
 	return 0;
 }
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size, unsigned char *sha1)
+static int verify_hdr(struct cache_header *hdr, unsigned long size)
 {
 	SHA_CTX c;
-	unsigned char sha1_buf[20];
-	if (!sha1)
-		sha1 = sha1_buf;
+	unsigned char sha1[20];
 
 	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
 		return error("bad signature");
@@ -515,7 +528,23 @@
 	return 0;
 }
 
-int read_cache_1(unsigned char *cache_sha1)
+static int read_index_extension(const char *ext, void *data, unsigned long sz)
+{
+	switch (CACHE_EXT(ext)) {
+	case CACHE_EXT_TREE:
+		active_cache_tree = cache_tree_read(data, sz);
+		break;
+	default:
+		if (*ext < 'A' || 'Z' < *ext)
+			return error("index uses %.4s extension, which we do not understand",
+				     ext);
+		fprintf(stderr, "ignoring %.4s extension\n", ext);
+		break;
+	}
+	return 0;
+}
+
+int read_cache(void)
 {
 	int fd, i;
 	struct stat st;
@@ -549,7 +578,7 @@
 		die("index file mmap failed (%s)", strerror(errno));
 
 	hdr = map;
-	if (verify_hdr(hdr, size, cache_sha1) < 0)
+	if (verify_hdr(hdr, size) < 0)
 		goto unmap;
 
 	active_nr = ntohl(hdr->hdr_entries);
@@ -563,6 +592,22 @@
 		active_cache[i] = ce;
 	}
 	index_file_timestamp = st.st_mtime;
+	while (offset <= size - 20 - 8) {
+		/* After an array of active_nr index entries,
+		 * there can be arbitrary number of extended
+		 * sections, each of which is prefixed with
+		 * extension name (4-byte) and section length
+		 * in 4-byte network byte order.
+		 */
+		unsigned long extsize;
+		memcpy(&extsize, map + offset + 4, 4);
+		extsize = ntohl(extsize);
+		if (read_index_extension(map + offset,
+					 map + offset + 8, extsize) < 0)
+			goto unmap;
+		offset += 8;
+		offset += extsize;
+	}
 	return active_nr;
 
 unmap:
@@ -597,7 +642,18 @@
  	return 0;
 }
 
-static int ce_flush(SHA_CTX *context, int fd, unsigned char *sha1)
+static int write_index_ext_header(SHA_CTX *context, int fd,
+				  unsigned long ext, unsigned long sz)
+{
+	ext = htonl(ext);
+	sz = htonl(sz);
+	if ((ce_write(context, fd, &ext, 4) < 0) ||
+	    (ce_write(context, fd, &sz, 4) < 0))
+		return -1;
+	return 0;
+}
+
+static int ce_flush(SHA_CTX *context, int fd)
 {
 	unsigned int left = write_buffer_len;
 
@@ -614,8 +670,7 @@
 	}
 
 	/* Append the SHA1 signature at the end */
-	SHA1_Final(sha1, context);
-	memcpy(write_buffer + left, sha1, 20);
+	SHA1_Final(write_buffer + left, context);
 	left += 20;
 	if (write(fd, write_buffer, left) != left)
 		return -1;
@@ -666,14 +721,11 @@
 	}
 }
 
-int write_cache_1(int newfd, struct cache_entry **cache, int entries,
-		  unsigned char *cache_sha1)
+int write_cache(int newfd, struct cache_entry **cache, int entries)
 {
 	SHA_CTX c;
 	struct cache_header hdr;
 	int i, removed;
-	int status;
-	unsigned char sha1[20];
 
 	for (i = removed = 0; i < entries; i++)
 		if (!cache[i]->ce_mode)
@@ -697,18 +749,19 @@
 		if (ce_write(&c, newfd, ce, ce_size(ce)) < 0)
 			return -1;
 	}
-	status = ce_flush(&c, newfd, sha1);
-	if (cache_sha1)
-		memcpy(cache_sha1, sha1, 20);
-	return status;
-}
 
-int read_cache(void)
-{
-	return read_cache_1(NULL);
-}
-
-int write_cache(int newfd, struct cache_entry **cache, int entries)
-{
-	return write_cache_1(newfd, cache, entries, NULL);
+	/* Write extension data here */
+	if (active_cache_tree) {
+		unsigned long sz;
+		void *data = cache_tree_write(active_cache_tree, &sz);
+		if (data &&
+		    !write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sz) &&
+		    !ce_write(&c, newfd, data, sz))
+			;
+		else {
+			free(data);
+			return -1;
+		}
+	}
+	return ce_flush(&c, newfd);
 }
diff --git a/read-tree.c b/read-tree.c
index 26f4f7e..1c65101 100644
--- a/read-tree.c
+++ b/read-tree.c
@@ -9,6 +9,7 @@
 
 #include "object.h"
 #include "tree.h"
+#include "cache-tree.h"
 #include <sys/time.h>
 #include <signal.h>
 
@@ -828,6 +829,7 @@
 	}
 
 	unpack_trees(fn);
+	cache_tree_free(&active_cache_tree);
 	if (write_cache(newfd, active_cache, active_nr) ||
 	    commit_index_file(&cache_file))
 		die("unable to write new index file");
diff --git a/update-index.c b/update-index.c
index 86f5394..d6d3295 100644
--- a/update-index.c
+++ b/update-index.c
@@ -6,12 +6,8 @@
 #include "cache.h"
 #include "strbuf.h"
 #include "quote.h"
-#include "tree.h"
 #include "cache-tree.h"
 
-static unsigned char active_cache_sha1[20];
-static struct cache_tree *active_cache_tree;
-
 /*
  * Default to not allowing changes to the list of files. The
  * tool doesn't actually care, but this makes it harder to add
@@ -501,10 +497,9 @@
 	if (newfd < 0)
 		die("unable to create new cachefile");
 
-	entries = read_cache_1(active_cache_sha1);
+	entries = read_cache();
 	if (entries < 0)
 		die("cache corrupted");
-	active_cache_tree = read_cache_tree(active_cache_sha1);
 
 	for (i = 1 ; i < argc; i++) {
 		const char *path = argv[i];
@@ -630,11 +625,9 @@
 		}
 	}
 	if (active_cache_changed) {
-		if (write_cache_1(newfd, active_cache, active_nr,
-				  active_cache_sha1) ||
+		if (write_cache(newfd, active_cache, active_nr) ||
 		    commit_index_file(&cache_file))
 			die("Unable to write new cachefile");
-		write_cache_tree(active_cache_sha1, active_cache_tree);
 	}
 
 	return has_errors ? 1 : 0;
diff --git a/write-tree.c b/write-tree.c
index cef0c5b..a506992 100644
--- a/write-tree.c
+++ b/write-tree.c
@@ -7,21 +7,20 @@
 #include "tree.h"
 #include "cache-tree.h"
 
-static unsigned char active_cache_sha1[20];
-static struct cache_tree *active_cache_tree;
-
 static int missing_ok = 0;
 
 static const char write_tree_usage[] = "git-write-tree [--missing-ok]";
 
+static struct cache_file cache_file;
+
 int main(int argc, char **argv)
 {
-	int entries;
+	int entries, was_valid, newfd;
 
 	setup_git_directory();
 
-	entries = read_cache_1(active_cache_sha1);
-	active_cache_tree = read_cache_tree(active_cache_sha1);
+	newfd = hold_index_file_for_update(&cache_file, get_index_file());
+	entries = read_cache();
 	if (argc == 2) {
 		if (!strcmp(argv[1], "--missing-ok"))
 			missing_ok = 1;
@@ -35,11 +34,26 @@
 	if (entries < 0)
 		die("git-write-tree: error reading cache");
 
-	if (cache_tree_update(active_cache_tree, active_cache, active_nr,
-			      missing_ok))
-		die("git-write-tree: error building trees");
-	write_cache_tree(active_cache_sha1, active_cache_tree);
+	if (!active_cache_tree)
+		active_cache_tree = cache_tree();
 
+	was_valid = cache_tree_fully_valid(active_cache_tree);
+	if (!was_valid) {
+		if (cache_tree_update(active_cache_tree,
+				      active_cache, active_nr,
+				      missing_ok) < 0)
+			die("git-write-tree: error building trees");
+		if (0 <= newfd) {
+			if (!write_cache(newfd, active_cache, active_nr))
+				commit_index_file(&cache_file);
+		}
+		/* Not being able to write is fine -- we are only interested
+		 * in updating the cache-tree part, and if the next caller
+		 * ends up using the old index with unupdated cache-tree part
+		 * it misses the work we did here, but that is just a
+		 * performance penalty and not a big deal.
+		 */
+	}
 	printf("%s\n", sha1_to_hex(active_cache_tree->sha1));
 	return 0;
 }