fast-import: prevent producing bad delta

To produce deltas for tree objects fast-import tracks two versions
of tree's entries - base and current one. Base version stands both
for a delta base of this tree, and for a entry inside a delta base
of a parent tree. So care should be taken to keep it in sync.

tree_content_set cuts away a whole subtree and replaces it with a
new one (or NULL for lazy load of a tree with known sha1). It
keeps a base sha1 for this subtree (needed for parent tree). And
here is the problem, 'subtree' tree root doesn't have the implied
base version entries.

Adjusting the subtree to include them would mean a deep rewrite of
subtree. Invalidating the subtree base version would mean recursive
invalidation of parents' base versions. So just mark this tree as
do-not-delta me. Abuse setuid bit for this purpose.

tree_content_replace is the same as tree_content_set except that is
is used to replace the root, so just clearing base sha1 here (instead
of setting the bit) is fine.

[di: log message]

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Dmitry Ivankov <divanorama@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/fast-import.c b/fast-import.c
index 78d9786..6dad9ff 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -170,6 +170,11 @@
 #define DEPTH_BITS 13
 #define MAX_DEPTH ((1<<DEPTH_BITS)-1)
 
+/*
+ * We abuse the setuid bit on directories to mean "do not delta".
+ */
+#define NO_DELTA S_ISUID
+
 struct object_entry {
 	struct pack_idx_entry idx;
 	struct object_entry *next;
@@ -1414,8 +1419,9 @@
 		struct tree_entry *e = t->entries[i];
 		if (!e->versions[v].mode)
 			continue;
-		strbuf_addf(b, "%o %s%c", (unsigned int)e->versions[v].mode,
-					e->name->str_dat, '\0');
+		strbuf_addf(b, "%o %s%c",
+			(unsigned int)(e->versions[v].mode & ~NO_DELTA),
+			e->name->str_dat, '\0');
 		strbuf_add(b, e->versions[v].sha1, 20);
 	}
 }
@@ -1425,7 +1431,7 @@
 	struct tree_content *t = root->tree;
 	unsigned int i, j, del;
 	struct last_object lo = { STRBUF_INIT, 0, 0, /* no_swap */ 1 };
-	struct object_entry *le;
+	struct object_entry *le = NULL;
 
 	if (!is_null_sha1(root->versions[1].sha1))
 		return;
@@ -1435,7 +1441,8 @@
 			store_tree(t->entries[i]);
 	}
 
-	le = find_object(root->versions[0].sha1);
+	if (!(root->versions[0].mode & NO_DELTA))
+		le = find_object(root->versions[0].sha1);
 	if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
 		mktree(t, 0, &old_tree);
 		lo.data = old_tree;
@@ -1469,6 +1476,7 @@
 {
 	if (!S_ISDIR(mode))
 		die("Root cannot be a non-directory");
+	hashclr(root->versions[0].sha1);
 	hashcpy(root->versions[1].sha1, sha1);
 	if (root->tree)
 		release_tree_content_recursive(root->tree);
@@ -1513,6 +1521,23 @@
 				if (e->tree)
 					release_tree_content_recursive(e->tree);
 				e->tree = subtree;
+
+				/*
+				 * We need to leave e->versions[0].sha1 alone
+				 * to avoid modifying the preimage tree used
+				 * when writing out the parent directory.
+				 * But after replacing the subdir with a
+				 * completely different one, it's not a good
+				 * delta base any more, and besides, we've
+				 * thrown away the tree entries needed to
+				 * make a delta against it.
+				 *
+				 * So let's just explicitly disable deltas
+				 * for the subtree.
+				 */
+				if (S_ISDIR(e->versions[0].mode))
+					e->versions[0].mode |= NO_DELTA;
+
 				hashclr(root->versions[1].sha1);
 				return 1;
 			}
@@ -2927,7 +2952,7 @@
 		/* mode SP type SP object_name TAB path LF */
 		strbuf_reset(&line);
 		strbuf_addf(&line, "%06o %s %s\t",
-				mode, type, sha1_to_hex(sha1));
+				mode & ~NO_DELTA, type, sha1_to_hex(sha1));
 		quote_c_style(path, &line, NULL, 0);
 		strbuf_addch(&line, '\n');
 	}