dfs: compact reftables during DfsPackCompactor

Combine intermediate, non-GC reftables when combining pack files.
This shrinks the reftable stack, improving lookup times.

Change-Id: I5dbba41806f99af5ecaff3a3119f6630e9404256
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java
index ac14c0b..963ba8a 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java
@@ -44,21 +44,29 @@
 package org.eclipse.jgit.internal.storage.dfs;
 
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
+import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
 import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Set;
 
 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
 import org.eclipse.jgit.internal.JGitText;
 import org.eclipse.jgit.internal.storage.file.PackIndex;
 import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
 import org.eclipse.jgit.internal.storage.pack.PackWriter;
+import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
+import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
 import org.eclipse.jgit.lib.AnyObjectId;
 import org.eclipse.jgit.lib.NullProgressMonitor;
 import org.eclipse.jgit.lib.ObjectId;
@@ -89,16 +97,15 @@
  */
 public class DfsPackCompactor {
 	private final DfsRepository repo;
-
 	private final List<DfsPackFile> srcPacks;
-
+	private final List<DfsReftable> srcReftables;
 	private final List<ObjectIdSet> exclude;
 
-	private final List<DfsPackDescription> newPacks;
-
-	private final List<PackStatistics> newStats;
+	private PackStatistics newStats;
+	private DfsPackDescription outDesc;
 
 	private int autoAddSize;
+	private ReftableConfig reftableConfig;
 
 	private RevWalk rw;
 	private RevFlag added;
@@ -114,9 +121,19 @@
 		repo = repository;
 		autoAddSize = 5 * 1024 * 1024; // 5 MiB
 		srcPacks = new ArrayList<>();
+		srcReftables = new ArrayList<>();
 		exclude = new ArrayList<>(4);
-		newPacks = new ArrayList<>(1);
-		newStats = new ArrayList<>(1);
+	}
+
+	/**
+	 * @param cfg
+	 *            configuration to write a reftable. Reftable compacting is
+	 *            disabled (default) when {@code cfg} is {@code null}.
+	 * @return {@code this}
+	 */
+	public DfsPackCompactor setReftableConfig(ReftableConfig cfg) {
+		reftableConfig = cfg;
+		return this;
 	}
 
 	/**
@@ -137,7 +154,19 @@
 	}
 
 	/**
-	 * Automatically select packs to be included, and add them.
+	 * Add a reftable to be compacted.
+	 *
+	 * @param table
+	 *            a reftable to combine.
+	 * @return {@code this}
+	 */
+	public DfsPackCompactor add(DfsReftable table) {
+		srcReftables.add(table);
+		return this;
+	}
+
+	/**
+	 * Automatically select pack and reftables to be included, and add them.
 	 * <p>
 	 * Packs are selected based on size, smaller packs get included while bigger
 	 * ones are omitted.
@@ -155,6 +184,16 @@
 			else
 				exclude(pack);
 		}
+
+		if (reftableConfig != null) {
+			for (DfsReftable table : objdb.getReftables()) {
+				DfsPackDescription d = table.getPackDescription();
+				if (d.getPackSource() != GC
+						&& d.getFileSize(REFTABLE) < autoAddSize) {
+					add(table);
+				}
+			}
+		}
 		return this;
 	}
 
@@ -197,61 +236,77 @@
 	 *             the packs cannot be compacted.
 	 */
 	public void compact(ProgressMonitor pm) throws IOException {
-		if (pm == null)
+		if (pm == null) {
 			pm = NullProgressMonitor.INSTANCE;
+		}
 
 		DfsObjDatabase objdb = repo.getObjectDatabase();
 		try (DfsReader ctx = objdb.newReader()) {
-			PackConfig pc = new PackConfig(repo);
-			pc.setIndexVersion(2);
-			pc.setDeltaCompress(false);
-			pc.setReuseDeltas(true);
-			pc.setReuseObjects(true);
+			if (reftableConfig != null && !srcReftables.isEmpty()) {
+				compactReftables(ctx);
+			}
+			compactPacks(ctx, pm);
 
-			PackWriter pw = new PackWriter(pc, ctx);
-			try {
-				pw.setDeltaBaseAsOffset(true);
-				pw.setReuseDeltaCommits(false);
+			List<DfsPackDescription> commit;
+			if (outDesc != null) {
+				commit = Collections.singletonList(outDesc);
+			} else {
+				commit = Collections.emptyList();
+			}
 
-				addObjectsToPack(pw, ctx, pm);
-				if (pw.getObjectCount() == 0) {
-					List<DfsPackDescription> remove = toPrune();
-					if (remove.size() > 0)
-						objdb.commitPack(
-								Collections.<DfsPackDescription>emptyList(),
-								remove);
-					return;
-				}
-
-				boolean rollback = true;
-				DfsPackDescription pack = objdb.newPack(COMPACT,
-						estimatePackSize());
-				try {
-					writePack(objdb, pack, pw, pm);
-					writeIndex(objdb, pack, pw);
-
-					PackStatistics stats = pw.getStatistics();
-					pw.close();
-					pw = null;
-
-					pack.setPackStats(stats);
-					objdb.commitPack(Collections.singletonList(pack), toPrune());
-					newPacks.add(pack);
-					newStats.add(stats);
-					rollback = false;
-				} finally {
-					if (rollback)
-						objdb.rollbackPack(Collections.singletonList(pack));
-				}
-			} finally {
-				if (pw != null)
-					pw.close();
+			Collection<DfsPackDescription> remove = toPrune();
+			if (!commit.isEmpty() || !remove.isEmpty()) {
+				objdb.commitPack(commit, remove);
 			}
 		} finally {
 			rw = null;
 		}
 	}
 
+	private void compactPacks(DfsReader ctx, ProgressMonitor pm)
+			throws IOException, IncorrectObjectTypeException {
+		DfsObjDatabase objdb = repo.getObjectDatabase();
+		PackConfig pc = new PackConfig(repo);
+		pc.setIndexVersion(2);
+		pc.setDeltaCompress(false);
+		pc.setReuseDeltas(true);
+		pc.setReuseObjects(true);
+
+		PackWriter pw = new PackWriter(pc, ctx);
+		try {
+			pw.setDeltaBaseAsOffset(true);
+			pw.setReuseDeltaCommits(false);
+
+			addObjectsToPack(pw, ctx, pm);
+			if (pw.getObjectCount() == 0) {
+				return;
+			}
+
+			boolean rollback = true;
+			initOutDesc(objdb);
+			try {
+				writePack(objdb, outDesc, pw, pm);
+				writeIndex(objdb, outDesc, pw);
+
+				PackStatistics stats = pw.getStatistics();
+				pw.close();
+				pw = null;
+
+				outDesc.setPackStats(stats);
+				newStats = stats;
+				rollback = false;
+			} finally {
+				if (rollback) {
+					objdb.rollbackPack(Collections.singletonList(outDesc));
+				}
+			}
+		} finally {
+			if (pw != null) {
+				pw.close();
+			}
+		}
+	}
+
 	private long estimatePackSize() {
 		// Every pack file contains 12 bytes of header and 20 bytes of trailer.
 		// Include the final pack file header and trailer size here and ignore
@@ -263,27 +318,81 @@
 		return size;
 	}
 
+	private void compactReftables(DfsReader ctx) throws IOException {
+		DfsObjDatabase objdb = repo.getObjectDatabase();
+		Collections.sort(srcReftables, objdb.reftableComparator());
+
+		try (ReftableStack stack = ReftableStack.open(ctx, srcReftables)) {
+			initOutDesc(objdb);
+			ReftableCompactor compact = new ReftableCompactor();
+			compact.addAll(stack.readers());
+			compact.setIncludeDeletes(true);
+			writeReftable(objdb, outDesc, compact);
+		}
+	}
+
+	private void initOutDesc(DfsObjDatabase objdb) throws IOException {
+		if (outDesc == null) {
+			outDesc = objdb.newPack(COMPACT, estimatePackSize());
+		}
+	}
+
 	/** @return all of the source packs that fed into this compaction. */
-	public List<DfsPackDescription> getSourcePacks() {
-		return toPrune();
+	public Collection<DfsPackDescription> getSourcePacks() {
+		Set<DfsPackDescription> src = new HashSet<>();
+		for (DfsPackFile pack : srcPacks) {
+			src.add(pack.getPackDescription());
+		}
+		for (DfsReftable table : srcReftables) {
+			src.add(table.getPackDescription());
+		}
+		return src;
 	}
 
 	/** @return new packs created by this compaction. */
 	public List<DfsPackDescription> getNewPacks() {
-		return newPacks;
+		return outDesc != null
+				? Collections.singletonList(outDesc)
+				: Collections.emptyList();
 	}
 
 	/** @return statistics corresponding to the {@link #getNewPacks()}. */
 	public List<PackStatistics> getNewPackStatistics() {
-		return newStats;
+		return newStats != null
+				? Collections.singletonList(newStats)
+				: Collections.emptyList();
 	}
 
-	private List<DfsPackDescription> toPrune() {
-		int cnt = srcPacks.size();
-		List<DfsPackDescription> all = new ArrayList<>(cnt);
-		for (DfsPackFile pack : srcPacks)
-			all.add(pack.getPackDescription());
-		return all;
+	private Collection<DfsPackDescription> toPrune() {
+		Set<DfsPackDescription> packs = new HashSet<>();
+		for (DfsPackFile pack : srcPacks) {
+			packs.add(pack.getPackDescription());
+		}
+
+		Set<DfsPackDescription> reftables = new HashSet<>();
+		for (DfsReftable table : srcReftables) {
+			reftables.add(table.getPackDescription());
+		}
+
+		for (Iterator<DfsPackDescription> i = packs.iterator(); i.hasNext();) {
+			DfsPackDescription d = i.next();
+			if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) {
+				i.remove();
+			}
+		}
+
+		for (Iterator<DfsPackDescription> i = reftables.iterator();
+				i.hasNext();) {
+			DfsPackDescription d = i.next();
+			if (d.hasFileExt(PACK) && !packs.contains(d)) {
+				i.remove();
+			}
+		}
+
+		Set<DfsPackDescription> toPrune = new HashSet<>();
+		toPrune.addAll(packs);
+		toPrune.addAll(reftables);
+		return toPrune;
 	}
 
 	private void addObjectsToPack(PackWriter pw, DfsReader ctx,
@@ -390,6 +499,30 @@
 		}
 	}
 
+	private void writeReftable(DfsObjDatabase objdb, DfsPackDescription pack,
+			ReftableCompactor compact) throws IOException {
+		try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) {
+			compact.setConfig(configureReftable(reftableConfig, out));
+			compact.compact(out);
+			pack.addFileExt(REFTABLE);
+			pack.setReftableStats(compact.getStats());
+		}
+	}
+
+	static ReftableConfig configureReftable(ReftableConfig cfg,
+			DfsOutputStream out) {
+		cfg = new ReftableConfig(cfg);
+		if (out.blockSize() >= 256) {
+			cfg.setRefBlockSize(out.blockSize());
+		} else {
+			int sz = DfsBlockCache.getInstance().getBlockSize();
+			if (cfg.getRefBlockSize() < sz) {
+				cfg.setRefBlockSize(sz);
+			}
+		}
+		return cfg;
+	}
+
 	private static class ObjectIdWithOffset extends ObjectId {
 		final long offset;
 
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java
index 4f92267..8f7a717 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java
@@ -159,9 +159,16 @@
 	 *            tables to compact. Tables should be ordered oldest first/most
 	 *            recent last so that the more recent tables can shadow the
 	 *            older results. Caller is responsible for closing the readers.
+	 * @throws IOException
+	 *             update indexes of a reader cannot be accessed.
 	 */
-	public void addAll(List<? extends Reftable> readers) {
+	public void addAll(List<? extends Reftable> readers) throws IOException {
 		tables.addAll(readers);
+		for (Reftable r : readers) {
+			if (r instanceof ReftableReader) {
+				adjustUpdateIndexes((ReftableReader) r);
+			}
+		}
 	}
 
 	/**
@@ -178,7 +185,7 @@
 	 * @return {@code true} if the compactor accepted this table; {@code false}
 	 *         if the compactor has reached its limit.
 	 * @throws IOException
-	 *             if size of {@code reader} cannot be read.
+	 *             if size of {@code reader}, or its update indexes cannot be read.
 	 */
 	public boolean tryAddFirst(ReftableReader reader) throws IOException {
 		long sz = reader.size();
@@ -186,10 +193,20 @@
 			return false;
 		}
 		bytesToCompact += sz;
+		adjustUpdateIndexes(reader);
 		tables.addFirst(reader);
 		return true;
 	}
 
+	private void adjustUpdateIndexes(ReftableReader reader) throws IOException {
+		if (minUpdateIndex == 0) {
+			minUpdateIndex = reader.minUpdateIndex();
+		} else {
+			minUpdateIndex = Math.min(minUpdateIndex, reader.minUpdateIndex());
+		}
+		maxUpdateIndex = Math.max(maxUpdateIndex, reader.maxUpdateIndex());
+	}
+
 	/**
 	 * Write a compaction to {@code out}.
 	 *