| /* |
| * Copyright (C) 2017, Google Inc. |
| * and other copyright owners as documented in the project's IP log. |
| * |
| * This program and the accompanying materials are made available |
| * under the terms of the Eclipse Distribution License v1.0 which |
| * accompanies this distribution, is reproduced below, and is |
| * available at http://www.eclipse.org/org/documents/edl-v10.php |
| * |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or |
| * without modification, are permitted provided that the following |
| * conditions are met: |
| * |
| * - Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer in the documentation and/or other materials provided |
| * with the distribution. |
| * |
| * - Neither the name of the Eclipse Foundation, Inc. nor the |
| * names of its contributors may be used to endorse or promote |
| * products derived from this software without specific prior |
| * written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
| * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
| * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
| * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| package org.eclipse.jgit.internal.storage.reftable; |
| |
| import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_FOOTER_LEN; |
| import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_HEADER_LEN; |
| import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_HEADER_MAGIC; |
| import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.INDEX_BLOCK_TYPE; |
| import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.MAX_BLOCK_SIZE; |
| import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.REF_BLOCK_TYPE; |
| import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.VERSION_1; |
| |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Set; |
| import java.util.zip.CRC32; |
| |
| import org.eclipse.jgit.annotations.Nullable; |
| import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.Entry; |
| import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.IndexEntry; |
| import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.ObjEntry; |
| import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.RefEntry; |
| import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.TextEntry; |
| import org.eclipse.jgit.lib.AbbreviatedObjectId; |
| import org.eclipse.jgit.lib.AnyObjectId; |
| import org.eclipse.jgit.lib.Constants; |
| import org.eclipse.jgit.lib.ObjectId; |
| import org.eclipse.jgit.lib.ObjectIdOwnerMap; |
| import org.eclipse.jgit.lib.ObjectIdSubclassMap; |
| import org.eclipse.jgit.lib.PersonIdent; |
| import org.eclipse.jgit.lib.Ref; |
| import org.eclipse.jgit.util.IntList; |
| import org.eclipse.jgit.util.NB; |
| |
| /** |
| * Writes a reftable formatted file. |
| * <p> |
| * A reftable can be written in a streaming fashion, provided the caller sorts |
| * all references. A {@link ReftableWriter} is single-use, and not thread-safe. |
| */ |
| public class ReftableWriter { |
| private ReftableConfig config; |
| private int refBlockSize; |
| private int logBlockSize; |
| private int restartInterval; |
| private boolean indexObjects; |
| |
| private long minUpdateIndex; |
| private long maxUpdateIndex; |
| private long refRootChunkAddr; |
| private long objRootChunkAddr; |
| |
| private ReftableOutputStream out; |
| private ObjectIdSubclassMap<RefList> obj2ref; |
| |
| private List<IndexEntry> refIndex; |
| private List<IndexEntry> objIndex; |
| private ChunkWriter cur; |
| |
| private long refCnt; |
| private int objCnt; |
| private long logCnt; |
| private long refBytes; |
| private long objBytes; |
| private int refBlocks; |
| private int objBlocks; |
| private int refIndexSize; |
| private int objIndexSize; |
| private int objIdLen; |
| |
| private int indexLevels; |
| private Stats stats; |
| |
| /** Initialize a writer with a default configuration. */ |
| public ReftableWriter() { |
| this(new ReftableConfig()); |
| } |
| |
| /** |
| * Initialize a writer with a specific configuration. |
| * |
| * @param cfg |
| * configuration for the writer. |
| */ |
| public ReftableWriter(ReftableConfig cfg) { |
| config = cfg; |
| } |
| |
| /** |
| * @param cfg |
| * configuration for the writer. |
| * @return {@code this} |
| */ |
| public ReftableWriter setConfig(ReftableConfig cfg) { |
| this.config = cfg != null ? cfg : new ReftableConfig(); |
| return this; |
| } |
| |
| /** |
| * @param min |
| * the minimum update index for log entries that appear in this |
| * reftable. This should be 1 higher than the prior reftable's |
| * {@code maxUpdateIndex} if this table will be used in a stack. |
| * @return {@code this} |
| */ |
| public ReftableWriter setMinUpdateIndex(long min) { |
| minUpdateIndex = min; |
| return this; |
| } |
| |
| /** |
| * @param max |
| * the maximum update index for log entries that appear in this |
| * reftable. This should be at least 1 higher than the prior |
| * reftable's {@code maxUpdateIndex} if this table will be used |
| * in a stack. |
| * @return {@code this} |
| */ |
| public ReftableWriter setMaxUpdateIndex(long max) { |
| maxUpdateIndex = max; |
| return this; |
| } |
| |
| /** |
| * Begin writing the reftable. |
| * |
| * @param os |
| * stream to write the table to. Caller is responsible for |
| * closing the stream after invoking {@link #finish()}. |
| * @return {@code this} |
| * @throws IOException |
| * if reftable header cannot be written. |
| */ |
| public ReftableWriter begin(OutputStream os) throws IOException { |
| refBlockSize = config.getRefBlockSize(); |
| logBlockSize = config.getLogBlockSize(); |
| restartInterval = config.getRestartInterval(); |
| indexObjects = config.isIndexObjects(); |
| |
| if (refBlockSize <= 0) { |
| refBlockSize = 4 << 10; |
| } else if (refBlockSize > MAX_BLOCK_SIZE) { |
| throw new IllegalArgumentException(); |
| } |
| if (logBlockSize <= 0) { |
| logBlockSize = 2 * refBlockSize; |
| } |
| if (restartInterval <= 0) { |
| restartInterval = refBlockSize < (60 << 10) ? 16 : 64; |
| } |
| |
| refIndex = new ArrayList<>(); |
| out = new ReftableOutputStream(os, refBlockSize); |
| if (indexObjects) { |
| obj2ref = new ObjectIdSubclassMap<>(); |
| } |
| writeFileHeader(); |
| return this; |
| } |
| |
| /** |
| * Sort a collection of references and write them to the reftable. |
| * |
| * @param refs |
| * references to sort and write. |
| * @return {@code this} |
| * @throws IOException |
| * reftable cannot be written. |
| */ |
| public ReftableWriter sortAndWriteRefs(Collection<Ref> refs) |
| throws IOException { |
| Iterator<RefEntry> itr = refs.stream() |
| .map(RefEntry::new) |
| .sorted(Entry::compare) |
| .iterator(); |
| while (itr.hasNext()) { |
| RefEntry entry = itr.next(); |
| int blockId = write(refIndex, entry); |
| indexRef(entry.ref, blockId); |
| } |
| return this; |
| } |
| |
| /** |
| * Write one reference to the reftable. |
| * <p> |
| * References must be passed in sorted order. |
| * |
| * @param ref |
| * the reference to store. |
| * @throws IOException |
| * reftable cannot be written. |
| */ |
| public void writeRef(Ref ref) throws IOException { |
| int blockId = write(refIndex, new RefEntry(ref)); |
| indexRef(ref, blockId); |
| } |
| |
| void writeText(String name, String content) throws IOException { |
| write(refIndex, new TextEntry(name, content)); |
| } |
| |
| private void indexRef(Ref ref, int blockId) { |
| if (indexObjects && !ref.isSymbolic()) { |
| indexId(ref.getObjectId(), blockId); |
| indexId(ref.getPeeledObjectId(), blockId); |
| } |
| refCnt++; |
| } |
| |
| private void indexId(ObjectId id, int blockId) { |
| if (id != null) { |
| RefList l = obj2ref.get(id); |
| if (l == null) { |
| l = new RefList(id); |
| obj2ref.add(l); |
| } |
| l.addBlock(blockId); |
| } |
| } |
| |
| /** |
| * Write one reflog entry to the reftable. |
| * <p> |
| * Reflog entries must be written in reference name and descending |
| * {@code updateIndex} (highest first) order. |
| * |
| * @param ref |
| * name of the reference. |
| * @param updateIndex |
| * identifier of the transaction that created the log record. The |
| * {@code updateIndex} must be unique within the scope of |
| * {@code ref}, and must be within the bounds defined by |
| * {@code minUpdateIndex <= updateIndex <= maxUpdateIndex}. |
| * @param who |
| * committer of the reflog entry. |
| * @param oldId |
| * prior id; pass {@link ObjectId#zeroId()} for creations. |
| * @param newId |
| * new id; pass {@link ObjectId#zeroId()} for deletions. |
| * @param message |
| * optional message (may be null). |
| * @throws IOException |
| * reftable cannot be written. |
| */ |
| public void writeLog(String ref, long updateIndex, PersonIdent who, |
| ObjectId oldId, ObjectId newId, @Nullable String message) |
| throws IOException { |
| } |
| |
| private int write(List<IndexEntry> idx, ChunkWriter.Entry entry) |
| throws IOException { |
| if (cur == null) { |
| beginBlock(entry); |
| } else if (!cur.tryAdd(entry)) { |
| idx.add(new IndexEntry(cur.lastKey(), out.size())); |
| cur.writeTo(out); |
| beginBlock(entry); |
| } |
| return out.blockCount(); |
| } |
| |
| private void beginBlock(ChunkWriter.Entry entry) |
| throws BlockSizeTooSmallException { |
| byte type = entry.blockType(); |
| int bs = out.bytesAvailableInBlock(); |
| cur = new ChunkWriter(type, bs); |
| cur.addFirst(entry); |
| } |
| |
| /** |
| * @return an estimate of the current size in bytes of the reftable, if it |
| * was finished right now. The estimate is only accurate if the |
| * configuration has {@link ReftableConfig#setIndexObjects(boolean)} |
| * to {@code false}. |
| */ |
| public long estimateTotalBytes() { |
| return 0; |
| } |
| |
| /** |
| * Finish writing the reftable by writing its trailer. |
| * |
| * @return {@code this} |
| * @throws IOException |
| * reftable cannot be written. |
| */ |
| public ReftableWriter finish() throws IOException { |
| finishRef(); |
| writeFileFooter(); |
| |
| stats = new Stats(this, out); |
| refIndex = null; |
| cur = null; |
| out = null; |
| obj2ref = null; |
| return this; |
| } |
| |
| private void finishRef() throws IOException { |
| if (cur != null && cur.blockType() == REF_BLOCK_TYPE) { |
| refIndex.add(new IndexEntry(cur.lastKey(), out.size())); |
| cur.writeTo(out); |
| cur = null; |
| |
| refBlocks = out.blockCount(); |
| refRootChunkAddr = writeIndex(refIndex); |
| if (refRootChunkAddr > 0) { |
| refIndexSize = (int) (out.size() - refRootChunkAddr); |
| } |
| refBytes = out.size(); |
| |
| if (indexObjects && !obj2ref.isEmpty()) { |
| writeObjBlocks(); |
| } |
| obj2ref = null; |
| } |
| } |
| |
| private void writeObjBlocks() throws IOException { |
| List<RefList> sorted = sortById(obj2ref); |
| obj2ref = null; |
| objIdLen = shortestUniqueAbbreviation(sorted); |
| |
| objCnt = sorted.size(); |
| objIndex = new ArrayList<>(); |
| for (RefList l : sorted) { |
| write(objIndex, new ObjEntry(objIdLen, l, l.blockIds)); |
| } |
| objBlocks = (out.blockCount() + 1) - refBlocks; |
| objRootChunkAddr = 0; |
| if (objRootChunkAddr > 0) { |
| objIndexSize = (int) (out.size() - objRootChunkAddr); |
| } |
| objBytes = out.size(); |
| } |
| |
| private long writeIndex(List<IndexEntry> idx) throws IOException { |
| long addr = 0; |
| while (!idx.isEmpty()) { |
| addr = out.size(); |
| List<IndexEntry> l2 = new ArrayList<>(); |
| int bs = out.bytesAvailableInBlock(); |
| ChunkWriter c = new ChunkWriter(INDEX_BLOCK_TYPE, bs); |
| for (IndexEntry e : idx) { |
| if (!c.tryAdd(e.forIndexAt(addr))) { |
| l2.add(new IndexEntry(c.lastKey(), addr)); |
| c.writeTo(out); |
| |
| addr = out.size(); |
| c = new ChunkWriter(INDEX_BLOCK_TYPE, bs); |
| c.addFirst(e.forIndexAt(addr)); |
| } |
| } |
| if (!l2.isEmpty()) { |
| l2.add(new IndexEntry(c.lastKey(), addr)); |
| } |
| c.writeTo(out); |
| |
| indexLevels++; |
| idx = l2; |
| } |
| return addr; |
| } |
| |
| private void writeFileHeader() throws IOException { |
| byte[] hdr = new byte[FILE_HEADER_LEN]; |
| encodeHeader(hdr); |
| out.write(hdr, 0, FILE_HEADER_LEN); |
| out.flushFileHeader(); |
| } |
| |
| private void encodeHeader(byte[] hdr) { |
| System.arraycopy(FILE_HEADER_MAGIC, 0, hdr, 0, 4); |
| NB.encodeInt32(hdr, 4, (VERSION_1 << 24)); |
| NB.encodeInt64(hdr, 8, minUpdateIndex); |
| NB.encodeInt64(hdr, 16, maxUpdateIndex); |
| NB.encodeInt64(hdr, 24, refRootChunkAddr); |
| NB.encodeInt64(hdr, 32, objRootChunkAddr); |
| } |
| |
| private void writeFileFooter() throws IOException { |
| int ftrLen = FILE_FOOTER_LEN; |
| byte[] ftr = new byte[ftrLen]; |
| encodeHeader(ftr); |
| CRC32 crc = new CRC32(); |
| crc.update(ftr, 0, ftrLen - 4); |
| NB.encodeInt32(ftr, ftrLen - 4, (int) crc.getValue()); |
| |
| out.write(ftr); |
| out.finishFile(); |
| } |
| |
| /** @return statistics of the last written reftable. */ |
| public Stats getStats() { |
| return stats; |
| } |
| |
| /** Statistics about a written reftable. */ |
| public static class Stats { |
| private final int refBlockSize; |
| private final int logBlockSize; |
| private final int restartInterval; |
| |
| private final long minUpdateIndex; |
| private final long maxUpdateIndex; |
| |
| private final long refCnt; |
| private final int objCnt; |
| private final int objIdLen; |
| private final long logCnt; |
| private final long refBytes; |
| private final long objBytes; |
| private long logBytes; |
| private final long paddingUsed; |
| private final long totalBytes; |
| private final int refBlocks; |
| private final int objBlocks; |
| private int logBlocks; |
| |
| private int refIndexKeys; |
| private final int indexLevels; |
| private final int refIndexSize; |
| private final int objIndexSize; |
| |
| Stats(ReftableWriter w, ReftableOutputStream o) { |
| refBlockSize = w.refBlockSize; |
| logBlockSize = w.logBlockSize; |
| restartInterval = w.restartInterval; |
| |
| minUpdateIndex = w.minUpdateIndex; |
| maxUpdateIndex = w.maxUpdateIndex; |
| |
| refCnt = w.refCnt; |
| objCnt = w.objCnt; |
| objIdLen = w.objIdLen; |
| logCnt = w.logCnt; |
| refBytes = w.refBytes; |
| objBytes = w.objBytes; |
| paddingUsed = o.paddingUsed(); |
| totalBytes = o.size(); |
| refBlocks = w.refBlocks; |
| objBlocks = w.objBlocks; |
| indexLevels = w.indexLevels; |
| |
| refIndexSize = w.refIndexSize; |
| objIndexSize = w.objIndexSize; |
| } |
| |
| /** @return number of bytes in a ref block. */ |
| public int refBlockSize() { |
| return refBlockSize; |
| } |
| |
| /** @return number of bytes to compress into a log block. */ |
| public int logBlockSize() { |
| return logBlockSize; |
| } |
| |
| /** @return number of references between binary search markers. */ |
| public int restartInterval() { |
| return restartInterval; |
| } |
| |
| /** @return smallest update index contained in this reftable. */ |
| public long minUpdateIndex() { |
| return minUpdateIndex; |
| } |
| |
| /** @return largest update index contained in this reftable. */ |
| public long maxUpdateIndex() { |
| return maxUpdateIndex; |
| } |
| |
| /** @return total number of references in the reftable. */ |
| public long refCount() { |
| return refCnt; |
| } |
| |
| /** @return number of unique objects in the reftable. */ |
| public long objCount() { |
| return objCnt; |
| } |
| |
| /** @return total number of log records in the reftable. */ |
| public long logCount() { |
| return logCnt; |
| } |
| |
| /** @return number of ref blocks in the output, excluding index. */ |
| public int refBlockCount() { |
| return refBlocks; |
| } |
| |
| /** @return number of object blocks in the output, excluding index. */ |
| public int objBlockCount() { |
| return objBlocks; |
| } |
| |
| /** @return number of log blocks in the output, excluding index. */ |
| public int logBlockCount() { |
| return logBlocks; |
| } |
| |
| /** @return number of bytes for references, including ref index. */ |
| public long refBytes() { |
| return refBytes; |
| } |
| |
| /** @return number of bytes for objects, including object index. */ |
| public long objBytes() { |
| return objBytes; |
| } |
| |
| /** @return number of bytes for log, including log index. */ |
| public long logBytes() { |
| return logBytes; |
| } |
| |
| /** @return total number of bytes in the reftable. */ |
| public long totalBytes() { |
| return totalBytes; |
| } |
| |
| /** @return bytes of padding used to maintain block alignment. */ |
| public long paddingBytes() { |
| return paddingUsed; |
| } |
| |
| /** @return number of keys in the ref index; 0 if no index was used. */ |
| public int refIndexKeys() { |
| return refIndexKeys; |
| } |
| |
| /** @return number of bytes in the ref index; 0 if no index was used. */ |
| public int refIndexSize() { |
| return refIndexSize; |
| } |
| |
| /** @return number of bytes in the object index; 0 if no index. */ |
| public int objIndexSize() { |
| return objIndexSize; |
| } |
| |
| /** |
| * @return number of bytes required to uniquely identify all objects in |
| * the reftable. Unique abbreviations in hex would be |
| * {@code 2 * objIdLength()}. |
| */ |
| public int objIdLength() { |
| return objIdLen; |
| } |
| |
| /** @return estimated number of disk seeks per ref read. */ |
| public double diskSeeksPerRead() { |
| return indexLevels + 1; |
| } |
| } |
| |
| private static List<RefList> sortById(ObjectIdSubclassMap<RefList> m) { |
| List<RefList> s = new ArrayList<>(m.size()); |
| for (RefList l : m) { |
| s.add(l); |
| } |
| Collections.sort(s); |
| return s; |
| } |
| |
| private static int shortestUniqueAbbreviation(List<RefList> in) { |
| Set<AbbreviatedObjectId> tmp = new HashSet<>((int) (in.size() * 0.75f)); |
| int bytes = 2; |
| retry: for (;;) { |
| int hexLen = bytes * 2; |
| for (ObjectId id : in) { |
| AbbreviatedObjectId a = id.abbreviate(hexLen); |
| if (!tmp.add(a)) { |
| if (++bytes >= Constants.OBJECT_ID_LENGTH) { |
| return Constants.OBJECT_ID_LENGTH; |
| } |
| tmp.clear(); |
| continue retry; |
| } |
| } |
| return bytes; |
| } |
| } |
| |
| private static class RefList extends ObjectIdOwnerMap.Entry { |
| final IntList blockIds = new IntList(2); |
| |
| RefList(AnyObjectId id) { |
| super(id); |
| } |
| |
| void addBlock(int id) { |
| if (!blockIds.contains(id)) { |
| blockIds.add(id); |
| } |
| } |
| } |
| } |