blob: 9dd6dd706a3d206675eafa22f7e5332a1396efe0 [file] [log] [blame]
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.internal.storage.reftable;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_FOOTER_LEN;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_HEADER_LEN;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_HEADER_MAGIC;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.INDEX_BLOCK_TYPE;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.MAX_BLOCK_SIZE;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.REF_BLOCK_TYPE;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.VERSION_1;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.zip.CRC32;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.Entry;
import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.IndexEntry;
import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.ObjEntry;
import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.RefEntry;
import org.eclipse.jgit.internal.storage.reftable.ChunkWriter.TextEntry;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdOwnerMap;
import org.eclipse.jgit.lib.ObjectIdSubclassMap;
import org.eclipse.jgit.lib.PersonIdent;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.util.IntList;
import org.eclipse.jgit.util.NB;
/**
* Writes a reftable formatted file.
* <p>
* A reftable can be written in a streaming fashion, provided the caller sorts
* all references. A {@link ReftableWriter} is single-use, and not thread-safe.
*/
public class ReftableWriter {
private ReftableConfig config;
private int refBlockSize;
private int logBlockSize;
private int restartInterval;
private boolean indexObjects;
private long minUpdateIndex;
private long maxUpdateIndex;
private long refRootChunkAddr;
private long objRootChunkAddr;
private ReftableOutputStream out;
private ObjectIdSubclassMap<RefList> obj2ref;
private List<IndexEntry> refIndex;
private List<IndexEntry> objIndex;
private ChunkWriter cur;
private long refCnt;
private int objCnt;
private long logCnt;
private long refBytes;
private long objBytes;
private int refBlocks;
private int objBlocks;
private int refIndexSize;
private int objIndexSize;
private int objIdLen;
private int indexLevels;
private Stats stats;
/** Initialize a writer with a default configuration. */
public ReftableWriter() {
this(new ReftableConfig());
}
/**
* Initialize a writer with a specific configuration.
*
* @param cfg
* configuration for the writer.
*/
public ReftableWriter(ReftableConfig cfg) {
config = cfg;
}
/**
* @param cfg
* configuration for the writer.
* @return {@code this}
*/
public ReftableWriter setConfig(ReftableConfig cfg) {
this.config = cfg != null ? cfg : new ReftableConfig();
return this;
}
/**
* @param min
* the minimum update index for log entries that appear in this
* reftable. This should be 1 higher than the prior reftable's
* {@code maxUpdateIndex} if this table will be used in a stack.
* @return {@code this}
*/
public ReftableWriter setMinUpdateIndex(long min) {
minUpdateIndex = min;
return this;
}
/**
* @param max
* the maximum update index for log entries that appear in this
* reftable. This should be at least 1 higher than the prior
* reftable's {@code maxUpdateIndex} if this table will be used
* in a stack.
* @return {@code this}
*/
public ReftableWriter setMaxUpdateIndex(long max) {
maxUpdateIndex = max;
return this;
}
/**
* Begin writing the reftable.
*
* @param os
* stream to write the table to. Caller is responsible for
* closing the stream after invoking {@link #finish()}.
* @return {@code this}
* @throws IOException
* if reftable header cannot be written.
*/
public ReftableWriter begin(OutputStream os) throws IOException {
refBlockSize = config.getRefBlockSize();
logBlockSize = config.getLogBlockSize();
restartInterval = config.getRestartInterval();
indexObjects = config.isIndexObjects();
if (refBlockSize <= 0) {
refBlockSize = 4 << 10;
} else if (refBlockSize > MAX_BLOCK_SIZE) {
throw new IllegalArgumentException();
}
if (logBlockSize <= 0) {
logBlockSize = 2 * refBlockSize;
}
if (restartInterval <= 0) {
restartInterval = refBlockSize < (60 << 10) ? 16 : 64;
}
refIndex = new ArrayList<>();
out = new ReftableOutputStream(os, refBlockSize);
if (indexObjects) {
obj2ref = new ObjectIdSubclassMap<>();
}
writeFileHeader();
return this;
}
/**
* Sort a collection of references and write them to the reftable.
*
* @param refs
* references to sort and write.
* @return {@code this}
* @throws IOException
* reftable cannot be written.
*/
public ReftableWriter sortAndWriteRefs(Collection<Ref> refs)
throws IOException {
Iterator<RefEntry> itr = refs.stream()
.map(RefEntry::new)
.sorted(Entry::compare)
.iterator();
while (itr.hasNext()) {
RefEntry entry = itr.next();
int blockId = write(refIndex, entry);
indexRef(entry.ref, blockId);
}
return this;
}
/**
* Write one reference to the reftable.
* <p>
* References must be passed in sorted order.
*
* @param ref
* the reference to store.
* @throws IOException
* reftable cannot be written.
*/
public void writeRef(Ref ref) throws IOException {
int blockId = write(refIndex, new RefEntry(ref));
indexRef(ref, blockId);
}
void writeText(String name, String content) throws IOException {
write(refIndex, new TextEntry(name, content));
}
private void indexRef(Ref ref, int blockId) {
if (indexObjects && !ref.isSymbolic()) {
indexId(ref.getObjectId(), blockId);
indexId(ref.getPeeledObjectId(), blockId);
}
refCnt++;
}
private void indexId(ObjectId id, int blockId) {
if (id != null) {
RefList l = obj2ref.get(id);
if (l == null) {
l = new RefList(id);
obj2ref.add(l);
}
l.addBlock(blockId);
}
}
/**
* Write one reflog entry to the reftable.
* <p>
* Reflog entries must be written in reference name and descending
* {@code updateIndex} (highest first) order.
*
* @param ref
* name of the reference.
* @param updateIndex
* identifier of the transaction that created the log record. The
* {@code updateIndex} must be unique within the scope of
* {@code ref}, and must be within the bounds defined by
* {@code minUpdateIndex <= updateIndex <= maxUpdateIndex}.
* @param who
* committer of the reflog entry.
* @param oldId
* prior id; pass {@link ObjectId#zeroId()} for creations.
* @param newId
* new id; pass {@link ObjectId#zeroId()} for deletions.
* @param message
* optional message (may be null).
* @throws IOException
* reftable cannot be written.
*/
public void writeLog(String ref, long updateIndex, PersonIdent who,
ObjectId oldId, ObjectId newId, @Nullable String message)
throws IOException {
}
private int write(List<IndexEntry> idx, ChunkWriter.Entry entry)
throws IOException {
if (cur == null) {
beginBlock(entry);
} else if (!cur.tryAdd(entry)) {
idx.add(new IndexEntry(cur.lastKey(), out.size()));
cur.writeTo(out);
beginBlock(entry);
}
return out.blockCount();
}
private void beginBlock(ChunkWriter.Entry entry)
throws BlockSizeTooSmallException {
byte type = entry.blockType();
int bs = out.bytesAvailableInBlock();
cur = new ChunkWriter(type, bs);
cur.addFirst(entry);
}
/**
* @return an estimate of the current size in bytes of the reftable, if it
* was finished right now. The estimate is only accurate if the
* configuration has {@link ReftableConfig#setIndexObjects(boolean)}
* to {@code false}.
*/
public long estimateTotalBytes() {
return 0;
}
/**
* Finish writing the reftable by writing its trailer.
*
* @return {@code this}
* @throws IOException
* reftable cannot be written.
*/
public ReftableWriter finish() throws IOException {
finishRef();
writeFileFooter();
stats = new Stats(this, out);
refIndex = null;
cur = null;
out = null;
obj2ref = null;
return this;
}
private void finishRef() throws IOException {
if (cur != null && cur.blockType() == REF_BLOCK_TYPE) {
refIndex.add(new IndexEntry(cur.lastKey(), out.size()));
cur.writeTo(out);
cur = null;
refBlocks = out.blockCount();
refRootChunkAddr = writeIndex(refIndex);
if (refRootChunkAddr > 0) {
refIndexSize = (int) (out.size() - refRootChunkAddr);
}
refBytes = out.size();
if (indexObjects && !obj2ref.isEmpty()) {
writeObjBlocks();
}
obj2ref = null;
}
}
private void writeObjBlocks() throws IOException {
List<RefList> sorted = sortById(obj2ref);
obj2ref = null;
objIdLen = shortestUniqueAbbreviation(sorted);
objCnt = sorted.size();
objIndex = new ArrayList<>();
for (RefList l : sorted) {
write(objIndex, new ObjEntry(objIdLen, l, l.blockIds));
}
objBlocks = (out.blockCount() + 1) - refBlocks;
objRootChunkAddr = 0;
if (objRootChunkAddr > 0) {
objIndexSize = (int) (out.size() - objRootChunkAddr);
}
objBytes = out.size();
}
private long writeIndex(List<IndexEntry> idx) throws IOException {
long addr = 0;
while (!idx.isEmpty()) {
addr = out.size();
List<IndexEntry> l2 = new ArrayList<>();
int bs = out.bytesAvailableInBlock();
ChunkWriter c = new ChunkWriter(INDEX_BLOCK_TYPE, bs);
for (IndexEntry e : idx) {
if (!c.tryAdd(e.forIndexAt(addr))) {
l2.add(new IndexEntry(c.lastKey(), addr));
c.writeTo(out);
addr = out.size();
c = new ChunkWriter(INDEX_BLOCK_TYPE, bs);
c.addFirst(e.forIndexAt(addr));
}
}
if (!l2.isEmpty()) {
l2.add(new IndexEntry(c.lastKey(), addr));
}
c.writeTo(out);
indexLevels++;
idx = l2;
}
return addr;
}
private void writeFileHeader() throws IOException {
byte[] hdr = new byte[FILE_HEADER_LEN];
encodeHeader(hdr);
out.write(hdr, 0, FILE_HEADER_LEN);
out.flushFileHeader();
}
private void encodeHeader(byte[] hdr) {
System.arraycopy(FILE_HEADER_MAGIC, 0, hdr, 0, 4);
NB.encodeInt32(hdr, 4, (VERSION_1 << 24));
NB.encodeInt64(hdr, 8, minUpdateIndex);
NB.encodeInt64(hdr, 16, maxUpdateIndex);
NB.encodeInt64(hdr, 24, refRootChunkAddr);
NB.encodeInt64(hdr, 32, objRootChunkAddr);
}
private void writeFileFooter() throws IOException {
int ftrLen = FILE_FOOTER_LEN;
byte[] ftr = new byte[ftrLen];
encodeHeader(ftr);
CRC32 crc = new CRC32();
crc.update(ftr, 0, ftrLen - 4);
NB.encodeInt32(ftr, ftrLen - 4, (int) crc.getValue());
out.write(ftr);
out.finishFile();
}
/** @return statistics of the last written reftable. */
public Stats getStats() {
return stats;
}
/** Statistics about a written reftable. */
public static class Stats {
private final int refBlockSize;
private final int logBlockSize;
private final int restartInterval;
private final long minUpdateIndex;
private final long maxUpdateIndex;
private final long refCnt;
private final int objCnt;
private final int objIdLen;
private final long logCnt;
private final long refBytes;
private final long objBytes;
private long logBytes;
private final long paddingUsed;
private final long totalBytes;
private final int refBlocks;
private final int objBlocks;
private int logBlocks;
private int refIndexKeys;
private final int indexLevels;
private final int refIndexSize;
private final int objIndexSize;
Stats(ReftableWriter w, ReftableOutputStream o) {
refBlockSize = w.refBlockSize;
logBlockSize = w.logBlockSize;
restartInterval = w.restartInterval;
minUpdateIndex = w.minUpdateIndex;
maxUpdateIndex = w.maxUpdateIndex;
refCnt = w.refCnt;
objCnt = w.objCnt;
objIdLen = w.objIdLen;
logCnt = w.logCnt;
refBytes = w.refBytes;
objBytes = w.objBytes;
paddingUsed = o.paddingUsed();
totalBytes = o.size();
refBlocks = w.refBlocks;
objBlocks = w.objBlocks;
indexLevels = w.indexLevels;
refIndexSize = w.refIndexSize;
objIndexSize = w.objIndexSize;
}
/** @return number of bytes in a ref block. */
public int refBlockSize() {
return refBlockSize;
}
/** @return number of bytes to compress into a log block. */
public int logBlockSize() {
return logBlockSize;
}
/** @return number of references between binary search markers. */
public int restartInterval() {
return restartInterval;
}
/** @return smallest update index contained in this reftable. */
public long minUpdateIndex() {
return minUpdateIndex;
}
/** @return largest update index contained in this reftable. */
public long maxUpdateIndex() {
return maxUpdateIndex;
}
/** @return total number of references in the reftable. */
public long refCount() {
return refCnt;
}
/** @return number of unique objects in the reftable. */
public long objCount() {
return objCnt;
}
/** @return total number of log records in the reftable. */
public long logCount() {
return logCnt;
}
/** @return number of ref blocks in the output, excluding index. */
public int refBlockCount() {
return refBlocks;
}
/** @return number of object blocks in the output, excluding index. */
public int objBlockCount() {
return objBlocks;
}
/** @return number of log blocks in the output, excluding index. */
public int logBlockCount() {
return logBlocks;
}
/** @return number of bytes for references, including ref index. */
public long refBytes() {
return refBytes;
}
/** @return number of bytes for objects, including object index. */
public long objBytes() {
return objBytes;
}
/** @return number of bytes for log, including log index. */
public long logBytes() {
return logBytes;
}
/** @return total number of bytes in the reftable. */
public long totalBytes() {
return totalBytes;
}
/** @return bytes of padding used to maintain block alignment. */
public long paddingBytes() {
return paddingUsed;
}
/** @return number of keys in the ref index; 0 if no index was used. */
public int refIndexKeys() {
return refIndexKeys;
}
/** @return number of bytes in the ref index; 0 if no index was used. */
public int refIndexSize() {
return refIndexSize;
}
/** @return number of bytes in the object index; 0 if no index. */
public int objIndexSize() {
return objIndexSize;
}
/**
* @return number of bytes required to uniquely identify all objects in
* the reftable. Unique abbreviations in hex would be
* {@code 2 * objIdLength()}.
*/
public int objIdLength() {
return objIdLen;
}
/** @return estimated number of disk seeks per ref read. */
public double diskSeeksPerRead() {
return indexLevels + 1;
}
}
private static List<RefList> sortById(ObjectIdSubclassMap<RefList> m) {
List<RefList> s = new ArrayList<>(m.size());
for (RefList l : m) {
s.add(l);
}
Collections.sort(s);
return s;
}
private static int shortestUniqueAbbreviation(List<RefList> in) {
Set<AbbreviatedObjectId> tmp = new HashSet<>((int) (in.size() * 0.75f));
int bytes = 2;
retry: for (;;) {
int hexLen = bytes * 2;
for (ObjectId id : in) {
AbbreviatedObjectId a = id.abbreviate(hexLen);
if (!tmp.add(a)) {
if (++bytes >= Constants.OBJECT_ID_LENGTH) {
return Constants.OBJECT_ID_LENGTH;
}
tmp.clear();
continue retry;
}
}
return bytes;
}
}
private static class RefList extends ObjectIdOwnerMap.Entry {
final IntList blockIds = new IntList(2);
RefList(AnyObjectId id) {
super(id);
}
void addBlock(int id) {
if (!blockIds.contains(id)) {
blockIds.add(id);
}
}
}
}