tikhomirov@530: /* tikhomirov@530: * Copyright (c) 2013 TMate Software Ltd tikhomirov@530: * tikhomirov@530: * This program is free software; you can redistribute it and/or modify tikhomirov@530: * it under the terms of the GNU General Public License as published by tikhomirov@530: * the Free Software Foundation; version 2 of the License. tikhomirov@530: * tikhomirov@530: * This program is distributed in the hope that it will be useful, tikhomirov@530: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@530: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@530: * GNU General Public License for more details. tikhomirov@530: * tikhomirov@530: * For information on how to redistribute this software under tikhomirov@530: * the terms of a license other than GNU General Public License tikhomirov@530: * contact TMate Software at support@hg4j.com tikhomirov@530: */ tikhomirov@530: package org.tmatesoft.hg.internal; tikhomirov@530: tikhomirov@530: import static org.tmatesoft.hg.internal.Internals.REVLOGV1_RECORD_SIZE; tikhomirov@534: import static org.tmatesoft.hg.repo.HgRepository.NO_REVISION; tikhomirov@530: tikhomirov@530: import java.io.IOException; tikhomirov@530: import java.nio.ByteBuffer; tikhomirov@530: tikhomirov@617: import org.tmatesoft.hg.core.HgIOException; tikhomirov@530: import org.tmatesoft.hg.core.Nodeid; tikhomirov@534: import org.tmatesoft.hg.core.SessionContext; tikhomirov@618: import org.tmatesoft.hg.internal.DataSerializer.ByteArrayDataSerializer; tikhomirov@618: import org.tmatesoft.hg.internal.DataSerializer.ByteArrayDataSource; tikhomirov@618: import org.tmatesoft.hg.internal.DataSerializer.DataSource; tikhomirov@534: import org.tmatesoft.hg.repo.HgInvalidControlFileException; tikhomirov@628: import org.tmatesoft.hg.repo.HgInvalidRevisionException; tikhomirov@534: import org.tmatesoft.hg.repo.HgInvalidStateException; tikhomirov@628: import org.tmatesoft.hg.repo.HgRuntimeException; tikhomirov@530: tikhomirov@530: /** tikhomirov@530: * tikhomirov@608: * TODO [post-1.1] separate operation to check if index is too big and split into index+data tikhomirov@532: * tikhomirov@530: * @author Artem Tikhomirov tikhomirov@530: * @author TMate Software Ltd. tikhomirov@530: */ tikhomirov@530: public class RevlogStreamWriter { tikhomirov@530: tikhomirov@538: private final DigestHelper dh = new DigestHelper(); tikhomirov@538: private final RevlogCompressor revlogDataZip; tikhomirov@617: private final Transaction transaction; tikhomirov@538: private int lastEntryBase, lastEntryIndex; tikhomirov@538: private byte[] lastEntryContent; tikhomirov@538: private Nodeid lastEntryRevision; tikhomirov@538: private IntMap revisionCache = new IntMap(32); tikhomirov@538: private RevlogStream revlogStream; tikhomirov@530: tikhomirov@617: public RevlogStreamWriter(SessionContext.Source ctxSource, RevlogStream stream, Transaction tr) { tikhomirov@591: assert ctxSource != null; tikhomirov@538: assert stream != null; tikhomirov@617: assert tr != null; tikhomirov@538: tikhomirov@591: revlogDataZip = new RevlogCompressor(ctxSource.getSessionContext()); tikhomirov@538: revlogStream = stream; tikhomirov@617: transaction = tr; tikhomirov@538: } tikhomirov@538: tikhomirov@538: /** tikhomirov@538: * @return nodeid of added revision tikhomirov@628: * @throws HgRuntimeException tikhomirov@538: */ tikhomirov@628: public Nodeid addRevision(DataSource content, int linkRevision, int p1, int p2) throws HgIOException, HgRuntimeException { tikhomirov@538: lastEntryRevision = Nodeid.NULL; tikhomirov@538: int revCount = revlogStream.revisionCount(); tikhomirov@538: lastEntryIndex = revCount == 0 ? NO_REVISION : revCount - 1; tikhomirov@538: populateLastEntry(); tikhomirov@538: // tikhomirov@618: byte[] contentByteArray = toByteArray(content); tikhomirov@618: Patch patch = GeneratePatchInspector.delta(lastEntryContent, contentByteArray); tikhomirov@538: int patchSerializedLength = patch.serializedLength(); tikhomirov@538: tikhomirov@618: final boolean writeComplete = preferCompleteOverPatch(patchSerializedLength, contentByteArray.length); tikhomirov@618: DataSerializer.DataSource dataSource = writeComplete ? new ByteArrayDataSource(contentByteArray) : patch.new PatchDataSource(); tikhomirov@538: revlogDataZip.reset(dataSource); tikhomirov@538: final int compressedLen; tikhomirov@538: final boolean useCompressedData = preferCompressedOverComplete(revlogDataZip.getCompressedLength(), dataSource.serializeLength()); tikhomirov@538: if (useCompressedData) { tikhomirov@538: compressedLen= revlogDataZip.getCompressedLength(); tikhomirov@538: } else { tikhomirov@538: // compression wasn't too effective, tikhomirov@538: compressedLen = dataSource.serializeLength() + 1 /*1 byte for 'u' - uncompressed prefix byte*/; tikhomirov@538: } tikhomirov@538: // tikhomirov@538: Nodeid p1Rev = revision(p1); tikhomirov@538: Nodeid p2Rev = revision(p2); tikhomirov@618: byte[] revisionNodeidBytes = dh.sha1(p1Rev, p2Rev, contentByteArray).asBinary(); tikhomirov@538: // tikhomirov@538: tikhomirov@618: DataSerializer indexFile, dataFile; tikhomirov@618: indexFile = dataFile = null; tikhomirov@538: try { tikhomirov@538: // tikhomirov@618: indexFile = revlogStream.getIndexStreamWriter(transaction); tikhomirov@538: final boolean isInlineData = revlogStream.isInlineData(); tikhomirov@538: HeaderWriter revlogHeader = new HeaderWriter(isInlineData); tikhomirov@618: revlogHeader.length(contentByteArray.length, compressedLen); tikhomirov@538: revlogHeader.nodeid(revisionNodeidBytes); tikhomirov@538: revlogHeader.linkRevision(linkRevision); tikhomirov@538: revlogHeader.parents(p1, p2); tikhomirov@538: revlogHeader.baseRevision(writeComplete ? lastEntryIndex+1 : lastEntryBase); tikhomirov@539: long lastEntryOffset = revlogStream.newEntryOffset(); tikhomirov@539: revlogHeader.offset(lastEntryOffset); tikhomirov@538: // tikhomirov@538: revlogHeader.serialize(indexFile); tikhomirov@538: tikhomirov@538: if (isInlineData) { tikhomirov@538: dataFile = indexFile; tikhomirov@538: } else { tikhomirov@617: dataFile = revlogStream.getDataStreamWriter(transaction); tikhomirov@538: } tikhomirov@538: if (useCompressedData) { tikhomirov@538: int actualCompressedLenWritten = revlogDataZip.writeCompressedData(dataFile); tikhomirov@538: if (actualCompressedLenWritten != compressedLen) { tikhomirov@538: throw new HgInvalidStateException(String.format("Expected %d bytes of compressed data, but actually wrote %d in %s", compressedLen, actualCompressedLenWritten, revlogStream.getDataFileName())); tikhomirov@538: } tikhomirov@538: } else { tikhomirov@538: dataFile.writeByte((byte) 'u'); tikhomirov@538: dataSource.serialize(dataFile); tikhomirov@538: } tikhomirov@538: tikhomirov@539: tikhomirov@618: lastEntryContent = contentByteArray; tikhomirov@538: lastEntryBase = revlogHeader.baseRevision(); tikhomirov@538: lastEntryIndex++; tikhomirov@538: lastEntryRevision = Nodeid.fromBinary(revisionNodeidBytes, 0); tikhomirov@538: revisionCache.put(lastEntryIndex, lastEntryRevision); tikhomirov@539: tikhomirov@539: revlogStream.revisionAdded(lastEntryIndex, lastEntryRevision, lastEntryBase, lastEntryOffset); tikhomirov@538: } finally { tikhomirov@538: indexFile.done(); tikhomirov@538: if (dataFile != null && dataFile != indexFile) { tikhomirov@538: dataFile.done(); tikhomirov@538: } tikhomirov@538: } tikhomirov@538: return lastEntryRevision; tikhomirov@538: } tikhomirov@538: tikhomirov@628: private byte[] toByteArray(DataSource content) throws HgIOException, HgRuntimeException { tikhomirov@618: ByteArrayDataSerializer ba = new ByteArrayDataSerializer(); tikhomirov@618: content.serialize(ba); tikhomirov@618: return ba.toByteArray(); tikhomirov@618: } tikhomirov@618: tikhomirov@628: private Nodeid revision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { tikhomirov@538: if (revisionIndex == NO_REVISION) { tikhomirov@538: return Nodeid.NULL; tikhomirov@538: } tikhomirov@538: Nodeid n = revisionCache.get(revisionIndex); tikhomirov@538: if (n == null) { tikhomirov@538: n = Nodeid.fromBinary(revlogStream.nodeid(revisionIndex), 0); tikhomirov@538: revisionCache.put(revisionIndex, n); tikhomirov@538: } tikhomirov@538: return n; tikhomirov@538: } tikhomirov@538: tikhomirov@628: private void populateLastEntry() throws HgRuntimeException { tikhomirov@539: if (lastEntryContent != null) { tikhomirov@539: return; tikhomirov@539: } tikhomirov@539: if (lastEntryIndex != NO_REVISION) { tikhomirov@538: assert lastEntryIndex >= 0; tikhomirov@538: final IOException[] failure = new IOException[1]; tikhomirov@538: revlogStream.iterate(lastEntryIndex, lastEntryIndex, true, new RevlogStream.Inspector() { tikhomirov@538: tikhomirov@538: public void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) { tikhomirov@538: try { tikhomirov@538: lastEntryBase = baseRevision; tikhomirov@538: lastEntryRevision = Nodeid.fromBinary(nodeid, 0); tikhomirov@538: lastEntryContent = data.byteArray(); tikhomirov@538: } catch (IOException ex) { tikhomirov@538: failure[0] = ex; tikhomirov@538: } tikhomirov@538: } tikhomirov@538: }); tikhomirov@538: if (failure[0] != null) { tikhomirov@538: String m = String.format("Failed to get content of most recent revision %d", lastEntryIndex); tikhomirov@538: throw revlogStream.initWithDataFile(new HgInvalidControlFileException(m, failure[0], null)); tikhomirov@538: } tikhomirov@539: } else { tikhomirov@539: lastEntryContent = new byte[0]; tikhomirov@538: } tikhomirov@538: } tikhomirov@538: tikhomirov@538: public static boolean preferCompleteOverPatch(int patchLength, int fullContentLength) { tikhomirov@538: return !decideWorthEffort(patchLength, fullContentLength); tikhomirov@538: } tikhomirov@538: tikhomirov@538: public static boolean preferCompressedOverComplete(int compressedLen, int fullContentLength) { tikhomirov@538: if (compressedLen <= 0) { // just in case, meaningless otherwise tikhomirov@538: return false; tikhomirov@538: } tikhomirov@538: return decideWorthEffort(compressedLen, fullContentLength); tikhomirov@538: } tikhomirov@538: tikhomirov@538: // true if length obtained with effort is worth it tikhomirov@538: private static boolean decideWorthEffort(int lengthWithExtraEffort, int lengthWithoutEffort) { tikhomirov@538: return lengthWithExtraEffort < (/* 3/4 of original */lengthWithoutEffort - (lengthWithoutEffort >>> 2)); tikhomirov@538: } tikhomirov@538: tikhomirov@534: /*XXX public because HgCloneCommand uses it*/ tikhomirov@534: public static class HeaderWriter implements DataSerializer.DataSource { tikhomirov@530: private final ByteBuffer header; tikhomirov@530: private final boolean isInline; tikhomirov@530: private long offset; tikhomirov@530: private int length, compressedLength; tikhomirov@530: private int baseRev, linkRev, p1, p2; tikhomirov@534: private byte[] nodeid; tikhomirov@530: tikhomirov@530: public HeaderWriter(boolean inline) { tikhomirov@530: isInline = inline; tikhomirov@530: header = ByteBuffer.allocate(REVLOGV1_RECORD_SIZE); tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter offset(long offset) { tikhomirov@530: this.offset = offset; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@532: public int baseRevision() { tikhomirov@532: return baseRev; tikhomirov@532: } tikhomirov@532: tikhomirov@530: public HeaderWriter baseRevision(int baseRevision) { tikhomirov@530: this.baseRev = baseRevision; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter length(int len, int compressedLen) { tikhomirov@530: this.length = len; tikhomirov@530: this.compressedLength = compressedLen; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter parents(int parent1, int parent2) { tikhomirov@530: p1 = parent1; tikhomirov@530: p2 = parent2; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter linkRevision(int linkRevision) { tikhomirov@534: linkRev = linkRevision; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter nodeid(Nodeid n) { tikhomirov@534: nodeid = n.toByteArray(); tikhomirov@530: return this; tikhomirov@530: } tikhomirov@534: tikhomirov@534: public HeaderWriter nodeid(byte[] nodeidBytes) { tikhomirov@534: nodeid = nodeidBytes; tikhomirov@534: return this; tikhomirov@534: } tikhomirov@534: tikhomirov@618: public void serialize(DataSerializer out) throws HgIOException { tikhomirov@530: header.clear(); tikhomirov@530: if (offset == 0) { tikhomirov@530: int version = 1 /* RevlogNG */; tikhomirov@530: if (isInline) { tikhomirov@608: version |= RevlogStream.INLINEDATA; tikhomirov@530: } tikhomirov@530: header.putInt(version); tikhomirov@530: header.putInt(0); tikhomirov@530: } else { tikhomirov@530: header.putLong(offset << 16); tikhomirov@530: } tikhomirov@530: header.putInt(compressedLength); tikhomirov@530: header.putInt(length); tikhomirov@530: header.putInt(baseRev); tikhomirov@530: header.putInt(linkRev); tikhomirov@530: header.putInt(p1); tikhomirov@530: header.putInt(p2); tikhomirov@534: header.put(nodeid); tikhomirov@530: // assume 12 bytes left are zeros tikhomirov@534: out.write(header.array(), 0, header.capacity()); tikhomirov@530: tikhomirov@530: // regardless whether it's inline or separate data, tikhomirov@530: // offset field always represent cumulative compressedLength tikhomirov@539: // (while physical offset in the index file with inline==true differs by n*sizeof(header), where n is entry's position in the file) tikhomirov@530: offset += compressedLength; tikhomirov@530: } tikhomirov@534: tikhomirov@534: public int serializeLength() { tikhomirov@534: return header.capacity(); tikhomirov@534: } tikhomirov@534: } tikhomirov@530: }