tikhomirov@530: /* tikhomirov@530: * Copyright (c) 2013 TMate Software Ltd tikhomirov@530: * tikhomirov@530: * This program is free software; you can redistribute it and/or modify tikhomirov@530: * it under the terms of the GNU General Public License as published by tikhomirov@530: * the Free Software Foundation; version 2 of the License. tikhomirov@530: * tikhomirov@530: * This program is distributed in the hope that it will be useful, tikhomirov@530: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@530: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@530: * GNU General Public License for more details. tikhomirov@530: * tikhomirov@530: * For information on how to redistribute this software under tikhomirov@530: * the terms of a license other than GNU General Public License tikhomirov@530: * contact TMate Software at support@hg4j.com tikhomirov@530: */ tikhomirov@530: package org.tmatesoft.hg.internal; tikhomirov@530: tikhomirov@530: import static org.tmatesoft.hg.internal.Internals.REVLOGV1_RECORD_SIZE; tikhomirov@663: import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; tikhomirov@534: import static org.tmatesoft.hg.repo.HgRepository.NO_REVISION; tikhomirov@530: tikhomirov@663: import java.io.File; tikhomirov@530: import java.io.IOException; tikhomirov@530: import java.nio.ByteBuffer; tikhomirov@530: tikhomirov@617: import org.tmatesoft.hg.core.HgIOException; tikhomirov@530: import org.tmatesoft.hg.core.Nodeid; tikhomirov@534: import org.tmatesoft.hg.core.SessionContext; tikhomirov@660: import org.tmatesoft.hg.internal.DataSerializer.ByteArrayDataSource; tikhomirov@645: import org.tmatesoft.hg.internal.DataSerializer.ByteArraySerializer; tikhomirov@618: import org.tmatesoft.hg.internal.DataSerializer.DataSource; tikhomirov@660: import org.tmatesoft.hg.repo.HgBundle.GroupElement; tikhomirov@534: import org.tmatesoft.hg.repo.HgInvalidControlFileException; tikhomirov@628: import org.tmatesoft.hg.repo.HgInvalidRevisionException; tikhomirov@534: import org.tmatesoft.hg.repo.HgInvalidStateException; tikhomirov@660: import org.tmatesoft.hg.repo.HgRepository; tikhomirov@628: import org.tmatesoft.hg.repo.HgRuntimeException; tikhomirov@660: import org.tmatesoft.hg.util.Pair; tikhomirov@530: tikhomirov@530: /** tikhomirov@530: * tikhomirov@608: * TODO [post-1.1] separate operation to check if index is too big and split into index+data tikhomirov@532: * tikhomirov@530: * @author Artem Tikhomirov tikhomirov@530: * @author TMate Software Ltd. tikhomirov@530: */ tikhomirov@530: public class RevlogStreamWriter { tikhomirov@530: tikhomirov@538: private final DigestHelper dh = new DigestHelper(); tikhomirov@538: private final RevlogCompressor revlogDataZip; tikhomirov@617: private final Transaction transaction; tikhomirov@663: // init with illegal values tikhomirov@663: private int lastEntryBase = BAD_REVISION, lastEntryIndex = BAD_REVISION, lastEntryActualLen = -1; tikhomirov@660: // record revision and its full content tikhomirov@660: // the name might be misleading, it does not necessarily match lastEntryIndex tikhomirov@660: private Pair lastFullContent; tikhomirov@538: private Nodeid lastEntryRevision; tikhomirov@538: private IntMap revisionCache = new IntMap(32); tikhomirov@538: private RevlogStream revlogStream; tikhomirov@530: tikhomirov@617: public RevlogStreamWriter(SessionContext.Source ctxSource, RevlogStream stream, Transaction tr) { tikhomirov@591: assert ctxSource != null; tikhomirov@538: assert stream != null; tikhomirov@617: assert tr != null; tikhomirov@538: tikhomirov@591: revlogDataZip = new RevlogCompressor(ctxSource.getSessionContext()); tikhomirov@538: revlogStream = stream; tikhomirov@617: transaction = tr; tikhomirov@538: } tikhomirov@538: tikhomirov@663: public RevlogStream getRevlogStream() { tikhomirov@663: return revlogStream; tikhomirov@663: } tikhomirov@663: tikhomirov@660: public Pair addPatchRevision(GroupElement ge, RevisionToIndexMap clogRevs, RevisionToIndexMap revlogRevs) throws HgIOException, HgRuntimeException { tikhomirov@660: populateLastEntryIndex(); tikhomirov@660: // tikhomirov@660: final Nodeid nodeRev = ge.node(); tikhomirov@660: final Nodeid csetRev = ge.cset(); tikhomirov@660: int linkRev; tikhomirov@660: if (nodeRev.equals(csetRev)) { tikhomirov@660: linkRev = lastEntryIndex+1; tikhomirov@660: } else { tikhomirov@660: linkRev = clogRevs.revisionIndex(csetRev); tikhomirov@660: } tikhomirov@660: assert linkRev >= 0; tikhomirov@660: final Nodeid p1Rev = ge.firstParent(); tikhomirov@660: int p1 = p1Rev.isNull() ? NO_REVISION : revlogRevs.revisionIndex(p1Rev); tikhomirov@660: final Nodeid p2Rev = ge.secondParent(); tikhomirov@660: int p2 = p2Rev.isNull() ? NO_REVISION : revlogRevs.revisionIndex(p2Rev); tikhomirov@660: Patch p = new Patch(); tikhomirov@660: final byte[] patchBytes; tikhomirov@660: try { tikhomirov@660: // XXX there's ge.rawData(), to avoid extra array wrap tikhomirov@660: patchBytes = ge.rawDataByteArray(); tikhomirov@660: p.read(new ByteArrayDataAccess(patchBytes)); tikhomirov@660: } catch (IOException ex) { tikhomirov@660: throw new HgIOException("Failed to read patch information", ex, null); tikhomirov@660: } tikhomirov@660: // tikhomirov@660: final Nodeid patchBase = ge.patchBase(); tikhomirov@660: int patchBaseRev = patchBase.isNull() ? NO_REVISION : revlogRevs.revisionIndex(patchBase); tikhomirov@660: int baseRev = lastEntryIndex == NO_REVISION ? 0 : revlogStream.baseRevision(patchBaseRev); tikhomirov@660: int revLen; tikhomirov@660: DataSource ds; tikhomirov@660: byte[] complete = null; tikhomirov@660: if (patchBaseRev == lastEntryIndex && lastEntryIndex != NO_REVISION) { tikhomirov@660: // we may write patch from GroupElement as is tikhomirov@660: int patchBaseLen = dataLength(patchBaseRev); tikhomirov@660: revLen = patchBaseLen + p.patchSizeDelta(); tikhomirov@660: ds = new ByteArrayDataSource(patchBytes); tikhomirov@660: } else { tikhomirov@660: // read baseRev, unless it's the pull to empty repository tikhomirov@660: try { tikhomirov@660: if (lastEntryIndex == NO_REVISION) { tikhomirov@660: complete = p.apply(new ByteArrayDataAccess(new byte[0]), -1); tikhomirov@660: baseRev = 0; // it's done above, but doesn't hurt tikhomirov@660: } else { tikhomirov@663: assert patchBaseRev != NO_REVISION; tikhomirov@663: ReadContentInspector insp = new ReadContentInspector().read(revlogStream, patchBaseRev); tikhomirov@660: complete = p.apply(new ByteArrayDataAccess(insp.content), -1); tikhomirov@660: baseRev = lastEntryIndex + 1; tikhomirov@660: } tikhomirov@660: ds = new ByteArrayDataSource(complete); tikhomirov@660: revLen = complete.length; tikhomirov@660: } catch (IOException ex) { tikhomirov@663: // unlikely to happen, as ByteArrayDataSource throws IOException only in case of programming errors tikhomirov@663: // FIXME next approach to get indexFile is awful: tikhomirov@663: File indexFile = revlogStream.initWithIndexFile(new HgInvalidControlFileException("", ex, null)).getFile(); tikhomirov@663: throw new HgIOException("Failed to reconstruct revision", ex, indexFile); tikhomirov@660: } tikhomirov@660: } tikhomirov@660: doAdd(nodeRev, p1, p2, linkRev, baseRev, revLen, ds); tikhomirov@660: if (complete != null) { tikhomirov@660: lastFullContent = new Pair(lastEntryIndex, complete); tikhomirov@660: } tikhomirov@660: return new Pair(lastEntryIndex, lastEntryRevision); tikhomirov@660: } tikhomirov@660: tikhomirov@538: /** tikhomirov@538: * @return nodeid of added revision tikhomirov@628: * @throws HgRuntimeException tikhomirov@538: */ tikhomirov@660: public Pair addRevision(DataSource content, int linkRevision, int p1, int p2) throws HgIOException, HgRuntimeException { tikhomirov@660: populateLastEntryIndex(); tikhomirov@660: populateLastEntryContent(); tikhomirov@538: // tikhomirov@618: byte[] contentByteArray = toByteArray(content); tikhomirov@660: Patch patch = GeneratePatchInspector.delta(lastFullContent.second(), contentByteArray); tikhomirov@538: int patchSerializedLength = patch.serializedLength(); tikhomirov@538: tikhomirov@618: final boolean writeComplete = preferCompleteOverPatch(patchSerializedLength, contentByteArray.length); tikhomirov@618: DataSerializer.DataSource dataSource = writeComplete ? new ByteArrayDataSource(contentByteArray) : patch.new PatchDataSource(); tikhomirov@660: // tikhomirov@660: Nodeid p1Rev = revision(p1); tikhomirov@660: Nodeid p2Rev = revision(p2); tikhomirov@660: Nodeid newRev = Nodeid.fromBinary(dh.sha1(p1Rev, p2Rev, contentByteArray).asBinary(), 0); tikhomirov@660: doAdd(newRev, p1, p2, linkRevision, writeComplete ? lastEntryIndex+1 : lastEntryBase, contentByteArray.length, dataSource); tikhomirov@660: lastFullContent = new Pair(lastEntryIndex, contentByteArray); tikhomirov@660: return new Pair(lastEntryIndex, lastEntryRevision); tikhomirov@660: } tikhomirov@660: tikhomirov@660: private Nodeid doAdd(Nodeid rev, int p1, int p2, int linkRevision, int baseRevision, int revLen, DataSerializer.DataSource dataSource) throws HgIOException, HgRuntimeException { tikhomirov@660: assert linkRevision >= 0; tikhomirov@660: assert baseRevision >= 0; tikhomirov@660: assert p1 == NO_REVISION || p1 >= 0; tikhomirov@660: assert p2 == NO_REVISION || p2 >= 0; tikhomirov@660: assert !rev.isNull(); tikhomirov@660: assert revLen >= 0; tikhomirov@538: revlogDataZip.reset(dataSource); tikhomirov@538: final int compressedLen; tikhomirov@538: final boolean useCompressedData = preferCompressedOverComplete(revlogDataZip.getCompressedLength(), dataSource.serializeLength()); tikhomirov@538: if (useCompressedData) { tikhomirov@538: compressedLen= revlogDataZip.getCompressedLength(); tikhomirov@538: } else { tikhomirov@538: // compression wasn't too effective, tikhomirov@538: compressedLen = dataSource.serializeLength() + 1 /*1 byte for 'u' - uncompressed prefix byte*/; tikhomirov@538: } tikhomirov@538: // tikhomirov@618: DataSerializer indexFile, dataFile; tikhomirov@618: indexFile = dataFile = null; tikhomirov@538: try { tikhomirov@538: // tikhomirov@618: indexFile = revlogStream.getIndexStreamWriter(transaction); tikhomirov@538: final boolean isInlineData = revlogStream.isInlineData(); tikhomirov@538: HeaderWriter revlogHeader = new HeaderWriter(isInlineData); tikhomirov@660: revlogHeader.length(revLen, compressedLen); tikhomirov@660: revlogHeader.nodeid(rev.toByteArray()); tikhomirov@538: revlogHeader.linkRevision(linkRevision); tikhomirov@538: revlogHeader.parents(p1, p2); tikhomirov@660: revlogHeader.baseRevision(baseRevision); tikhomirov@539: long lastEntryOffset = revlogStream.newEntryOffset(); tikhomirov@539: revlogHeader.offset(lastEntryOffset); tikhomirov@538: // tikhomirov@538: revlogHeader.serialize(indexFile); tikhomirov@538: tikhomirov@538: if (isInlineData) { tikhomirov@538: dataFile = indexFile; tikhomirov@538: } else { tikhomirov@617: dataFile = revlogStream.getDataStreamWriter(transaction); tikhomirov@538: } tikhomirov@538: if (useCompressedData) { tikhomirov@538: int actualCompressedLenWritten = revlogDataZip.writeCompressedData(dataFile); tikhomirov@538: if (actualCompressedLenWritten != compressedLen) { tikhomirov@538: throw new HgInvalidStateException(String.format("Expected %d bytes of compressed data, but actually wrote %d in %s", compressedLen, actualCompressedLenWritten, revlogStream.getDataFileName())); tikhomirov@538: } tikhomirov@538: } else { tikhomirov@538: dataFile.writeByte((byte) 'u'); tikhomirov@538: dataSource.serialize(dataFile); tikhomirov@538: } tikhomirov@538: tikhomirov@538: lastEntryBase = revlogHeader.baseRevision(); tikhomirov@538: lastEntryIndex++; tikhomirov@660: lastEntryActualLen = revLen; tikhomirov@660: lastEntryRevision = rev; tikhomirov@538: revisionCache.put(lastEntryIndex, lastEntryRevision); tikhomirov@539: tikhomirov@539: revlogStream.revisionAdded(lastEntryIndex, lastEntryRevision, lastEntryBase, lastEntryOffset); tikhomirov@538: } finally { tikhomirov@538: indexFile.done(); tikhomirov@538: if (dataFile != null && dataFile != indexFile) { tikhomirov@538: dataFile.done(); tikhomirov@538: } tikhomirov@538: } tikhomirov@538: return lastEntryRevision; tikhomirov@538: } tikhomirov@538: tikhomirov@628: private byte[] toByteArray(DataSource content) throws HgIOException, HgRuntimeException { tikhomirov@645: ByteArraySerializer ba = new ByteArraySerializer(); tikhomirov@618: content.serialize(ba); tikhomirov@618: return ba.toByteArray(); tikhomirov@618: } tikhomirov@618: tikhomirov@628: private Nodeid revision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { tikhomirov@538: if (revisionIndex == NO_REVISION) { tikhomirov@538: return Nodeid.NULL; tikhomirov@538: } tikhomirov@538: Nodeid n = revisionCache.get(revisionIndex); tikhomirov@538: if (n == null) { tikhomirov@538: n = Nodeid.fromBinary(revlogStream.nodeid(revisionIndex), 0); tikhomirov@538: revisionCache.put(revisionIndex, n); tikhomirov@538: } tikhomirov@538: return n; tikhomirov@538: } tikhomirov@538: tikhomirov@660: private int dataLength(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { tikhomirov@660: assert revisionIndex >= 0; tikhomirov@663: if (revisionIndex == lastEntryIndex && lastEntryActualLen >= 0) { tikhomirov@663: // if the last entry is the one we've just written, we know its actual len. tikhomirov@663: // it's possible, however, that revisionIndex == lastEntryIndex just tikhomirov@663: // because revision being added comes right after last locally known one tikhomirov@663: // and lastEntryActualLen is not set tikhomirov@660: return lastEntryActualLen; tikhomirov@660: } tikhomirov@660: if (lastFullContent != null && lastFullContent.first() == revisionIndex) { tikhomirov@660: return lastFullContent.second().length; tikhomirov@660: } tikhomirov@660: return revlogStream.dataLength(revisionIndex); tikhomirov@660: } tikhomirov@660: tikhomirov@660: private void populateLastEntryIndex() throws HgRuntimeException { tikhomirov@660: int revCount = revlogStream.revisionCount(); tikhomirov@660: lastEntryIndex = revCount == 0 ? NO_REVISION : revCount - 1; tikhomirov@660: } tikhomirov@660: tikhomirov@660: private void populateLastEntryContent() throws HgRuntimeException { tikhomirov@660: if (lastFullContent != null && lastFullContent.first() == lastEntryIndex) { tikhomirov@660: // we have last entry cached tikhomirov@539: return; tikhomirov@539: } tikhomirov@660: lastEntryRevision = Nodeid.NULL; tikhomirov@539: if (lastEntryIndex != NO_REVISION) { tikhomirov@660: ReadContentInspector insp = new ReadContentInspector().read(revlogStream, lastEntryIndex); tikhomirov@660: lastEntryBase = insp.baseRev; tikhomirov@660: lastEntryRevision = insp.rev; tikhomirov@660: lastFullContent = new Pair(lastEntryIndex, insp.content); tikhomirov@539: } else { tikhomirov@660: lastFullContent = new Pair(lastEntryIndex, new byte[0]); tikhomirov@538: } tikhomirov@660: assert lastFullContent.first() == lastEntryIndex; tikhomirov@660: assert lastFullContent.second() != null; tikhomirov@538: } tikhomirov@538: tikhomirov@538: public static boolean preferCompleteOverPatch(int patchLength, int fullContentLength) { tikhomirov@538: return !decideWorthEffort(patchLength, fullContentLength); tikhomirov@538: } tikhomirov@538: tikhomirov@538: public static boolean preferCompressedOverComplete(int compressedLen, int fullContentLength) { tikhomirov@538: if (compressedLen <= 0) { // just in case, meaningless otherwise tikhomirov@538: return false; tikhomirov@538: } tikhomirov@538: return decideWorthEffort(compressedLen, fullContentLength); tikhomirov@538: } tikhomirov@538: tikhomirov@538: // true if length obtained with effort is worth it tikhomirov@538: private static boolean decideWorthEffort(int lengthWithExtraEffort, int lengthWithoutEffort) { tikhomirov@538: return lengthWithExtraEffort < (/* 3/4 of original */lengthWithoutEffort - (lengthWithoutEffort >>> 2)); tikhomirov@538: } tikhomirov@538: tikhomirov@534: /*XXX public because HgCloneCommand uses it*/ tikhomirov@534: public static class HeaderWriter implements DataSerializer.DataSource { tikhomirov@530: private final ByteBuffer header; tikhomirov@530: private final boolean isInline; tikhomirov@530: private long offset; tikhomirov@530: private int length, compressedLength; tikhomirov@530: private int baseRev, linkRev, p1, p2; tikhomirov@534: private byte[] nodeid; tikhomirov@530: tikhomirov@530: public HeaderWriter(boolean inline) { tikhomirov@530: isInline = inline; tikhomirov@530: header = ByteBuffer.allocate(REVLOGV1_RECORD_SIZE); tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter offset(long offset) { tikhomirov@530: this.offset = offset; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@532: public int baseRevision() { tikhomirov@532: return baseRev; tikhomirov@532: } tikhomirov@532: tikhomirov@530: public HeaderWriter baseRevision(int baseRevision) { tikhomirov@530: this.baseRev = baseRevision; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter length(int len, int compressedLen) { tikhomirov@530: this.length = len; tikhomirov@530: this.compressedLength = compressedLen; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter parents(int parent1, int parent2) { tikhomirov@530: p1 = parent1; tikhomirov@530: p2 = parent2; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter linkRevision(int linkRevision) { tikhomirov@534: linkRev = linkRevision; tikhomirov@530: return this; tikhomirov@530: } tikhomirov@530: tikhomirov@530: public HeaderWriter nodeid(Nodeid n) { tikhomirov@534: nodeid = n.toByteArray(); tikhomirov@530: return this; tikhomirov@530: } tikhomirov@534: tikhomirov@534: public HeaderWriter nodeid(byte[] nodeidBytes) { tikhomirov@534: nodeid = nodeidBytes; tikhomirov@534: return this; tikhomirov@534: } tikhomirov@534: tikhomirov@618: public void serialize(DataSerializer out) throws HgIOException { tikhomirov@530: header.clear(); tikhomirov@530: if (offset == 0) { tikhomirov@530: int version = 1 /* RevlogNG */; tikhomirov@530: if (isInline) { tikhomirov@608: version |= RevlogStream.INLINEDATA; tikhomirov@530: } tikhomirov@530: header.putInt(version); tikhomirov@530: header.putInt(0); tikhomirov@530: } else { tikhomirov@530: header.putLong(offset << 16); tikhomirov@530: } tikhomirov@530: header.putInt(compressedLength); tikhomirov@530: header.putInt(length); tikhomirov@530: header.putInt(baseRev); tikhomirov@530: header.putInt(linkRev); tikhomirov@530: header.putInt(p1); tikhomirov@530: header.putInt(p2); tikhomirov@534: header.put(nodeid); tikhomirov@530: // assume 12 bytes left are zeros tikhomirov@534: out.write(header.array(), 0, header.capacity()); tikhomirov@530: tikhomirov@530: // regardless whether it's inline or separate data, tikhomirov@530: // offset field always represent cumulative compressedLength tikhomirov@539: // (while physical offset in the index file with inline==true differs by n*sizeof(header), where n is entry's position in the file) tikhomirov@530: offset += compressedLength; tikhomirov@530: } tikhomirov@534: tikhomirov@534: public int serializeLength() { tikhomirov@534: return header.capacity(); tikhomirov@534: } tikhomirov@534: } tikhomirov@660: tikhomirov@660: // XXX part of HgRevisionMap contract, need public counterparts (along with IndexToRevisionMap) tikhomirov@660: public interface RevisionToIndexMap { tikhomirov@660: tikhomirov@660: /** tikhomirov@660: * @return {@link HgRepository#NO_REVISION} if unknown revision tikhomirov@660: */ tikhomirov@660: int revisionIndex(Nodeid revision); tikhomirov@660: } tikhomirov@660: tikhomirov@660: private static class ReadContentInspector implements RevlogStream.Inspector { tikhomirov@660: public int baseRev; tikhomirov@660: public Nodeid rev; tikhomirov@660: public byte[] content; tikhomirov@660: private IOException failure; tikhomirov@660: tikhomirov@660: public ReadContentInspector read(RevlogStream rs, int revIndex) throws HgInvalidControlFileException { tikhomirov@660: assert revIndex >= 0; tikhomirov@660: rs.iterate(revIndex, revIndex, true, this); tikhomirov@660: if (failure != null) { tikhomirov@660: String m = String.format("Failed to get content of revision %d", revIndex); tikhomirov@660: throw rs.initWithDataFile(new HgInvalidControlFileException(m, failure, null)); tikhomirov@660: } tikhomirov@660: return this; tikhomirov@660: } tikhomirov@660: tikhomirov@660: public void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) { tikhomirov@660: try { tikhomirov@660: baseRev = baseRevision; tikhomirov@660: rev = Nodeid.fromBinary(nodeid, 0); tikhomirov@660: content = data.byteArray(); tikhomirov@660: } catch (IOException ex) { tikhomirov@660: failure = ex; tikhomirov@660: } tikhomirov@660: } tikhomirov@660: } tikhomirov@660: }