tikhomirov@10: /* tikhomirov@74: * Copyright (c) 2010-2011 TMate Software Ltd tikhomirov@74: * tikhomirov@74: * This program is free software; you can redistribute it and/or modify tikhomirov@74: * it under the terms of the GNU General Public License as published by tikhomirov@74: * the Free Software Foundation; version 2 of the License. tikhomirov@74: * tikhomirov@74: * This program is distributed in the hope that it will be useful, tikhomirov@74: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@74: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@74: * GNU General Public License for more details. tikhomirov@74: * tikhomirov@74: * For information on how to redistribute this software under tikhomirov@74: * the terms of a license other than GNU General Public License tikhomirov@102: * contact TMate Software at support@hg4j.com tikhomirov@0: */ tikhomirov@77: package org.tmatesoft.hg.internal; tikhomirov@0: tikhomirov@80: import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; tikhomirov@74: import static org.tmatesoft.hg.repo.HgRepository.TIP; tikhomirov@5: tikhomirov@3: import java.io.File; tikhomirov@2: import java.io.IOException; tikhomirov@2: import java.util.ArrayList; tikhomirov@2: import java.util.List; tikhomirov@263: import java.util.zip.Inflater; tikhomirov@0: tikhomirov@158: import org.tmatesoft.hg.core.HgBadStateException; tikhomirov@74: import org.tmatesoft.hg.core.Nodeid; tikhomirov@80: import org.tmatesoft.hg.repo.HgRepository; tikhomirov@74: tikhomirov@10: tikhomirov@0: /** tikhomirov@0: * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), tikhomirov@198: * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)? tikhomirov@74: * tikhomirov@0: * @see http://mercurial.selenic.com/wiki/Revlog tikhomirov@0: * @see http://mercurial.selenic.com/wiki/RevlogNG tikhomirov@74: * tikhomirov@74: * @author Artem Tikhomirov tikhomirov@74: * @author TMate Software Ltd. tikhomirov@0: */ tikhomirov@0: public class RevlogStream { tikhomirov@2: tikhomirov@198: /* tikhomirov@198: * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size)) tikhomirov@198: * tikhomirov@198: * long[] in fact (there are 8-bytes field in the revlog) tikhomirov@198: * However, (a) DataAccess currently doesn't operate with long seek/length tikhomirov@198: * and, of greater significance, (b) files with inlined data are designated for smaller files, tikhomirov@198: * guess, about 130 Kb, and offset there won't ever break int capacity tikhomirov@198: */ tikhomirov@198: private int[] indexRecordOffset; tikhomirov@198: private int[] baseRevisions; tikhomirov@2: private boolean inline = false; tikhomirov@3: private final File indexFile; tikhomirov@10: private final DataAccessProvider dataAccess; tikhomirov@3: tikhomirov@10: // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP. tikhomirov@77: public RevlogStream(DataAccessProvider dap, File indexFile) { tikhomirov@10: this.dataAccess = dap; tikhomirov@3: this.indexFile = indexFile; tikhomirov@3: } tikhomirov@2: tikhomirov@9: /*package*/ DataAccess getIndexStream() { tikhomirov@280: // XXX may supply a hint that I'll need really few bytes of data (perhaps, at some offset) tikhomirov@280: // to avoid mmap files when only few bytes are to be read (i.e. #dataLength()) tikhomirov@10: return dataAccess.create(indexFile); tikhomirov@0: } tikhomirov@0: tikhomirov@9: /*package*/ DataAccess getDataStream() { tikhomirov@3: final String indexName = indexFile.getName(); tikhomirov@3: File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d"); tikhomirov@10: return dataAccess.create(dataFile); tikhomirov@9: } tikhomirov@9: tikhomirov@2: public int revisionCount() { tikhomirov@2: initOutline(); tikhomirov@198: return baseRevisions.length; tikhomirov@2: } tikhomirov@22: tikhomirov@295: /** tikhomirov@295: * @throws HgBadStateException if internal read operation failed tikhomirov@295: */ tikhomirov@22: public int dataLength(int revision) { tikhomirov@22: // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. tikhomirov@22: // tikhomirov@22: final int indexSize = revisionCount(); tikhomirov@280: DataAccess daIndex = getIndexStream(); tikhomirov@22: if (revision == TIP) { tikhomirov@22: revision = indexSize - 1; tikhomirov@22: } tikhomirov@22: try { tikhomirov@198: int recordOffset = getIndexOffsetInt(revision); tikhomirov@22: daIndex.seek(recordOffset + 12); // 6+2+4 tikhomirov@22: int actualLen = daIndex.readInt(); tikhomirov@22: return actualLen; tikhomirov@22: } catch (IOException ex) { tikhomirov@22: ex.printStackTrace(); // log error. FIXME better handling tikhomirov@295: throw new HgBadStateException(ex); tikhomirov@22: } finally { tikhomirov@22: daIndex.done(); tikhomirov@22: } tikhomirov@22: } tikhomirov@22: tikhomirov@295: /** tikhomirov@295: * @throws HgBadStateException if internal read operation failed tikhomirov@295: */ tikhomirov@80: public byte[] nodeid(int revision) { tikhomirov@80: final int indexSize = revisionCount(); tikhomirov@80: if (revision == TIP) { tikhomirov@80: revision = indexSize - 1; tikhomirov@80: } tikhomirov@80: if (revision < 0 || revision >= indexSize) { tikhomirov@80: throw new IllegalArgumentException(Integer.toString(revision)); tikhomirov@80: } tikhomirov@80: DataAccess daIndex = getIndexStream(); tikhomirov@80: try { tikhomirov@198: int recordOffset = getIndexOffsetInt(revision); tikhomirov@80: daIndex.seek(recordOffset + 32); tikhomirov@80: byte[] rv = new byte[20]; tikhomirov@80: daIndex.readBytes(rv, 0, 20); tikhomirov@80: return rv; tikhomirov@80: } catch (IOException ex) { tikhomirov@80: ex.printStackTrace(); tikhomirov@295: throw new HgBadStateException(); tikhomirov@80: } finally { tikhomirov@88: daIndex.done(); tikhomirov@88: } tikhomirov@88: } tikhomirov@295: tikhomirov@295: /** tikhomirov@295: * Get link field from the index record. tikhomirov@295: * @throws HgBadStateException if internal read operation failed tikhomirov@295: */ tikhomirov@88: public int linkRevision(int revision) { tikhomirov@88: final int last = revisionCount() - 1; tikhomirov@88: if (revision == TIP) { tikhomirov@88: revision = last; tikhomirov@88: } tikhomirov@88: if (revision < 0 || revision > last) { tikhomirov@88: throw new IllegalArgumentException(Integer.toString(revision)); tikhomirov@88: } tikhomirov@88: DataAccess daIndex = getIndexStream(); tikhomirov@88: try { tikhomirov@198: int recordOffset = getIndexOffsetInt(revision); tikhomirov@88: daIndex.seek(recordOffset + 20); tikhomirov@88: int linkRev = daIndex.readInt(); tikhomirov@88: return linkRev; tikhomirov@88: } catch (IOException ex) { tikhomirov@88: ex.printStackTrace(); tikhomirov@295: throw new HgBadStateException(); tikhomirov@88: } finally { tikhomirov@88: daIndex.done(); tikhomirov@80: } tikhomirov@80: } tikhomirov@80: tikhomirov@49: // Perhaps, RevlogStream should be limited to use of plain int revisions for access, tikhomirov@49: // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep tikhomirov@49: // map of nodeids, and once this comes true, we may get rid of this method. tikhomirov@80: // Unlike its counterpart, {@link Revlog#getLocalRevisionNumber()}, doesn't fail with exception if node not found, tikhomirov@80: /** tikhomirov@80: * @return integer in [0..revisionCount()) or {@link HgRepository#BAD_REVISION} if not found tikhomirov@80: */ tikhomirov@77: public int findLocalRevisionNumber(Nodeid nodeid) { tikhomirov@22: // XXX this one may be implemented with iterate() once there's mechanism to stop iterations tikhomirov@22: final int indexSize = revisionCount(); tikhomirov@22: DataAccess daIndex = getIndexStream(); tikhomirov@22: try { tikhomirov@24: byte[] nodeidBuf = new byte[20]; tikhomirov@22: for (int i = 0; i < indexSize; i++) { tikhomirov@22: daIndex.skip(8); tikhomirov@22: int compressedLen = daIndex.readInt(); tikhomirov@22: daIndex.skip(20); tikhomirov@24: daIndex.readBytes(nodeidBuf, 0, 20); tikhomirov@24: if (nodeid.equalsTo(nodeidBuf)) { tikhomirov@22: return i; tikhomirov@22: } tikhomirov@22: daIndex.skip(inline ? 12 + compressedLen : 12); tikhomirov@22: } tikhomirov@22: } catch (IOException ex) { tikhomirov@243: ex.printStackTrace(); // log error. FIXME better handling. Perhaps, shall return BAD_REVISION here as well? tikhomirov@22: throw new IllegalStateException(ex); tikhomirov@22: } finally { tikhomirov@22: daIndex.done(); tikhomirov@22: } tikhomirov@80: return BAD_REVISION; tikhomirov@22: } tikhomirov@22: tikhomirov@2: tikhomirov@9: private final int REVLOGV1_RECORD_SIZE = 64; tikhomirov@9: tikhomirov@3: // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg tikhomirov@3: // ? boolean needsNodeid tikhomirov@77: public void iterate(int start, int end, boolean needData, Inspector inspector) { tikhomirov@2: initOutline(); tikhomirov@198: final int indexSize = revisionCount(); tikhomirov@3: if (indexSize == 0) { tikhomirov@3: return; tikhomirov@3: } tikhomirov@5: if (end == TIP) { tikhomirov@3: end = indexSize - 1; tikhomirov@3: } tikhomirov@5: if (start == TIP) { tikhomirov@5: start = indexSize - 1; tikhomirov@5: } tikhomirov@2: if (start < 0 || start >= indexSize) { tikhomirov@157: throw new IllegalArgumentException(String.format("Bad left range boundary %d in [0..%d]", start, indexSize-1)); tikhomirov@2: } tikhomirov@2: if (end < start || end >= indexSize) { tikhomirov@157: throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); tikhomirov@2: } tikhomirov@2: // XXX may cache [start .. end] from index with a single read (pre-read) tikhomirov@2: tikhomirov@242: ReaderN1 r = new ReaderN1(needData, inspector); tikhomirov@2: try { tikhomirov@242: r.start(end - start + 1); tikhomirov@242: r.range(start, end); tikhomirov@242: } catch (IOException ex) { tikhomirov@242: throw new HgBadStateException(ex); // FIXME need better handling tikhomirov@242: } finally { tikhomirov@242: r.finish(); tikhomirov@242: } tikhomirov@242: } tikhomirov@242: tikhomirov@242: /** tikhomirov@242: * Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected tikhomirov@242: * revisions are of interest. tikhomirov@242: * @param sortedRevisions revisions to walk, in ascending order. tikhomirov@242: * @param needData whether inspector needs access to header only tikhomirov@242: * @param inspector callback to process entries tikhomirov@242: */ tikhomirov@242: public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) { tikhomirov@242: final int indexSize = revisionCount(); tikhomirov@242: if (indexSize == 0 || sortedRevisions.length == 0) { tikhomirov@242: return; tikhomirov@242: } tikhomirov@242: if (sortedRevisions[0] > indexSize || sortedRevisions[sortedRevisions.length - 1] > indexSize) { tikhomirov@242: throw new IllegalArgumentException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize)); tikhomirov@242: } tikhomirov@242: tikhomirov@242: ReaderN1 r = new ReaderN1(needData, inspector); tikhomirov@242: try { tikhomirov@242: r.start(sortedRevisions.length); tikhomirov@242: for (int i = 0; i < sortedRevisions.length; ) { tikhomirov@242: int x = i; tikhomirov@242: i++; tikhomirov@242: while (i < sortedRevisions.length) { tikhomirov@242: if (sortedRevisions[i] == sortedRevisions[i-1] + 1) { tikhomirov@242: i++; tikhomirov@2: } else { tikhomirov@217: break; tikhomirov@217: } tikhomirov@217: } tikhomirov@242: // commitRevisions[x..i-1] are sequential tikhomirov@242: if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) { tikhomirov@242: return; tikhomirov@51: } tikhomirov@2: } tikhomirov@2: } catch (IOException ex) { tikhomirov@158: throw new HgBadStateException(ex); // FIXME need better handling tikhomirov@3: } finally { tikhomirov@242: r.finish(); tikhomirov@2: } tikhomirov@2: } tikhomirov@198: tikhomirov@198: private int getBaseRevision(int revision) { tikhomirov@198: return baseRevisions[revision]; tikhomirov@198: } tikhomirov@198: tikhomirov@198: /** tikhomirov@198: * @return offset of the revision's record in the index (.i) stream tikhomirov@198: */ tikhomirov@198: private int getIndexOffsetInt(int revision) { tikhomirov@198: return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE; tikhomirov@198: } tikhomirov@198: tikhomirov@2: private void initOutline() { tikhomirov@198: if (baseRevisions != null && baseRevisions.length > 0) { tikhomirov@2: return; tikhomirov@2: } tikhomirov@9: DataAccess da = getIndexStream(); tikhomirov@2: try { tikhomirov@202: if (da.isEmpty()) { tikhomirov@202: // do not fail with exception if stream is empty, it's likely intentional tikhomirov@202: baseRevisions = new int[0]; tikhomirov@202: return; tikhomirov@202: } tikhomirov@9: int versionField = da.readInt(); tikhomirov@170: da.readInt(); // just to skip next 4 bytes of offset + flags tikhomirov@2: final int INLINEDATA = 1 << 16; tikhomirov@2: inline = (versionField & INLINEDATA) != 0; tikhomirov@288: IntVector resBases, resOffsets = null; tikhomirov@288: int entryCountGuess = da.length() / REVLOGV1_RECORD_SIZE; tikhomirov@288: if (inline) { tikhomirov@288: entryCountGuess >>>= 2; // pure guess, assume useful data takes 3/4 of total space tikhomirov@288: resOffsets = new IntVector(entryCountGuess, 5000); tikhomirov@288: } tikhomirov@288: resBases = new IntVector(entryCountGuess, 5000); tikhomirov@288: tikhomirov@2: long offset = 0; // first offset is always 0, thus Hg uses it for other purposes tikhomirov@9: while(true) { tikhomirov@9: int compressedLen = da.readInt(); tikhomirov@5: // 8+4 = 12 bytes total read here tikhomirov@49: @SuppressWarnings("unused") tikhomirov@9: int actualLen = da.readInt(); tikhomirov@9: int baseRevision = da.readInt(); tikhomirov@5: // 12 + 8 = 20 bytes read here tikhomirov@2: // int linkRevision = di.readInt(); tikhomirov@2: // int parent1Revision = di.readInt(); tikhomirov@2: // int parent2Revision = di.readInt(); tikhomirov@2: // byte[] nodeid = new byte[32]; tikhomirov@198: resBases.add(baseRevision); tikhomirov@2: if (inline) { tikhomirov@198: int o = (int) offset; tikhomirov@198: if (o != offset) { tikhomirov@198: // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file tikhomirov@198: // with inlined data of size greater than 2 Gb. tikhomirov@198: throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); tikhomirov@198: } tikhomirov@198: resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size()); tikhomirov@9: da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) tikhomirov@2: } else { tikhomirov@9: da.skip(3*4 + 32); tikhomirov@2: } tikhomirov@10: if (da.isEmpty()) { tikhomirov@9: // fine, done then tikhomirov@288: baseRevisions = resBases.toArray(true); tikhomirov@198: if (inline) { tikhomirov@288: indexRecordOffset = resOffsets.toArray(true); tikhomirov@198: } tikhomirov@9: break; tikhomirov@10: } else { tikhomirov@10: // start reading next record tikhomirov@10: long l = da.readLong(); tikhomirov@10: offset = l >>> 16; tikhomirov@9: } tikhomirov@2: } tikhomirov@2: } catch (IOException ex) { tikhomirov@9: ex.printStackTrace(); // log error tikhomirov@198: // too bad, no outline then, but don't fail with NPE tikhomirov@198: baseRevisions = new int[0]; tikhomirov@9: } finally { tikhomirov@9: da.done(); tikhomirov@2: } tikhomirov@3: } tikhomirov@3: tikhomirov@242: /** tikhomirov@242: * operation with single file open/close and multiple diverse reads. tikhomirov@242: * XXX initOutline might need similar extraction to keen N1 format knowledge tikhomirov@242: */ tikhomirov@242: class ReaderN1 { tikhomirov@242: private final Inspector inspector; tikhomirov@242: private final boolean needData; tikhomirov@242: private DataAccess daIndex = null, daData = null; tikhomirov@242: private Lifecycle.BasicCallback cb = null; tikhomirov@242: private int lastRevisionRead = BAD_REVISION; tikhomirov@242: private DataAccess lastUserData; tikhomirov@263: // next are to track two major bottlenecks - patch application and actual time spent in inspector tikhomirov@264: // private long applyTime, inspectorTime; // TIMING tikhomirov@263: tikhomirov@242: tikhomirov@242: public ReaderN1(boolean needData, Inspector insp) { tikhomirov@242: assert insp != null; tikhomirov@242: this.needData = needData; tikhomirov@242: inspector = insp; tikhomirov@242: } tikhomirov@242: tikhomirov@242: public void start(int totalWork) { tikhomirov@242: daIndex = getIndexStream(); tikhomirov@242: if (needData && !inline) { tikhomirov@242: daData = getDataStream(); tikhomirov@242: } tikhomirov@242: if (inspector instanceof Lifecycle) { tikhomirov@242: cb = new Lifecycle.BasicCallback(); tikhomirov@242: ((Lifecycle) inspector).start(totalWork, cb, cb); tikhomirov@242: } tikhomirov@264: // applyTime = inspectorTime = 0; // TIMING tikhomirov@242: } tikhomirov@242: tikhomirov@242: public void finish() { tikhomirov@242: if (lastUserData != null) { tikhomirov@242: lastUserData.done(); tikhomirov@242: lastUserData = null; tikhomirov@242: } tikhomirov@242: if (inspector instanceof Lifecycle) { tikhomirov@242: ((Lifecycle) inspector).finish(cb); tikhomirov@242: } tikhomirov@242: daIndex.done(); tikhomirov@242: if (daData != null) { tikhomirov@242: daData.done(); tikhomirov@242: } tikhomirov@264: // System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING tikhomirov@242: } tikhomirov@263: tikhomirov@242: public boolean range(int start, int end) throws IOException { tikhomirov@242: byte[] nodeidBuf = new byte[20]; tikhomirov@242: int i; tikhomirov@242: boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without tikhomirov@242: // it (i.e. replace with i >= start) tikhomirov@242: if (needData && (i = getBaseRevision(start)) < start) { tikhomirov@242: // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData tikhomirov@242: // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). tikhomirov@242: if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { tikhomirov@242: i = lastRevisionRead + 1; // start with first not-yet-read revision tikhomirov@242: extraReadsToBaseRev = i < start; tikhomirov@242: } else { tikhomirov@242: if (lastUserData != null) { tikhomirov@242: lastUserData.done(); tikhomirov@242: lastUserData = null; tikhomirov@242: } tikhomirov@242: extraReadsToBaseRev = true; tikhomirov@242: } tikhomirov@242: } else { tikhomirov@242: // don't need to clean lastUserData as it's always null when !needData tikhomirov@242: i = start; tikhomirov@242: } tikhomirov@242: tikhomirov@242: daIndex.seek(getIndexOffsetInt(i)); tikhomirov@258: // tikhomirov@263: // reuse some instances tikhomirov@258: final ArrayList patches = new ArrayList(); tikhomirov@263: final Inflater inflater = new Inflater(); tikhomirov@263: // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel tikhomirov@263: final byte[] inflaterBuffer = new byte[1024]; tikhomirov@263: // tikhomirov@242: tikhomirov@242: for (; i <= end; i++ ) { tikhomirov@242: if (inline && needData) { tikhomirov@242: // inspector reading data (though FilterDataAccess) may have affected index position tikhomirov@242: daIndex.seek(getIndexOffsetInt(i)); tikhomirov@242: } tikhomirov@242: long l = daIndex.readLong(); // 0 tikhomirov@242: long offset = i == 0 ? 0 : (l >>> 16); tikhomirov@242: @SuppressWarnings("unused") tikhomirov@242: int flags = (int) (l & 0X0FFFF); tikhomirov@242: int compressedLen = daIndex.readInt(); // +8 tikhomirov@242: int actualLen = daIndex.readInt(); // +12 tikhomirov@242: int baseRevision = daIndex.readInt(); // +16 tikhomirov@242: int linkRevision = daIndex.readInt(); // +20 tikhomirov@242: int parent1Revision = daIndex.readInt(); tikhomirov@242: int parent2Revision = daIndex.readInt(); tikhomirov@242: // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty tikhomirov@242: daIndex.readBytes(nodeidBuf, 0, 20); // +32 tikhomirov@242: daIndex.skip(12); tikhomirov@242: DataAccess userDataAccess = null; tikhomirov@242: if (needData) { tikhomirov@242: int streamOffset; tikhomirov@242: DataAccess streamDataAccess; tikhomirov@242: if (inline) { tikhomirov@242: streamDataAccess = daIndex; tikhomirov@242: streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream tikhomirov@242: } else { tikhomirov@242: streamOffset = (int) offset; tikhomirov@242: streamDataAccess = daData; tikhomirov@242: daData.seek(streamOffset); tikhomirov@242: } tikhomirov@242: final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch tikhomirov@242: if (streamDataAccess.isEmpty()) { tikhomirov@242: userDataAccess = new DataAccess(); // empty tikhomirov@242: } else { tikhomirov@242: final byte firstByte = streamDataAccess.readByte(); tikhomirov@242: if (firstByte == 0x78 /* 'x' */) { tikhomirov@263: inflater.reset(); tikhomirov@263: userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen, inflater, inflaterBuffer); tikhomirov@242: } else if (firstByte == 0x75 /* 'u' */) { tikhomirov@242: userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); tikhomirov@242: } else { tikhomirov@242: // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' tikhomirov@242: // but I don't see reason not to return data as is tikhomirov@242: userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); tikhomirov@242: } tikhomirov@242: } tikhomirov@242: // XXX tikhomirov@242: if (patchToPrevious) { tikhomirov@242: // this is a patch tikhomirov@263: patches.clear(); // won't hurt to ensure there are no leftovers, even if we already cleaned tikhomirov@242: while (!userDataAccess.isEmpty()) { tikhomirov@242: PatchRecord pr = PatchRecord.read(userDataAccess); tikhomirov@242: // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); tikhomirov@242: patches.add(pr); tikhomirov@242: } tikhomirov@242: userDataAccess.done(); tikhomirov@242: // tikhomirov@263: // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess tikhomirov@263: // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset tikhomirov@263: // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) tikhomirov@263: lastUserData.reset(); tikhomirov@264: // final long startMeasuring = System.currentTimeMillis(); // TIMING tikhomirov@242: byte[] userData = apply(lastUserData, actualLen, patches); tikhomirov@264: // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING tikhomirov@263: patches.clear(); // do not keep any reference, allow PatchRecord to be gc'd tikhomirov@242: userDataAccess = new ByteArrayDataAccess(userData); tikhomirov@242: } tikhomirov@242: } else { tikhomirov@242: if (inline) { tikhomirov@242: daIndex.skip(compressedLen); tikhomirov@242: } tikhomirov@242: } tikhomirov@242: if (!extraReadsToBaseRev || i >= start) { tikhomirov@264: // final long startMeasuring = System.currentTimeMillis(); // TIMING tikhomirov@242: inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); tikhomirov@264: // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING tikhomirov@242: } tikhomirov@242: if (cb != null) { tikhomirov@242: if (cb.isStopped()) { tikhomirov@242: return false; tikhomirov@242: } tikhomirov@242: } tikhomirov@242: if (userDataAccess != null) { tikhomirov@263: userDataAccess.reset(); // not sure this is necessary here, as lastUserData would get reset anyway before next use. tikhomirov@242: } tikhomirov@263: if (lastUserData != null) { tikhomirov@263: lastUserData.done(); tikhomirov@263: } tikhomirov@263: lastUserData = userDataAccess; tikhomirov@242: } tikhomirov@242: lastRevisionRead = end; tikhomirov@242: return true; tikhomirov@242: } tikhomirov@242: } tikhomirov@242: tikhomirov@242: tikhomirov@198: private static int[] toArray(List l) { tikhomirov@198: int[] rv = new int[l.size()]; tikhomirov@198: for (int i = 0; i < rv.length; i++) { tikhomirov@198: rv[i] = l.get(i); tikhomirov@2: } tikhomirov@198: return rv; tikhomirov@2: } tikhomirov@198: tikhomirov@3: tikhomirov@3: // mpatch.c : apply() tikhomirov@3: // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF tikhomirov@157: public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List patch) throws IOException { tikhomirov@3: int last = 0, destIndex = 0; tikhomirov@43: if (outcomeLen == -1) { tikhomirov@158: outcomeLen = baseRevisionContent.length(); tikhomirov@263: for (int i = 0, x = patch.size(); i < x; i++) { tikhomirov@263: PatchRecord pr = patch.get(i); tikhomirov@43: outcomeLen += pr.start - last + pr.len; tikhomirov@43: last = pr.end; tikhomirov@43: } tikhomirov@43: outcomeLen -= last; tikhomirov@43: last = 0; tikhomirov@43: } tikhomirov@43: byte[] rv = new byte[outcomeLen]; tikhomirov@263: for (int i = 0, x = patch.size(); i < x; i++) { tikhomirov@263: PatchRecord pr = patch.get(i); tikhomirov@51: baseRevisionContent.seek(last); tikhomirov@51: baseRevisionContent.readBytes(rv, destIndex, pr.start-last); tikhomirov@3: destIndex += pr.start - last; tikhomirov@43: System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); tikhomirov@3: destIndex += pr.data.length; tikhomirov@3: last = pr.end; tikhomirov@3: } tikhomirov@51: baseRevisionContent.seek(last); tikhomirov@51: baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last)); tikhomirov@3: return rv; tikhomirov@3: } tikhomirov@3: tikhomirov@35: // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description tikhomirov@77: public static class PatchRecord { tikhomirov@52: /* tikhomirov@52: Given there are pr1 and pr2: tikhomirov@52: pr1.start to pr1.end will be replaced with pr's data (of pr1.len) tikhomirov@52: pr1.end to pr2.start gets copied from base tikhomirov@52: */ tikhomirov@77: public int start, end, len; tikhomirov@77: public byte[] data; tikhomirov@3: tikhomirov@36: // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed tikhomirov@36: private PatchRecord(int p1, int p2, int length, byte[] src) { tikhomirov@36: start = p1; tikhomirov@36: end = p2; tikhomirov@36: len = length; tikhomirov@36: data = src; tikhomirov@3: } tikhomirov@36: tikhomirov@36: /*package-local*/ static PatchRecord read(byte[] data, int offset) { tikhomirov@36: final int x = offset; // shorthand tikhomirov@36: int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF); tikhomirov@36: int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF); tikhomirov@36: int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF); tikhomirov@36: byte[] dataCopy = new byte[len]; tikhomirov@36: System.arraycopy(data, x+12, dataCopy, 0, len); tikhomirov@36: return new PatchRecord(p1, p2, len, dataCopy); tikhomirov@36: } tikhomirov@36: tikhomirov@77: public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException { tikhomirov@36: int p1 = da.readInt(); tikhomirov@36: int p2 = da.readInt(); tikhomirov@36: int len = da.readInt(); tikhomirov@36: byte[] src = new byte[len]; tikhomirov@36: da.readBytes(src, 0, len); tikhomirov@36: return new PatchRecord(p1, p2, len, src); tikhomirov@36: } tikhomirov@77: } tikhomirov@77: tikhomirov@77: // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data tikhomirov@77: // instantly - e.g. calculate hash, or comparing two revisions tikhomirov@77: public interface Inspector { tikhomirov@77: // XXX boolean retVal to indicate whether to continue? tikhomirov@157: // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) tikhomirov@157: // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment tikhomirov@157: void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); tikhomirov@3: } tikhomirov@0: }