Mercurial > hg4j
view src/org/tmatesoft/hg/internal/RevlogStream.java @ 709:497e697636fc
Report merged lines as changed block if possible, not as a sequence of added/deleted blocks. To facilitate access to merge parent lines AddBlock got mergeLineAt() method that reports index of the line in the second parent (if any), while insertedAt() has been changed to report index in the first parent always
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Wed, 21 Aug 2013 16:23:27 +0200 |
parents | ae2d439fbed3 |
children |
line wrap: on
line source
/* * Copyright (c) 2010-2013 TMate Software Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * For information on how to redistribute this software under * the terms of a license other than GNU General Public License * contact TMate Software at support@hg4j.com */ package org.tmatesoft.hg.internal; import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; import static org.tmatesoft.hg.repo.HgRepository.NO_REVISION; import static org.tmatesoft.hg.repo.HgRepository.TIP; import static org.tmatesoft.hg.internal.Internals.REVLOGV1_RECORD_SIZE; import java.io.File; import java.io.IOException; import java.lang.ref.Reference; import java.lang.ref.ReferenceQueue; import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.zip.Inflater; import org.tmatesoft.hg.core.HgIOException; import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.repo.HgInternals; import org.tmatesoft.hg.repo.HgInvalidControlFileException; import org.tmatesoft.hg.repo.HgInvalidRevisionException; import org.tmatesoft.hg.repo.HgInvalidStateException; import org.tmatesoft.hg.repo.HgRepository; import org.tmatesoft.hg.repo.HgRuntimeException; import org.tmatesoft.hg.util.Adaptable; /** * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)? * * @see http://mercurial.selenic.com/wiki/Revlog * @see http://mercurial.selenic.com/wiki/RevlogNG * * @author Artem Tikhomirov * @author TMate Software Ltd. */ public class RevlogStream { static final int INLINEDATA = 1 << 16; /* * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size)) * * long[] in fact (there are 8-bytes field in the revlog) * However, (a) DataAccess currently doesn't operate with long seek/length * and, of greater significance, (b) files with inlined data are designated for smaller files, * guess, about 130 Kb, and offset there won't ever break int capacity */ private int[] indexRecordOffset; private int[] baseRevisions; private boolean inline = false; private final File indexFile; private File dataFile; private final Internals repo; // keeps last complete revision we've read. Note, this cached revision doesn't help // for subsequent #iterate() calls with the same revision (Inspector needs more data than // we currently cache here, perhaps, we shall cache everything it wants to cover same // revision case as well). Now this helps when second #iterate() call is for a revision greater // than one from the first call, and both revisions got same base rev. It's often the case when // parents/children are analyzed. private SoftReference<CachedRevision> lastRevisionRead; private final ReferenceQueue<CachedRevision> lastRevisionQueue = new ReferenceQueue<CachedRevision>(); // private final RevlogChangeMonitor changeTracker; private List<Observer> observers; private boolean shallDropDerivedCaches = false; public RevlogStream(Internals hgRepo, File indexFile) { repo = hgRepo; this.indexFile = indexFile; changeTracker = repo.getRevlogTracker(indexFile); } public boolean exists() { return indexFile.exists(); } /** * @param shortRead pass <code>true</code> to indicate intention to read few revisions only (as opposed to reading most of/complete revlog) * @return never <code>null</code>, empty {@link DataAccess} if no stream is available */ /*package*/ DataAccess getIndexStream(boolean shortRead) { // shortRead hint helps to avoid mmap files when only // few bytes are to be read (i.e. #dataLength()) DataAccessProvider dataAccess = repo.getDataAccess(); return dataAccess.createReader(indexFile, shortRead); } /*package*/ DataAccess getDataStream() { DataAccessProvider dataAccess = repo.getDataAccess(); return dataAccess.createReader(getDataFile(), false); } /*package*/ DataSerializer getIndexStreamWriter(Transaction tr) throws HgIOException { DataAccessProvider dataAccess = repo.getDataAccess(); return dataAccess.createWriter(tr, indexFile, true); } /*package*/ DataSerializer getDataStreamWriter(Transaction tr) throws HgIOException { DataAccessProvider dataAccess = repo.getDataAccess(); return dataAccess.createWriter(tr, getDataFile(), true); } /** * Constructs file object that corresponds to .d revlog counterpart. * Note, it's caller responsibility to ensure this file makes any sense (i.e. check {@link #inline} attribute) */ private File getDataFile() { if (dataFile == null) { final String indexName = indexFile.getName(); dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d"); } return dataFile; } // initialize exception with the file where revlog structure information comes from public HgInvalidControlFileException initWithIndexFile(HgInvalidControlFileException ex) { return ex.setFile(indexFile); } public HgIOException initWithIndexFile(HgIOException ex) { return ex.setFile(indexFile); } // initialize exception with the file where revlog data comes from public HgInvalidControlFileException initWithDataFile(HgInvalidControlFileException ex) { // exceptions are usually raised after read attepmt, hence inline shall be initialized // although honest approach is to call #initOutline() first return ex.setFile(inline ? indexFile : getDataFile()); } /*package-private*/String getDataFileName() { // XXX a temporary solution to provide more info to fill in exceptions other than // HgInvalidControlFileException (those benefit from initWith* methods above) // // Besides, since RevlogStream represents both revlogs with user data (those with WC representative and // system data under store/data) and system-only revlogs (like changelog and manifest), there's no // easy way to supply human-friendly name of the active file (independent from whether it's index of data) return inline ? indexFile.getPath() : getDataFile().getPath(); } public boolean isInlineData() throws HgInvalidControlFileException { initOutline(); return inline; } public int revisionCount() throws HgInvalidControlFileException { initOutline(); return baseRevisions.length; } /** * @throws HgInvalidControlFileException if attempt to read index file failed * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog */ public int dataLength(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. // revisionIndex = checkRevisionIndex(revisionIndex); DataAccess daIndex = getIndexStream(true); try { int recordOffset = getIndexOffsetInt(revisionIndex); daIndex.seek(recordOffset + 12); // 6+2+4 int actualLen = daIndex.readInt(); return actualLen; } catch (IOException ex) { throw new HgInvalidControlFileException(null, ex, indexFile).setRevisionIndex(revisionIndex); } finally { daIndex.done(); } } /** * Read nodeid at given index * * @throws HgInvalidControlFileException if attempt to read index file failed * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog */ public byte[] nodeid(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { revisionIndex = checkRevisionIndex(revisionIndex); DataAccess daIndex = getIndexStream(true); try { int recordOffset = getIndexOffsetInt(revisionIndex); daIndex.seek(recordOffset + 32); byte[] rv = new byte[20]; daIndex.readBytes(rv, 0, 20); return rv; } catch (IOException ex) { throw new HgInvalidControlFileException("Revision lookup failed", ex, indexFile).setRevisionIndex(revisionIndex); } finally { daIndex.done(); } } /** * Get link field from the index record. * * @throws HgInvalidControlFileException if attempt to read index file failed * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog */ public int linkRevision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { revisionIndex = checkRevisionIndex(revisionIndex); DataAccess daIndex = getIndexStream(true); try { int recordOffset = getIndexOffsetInt(revisionIndex); daIndex.seek(recordOffset + 20); int linkRev = daIndex.readInt(); return linkRev; } catch (IOException ex) { throw new HgInvalidControlFileException("Linked revision lookup failed", ex, indexFile).setRevisionIndex(revisionIndex); } finally { daIndex.done(); } } /** * Extract base revision field from the revlog * * @throws HgInvalidControlFileException if attempt to read index file failed * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog */ public int baseRevision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { revisionIndex = checkRevisionIndex(revisionIndex); return getBaseRevision(revisionIndex); } /** * Read indexes of parent revisions * @param revisionIndex index of child revision * @param parents array to hold return value, length >= 2 * @return value of <code>parents</code> parameter for convenience * @throws HgInvalidControlFileException if attempt to read index file failed * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog */ public int[] parents(int revisionIndex, int[] parents) throws HgInvalidControlFileException, HgInvalidRevisionException { assert parents.length > 1; revisionIndex = checkRevisionIndex(revisionIndex); DataAccess daIndex = getIndexStream(true); try { int recordOffset = getIndexOffsetInt(revisionIndex); daIndex.seek(recordOffset + 24); int p1 = daIndex.readInt(); int p2 = daIndex.readInt(); // although NO_REVISION == -1, it doesn't hurt to ensure this parents[0] = p1 == -1 ? NO_REVISION : p1; parents[1] = p2 == -1 ? NO_REVISION : p2; return parents; } catch (IOException ex) { throw new HgInvalidControlFileException("Parents lookup failed", ex, indexFile).setRevisionIndex(revisionIndex); } finally { daIndex.done(); } } // Perhaps, RevlogStream should be limited to use of plain int revisions for access, // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep // map of nodeids, and once this comes true, we may get rid of this method. // Unlike its counterpart, {@link Revlog#getLocalRevisionNumber()}, doesn't fail with exception if node not found, /** * @return integer in [0..revisionCount()) or {@link HgRepository#BAD_REVISION} if not found * @throws HgInvalidControlFileException if attempt to read index file failed */ public int findRevisionIndex(Nodeid nodeid) throws HgInvalidControlFileException { // XXX this one may be implemented with iterate() once there's mechanism to stop iterations final int indexSize = revisionCount(); DataAccess daIndex = getIndexStream(false); try { byte[] nodeidBuf = new byte[20]; for (int i = 0; i < indexSize; i++) { daIndex.skip(8); int compressedLen = daIndex.readInt(); daIndex.skip(20); daIndex.readBytes(nodeidBuf, 0, 20); if (nodeid.equalsTo(nodeidBuf)) { return i; } daIndex.skip(inline ? 12 + compressedLen : 12); } } catch (IOException ex) { throw new HgInvalidControlFileException("Revision lookup failed", ex, indexFile).setRevision(nodeid); } finally { daIndex.done(); } return BAD_REVISION; } /** * @return value suitable for the corresponding field in the new revision's header, not physical offset in the file * (which is different in case of inline revlogs) */ public long newEntryOffset() throws HgInvalidControlFileException { if (revisionCount() == 0) { return 0; } DataAccess daIndex = getIndexStream(true); int lastRev = revisionCount() - 1; try { int recordOffset = getIndexOffsetInt(lastRev); daIndex.seek(recordOffset); long value = daIndex.readLong(); value = value >>> 16; int compressedLen = daIndex.readInt(); return lastRev == 0 ? compressedLen : value + compressedLen; } catch (IOException ex) { throw new HgInvalidControlFileException("Linked revision lookup failed", ex, indexFile).setRevisionIndex(lastRev); } finally { daIndex.done(); } } /** * should be possible to use TIP, ALL, or -1, -2, -n notation of Hg * ? boolean needsNodeid * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em> */ public void iterate(int start, int end, boolean needData, Inspector inspector) throws HgRuntimeException { initOutline(); final int indexSize = revisionCount(); if (indexSize == 0) { return; } if (end == TIP) { end = indexSize - 1; } if (start == TIP) { start = indexSize - 1; } HgInternals.checkRevlogRange(start, end, indexSize-1); // XXX may cache [start .. end] from index with a single read (pre-read) ReaderN1 r = new ReaderN1(needData, inspector, repo.shallMergePatches()); try { r.start(end - start + 1, getLastRevisionRead()); r.range(start, end); } catch (IOException ex) { throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile); } finally { CachedRevision cr = r.finish(); setLastRevisionRead(cr); } } /** * Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected * revisions are of interest. * @param sortedRevisions revisions to walk, in ascending order. * @param needData whether inspector needs access to header only * @param inspector callback to process entries * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em> */ public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) throws HgRuntimeException { final int indexSize = revisionCount(); if (indexSize == 0 || sortedRevisions.length == 0) { return; } if (sortedRevisions[0] > indexSize) { throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[0]); } if (sortedRevisions[sortedRevisions.length - 1] > indexSize) { throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]); } ReaderN1 r = new ReaderN1(needData, inspector, repo.shallMergePatches()); try { r.start(sortedRevisions.length, getLastRevisionRead()); for (int i = 0; i < sortedRevisions.length; ) { int x = i; i++; while (i < sortedRevisions.length) { if (sortedRevisions[i] == sortedRevisions[i-1] + 1) { i++; } else { break; } } // commitRevisions[x..i-1] are sequential if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) { return; } } } catch (IOException ex) { final int c = sortedRevisions.length; throw new HgInvalidControlFileException(String.format("Failed reading %d revisions in [%d; %d]", c, sortedRevisions[0], sortedRevisions[c-1]), ex, indexFile); } finally { CachedRevision cr = r.finish(); setLastRevisionRead(cr); } } public void attach(Observer listener) { assert listener != null; if (observers == null) { observers = new ArrayList<Observer>(3); } observers.add(listener); } public void detach(Observer listener) { assert listener != null; if (observers != null) { observers.remove(listener); } } /* * Note, this method IS NOT a replacement for Observer. It has to be invoked when the validity of any * cache built using revision information is in doubt, but it provides reasonable value only till the * first initOutline() to be invoked, i.e. in [change..revlog read operation] time frame. If your code * accesses cached information without any prior explicit read operation, you shall consult this method * if next read operation would in fact bring changed content. * Observer is needed in addition to this method because any revlog read operation (e.g. Revlog#getLastRevision) * would clear shallDropDerivedCaches(), and if code relies only on this method to clear its derived caches, * it would miss the update. */ public boolean shallDropDerivedCaches() { if (shallDropDerivedCaches) { return shallDropDerivedCaches; } return shallDropDerivedCaches = changeTracker.hasChanged(indexFile); } void revisionAdded(int revisionIndex, Nodeid revision, int baseRevisionIndex, long revisionOffset) throws HgInvalidControlFileException { shallDropDerivedCaches = true; if (!outlineCached()) { return; } if (baseRevisions.length != revisionIndex) { throw new HgInvalidControlFileException(String.format("New entry's index shall be %d, not %d", baseRevisions.length, revisionIndex), null, indexFile); } if (baseRevisionIndex < 0 || baseRevisionIndex > baseRevisions.length) { // baseRevisionIndex MAY be == to baseRevisions.length, it's when new revision is based on itself throw new HgInvalidControlFileException(String.format("Base revision index %d doesn't fit [0..%d] range", baseRevisionIndex, baseRevisions.length), null, indexFile); } assert revision != null; assert !revision.isNull(); // next effort doesn't seem to be of any value at least in case of regular commit // as the next call to #initOutline would recognize the file change and reload complete revlog anyway // OTOH, there might be transaction strategy that doesn't update the file until its completion, // while it's handy to know new revisions meanwhile. int[] baseRevisionsCopy = new int[baseRevisions.length + 1]; System.arraycopy(baseRevisions, 0, baseRevisionsCopy, 0, baseRevisions.length); baseRevisionsCopy[baseRevisions.length] = baseRevisionIndex; baseRevisions = baseRevisionsCopy; if (inline && indexRecordOffset != null) { assert indexRecordOffset.length == revisionIndex; int[] indexRecordOffsetCopy = new int[indexRecordOffset.length + 1]; System.arraycopy(indexRecordOffset, 0, indexRecordOffsetCopy, 0, indexRecordOffset.length); indexRecordOffsetCopy[indexRecordOffset.length] = offsetFieldToInlineFileOffset(revisionOffset, revisionIndex); indexRecordOffset = indexRecordOffsetCopy; } } private int getBaseRevision(int revision) { return baseRevisions[revision]; } /** * @param revisionIndex shall be valid index, [0..baseRevisions.length-1]. * It's advised to use {@link #checkRevisionIndex(int)} to ensure argument is correct. * @return offset of the revision's record in the index (.i) stream */ private int getIndexOffsetInt(int revisionIndex) { return inline ? indexRecordOffset[revisionIndex] : revisionIndex * REVLOGV1_RECORD_SIZE; } private int checkRevisionIndex(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { final int last = revisionCount() - 1; if (revisionIndex == TIP) { revisionIndex = last; } if (revisionIndex < 0 || revisionIndex > last) { throw new HgInvalidRevisionException(revisionIndex).setRevisionIndex(revisionIndex, 0, last); } return revisionIndex; } private boolean outlineCached() { return baseRevisions != null && baseRevisions.length > 0; } // translate 6-byte offset field value to physical file offset for inline revlogs // DOESN'T MAKE SENSE if revlog with data is separate private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException { int o = Internals.ltoi(offset); if (o != offset) { // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file // with inlined data of size greater than 2 Gb. throw new HgInvalidStateException("Data too big, offset didn't fit to sizeof(int)"); } return o + REVLOGV1_RECORD_SIZE * recordIndex; } // every access to index revlog goes after this method only. private void initOutline() throws HgInvalidControlFileException { // true to send out 'drop-your-caches' event after outline has been built final boolean notifyReload; if (outlineCached()) { if (!changeTracker.hasChanged(indexFile)) { return; } notifyReload = true; } else { // no cached outline - inital read, do not send any reload/invalidate notifications notifyReload = false; } changeTracker.touch(indexFile); DataAccess da = getIndexStream(false); try { if (da.isEmpty()) { // do not fail with exception if stream is empty, it's likely intentional baseRevisions = new int[0]; // empty revlog, likely to be populated, indicate we start with a single file inline = true; return; } int versionField = da.readInt(); da.readInt(); // just to skip next 4 bytes of offset + flags inline = (versionField & INLINEDATA) != 0; IntVector resBases, resOffsets = null; int entryCountGuess = Internals.ltoi(da.longLength() / REVLOGV1_RECORD_SIZE); if (inline) { entryCountGuess >>>= 2; // pure guess, assume useful data takes 3/4 of total space resOffsets = new IntVector(entryCountGuess, 5000); } resBases = new IntVector(entryCountGuess, 5000); long offset = 0; // first offset is always 0, thus Hg uses it for other purposes while(true) { int compressedLen = da.readInt(); // 8+4 = 12 bytes total read here @SuppressWarnings("unused") int actualLen = da.readInt(); int baseRevision = da.readInt(); // 12 + 8 = 20 bytes read here // int linkRevision = di.readInt(); // int parent1Revision = di.readInt(); // int parent2Revision = di.readInt(); // byte[] nodeid = new byte[32]; resBases.add(baseRevision); if (inline) { int o = offsetFieldToInlineFileOffset(offset, resOffsets.size()); resOffsets.add(o); da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) } else { da.skip(3*4 + 32); } if (da.isEmpty()) { // fine, done then baseRevisions = resBases.toArray(true); if (inline) { indexRecordOffset = resOffsets.toArray(true); } break; } else { // start reading next record long l = da.readLong(); offset = l >>> 16; } } } catch (IOException ex) { throw new HgInvalidControlFileException("Failed to analyze revlog index", ex, indexFile); } finally { da.done(); if (notifyReload && observers != null) { for (Observer l : observers) { l.reloaded(this); } shallDropDerivedCaches = false; } } } private CachedRevision getLastRevisionRead() { return lastRevisionRead == null ? null : lastRevisionRead.get(); } private void setLastRevisionRead(CachedRevision cr) { // done() for lastRevisionRead.userData has been called by ReaderN1 once // it noticed unsuitable DataAccess. // Now, done() for any CachedRevision cleared by GC: for (Reference<? extends CachedRevision> r; (r = lastRevisionQueue.poll()) != null;) { CachedRevision toClean = r.get(); if (toClean != null && toClean.userData != null) { toClean.userData.done(); } } if (cr != null) { lastRevisionRead = new SoftReference<CachedRevision>(cr, lastRevisionQueue); } else { lastRevisionRead = null; } } final static class CachedRevision { final int revision; final DataAccess userData; public CachedRevision(int lastRevisionRead, DataAccess lastUserData) { revision = lastRevisionRead; userData = lastUserData; } } /** * operation with single file open/close and multiple diverse reads. * XXX initOutline might need similar extraction to keep N1 format knowledge */ final class ReaderN1 { private final Inspector inspector; private final boolean needData; private final boolean mergePatches; private DataAccess daIndex = null, daData = null; private Lifecycle.BasicCallback cb = null; private Lifecycle lifecycleListener = null; private int lastRevisionRead = BAD_REVISION; private DataAccess lastUserData; // // next are transient values, for range() use only private final Inflater inflater = new Inflater(); // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel private final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO [post-1.1] consider using DAP.DEFAULT_FILE_BUFFER private final ByteBuffer inflaterOutBuffer = ByteBuffer.allocate(inflaterBuffer.length * 2); private final byte[] nodeidBuf = new byte[20]; // revlog record fields private long offset; @SuppressWarnings("unused") private int flags; private int compressedLen; private int actualLen; private int baseRevision; private int linkRevision; private int parent1Revision; private int parent2Revision; public ReaderN1(boolean dataRequested, Inspector insp, boolean usePatchMerge) { assert insp != null; needData = dataRequested; inspector = insp; mergePatches = usePatchMerge; } public void start(int totalWork, CachedRevision cachedRevision) { daIndex = getIndexStream(totalWork <= 10); if (needData && !inline) { daData = getDataStream(); } lifecycleListener = Adaptable.Factory.getAdapter(inspector, Lifecycle.class, null); if (lifecycleListener != null) { cb = new Lifecycle.BasicCallback(); lifecycleListener.start(totalWork, cb, cb); } if (needData && cachedRevision != null) { lastUserData = cachedRevision.userData; lastRevisionRead = cachedRevision.revision; assert lastUserData != null; } } // invoked only once per instance public CachedRevision finish() { CachedRevision rv = null; if (lastUserData != null) { if (lastUserData instanceof ByteArrayDataAccess) { // it's safe to cache only in-memory revision texts, // if lastUserData is merely a filter over file stream, // we'd need to keep file open, and this is bad. // XXX perhaps, wrap any DataAccess.byteArray into // ByteArrayDataAccess? rv = new CachedRevision(lastRevisionRead, lastUserData); } else { lastUserData.done(); } lastUserData = null; } if (lifecycleListener != null) { lifecycleListener.finish(cb); lifecycleListener = null; cb = null; } daIndex.done(); if (daData != null) { daData.done(); daData = null; } return rv; } private void readHeaderRecord(int i) throws IOException { if (inline && needData) { // inspector reading data (though FilterDataAccess) may have affected index position daIndex.seek(getIndexOffsetInt(i)); } long l = daIndex.readLong(); // 0 offset = i == 0 ? 0 : (l >>> 16); flags = (int) (l & 0x0FFFF); compressedLen = daIndex.readInt(); // +8 actualLen = daIndex.readInt(); // +12 baseRevision = daIndex.readInt(); // +16 linkRevision = daIndex.readInt(); // +20 parent1Revision = daIndex.readInt(); parent2Revision = daIndex.readInt(); // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty daIndex.readBytes(nodeidBuf, 0, 20); // +32 daIndex.skip(12); } private boolean isPatch(int i) { return baseRevision != i; // the only way I found to tell if it's a patch } private DataAccess getStoredData(int i) throws IOException { DataAccess userDataAccess = null; DataAccess streamDataAccess; long streamOffset; if (inline) { streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; streamDataAccess = daIndex; // don't need to do seek as it's actual position in the index stream, but it's safe to seek, just in case daIndex.longSeek(streamOffset); } else { streamOffset = offset; streamDataAccess = daData; daData.longSeek(streamOffset); } if (streamDataAccess.isEmpty() || compressedLen == 0) { userDataAccess = new DataAccess(); // empty } else { final byte firstByte = streamDataAccess.readByte(); if (firstByte == 0x78 /* 'x' */) { inflater.reset(); userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, isPatch(i) ? -1 : actualLen, inflater, inflaterBuffer, inflaterOutBuffer); } else if (firstByte == 0x75 /* 'u' */) { userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); } else { // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is // // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to // initial offset before first attempt to read a byte userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); } } return userDataAccess; } // may be invoked few times per instance life public boolean range(int start, int end) throws IOException, HgRuntimeException { int i; // it (i.e. replace with i >= start) if (needData && (i = getBaseRevision(start)) < start) { // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { i = lastRevisionRead + 1; // start with first not-yet-read revision } else { if (lastUserData != null) { lastUserData.done(); lastUserData = null; } } } else { // don't need to clean lastUserData as it's always null when !needData i = start; } daIndex.seek(getIndexOffsetInt(i)); // // reuse instance, do not normalize it as patches from the stream are unlikely to need it final Patch patch = new Patch(false); // if (needData && mergePatches && start-i > 2) { // i+1 == start just reads lastUserData, i+2 == start applies one patch - not worth dedicated effort Patch ultimatePatch = new Patch(true); for ( ; i < start; i++) { readHeaderRecord(i); DataAccess userDataAccess = getStoredData(i); if (lastUserData == null) { assert !isPatch(i); lastUserData = userDataAccess; } else { assert isPatch(i); // i < start and i == getBaseRevision() patch.read(userDataAccess); userDataAccess.done(); // I assume empty patches are applied ok ultimatePatch = ultimatePatch.apply(patch); patch.clear(); } } lastUserData.reset(); byte[] userData = ultimatePatch.apply(lastUserData, actualLen); ultimatePatch.clear(); lastUserData.done(); lastUserData = new ByteArrayDataAccess(userData); } // for (; i <= end; i++ ) { readHeaderRecord(i); DataAccess userDataAccess = null; if (needData) { userDataAccess = getStoredData(i); // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch if (isPatch(i)) { // this is a patch if (userDataAccess.isEmpty()) { // Issue 22, empty patch to an empty base revision // Issue 24, empty patch to non-empty base revision // empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here) // assert lastUserData.length() == actualLen; // with no patch, data size shall be the same userDataAccess = lastUserData; } else { patch.read(userDataAccess); userDataAccess.done(); // // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) lastUserData.reset(); // final long startMeasuring = System.currentTimeMillis(); // TIMING byte[] userData = patch.apply(lastUserData, actualLen); // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING patch.clear(); // do not keep any reference, allow byte[] data to be gc'd userDataAccess = new ByteArrayDataAccess(userData); } } } else { if (inline) { daIndex.skip(compressedLen); } } if (i >= start) { // final long startMeasuring = System.currentTimeMillis(); // TIMING inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING } if (cb != null) { if (cb.isStopped()) { return false; } } if (userDataAccess != null) { userDataAccess.reset(); // not sure this is necessary here, as lastUserData would get reset anyway before next use. } if (lastUserData != null && lastUserData != userDataAccess /* empty patch case, reuse of recent data in actual revision */) { // release lastUserData only if we didn't reuse it in actual revision due to empty patch: // empty patch means we have previous revision and didn't alter it with a patch, hence use lastUserData for userDataAccess above lastUserData.done(); } lastUserData = userDataAccess; } lastRevisionRead = end; return true; } } public interface Inspector { /** * XXX boolean retVal to indicate whether to continue? * * Implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropriate moment * * @param revisionIndex absolute index of revision in revlog being iterated * @param actualLen length of the user data at this revision * @param baseRevision last revision known to hold complete revision (other hold patches). * if baseRevision != revisionIndex, data for this revision is a result of a sequence of patches * @param linkRevision index of corresponding changeset revision * @param parent1Revision index of first parent revision in this revlog, or {@link HgRepository#NO_REVISION} * @param parent2Revision index of second parent revision in this revlog, or {@link HgRepository#NO_REVISION} * @param nodeid 20-byte buffer, shared between invocations * @param data access to revision content of actualLen size, or <code>null</code> if no data has been requested with * {@link RevlogStream#iterate(int[], boolean, Inspector)} */ void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data) throws HgRuntimeException; } public interface Observer { // notify observer of invalidate/reload event in the stream public void reloaded(RevlogStream src); } }