tikhomirov@10: /* tikhomirov@530: * Copyright (c) 2010-2013 TMate Software Ltd tikhomirov@74: * tikhomirov@74: * This program is free software; you can redistribute it and/or modify tikhomirov@74: * it under the terms of the GNU General Public License as published by tikhomirov@74: * the Free Software Foundation; version 2 of the License. tikhomirov@74: * tikhomirov@74: * This program is distributed in the hope that it will be useful, tikhomirov@74: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@74: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@74: * GNU General Public License for more details. tikhomirov@74: * tikhomirov@74: * For information on how to redistribute this software under tikhomirov@74: * the terms of a license other than GNU General Public License tikhomirov@102: * contact TMate Software at support@hg4j.com tikhomirov@0: */ tikhomirov@77: package org.tmatesoft.hg.internal; tikhomirov@0: tikhomirov@80: import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; tikhomirov@74: import static org.tmatesoft.hg.repo.HgRepository.TIP; tikhomirov@530: import static org.tmatesoft.hg.internal.Internals.REVLOGV1_RECORD_SIZE; tikhomirov@5: tikhomirov@3: import java.io.File; tikhomirov@2: import java.io.IOException; tikhomirov@593: import java.lang.ref.Reference; tikhomirov@593: import java.lang.ref.ReferenceQueue; tikhomirov@593: import java.lang.ref.SoftReference; tikhomirov@607: import java.util.ArrayList; tikhomirov@607: import java.util.List; tikhomirov@263: import java.util.zip.Inflater; tikhomirov@0: tikhomirov@74: import org.tmatesoft.hg.core.Nodeid; tikhomirov@300: import org.tmatesoft.hg.repo.HgInternals; tikhomirov@423: import org.tmatesoft.hg.repo.HgInvalidControlFileException; tikhomirov@423: import org.tmatesoft.hg.repo.HgInvalidRevisionException; tikhomirov@423: import org.tmatesoft.hg.repo.HgInvalidStateException; tikhomirov@80: import org.tmatesoft.hg.repo.HgRepository; tikhomirov@520: import org.tmatesoft.hg.util.Adaptable; tikhomirov@74: tikhomirov@10: tikhomirov@0: /** tikhomirov@0: * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), tikhomirov@198: * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)? tikhomirov@74: * tikhomirov@0: * @see http://mercurial.selenic.com/wiki/Revlog tikhomirov@0: * @see http://mercurial.selenic.com/wiki/RevlogNG tikhomirov@74: * tikhomirov@74: * @author Artem Tikhomirov tikhomirov@74: * @author TMate Software Ltd. tikhomirov@0: */ tikhomirov@0: public class RevlogStream { tikhomirov@2: tikhomirov@608: static final int INLINEDATA = 1 << 16; tikhomirov@608: tikhomirov@198: /* tikhomirov@198: * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size)) tikhomirov@198: * tikhomirov@198: * long[] in fact (there are 8-bytes field in the revlog) tikhomirov@198: * However, (a) DataAccess currently doesn't operate with long seek/length tikhomirov@198: * and, of greater significance, (b) files with inlined data are designated for smaller files, tikhomirov@198: * guess, about 130 Kb, and offset there won't ever break int capacity tikhomirov@198: */ tikhomirov@198: private int[] indexRecordOffset; tikhomirov@198: private int[] baseRevisions; tikhomirov@2: private boolean inline = false; tikhomirov@3: private final File indexFile; tikhomirov@396: private File dataFile; tikhomirov@608: private final Internals repo; tikhomirov@593: // keeps last complete revision we've read. Note, this cached revision doesn't help tikhomirov@593: // for subsequent #iterate() calls with the same revision (Inspector needs more data than tikhomirov@593: // we currently cache here, perhaps, we shall cache everything it wants to cover same tikhomirov@593: // revision case as well). Now this helps when second #iterate() call is for a revision greater tikhomirov@593: // than one from the first call, and both revisions got same base rev. It's often the case when tikhomirov@593: // parents/children are analyzed. tikhomirov@593: private SoftReference lastRevisionRead; tikhomirov@593: private final ReferenceQueue lastRevisionQueue = new ReferenceQueue(); tikhomirov@607: // tikhomirov@607: private final RevlogChangeMonitor changeTracker; tikhomirov@607: private List observers; tikhomirov@607: private boolean shallDropDerivedCaches = false; tikhomirov@3: tikhomirov@608: public RevlogStream(Internals hgRepo, File indexFile) { tikhomirov@608: repo = hgRepo; tikhomirov@3: this.indexFile = indexFile; tikhomirov@608: changeTracker = repo.getRevlogTracker(indexFile); tikhomirov@3: } tikhomirov@2: tikhomirov@606: /** tikhomirov@606: * @param shortRead pass true to indicate intention to read few revisions only (as opposed to reading most of/complete revlog) tikhomirov@606: * @return never null, empty {@link DataAccess} if no stream is available tikhomirov@606: */ tikhomirov@606: /*package*/ DataAccess getIndexStream(boolean shortRead) { tikhomirov@606: // shortRead hint helps to avoid mmap files when only tikhomirov@606: // few bytes are to be read (i.e. #dataLength()) tikhomirov@608: DataAccessProvider dataAccess = repo.getDataAccess(); tikhomirov@606: return dataAccess.createReader(indexFile, shortRead); tikhomirov@0: } tikhomirov@0: tikhomirov@9: /*package*/ DataAccess getDataStream() { tikhomirov@608: DataAccessProvider dataAccess = repo.getDataAccess(); tikhomirov@606: return dataAccess.createReader(getDataFile(), false); tikhomirov@534: } tikhomirov@534: tikhomirov@534: /*package*/ DataSerializer getIndexStreamWriter() { tikhomirov@608: DataAccessProvider dataAccess = repo.getDataAccess(); tikhomirov@534: return dataAccess.createWriter(indexFile, true); tikhomirov@534: } tikhomirov@534: tikhomirov@534: /*package*/ DataSerializer getDataStreamWriter() { tikhomirov@608: DataAccessProvider dataAccess = repo.getDataAccess(); tikhomirov@534: return dataAccess.createWriter(getDataFile(), true); tikhomirov@9: } tikhomirov@9: tikhomirov@396: /** tikhomirov@396: * Constructs file object that corresponds to .d revlog counterpart. tikhomirov@396: * Note, it's caller responsibility to ensure this file makes any sense (i.e. check {@link #inline} attribute) tikhomirov@396: */ tikhomirov@396: private File getDataFile() { tikhomirov@396: if (dataFile == null) { tikhomirov@396: final String indexName = indexFile.getName(); tikhomirov@396: dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d"); tikhomirov@396: } tikhomirov@396: return dataFile; tikhomirov@396: } tikhomirov@396: tikhomirov@396: // initialize exception with the file where revlog structure information comes from tikhomirov@396: public HgInvalidControlFileException initWithIndexFile(HgInvalidControlFileException ex) { tikhomirov@396: return ex.setFile(indexFile); tikhomirov@396: } tikhomirov@396: tikhomirov@396: // initialize exception with the file where revlog data comes from tikhomirov@396: public HgInvalidControlFileException initWithDataFile(HgInvalidControlFileException ex) { tikhomirov@396: // exceptions are usually raised after read attepmt, hence inline shall be initialized tikhomirov@396: // although honest approach is to call #initOutline() first tikhomirov@396: return ex.setFile(inline ? indexFile : getDataFile()); tikhomirov@396: } tikhomirov@534: tikhomirov@534: /*package-private*/String getDataFileName() { tikhomirov@534: // XXX a temporary solution to provide more info to fill in exceptions other than tikhomirov@534: // HgInvalidControlFileException (those benefit from initWith* methods above) tikhomirov@534: // tikhomirov@534: // Besides, since RevlogStream represents both revlogs with user data (those with WC representative and tikhomirov@534: // system data under store/data) and system-only revlogs (like changelog and manifest), there's no tikhomirov@534: // easy way to supply human-friendly name of the active file (independent from whether it's index of data) tikhomirov@534: return inline ? indexFile.getPath() : getDataFile().getPath(); tikhomirov@534: } tikhomirov@396: tikhomirov@534: public boolean isInlineData() { tikhomirov@534: initOutline(); tikhomirov@534: return inline; tikhomirov@534: } tikhomirov@396: tikhomirov@2: public int revisionCount() { tikhomirov@2: initOutline(); tikhomirov@198: return baseRevisions.length; tikhomirov@2: } tikhomirov@22: tikhomirov@295: /** tikhomirov@354: * @throws HgInvalidControlFileException if attempt to read index file failed tikhomirov@354: * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog tikhomirov@295: */ tikhomirov@354: public int dataLength(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { tikhomirov@22: // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. tikhomirov@22: // tikhomirov@354: revisionIndex = checkRevisionIndex(revisionIndex); tikhomirov@606: DataAccess daIndex = getIndexStream(true); tikhomirov@22: try { tikhomirov@354: int recordOffset = getIndexOffsetInt(revisionIndex); tikhomirov@22: daIndex.seek(recordOffset + 12); // 6+2+4 tikhomirov@22: int actualLen = daIndex.readInt(); tikhomirov@22: return actualLen; tikhomirov@22: } catch (IOException ex) { tikhomirov@440: throw new HgInvalidControlFileException(null, ex, indexFile).setRevisionIndex(revisionIndex); tikhomirov@22: } finally { tikhomirov@22: daIndex.done(); tikhomirov@22: } tikhomirov@22: } tikhomirov@22: tikhomirov@295: /** tikhomirov@354: * Read nodeid at given index tikhomirov@354: * tikhomirov@354: * @throws HgInvalidControlFileException if attempt to read index file failed tikhomirov@354: * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog tikhomirov@295: */ tikhomirov@354: public byte[] nodeid(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { tikhomirov@354: revisionIndex = checkRevisionIndex(revisionIndex); tikhomirov@606: DataAccess daIndex = getIndexStream(true); tikhomirov@80: try { tikhomirov@354: int recordOffset = getIndexOffsetInt(revisionIndex); tikhomirov@80: daIndex.seek(recordOffset + 32); tikhomirov@80: byte[] rv = new byte[20]; tikhomirov@80: daIndex.readBytes(rv, 0, 20); tikhomirov@80: return rv; tikhomirov@80: } catch (IOException ex) { tikhomirov@440: throw new HgInvalidControlFileException("Revision lookup failed", ex, indexFile).setRevisionIndex(revisionIndex); tikhomirov@80: } finally { tikhomirov@88: daIndex.done(); tikhomirov@88: } tikhomirov@88: } tikhomirov@295: tikhomirov@295: /** tikhomirov@295: * Get link field from the index record. tikhomirov@354: * tikhomirov@354: * @throws HgInvalidControlFileException if attempt to read index file failed tikhomirov@354: * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog tikhomirov@295: */ tikhomirov@354: public int linkRevision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { tikhomirov@354: revisionIndex = checkRevisionIndex(revisionIndex); tikhomirov@606: DataAccess daIndex = getIndexStream(true); tikhomirov@88: try { tikhomirov@354: int recordOffset = getIndexOffsetInt(revisionIndex); tikhomirov@88: daIndex.seek(recordOffset + 20); tikhomirov@88: int linkRev = daIndex.readInt(); tikhomirov@88: return linkRev; tikhomirov@88: } catch (IOException ex) { tikhomirov@440: throw new HgInvalidControlFileException("Linked revision lookup failed", ex, indexFile).setRevisionIndex(revisionIndex); tikhomirov@88: } finally { tikhomirov@88: daIndex.done(); tikhomirov@80: } tikhomirov@80: } tikhomirov@80: tikhomirov@585: /** tikhomirov@585: * Extract base revision field from the revlog tikhomirov@585: * tikhomirov@585: * @throws HgInvalidControlFileException if attempt to read index file failed tikhomirov@585: * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog tikhomirov@585: */ tikhomirov@585: public int baseRevision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException { tikhomirov@585: revisionIndex = checkRevisionIndex(revisionIndex); tikhomirov@585: return getBaseRevision(revisionIndex); tikhomirov@585: } tikhomirov@585: tikhomirov@49: // Perhaps, RevlogStream should be limited to use of plain int revisions for access, tikhomirov@49: // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep tikhomirov@49: // map of nodeids, and once this comes true, we may get rid of this method. tikhomirov@80: // Unlike its counterpart, {@link Revlog#getLocalRevisionNumber()}, doesn't fail with exception if node not found, tikhomirov@80: /** tikhomirov@80: * @return integer in [0..revisionCount()) or {@link HgRepository#BAD_REVISION} if not found tikhomirov@354: * @throws HgInvalidControlFileException if attempt to read index file failed tikhomirov@80: */ tikhomirov@367: public int findRevisionIndex(Nodeid nodeid) throws HgInvalidControlFileException { tikhomirov@22: // XXX this one may be implemented with iterate() once there's mechanism to stop iterations tikhomirov@22: final int indexSize = revisionCount(); tikhomirov@606: DataAccess daIndex = getIndexStream(false); tikhomirov@22: try { tikhomirov@24: byte[] nodeidBuf = new byte[20]; tikhomirov@22: for (int i = 0; i < indexSize; i++) { tikhomirov@22: daIndex.skip(8); tikhomirov@22: int compressedLen = daIndex.readInt(); tikhomirov@22: daIndex.skip(20); tikhomirov@24: daIndex.readBytes(nodeidBuf, 0, 20); tikhomirov@24: if (nodeid.equalsTo(nodeidBuf)) { tikhomirov@22: return i; tikhomirov@22: } tikhomirov@22: daIndex.skip(inline ? 12 + compressedLen : 12); tikhomirov@22: } tikhomirov@22: } catch (IOException ex) { tikhomirov@440: throw new HgInvalidControlFileException("Revision lookup failed", ex, indexFile).setRevision(nodeid); tikhomirov@22: } finally { tikhomirov@22: daIndex.done(); tikhomirov@22: } tikhomirov@80: return BAD_REVISION; tikhomirov@22: } tikhomirov@538: tikhomirov@539: /** tikhomirov@539: * @return value suitable for the corresponding field in the new revision's header, not physical offset in the file tikhomirov@539: * (which is different in case of inline revlogs) tikhomirov@539: */ tikhomirov@538: public long newEntryOffset() { tikhomirov@538: if (revisionCount() == 0) { tikhomirov@538: return 0; tikhomirov@538: } tikhomirov@606: DataAccess daIndex = getIndexStream(true); tikhomirov@538: int lastRev = revisionCount() - 1; tikhomirov@538: try { tikhomirov@538: int recordOffset = getIndexOffsetInt(lastRev); tikhomirov@538: daIndex.seek(recordOffset); tikhomirov@538: long value = daIndex.readLong(); tikhomirov@538: value = value >>> 16; tikhomirov@538: int compressedLen = daIndex.readInt(); tikhomirov@538: return lastRev == 0 ? compressedLen : value + compressedLen; tikhomirov@538: } catch (IOException ex) { tikhomirov@538: throw new HgInvalidControlFileException("Linked revision lookup failed", ex, indexFile).setRevisionIndex(lastRev); tikhomirov@538: } finally { tikhomirov@538: daIndex.done(); tikhomirov@538: } tikhomirov@538: } tikhomirov@538: tikhomirov@3: // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg tikhomirov@3: // ? boolean needsNodeid tikhomirov@423: public void iterate(int start, int end, boolean needData, Inspector inspector) throws HgInvalidRevisionException, HgInvalidControlFileException { tikhomirov@2: initOutline(); tikhomirov@198: final int indexSize = revisionCount(); tikhomirov@3: if (indexSize == 0) { tikhomirov@3: return; tikhomirov@3: } tikhomirov@5: if (end == TIP) { tikhomirov@3: end = indexSize - 1; tikhomirov@3: } tikhomirov@5: if (start == TIP) { tikhomirov@5: start = indexSize - 1; tikhomirov@5: } tikhomirov@300: HgInternals.checkRevlogRange(start, end, indexSize-1); tikhomirov@2: // XXX may cache [start .. end] from index with a single read (pre-read) tikhomirov@2: tikhomirov@608: ReaderN1 r = new ReaderN1(needData, inspector, repo.shallMergePatches()); tikhomirov@2: try { tikhomirov@593: r.start(end - start + 1, getLastRevisionRead()); tikhomirov@242: r.range(start, end); tikhomirov@242: } catch (IOException ex) { tikhomirov@366: throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile); tikhomirov@242: } finally { tikhomirov@593: CachedRevision cr = r.finish(); tikhomirov@593: setLastRevisionRead(cr); tikhomirov@242: } tikhomirov@242: } tikhomirov@242: tikhomirov@242: /** tikhomirov@242: * Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected tikhomirov@242: * revisions are of interest. tikhomirov@242: * @param sortedRevisions revisions to walk, in ascending order. tikhomirov@242: * @param needData whether inspector needs access to header only tikhomirov@242: * @param inspector callback to process entries tikhomirov@242: */ tikhomirov@366: public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) throws HgInvalidRevisionException, HgInvalidControlFileException /*REVISIT - too general exception*/ { tikhomirov@242: final int indexSize = revisionCount(); tikhomirov@242: if (indexSize == 0 || sortedRevisions.length == 0) { tikhomirov@242: return; tikhomirov@242: } tikhomirov@347: if (sortedRevisions[0] > indexSize) { tikhomirov@347: throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[0]); tikhomirov@347: } tikhomirov@347: if (sortedRevisions[sortedRevisions.length - 1] > indexSize) { tikhomirov@347: throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]); tikhomirov@242: } tikhomirov@242: tikhomirov@608: ReaderN1 r = new ReaderN1(needData, inspector, repo.shallMergePatches()); tikhomirov@242: try { tikhomirov@594: r.start(sortedRevisions.length, getLastRevisionRead()); tikhomirov@242: for (int i = 0; i < sortedRevisions.length; ) { tikhomirov@242: int x = i; tikhomirov@242: i++; tikhomirov@242: while (i < sortedRevisions.length) { tikhomirov@242: if (sortedRevisions[i] == sortedRevisions[i-1] + 1) { tikhomirov@242: i++; tikhomirov@2: } else { tikhomirov@217: break; tikhomirov@217: } tikhomirov@217: } tikhomirov@242: // commitRevisions[x..i-1] are sequential tikhomirov@242: if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) { tikhomirov@242: return; tikhomirov@51: } tikhomirov@2: } tikhomirov@2: } catch (IOException ex) { tikhomirov@366: final int c = sortedRevisions.length; tikhomirov@608: throw new HgInvalidControlFileException(String.format("Failed reading %d revisions in [%d; %d]", c, sortedRevisions[0], sortedRevisions[c-1]), ex, indexFile); tikhomirov@3: } finally { tikhomirov@593: CachedRevision cr = r.finish(); tikhomirov@593: setLastRevisionRead(cr); tikhomirov@2: } tikhomirov@2: } tikhomirov@607: tikhomirov@607: public void attach(Observer listener) { tikhomirov@607: assert listener != null; tikhomirov@607: if (observers == null) { tikhomirov@607: observers = new ArrayList(3); tikhomirov@607: } tikhomirov@607: observers.add(listener); tikhomirov@607: } tikhomirov@607: tikhomirov@607: public void detach(Observer listener) { tikhomirov@607: assert listener != null; tikhomirov@607: if (observers != null) { tikhomirov@607: observers.remove(listener); tikhomirov@607: } tikhomirov@607: } tikhomirov@607: tikhomirov@607: /* tikhomirov@607: * Note, this method IS NOT a replacement for Observer. It has to be invoked when the validity of any tikhomirov@607: * cache built using revision information is in doubt, but it provides reasonable value only till the tikhomirov@607: * first initOutline() to be invoked, i.e. in [change..revlog read operation] time frame. If your code tikhomirov@607: * accesses cached information without any prior explicit read operation, you shall consult this method tikhomirov@607: * if next read operation would in fact bring changed content. tikhomirov@607: * Observer is needed in addition to this method because any revlog read operation (e.g. Revlog#getLastRevision) tikhomirov@607: * would clear shallDropDerivedCaches(), and if code relies only on this method to clear its derived caches, tikhomirov@607: * it would miss the update. tikhomirov@607: */ tikhomirov@607: public boolean shallDropDerivedCaches() { tikhomirov@607: if (shallDropDerivedCaches) { tikhomirov@607: return shallDropDerivedCaches; tikhomirov@607: } tikhomirov@607: return shallDropDerivedCaches = changeTracker.hasChanged(indexFile); tikhomirov@607: } tikhomirov@198: tikhomirov@539: void revisionAdded(int revisionIndex, Nodeid revision, int baseRevisionIndex, long revisionOffset) throws HgInvalidControlFileException { tikhomirov@607: shallDropDerivedCaches = true; tikhomirov@539: if (!outlineCached()) { tikhomirov@539: return; tikhomirov@539: } tikhomirov@539: if (baseRevisions.length != revisionIndex) { tikhomirov@539: throw new HgInvalidControlFileException(String.format("New entry's index shall be %d, not %d", baseRevisions.length, revisionIndex), null, indexFile); tikhomirov@539: } tikhomirov@539: if (baseRevisionIndex < 0 || baseRevisionIndex > baseRevisions.length) { tikhomirov@539: // baseRevisionIndex MAY be == to baseRevisions.length, it's when new revision is based on itself tikhomirov@539: throw new HgInvalidControlFileException(String.format("Base revision index %d doesn't fit [0..%d] range", baseRevisionIndex, baseRevisions.length), null, indexFile); tikhomirov@539: } tikhomirov@539: assert revision != null; tikhomirov@539: assert !revision.isNull(); tikhomirov@539: int[] baseRevisionsCopy = new int[baseRevisions.length + 1]; tikhomirov@539: System.arraycopy(baseRevisions, 0, baseRevisionsCopy, 0, baseRevisions.length); tikhomirov@539: baseRevisionsCopy[baseRevisions.length] = baseRevisionIndex; tikhomirov@539: baseRevisions = baseRevisionsCopy; tikhomirov@539: if (inline && indexRecordOffset != null) { tikhomirov@539: assert indexRecordOffset.length == revisionIndex; tikhomirov@539: int[] indexRecordOffsetCopy = new int[indexRecordOffset.length + 1]; tikhomirov@559: System.arraycopy(indexRecordOffset, 0, indexRecordOffsetCopy, 0, indexRecordOffset.length); tikhomirov@539: indexRecordOffsetCopy[indexRecordOffset.length] = offsetFieldToInlineFileOffset(revisionOffset, revisionIndex); tikhomirov@539: indexRecordOffset = indexRecordOffsetCopy; tikhomirov@539: } tikhomirov@539: } tikhomirov@539: tikhomirov@198: private int getBaseRevision(int revision) { tikhomirov@198: return baseRevisions[revision]; tikhomirov@198: } tikhomirov@198: tikhomirov@198: /** tikhomirov@354: * @param revisionIndex shall be valid index, [0..baseRevisions.length-1]. tikhomirov@354: * It's advised to use {@link #checkRevisionIndex(int)} to ensure argument is correct. tikhomirov@198: * @return offset of the revision's record in the index (.i) stream tikhomirov@198: */ tikhomirov@354: private int getIndexOffsetInt(int revisionIndex) { tikhomirov@354: return inline ? indexRecordOffset[revisionIndex] : revisionIndex * REVLOGV1_RECORD_SIZE; tikhomirov@354: } tikhomirov@354: tikhomirov@354: private int checkRevisionIndex(int revisionIndex) throws HgInvalidRevisionException { tikhomirov@354: final int last = revisionCount() - 1; tikhomirov@354: if (revisionIndex == TIP) { tikhomirov@354: revisionIndex = last; tikhomirov@354: } tikhomirov@354: if (revisionIndex < 0 || revisionIndex > last) { tikhomirov@354: throw new HgInvalidRevisionException(revisionIndex).setRevisionIndex(revisionIndex, 0, last); tikhomirov@354: } tikhomirov@354: return revisionIndex; tikhomirov@198: } tikhomirov@539: tikhomirov@539: private boolean outlineCached() { tikhomirov@539: return baseRevisions != null && baseRevisions.length > 0; tikhomirov@539: } tikhomirov@539: tikhomirov@584: // translate 6-byte offset field value to physical file offset for inline revlogs tikhomirov@539: // DOESN'T MAKE SENSE if revlog with data is separate tikhomirov@539: private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException { tikhomirov@539: int o = Internals.ltoi(offset); tikhomirov@539: if (o != offset) { tikhomirov@539: // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file tikhomirov@539: // with inlined data of size greater than 2 Gb. tikhomirov@539: throw new HgInvalidStateException("Data too big, offset didn't fit to sizeof(int)"); tikhomirov@539: } tikhomirov@539: return o + REVLOGV1_RECORD_SIZE * recordIndex; tikhomirov@539: } tikhomirov@198: tikhomirov@607: // every access to index revlog goes after this method only. tikhomirov@425: private void initOutline() throws HgInvalidControlFileException { tikhomirov@607: // true to send out 'drop-your-caches' event after outline has been built tikhomirov@607: final boolean notifyReload; tikhomirov@539: if (outlineCached()) { tikhomirov@607: if (!changeTracker.hasChanged(indexFile)) { tikhomirov@607: return; tikhomirov@607: } tikhomirov@607: notifyReload = true; tikhomirov@607: } else { tikhomirov@607: // no cached outline - inital read, do not send any reload/invalidate notifications tikhomirov@607: notifyReload = false; tikhomirov@2: } tikhomirov@607: changeTracker.touch(indexFile); tikhomirov@606: DataAccess da = getIndexStream(false); tikhomirov@2: try { tikhomirov@202: if (da.isEmpty()) { tikhomirov@202: // do not fail with exception if stream is empty, it's likely intentional tikhomirov@202: baseRevisions = new int[0]; tikhomirov@539: // empty revlog, likely to be populated, indicate we start with a single file tikhomirov@539: inline = true; tikhomirov@202: return; tikhomirov@202: } tikhomirov@9: int versionField = da.readInt(); tikhomirov@170: da.readInt(); // just to skip next 4 bytes of offset + flags tikhomirov@2: inline = (versionField & INLINEDATA) != 0; tikhomirov@288: IntVector resBases, resOffsets = null; tikhomirov@420: int entryCountGuess = Internals.ltoi(da.longLength() / REVLOGV1_RECORD_SIZE); tikhomirov@288: if (inline) { tikhomirov@288: entryCountGuess >>>= 2; // pure guess, assume useful data takes 3/4 of total space tikhomirov@288: resOffsets = new IntVector(entryCountGuess, 5000); tikhomirov@288: } tikhomirov@288: resBases = new IntVector(entryCountGuess, 5000); tikhomirov@288: tikhomirov@2: long offset = 0; // first offset is always 0, thus Hg uses it for other purposes tikhomirov@9: while(true) { tikhomirov@9: int compressedLen = da.readInt(); tikhomirov@5: // 8+4 = 12 bytes total read here tikhomirov@49: @SuppressWarnings("unused") tikhomirov@9: int actualLen = da.readInt(); tikhomirov@9: int baseRevision = da.readInt(); tikhomirov@5: // 12 + 8 = 20 bytes read here tikhomirov@2: // int linkRevision = di.readInt(); tikhomirov@2: // int parent1Revision = di.readInt(); tikhomirov@2: // int parent2Revision = di.readInt(); tikhomirov@2: // byte[] nodeid = new byte[32]; tikhomirov@198: resBases.add(baseRevision); tikhomirov@2: if (inline) { tikhomirov@539: int o = offsetFieldToInlineFileOffset(offset, resOffsets.size()); tikhomirov@539: resOffsets.add(o); tikhomirov@9: da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) tikhomirov@2: } else { tikhomirov@9: da.skip(3*4 + 32); tikhomirov@2: } tikhomirov@10: if (da.isEmpty()) { tikhomirov@9: // fine, done then tikhomirov@288: baseRevisions = resBases.toArray(true); tikhomirov@198: if (inline) { tikhomirov@288: indexRecordOffset = resOffsets.toArray(true); tikhomirov@198: } tikhomirov@9: break; tikhomirov@10: } else { tikhomirov@10: // start reading next record tikhomirov@10: long l = da.readLong(); tikhomirov@10: offset = l >>> 16; tikhomirov@9: } tikhomirov@2: } tikhomirov@2: } catch (IOException ex) { tikhomirov@423: throw new HgInvalidControlFileException("Failed to analyze revlog index", ex, indexFile); tikhomirov@9: } finally { tikhomirov@9: da.done(); tikhomirov@607: if (notifyReload && observers != null) { tikhomirov@607: for (Observer l : observers) { tikhomirov@607: l.reloaded(this); tikhomirov@607: } tikhomirov@607: shallDropDerivedCaches = false; tikhomirov@607: } tikhomirov@2: } tikhomirov@3: } tikhomirov@3: tikhomirov@593: private CachedRevision getLastRevisionRead() { tikhomirov@593: return lastRevisionRead == null ? null : lastRevisionRead.get(); tikhomirov@593: } tikhomirov@593: tikhomirov@593: private void setLastRevisionRead(CachedRevision cr) { tikhomirov@593: // done() for lastRevisionRead.userData has been called by ReaderN1 once tikhomirov@593: // it noticed unsuitable DataAccess. tikhomirov@593: // Now, done() for any CachedRevision cleared by GC: tikhomirov@593: for (Reference r; (r = lastRevisionQueue.poll()) != null;) { tikhomirov@593: CachedRevision toClean = r.get(); tikhomirov@593: if (toClean != null && toClean.userData != null) { tikhomirov@593: toClean.userData.done(); tikhomirov@593: } tikhomirov@593: } tikhomirov@593: if (cr != null) { tikhomirov@593: lastRevisionRead = new SoftReference(cr, lastRevisionQueue); tikhomirov@593: } else { tikhomirov@593: lastRevisionRead = null; tikhomirov@593: } tikhomirov@593: } tikhomirov@593: tikhomirov@593: final static class CachedRevision { tikhomirov@593: final int revision; tikhomirov@593: final DataAccess userData; tikhomirov@593: tikhomirov@593: public CachedRevision(int lastRevisionRead, DataAccess lastUserData) { tikhomirov@593: revision = lastRevisionRead; tikhomirov@593: userData = lastUserData; tikhomirov@593: } tikhomirov@593: } tikhomirov@593: tikhomirov@242: /** tikhomirov@242: * operation with single file open/close and multiple diverse reads. tikhomirov@366: * XXX initOutline might need similar extraction to keep N1 format knowledge tikhomirov@242: */ tikhomirov@584: final class ReaderN1 { tikhomirov@242: private final Inspector inspector; tikhomirov@242: private final boolean needData; tikhomirov@584: private final boolean mergePatches; tikhomirov@242: private DataAccess daIndex = null, daData = null; tikhomirov@242: private Lifecycle.BasicCallback cb = null; tikhomirov@520: private Lifecycle lifecycleListener = null; tikhomirov@242: private int lastRevisionRead = BAD_REVISION; tikhomirov@242: private DataAccess lastUserData; tikhomirov@584: // tikhomirov@584: // next are transient values, for range() use only tikhomirov@584: private final Inflater inflater = new Inflater(); tikhomirov@584: // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel tikhomirov@608: private final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO [post-1.1] consider using DAP.DEFAULT_FILE_BUFFER tikhomirov@584: private final byte[] nodeidBuf = new byte[20]; tikhomirov@584: // revlog record fields tikhomirov@584: private long offset; tikhomirov@584: @SuppressWarnings("unused") tikhomirov@584: private int flags; tikhomirov@584: private int compressedLen; tikhomirov@584: private int actualLen; tikhomirov@584: private int baseRevision; tikhomirov@584: private int linkRevision; tikhomirov@584: private int parent1Revision; tikhomirov@584: private int parent2Revision; tikhomirov@584: tikhomirov@584: public ReaderN1(boolean dataRequested, Inspector insp, boolean usePatchMerge) { tikhomirov@242: assert insp != null; tikhomirov@584: needData = dataRequested; tikhomirov@242: inspector = insp; tikhomirov@584: mergePatches = usePatchMerge; tikhomirov@242: } tikhomirov@242: tikhomirov@593: public void start(int totalWork, CachedRevision cachedRevision) { tikhomirov@606: daIndex = getIndexStream(totalWork <= 10); tikhomirov@242: if (needData && !inline) { tikhomirov@242: daData = getDataStream(); tikhomirov@242: } tikhomirov@520: lifecycleListener = Adaptable.Factory.getAdapter(inspector, Lifecycle.class, null); tikhomirov@520: if (lifecycleListener != null) { tikhomirov@242: cb = new Lifecycle.BasicCallback(); tikhomirov@520: lifecycleListener.start(totalWork, cb, cb); tikhomirov@242: } tikhomirov@593: if (needData && cachedRevision != null) { tikhomirov@593: lastUserData = cachedRevision.userData; tikhomirov@593: lastRevisionRead = cachedRevision.revision; tikhomirov@593: assert lastUserData != null; tikhomirov@593: } tikhomirov@242: } tikhomirov@242: tikhomirov@520: // invoked only once per instance tikhomirov@593: public CachedRevision finish() { tikhomirov@593: CachedRevision rv = null; tikhomirov@242: if (lastUserData != null) { tikhomirov@594: if (lastUserData instanceof ByteArrayDataAccess) { tikhomirov@594: // it's safe to cache only in-memory revision texts, tikhomirov@594: // if lastUserData is merely a filter over file stream, tikhomirov@594: // we'd need to keep file open, and this is bad. tikhomirov@594: // XXX perhaps, wrap any DataAccess.byteArray into tikhomirov@594: // ByteArrayDataAccess? tikhomirov@594: rv = new CachedRevision(lastRevisionRead, lastUserData); tikhomirov@594: } else { tikhomirov@594: lastUserData.done(); tikhomirov@594: } tikhomirov@242: lastUserData = null; tikhomirov@242: } tikhomirov@520: if (lifecycleListener != null) { tikhomirov@520: lifecycleListener.finish(cb); tikhomirov@520: lifecycleListener = null; tikhomirov@520: cb = null; tikhomirov@520: tikhomirov@242: } tikhomirov@242: daIndex.done(); tikhomirov@242: if (daData != null) { tikhomirov@242: daData.done(); tikhomirov@520: daData = null; tikhomirov@242: } tikhomirov@593: return rv; tikhomirov@242: } tikhomirov@584: tikhomirov@584: private void readHeaderRecord(int i) throws IOException { tikhomirov@584: if (inline && needData) { tikhomirov@584: // inspector reading data (though FilterDataAccess) may have affected index position tikhomirov@584: daIndex.seek(getIndexOffsetInt(i)); tikhomirov@584: } tikhomirov@584: long l = daIndex.readLong(); // 0 tikhomirov@584: offset = i == 0 ? 0 : (l >>> 16); tikhomirov@584: flags = (int) (l & 0x0FFFF); tikhomirov@584: compressedLen = daIndex.readInt(); // +8 tikhomirov@584: actualLen = daIndex.readInt(); // +12 tikhomirov@584: baseRevision = daIndex.readInt(); // +16 tikhomirov@584: linkRevision = daIndex.readInt(); // +20 tikhomirov@584: parent1Revision = daIndex.readInt(); tikhomirov@584: parent2Revision = daIndex.readInt(); tikhomirov@584: // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty tikhomirov@584: daIndex.readBytes(nodeidBuf, 0, 20); // +32 tikhomirov@584: daIndex.skip(12); tikhomirov@584: } tikhomirov@584: tikhomirov@584: private boolean isPatch(int i) { tikhomirov@584: return baseRevision != i; // the only way I found to tell if it's a patch tikhomirov@584: } tikhomirov@584: tikhomirov@584: private DataAccess getStoredData(int i) throws IOException { tikhomirov@584: DataAccess userDataAccess = null; tikhomirov@584: DataAccess streamDataAccess; tikhomirov@584: long streamOffset; tikhomirov@584: if (inline) { tikhomirov@584: streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; tikhomirov@584: streamDataAccess = daIndex; tikhomirov@584: // don't need to do seek as it's actual position in the index stream, but it's safe to seek, just in case tikhomirov@584: daIndex.longSeek(streamOffset); tikhomirov@584: } else { tikhomirov@584: streamOffset = offset; tikhomirov@584: streamDataAccess = daData; tikhomirov@584: daData.longSeek(streamOffset); tikhomirov@584: } tikhomirov@584: if (streamDataAccess.isEmpty() || compressedLen == 0) { tikhomirov@584: userDataAccess = new DataAccess(); // empty tikhomirov@584: } else { tikhomirov@584: final byte firstByte = streamDataAccess.readByte(); tikhomirov@584: if (firstByte == 0x78 /* 'x' */) { tikhomirov@584: inflater.reset(); tikhomirov@584: userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, isPatch(i) ? -1 : actualLen, inflater, inflaterBuffer); tikhomirov@584: } else if (firstByte == 0x75 /* 'u' */) { tikhomirov@584: userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); tikhomirov@584: } else { tikhomirov@584: // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is tikhomirov@584: // tikhomirov@584: // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to tikhomirov@584: // initial offset before first attempt to read a byte tikhomirov@584: userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); tikhomirov@584: } tikhomirov@584: } tikhomirov@584: return userDataAccess; tikhomirov@584: } tikhomirov@263: tikhomirov@520: // may be invoked few times per instance life tikhomirov@423: public boolean range(int start, int end) throws IOException { tikhomirov@242: int i; tikhomirov@242: // it (i.e. replace with i >= start) tikhomirov@242: if (needData && (i = getBaseRevision(start)) < start) { tikhomirov@242: // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData tikhomirov@242: // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). tikhomirov@242: if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { tikhomirov@242: i = lastRevisionRead + 1; // start with first not-yet-read revision tikhomirov@242: } else { tikhomirov@242: if (lastUserData != null) { tikhomirov@242: lastUserData.done(); tikhomirov@242: lastUserData = null; tikhomirov@242: } tikhomirov@242: } tikhomirov@242: } else { tikhomirov@242: // don't need to clean lastUserData as it's always null when !needData tikhomirov@242: i = start; tikhomirov@242: } tikhomirov@242: tikhomirov@242: daIndex.seek(getIndexOffsetInt(i)); tikhomirov@258: // tikhomirov@584: // reuse instance, do not normalize it as patches from the stream are unlikely to need it tikhomirov@584: final Patch patch = new Patch(false); tikhomirov@584: // tikhomirov@584: if (needData && mergePatches && start-i > 2) { tikhomirov@584: // i+1 == start just reads lastUserData, i+2 == start applies one patch - not worth dedicated effort tikhomirov@584: Patch ultimatePatch = new Patch(true); tikhomirov@584: for ( ; i < start; i++) { tikhomirov@584: readHeaderRecord(i); tikhomirov@584: DataAccess userDataAccess = getStoredData(i); tikhomirov@584: if (lastUserData == null) { tikhomirov@584: assert !isPatch(i); tikhomirov@584: lastUserData = userDataAccess; tikhomirov@584: } else { tikhomirov@584: assert isPatch(i); // i < start and i == getBaseRevision() tikhomirov@584: patch.read(userDataAccess); tikhomirov@584: userDataAccess.done(); tikhomirov@584: // I assume empty patches are applied ok tikhomirov@584: ultimatePatch = ultimatePatch.apply(patch); tikhomirov@584: patch.clear(); tikhomirov@584: } tikhomirov@584: } tikhomirov@584: lastUserData.reset(); tikhomirov@584: byte[] userData = ultimatePatch.apply(lastUserData, actualLen); tikhomirov@584: ultimatePatch.clear(); tikhomirov@584: lastUserData.done(); tikhomirov@584: lastUserData = new ByteArrayDataAccess(userData); tikhomirov@584: } tikhomirov@263: // tikhomirov@242: tikhomirov@242: for (; i <= end; i++ ) { tikhomirov@584: readHeaderRecord(i); tikhomirov@242: DataAccess userDataAccess = null; tikhomirov@242: if (needData) { tikhomirov@584: userDataAccess = getStoredData(i); tikhomirov@397: // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch tikhomirov@584: if (isPatch(i)) { tikhomirov@242: // this is a patch tikhomirov@397: if (userDataAccess.isEmpty()) { tikhomirov@397: // Issue 22, empty patch to an empty base revision tikhomirov@397: // Issue 24, empty patch to non-empty base revision tikhomirov@397: // empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here) tikhomirov@397: // tikhomirov@397: assert lastUserData.length() == actualLen; // with no patch, data size shall be the same tikhomirov@397: userDataAccess = lastUserData; tikhomirov@397: } else { tikhomirov@397: patch.read(userDataAccess); tikhomirov@397: userDataAccess.done(); tikhomirov@397: // tikhomirov@397: // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess tikhomirov@397: // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset tikhomirov@397: // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) tikhomirov@397: lastUserData.reset(); tikhomirov@397: // final long startMeasuring = System.currentTimeMillis(); // TIMING tikhomirov@397: byte[] userData = patch.apply(lastUserData, actualLen); tikhomirov@397: // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING tikhomirov@397: patch.clear(); // do not keep any reference, allow byte[] data to be gc'd tikhomirov@397: userDataAccess = new ByteArrayDataAccess(userData); tikhomirov@397: } tikhomirov@242: } tikhomirov@242: } else { tikhomirov@242: if (inline) { tikhomirov@242: daIndex.skip(compressedLen); tikhomirov@242: } tikhomirov@242: } tikhomirov@329: if (i >= start) { tikhomirov@264: // final long startMeasuring = System.currentTimeMillis(); // TIMING tikhomirov@242: inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); tikhomirov@264: // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING tikhomirov@242: } tikhomirov@242: if (cb != null) { tikhomirov@242: if (cb.isStopped()) { tikhomirov@242: return false; tikhomirov@242: } tikhomirov@242: } tikhomirov@242: if (userDataAccess != null) { tikhomirov@263: userDataAccess.reset(); // not sure this is necessary here, as lastUserData would get reset anyway before next use. tikhomirov@242: } tikhomirov@397: if (lastUserData != null && lastUserData != userDataAccess /* empty patch case, reuse of recent data in actual revision */) { tikhomirov@397: // release lastUserData only if we didn't reuse it in actual revision due to empty patch: tikhomirov@397: // empty patch means we have previous revision and didn't alter it with a patch, hence use lastUserData for userDataAccess above tikhomirov@263: lastUserData.done(); tikhomirov@263: } tikhomirov@263: lastUserData = userDataAccess; tikhomirov@242: } tikhomirov@242: lastRevisionRead = end; tikhomirov@242: return true; tikhomirov@242: } tikhomirov@242: } tikhomirov@242: tikhomirov@242: tikhomirov@77: public interface Inspector { tikhomirov@608: /** tikhomirov@608: * XXX boolean retVal to indicate whether to continue? tikhomirov@608: * tikhomirov@608: * Implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropriate moment tikhomirov@608: * tikhomirov@608: * @param revisionIndex absolute index of revision in revlog being iterated tikhomirov@608: * @param actualLen length of the user data at this revision tikhomirov@608: * @param baseRevision last revision known to hold complete revision (other hold patches). tikhomirov@608: * if baseRevision != revisionIndex, data for this revision is a result of a sequence of patches tikhomirov@608: * @param linkRevision index of corresponding changeset revision tikhomirov@608: * @param parent1Revision index of first parent revision in this revlog, or {@link HgRepository#NO_REVISION} tikhomirov@608: * @param parent2Revision index of second parent revision in this revlog, or {@link HgRepository#NO_REVISION} tikhomirov@608: * @param nodeid 20-byte buffer, shared between invocations tikhomirov@608: * @param data access to revision content of actualLen size, or null if no data has been requested with tikhomirov@608: * {@link RevlogStream#iterate(int[], boolean, Inspector)} tikhomirov@608: */ tikhomirov@423: void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); tikhomirov@3: } tikhomirov@539: tikhomirov@607: public interface Observer { tikhomirov@607: // notify observer of invalidate/reload event in the stream tikhomirov@607: public void reloaded(RevlogStream src); tikhomirov@607: } tikhomirov@0: }