Mercurial > jhg
diff src/org/tmatesoft/hg/internal/RevlogStream.java @ 584:ed243b668502
Conditionally enable effective patch merge alternative for revlog reading
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 25 Apr 2013 16:08:17 +0200 |
parents | bd5926e24aa3 |
children | b47ef0d2777b |
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java Wed Apr 24 15:39:53 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/RevlogStream.java Thu Apr 25 16:08:17 2013 +0200 @@ -267,7 +267,7 @@ HgInternals.checkRevlogRange(start, end, indexSize-1); // XXX may cache [start .. end] from index with a single read (pre-read) - ReaderN1 r = new ReaderN1(needData, inspector); + ReaderN1 r = new ReaderN1(needData, inspector, dataAccess.shallMergePatches()); try { r.start(end - start + 1); r.range(start, end); @@ -299,7 +299,7 @@ throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]); } - ReaderN1 r = new ReaderN1(needData, inspector); + ReaderN1 r = new ReaderN1(needData, inspector, dataAccess.shallMergePatches()); try { r.start(sortedRevisions.length); for (int i = 0; i < sortedRevisions.length; ) { @@ -382,7 +382,7 @@ return baseRevisions != null && baseRevisions.length > 0; } - // translate 6-byte offset field value to pysical file offset for inline revlogs + // translate 6-byte offset field value to physical file offset for inline revlogs // DOESN'T MAKE SENSE if revlog with data is separate private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException { int o = Internals.ltoi(offset); @@ -463,22 +463,39 @@ * operation with single file open/close and multiple diverse reads. * XXX initOutline might need similar extraction to keep N1 format knowledge */ - class ReaderN1 { + final class ReaderN1 { private final Inspector inspector; private final boolean needData; + private final boolean mergePatches; private DataAccess daIndex = null, daData = null; private Lifecycle.BasicCallback cb = null; private Lifecycle lifecycleListener = null; private int lastRevisionRead = BAD_REVISION; private DataAccess lastUserData; + // + // next are transient values, for range() use only + private final Inflater inflater = new Inflater(); + // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel + private final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO consider using DAP.DEFAULT_FILE_BUFFER + private final byte[] nodeidBuf = new byte[20]; + // revlog record fields + private long offset; + @SuppressWarnings("unused") + private int flags; + private int compressedLen; + private int actualLen; + private int baseRevision; + private int linkRevision; + private int parent1Revision; + private int parent2Revision; // next are to track two major bottlenecks - patch application and actual time spent in inspector // private long applyTime, inspectorTime; // TIMING - - - public ReaderN1(boolean needData, Inspector insp) { + + public ReaderN1(boolean dataRequested, Inspector insp, boolean usePatchMerge) { assert insp != null; - this.needData = needData; + needData = dataRequested; inspector = insp; + mergePatches = usePatchMerge; } public void start(int totalWork) { @@ -513,10 +530,66 @@ } // System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING } + + private void readHeaderRecord(int i) throws IOException { + if (inline && needData) { + // inspector reading data (though FilterDataAccess) may have affected index position + daIndex.seek(getIndexOffsetInt(i)); + } + long l = daIndex.readLong(); // 0 + offset = i == 0 ? 0 : (l >>> 16); + flags = (int) (l & 0x0FFFF); + compressedLen = daIndex.readInt(); // +8 + actualLen = daIndex.readInt(); // +12 + baseRevision = daIndex.readInt(); // +16 + linkRevision = daIndex.readInt(); // +20 + parent1Revision = daIndex.readInt(); + parent2Revision = daIndex.readInt(); + // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty + daIndex.readBytes(nodeidBuf, 0, 20); // +32 + daIndex.skip(12); + } + + private boolean isPatch(int i) { + return baseRevision != i; // the only way I found to tell if it's a patch + } + + private DataAccess getStoredData(int i) throws IOException { + DataAccess userDataAccess = null; + DataAccess streamDataAccess; + long streamOffset; + if (inline) { + streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; + streamDataAccess = daIndex; + // don't need to do seek as it's actual position in the index stream, but it's safe to seek, just in case + daIndex.longSeek(streamOffset); + } else { + streamOffset = offset; + streamDataAccess = daData; + daData.longSeek(streamOffset); + } + if (streamDataAccess.isEmpty() || compressedLen == 0) { + userDataAccess = new DataAccess(); // empty + } else { + final byte firstByte = streamDataAccess.readByte(); + if (firstByte == 0x78 /* 'x' */) { + inflater.reset(); + userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, isPatch(i) ? -1 : actualLen, inflater, inflaterBuffer); + } else if (firstByte == 0x75 /* 'u' */) { + userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); + } else { + // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is + // + // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to + // initial offset before first attempt to read a byte + userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); + } + } + return userDataAccess; + } // may be invoked few times per instance life public boolean range(int start, int end) throws IOException { - byte[] nodeidBuf = new byte[20]; int i; // it (i.e. replace with i >= start) if (needData && (i = getBaseRevision(start)) < start) { @@ -537,63 +610,42 @@ daIndex.seek(getIndexOffsetInt(i)); // - // reuse some instances - final Patch patch = new Patch(); - final Inflater inflater = new Inflater(); - // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel - final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO consider using DAP.DEFAULT_FILE_BUFFER + // reuse instance, do not normalize it as patches from the stream are unlikely to need it + final Patch patch = new Patch(false); + // + if (needData && mergePatches && start-i > 2) { + // i+1 == start just reads lastUserData, i+2 == start applies one patch - not worth dedicated effort + Patch ultimatePatch = new Patch(true); + for ( ; i < start; i++) { + readHeaderRecord(i); + DataAccess userDataAccess = getStoredData(i); + if (lastUserData == null) { + assert !isPatch(i); + lastUserData = userDataAccess; + } else { + assert isPatch(i); // i < start and i == getBaseRevision() + patch.read(userDataAccess); + userDataAccess.done(); + // I assume empty patches are applied ok + ultimatePatch = ultimatePatch.apply(patch); + patch.clear(); + } + } + lastUserData.reset(); + byte[] userData = ultimatePatch.apply(lastUserData, actualLen); + ultimatePatch.clear(); + lastUserData.done(); + lastUserData = new ByteArrayDataAccess(userData); + } // for (; i <= end; i++ ) { - if (inline && needData) { - // inspector reading data (though FilterDataAccess) may have affected index position - daIndex.seek(getIndexOffsetInt(i)); - } - long l = daIndex.readLong(); // 0 - long offset = i == 0 ? 0 : (l >>> 16); - @SuppressWarnings("unused") - int flags = (int) (l & 0x0FFFF); - int compressedLen = daIndex.readInt(); // +8 - int actualLen = daIndex.readInt(); // +12 - int baseRevision = daIndex.readInt(); // +16 - int linkRevision = daIndex.readInt(); // +20 - int parent1Revision = daIndex.readInt(); - int parent2Revision = daIndex.readInt(); - // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty - daIndex.readBytes(nodeidBuf, 0, 20); // +32 - daIndex.skip(12); + readHeaderRecord(i); DataAccess userDataAccess = null; if (needData) { - long streamOffset; - DataAccess streamDataAccess; - if (inline) { - streamDataAccess = daIndex; - streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream - } else { - streamOffset = offset; - streamDataAccess = daData; - daData.longSeek(streamOffset); - } - final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch - if (streamDataAccess.isEmpty() || compressedLen == 0) { - userDataAccess = new DataAccess(); // empty - } else { - final byte firstByte = streamDataAccess.readByte(); - if (firstByte == 0x78 /* 'x' */) { - inflater.reset(); - userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen, inflater, inflaterBuffer); - } else if (firstByte == 0x75 /* 'u' */) { - userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); - } else { - // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is - // - // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to - // initial offset before first attempt to read a byte - userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); - } - } + userDataAccess = getStoredData(i); // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch - if (patchToPrevious) { + if (isPatch(i)) { // this is a patch if (userDataAccess.isEmpty()) { // Issue 22, empty patch to an empty base revision