Mercurial > hg4j
diff src/org/tmatesoft/hg/internal/RevlogStream.java @ 242:ad6a046943be
Improved reading of sparse revisions from a revlog
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 23 Jun 2011 15:19:07 +0200 |
parents | 80a3433ace91 |
children | 0e01f9182e16 |
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java Thu Jun 23 15:16:34 2011 +0200 +++ b/src/org/tmatesoft/hg/internal/RevlogStream.java Thu Jun 23 15:19:07 2011 +0200 @@ -200,121 +200,55 @@ } // XXX may cache [start .. end] from index with a single read (pre-read) - Lifecycle.BasicCallback cb = null; - DataAccess daIndex = null, daData = null; - daIndex = getIndexStream(); - if (needData && !inline) { - daData = getDataStream(); - } + ReaderN1 r = new ReaderN1(needData, inspector); try { - byte[] nodeidBuf = new byte[20]; - DataAccess lastUserData = null; - int i; - boolean extraReadsToBaseRev = false; - if (needData && getBaseRevision(start) < start) { - i = getBaseRevision(start); - extraReadsToBaseRev = true; - } else { - i = start; - } - - daIndex.seek(getIndexOffsetInt(i)); - - if (inspector instanceof Lifecycle) { - cb = new Lifecycle.BasicCallback(); - ((Lifecycle) inspector).start(end - start + 1, cb, cb); - } - - for (; i <= end; i++ ) { - if (inline && needData) { - // inspector reading data (though FilterDataAccess) may have affected index position - daIndex.seek(getIndexOffsetInt(i)); - } - long l = daIndex.readLong(); // 0 - long offset = i == 0 ? 0 : (l >>> 16); - @SuppressWarnings("unused") - int flags = (int) (l & 0X0FFFF); - int compressedLen = daIndex.readInt(); // +8 - int actualLen = daIndex.readInt(); // +12 - int baseRevision = daIndex.readInt(); // +16 - int linkRevision = daIndex.readInt(); // +20 - int parent1Revision = daIndex.readInt(); - int parent2Revision = daIndex.readInt(); - // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty - daIndex.readBytes(nodeidBuf, 0, 20); // +32 - daIndex.skip(12); - DataAccess userDataAccess = null; - if (needData) { - int streamOffset; - DataAccess streamDataAccess; - if (inline) { - streamDataAccess = daIndex; - streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream + r.start(end - start + 1); + r.range(start, end); + } catch (IOException ex) { + throw new HgBadStateException(ex); // FIXME need better handling + } finally { + r.finish(); + } + } + + /** + * Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected + * revisions are of interest. + * @param sortedRevisions revisions to walk, in ascending order. + * @param needData whether inspector needs access to header only + * @param inspector callback to process entries + */ + public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) { + final int indexSize = revisionCount(); + if (indexSize == 0 || sortedRevisions.length == 0) { + return; + } + if (sortedRevisions[0] > indexSize || sortedRevisions[sortedRevisions.length - 1] > indexSize) { + throw new IllegalArgumentException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize)); + } + + ReaderN1 r = new ReaderN1(needData, inspector); + try { + r.start(sortedRevisions.length); + for (int i = 0; i < sortedRevisions.length; ) { + int x = i; + i++; + while (i < sortedRevisions.length) { + if (sortedRevisions[i] == sortedRevisions[i-1] + 1) { + i++; } else { - streamOffset = (int) offset; - streamDataAccess = daData; - daData.seek(streamOffset); - } - final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch - if (streamDataAccess.isEmpty()) { - userDataAccess = new DataAccess(); // empty - } else { - final byte firstByte = streamDataAccess.readByte(); - if (firstByte == 0x78 /* 'x' */) { - userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen); - } else if (firstByte == 0x75 /* 'u' */) { - userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); - } else { - // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' - // but I don't see reason not to return data as is - userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); - } - } - // XXX - if (patchToPrevious) { - // this is a patch - LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); - while (!userDataAccess.isEmpty()) { - PatchRecord pr = PatchRecord.read(userDataAccess); -// System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); - patches.add(pr); - } - userDataAccess.done(); - // - byte[] userData = apply(lastUserData, actualLen, patches); - userDataAccess = new ByteArrayDataAccess(userData); - } - } else { - if (inline) { - daIndex.skip(compressedLen); - } - } - if (!extraReadsToBaseRev || i >= start) { - inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); - } - if (cb != null) { - if (cb.isStopped()) { break; } } - if (userDataAccess != null) { - userDataAccess.reset(); - if (lastUserData != null) { - lastUserData.done(); - } - lastUserData = userDataAccess; + // commitRevisions[x..i-1] are sequential + if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) { + return; } } } catch (IOException ex) { throw new HgBadStateException(ex); // FIXME need better handling } finally { - if (inspector instanceof Lifecycle) { - ((Lifecycle) inspector).finish(cb); - } - daIndex.done(); - if (daData != null) { - daData.done(); - } + r.finish(); } } @@ -393,6 +327,161 @@ } } + /** + * operation with single file open/close and multiple diverse reads. + * XXX initOutline might need similar extraction to keen N1 format knowledge + */ + class ReaderN1 { + private final Inspector inspector; + private final boolean needData; + private DataAccess daIndex = null, daData = null; + private Lifecycle.BasicCallback cb = null; + private int lastRevisionRead = BAD_REVISION; + private DataAccess lastUserData; + + public ReaderN1(boolean needData, Inspector insp) { + assert insp != null; + this.needData = needData; + inspector = insp; + } + + public void start(int totalWork) { + daIndex = getIndexStream(); + if (needData && !inline) { + daData = getDataStream(); + } + if (inspector instanceof Lifecycle) { + cb = new Lifecycle.BasicCallback(); + ((Lifecycle) inspector).start(totalWork, cb, cb); + } + } + + public void finish() { + if (lastUserData != null) { + lastUserData.done(); + lastUserData = null; + } + if (inspector instanceof Lifecycle) { + ((Lifecycle) inspector).finish(cb); + } + daIndex.done(); + if (daData != null) { + daData.done(); + } + } + + public boolean range(int start, int end) throws IOException { +// System.out.printf("RevlogStream.ReaderN1.range(): [%d, %d]\n", start, end); + byte[] nodeidBuf = new byte[20]; + int i; + boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without + // it (i.e. replace with i >= start) + if (needData && (i = getBaseRevision(start)) < start) { + // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData + // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). + if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { + i = lastRevisionRead + 1; // start with first not-yet-read revision + extraReadsToBaseRev = i < start; + } else { + if (lastUserData != null) { + lastUserData.done(); + lastUserData = null; + } + extraReadsToBaseRev = true; + } + } else { + // don't need to clean lastUserData as it's always null when !needData + i = start; + } + + daIndex.seek(getIndexOffsetInt(i)); + + for (; i <= end; i++ ) { + if (inline && needData) { + // inspector reading data (though FilterDataAccess) may have affected index position + daIndex.seek(getIndexOffsetInt(i)); + } + long l = daIndex.readLong(); // 0 + long offset = i == 0 ? 0 : (l >>> 16); + @SuppressWarnings("unused") + int flags = (int) (l & 0X0FFFF); + int compressedLen = daIndex.readInt(); // +8 + int actualLen = daIndex.readInt(); // +12 + int baseRevision = daIndex.readInt(); // +16 + int linkRevision = daIndex.readInt(); // +20 + int parent1Revision = daIndex.readInt(); + int parent2Revision = daIndex.readInt(); + // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty + daIndex.readBytes(nodeidBuf, 0, 20); // +32 + daIndex.skip(12); + DataAccess userDataAccess = null; + if (needData) { + int streamOffset; + DataAccess streamDataAccess; + if (inline) { + streamDataAccess = daIndex; + streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream + } else { + streamOffset = (int) offset; + streamDataAccess = daData; + daData.seek(streamOffset); + } + final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch + if (streamDataAccess.isEmpty()) { + userDataAccess = new DataAccess(); // empty + } else { + final byte firstByte = streamDataAccess.readByte(); + if (firstByte == 0x78 /* 'x' */) { + userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen); + } else if (firstByte == 0x75 /* 'u' */) { + userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); + } else { + // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' + // but I don't see reason not to return data as is + userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); + } + } + // XXX + if (patchToPrevious) { + // this is a patch + LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); + while (!userDataAccess.isEmpty()) { + PatchRecord pr = PatchRecord.read(userDataAccess); +// System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); + patches.add(pr); + } + userDataAccess.done(); + // + byte[] userData = apply(lastUserData, actualLen, patches); + userDataAccess = new ByteArrayDataAccess(userData); + } + } else { + if (inline) { + daIndex.skip(compressedLen); + } + } + if (!extraReadsToBaseRev || i >= start) { + inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); + } + if (cb != null) { + if (cb.isStopped()) { + return false; + } + } + if (userDataAccess != null) { + userDataAccess.reset(); + if (lastUserData != null) { + lastUserData.done(); + } + lastUserData = userDataAccess; + } + } + lastRevisionRead = end; + return true; + } + } + + private static int[] toArray(List<Integer> l) { int[] rv = new int[l.size()]; for (int i = 0; i < rv.length; i++) {