Mercurial > hg4j
diff src/org/tmatesoft/hg/internal/RevlogStream.java @ 329:694ebabb5cb3
Refactor revlog patch mechanism, towards patch merging
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 13 Oct 2011 03:30:50 +0200 |
parents | 650b45d290b1 |
children | 8da7ade36c57 |
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java Wed Oct 05 07:13:57 2011 +0200 +++ b/src/org/tmatesoft/hg/internal/RevlogStream.java Thu Oct 13 03:30:50 2011 +0200 @@ -21,8 +21,6 @@ import java.io.File; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.zip.Inflater; import org.tmatesoft.hg.core.HgBadStateException; @@ -392,20 +390,17 @@ public boolean range(int start, int end) throws IOException { byte[] nodeidBuf = new byte[20]; int i; - boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without // it (i.e. replace with i >= start) if (needData && (i = getBaseRevision(start)) < start) { // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { i = lastRevisionRead + 1; // start with first not-yet-read revision - extraReadsToBaseRev = i < start; } else { if (lastUserData != null) { lastUserData.done(); lastUserData = null; } - extraReadsToBaseRev = true; } } else { // don't need to clean lastUserData as it's always null when !needData @@ -415,7 +410,7 @@ daIndex.seek(getIndexOffsetInt(i)); // // reuse some instances - final ArrayList<PatchRecord> patches = new ArrayList<PatchRecord>(); + final Patch patch = new Patch(); final Inflater inflater = new Inflater(); // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel final byte[] inflaterBuffer = new byte[1024]; @@ -470,12 +465,7 @@ // XXX if (patchToPrevious) { // this is a patch - patches.clear(); // won't hurt to ensure there are no leftovers, even if we already cleaned - while (!userDataAccess.isEmpty()) { - PatchRecord pr = PatchRecord.read(userDataAccess); -// System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); - patches.add(pr); - } + patch.read(userDataAccess); userDataAccess.done(); // // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess @@ -483,9 +473,9 @@ // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) lastUserData.reset(); // final long startMeasuring = System.currentTimeMillis(); // TIMING - byte[] userData = apply(lastUserData, actualLen, patches); + byte[] userData = patch.apply(lastUserData, actualLen); // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING - patches.clear(); // do not keep any reference, allow PatchRecord to be gc'd + patch.clear(); // do not keep any reference, allow byte[] data to be gc'd userDataAccess = new ByteArrayDataAccess(userData); } } else { @@ -493,7 +483,7 @@ daIndex.skip(compressedLen); } } - if (!extraReadsToBaseRev || i >= start) { + if (i >= start) { // final long startMeasuring = System.currentTimeMillis(); // TIMING inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING @@ -517,75 +507,6 @@ } - // mpatch.c : apply() - // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF - public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { - int last = 0, destIndex = 0; - if (outcomeLen == -1) { - outcomeLen = baseRevisionContent.length(); - for (int i = 0, x = patch.size(); i < x; i++) { - PatchRecord pr = patch.get(i); - outcomeLen += pr.start - last + pr.len; - last = pr.end; - } - outcomeLen -= last; - last = 0; - } - byte[] rv = new byte[outcomeLen]; - for (int i = 0, x = patch.size(); i < x; i++) { - PatchRecord pr = patch.get(i); - baseRevisionContent.seek(last); - baseRevisionContent.readBytes(rv, destIndex, pr.start-last); - destIndex += pr.start - last; - System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); - destIndex += pr.data.length; - last = pr.end; - } - baseRevisionContent.seek(last); - baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last)); - return rv; - } - - // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description - public static class PatchRecord { - /* - Given there are pr1 and pr2: - pr1.start to pr1.end will be replaced with pr's data (of pr1.len) - pr1.end to pr2.start gets copied from base - */ - public int start, end, len; - public byte[] data; - - // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed - private PatchRecord(int p1, int p2, int length, byte[] src) { - start = p1; - end = p2; - len = length; - data = src; - } - - /*package-local*/ static PatchRecord read(byte[] data, int offset) { - final int x = offset; // shorthand - int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF); - int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF); - int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF); - byte[] dataCopy = new byte[len]; - System.arraycopy(data, x+12, dataCopy, 0, len); - return new PatchRecord(p1, p2, len, dataCopy); - } - - public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException { - int p1 = da.readInt(); - int p2 = da.readInt(); - int len = da.readInt(); - byte[] src = new byte[len]; - da.readBytes(src, 0, len); - return new PatchRecord(p1, p2, len, src); - } - } - - // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data - // instantly - e.g. calculate hash, or comparing two revisions public interface Inspector { // XXX boolean retVal to indicate whether to continue? // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call)