# HG changeset patch # User Artem Tikhomirov # Date 1366810793 -7200 # Node ID 47dfa0ec7e35c790abd2e8cbfda8d9285c4f071e # Parent 90df078d6418d9c93032f8e4f1834efe31e54371 Effective revlog patching diff -r 90df078d6418 -r 47dfa0ec7e35 src/org/tmatesoft/hg/internal/DiffHelper.java --- a/src/org/tmatesoft/hg/internal/DiffHelper.java Mon Apr 22 19:17:29 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/DiffHelper.java Wed Apr 24 15:39:53 2013 +0200 @@ -232,7 +232,7 @@ } } - static class DeltaDumpInspector> extends DeltaInspector { + public static class DeltaDumpInspector> extends DeltaInspector { @Override protected void changed(int s1From, int s1To, int s2From, int s2To) { diff -r 90df078d6418 -r 47dfa0ec7e35 src/org/tmatesoft/hg/internal/Patch.java --- a/src/org/tmatesoft/hg/internal/Patch.java Mon Apr 22 19:17:29 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/Patch.java Wed Apr 24 15:39:53 2013 +0200 @@ -86,7 +86,7 @@ } // number of bytes this patch will add (or remove, if negative) from the base revision - private int patchSizeDelta() { + public int patchSizeDelta() { int rv = 0; int prevEnd = 0; for (int i = 0, x = data.size(); i < x; i++) { @@ -182,13 +182,19 @@ } /*package-local*/ void add(int start, int end, byte[] d) { + // FIXME if start == end(-1), merge data + if (start == end && d.length == 0) { + System.currentTimeMillis(); + return; + } starts.add(start); ends.add(end); data.add(d); } + // copies [start..end) bytes from the d private static byte[] subarray(byte[] d, int start, int end) { - byte[] r = new byte[end-start+1]; + byte[] r = new byte[end-start]; System.arraycopy(d, start, r, 0, r.length); return r; } @@ -196,7 +202,7 @@ /** * Modify this patch with subsequent patch */ - private /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) { + public /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) { Patch r = new Patch(); int p1TotalAppliedDelta = 0; // value to add to start and end indexes of the older patch to get their values as if // in the patched text, iow, directly comparable with respective indexes from the newer patch. @@ -211,10 +217,10 @@ final int p2EntryRange = p2EntryEnd - p2EntryStart; final byte[] p2Data = another.data.get(p2); boolean insideP2entry = false; - int p2EntryStartOffset = -1; + // when we iterate p1 elements within single p2, we need to remember where p2 + // shall ultimately start in terms of p1 + int p2EntrySavedStart = -1; /// - p1EntryStart = p1EntryEnd = p1EntryLen = 0; - p1Data = null; L1: while (p1 < p1Max) { if (!insideP1entry) { @@ -229,8 +235,8 @@ final int p1EntryAppliedEnd = p1EntryAppliedStart + p1EntryLen; // end of j'th patch entry in the text which is source for p2 if (insideP2entry) { - if (p2EntryEnd < p1EntryAppliedStart) { - r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data); + if (p2EntryEnd <= p1EntryAppliedStart) { + r.add(p2EntrySavedStart, p2EntryEnd - p1TotalAppliedDelta, p2Data); insideP2entry = false; continue L0; } @@ -242,17 +248,18 @@ p1TotalAppliedDelta += p1EntryDelta; continue L1; } - // p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedEnd - r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data); - p1EntryStart = p2EntryEnd - p1TotalAppliedDelta; - final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart + 1; - if (p1DataPartShift >= p1EntryLen) { - p1EntryLen = 0; - p1Data = new byte[0]; - } else { - p1EntryLen -= p1DataPartShift; - p1Data = subarray(p1Data, p1DataPartShift, p1Data.length); - } + // p1EntryAppliedStart < p2EntryEnd < p1EntryAppliedEnd + // can add up to p1EntryEnd here (not only to p1EntryStart), but decided + // to leave p1 intact here, to avoid delta/range recalculation + r.add(p2EntrySavedStart, p1EntryStart, p2Data); + // consume part of p1 overlapped by current p2 + final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart; + // p2EntryEnd < p1EntryAppliedEnd ==> p2EntryEnd < p1EntryAppliedStart + p1EntryLen + // ==> p2EntryEnd - p1EntryAppliedStart < p1EntryLen + assert p1DataPartShift < p1EntryLen; + p1EntryLen -= p1DataPartShift; + p1Data = subarray(p1Data, p1DataPartShift, p1Data.length); + p1TotalAppliedDelta += p1DataPartShift; insideP1entry = true; insideP2entry = false; continue L0; @@ -265,63 +272,40 @@ insideP1entry = false; p1++; // fall-through to get p1TotalAppliedDelta incremented - } else { // SKETCH: II or III + } else { // SKETCH: II or III - p2 start inside p1 range // remember, p1EntryDelta may be negative // shall break j'th entry into few // fix p1's end/length - // p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd - int s = p2EntryStart - p1TotalAppliedDelta; // p2EntryStart in p1 scale. Is within p1 range - if (s > p1EntryEnd) { - s = p1EntryEnd; - } - int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart; // index, not count. <= (p1EntryEnd-p1EntryStart). - // add what left from p1 - if (p1DataPartEnd < p1EntryLen) { - r.add(p1EntryStart, s, subarray(p1Data, 0, p1DataPartEnd)); - } else { - p1DataPartEnd = p1EntryLen-1; // record factual number of p1 bytes we consumed. - r.add(p1EntryStart, s, p1Data); - } - p1TotalAppliedDelta += p1DataPartEnd - (s - p1EntryStart); // (s2 - (s1+delta)) - (s2 - delta - s1) = s2-s1-delta-s2+delta+s1 = 0, unless p1DataPartEnd >= p1Data.length - p1EntryLen -= (p1DataPartEnd+1); - if (p2EntryEnd < p1EntryAppliedEnd) { - // SKETCH: III + // p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd, or, alternatively + // p2EntryStart is from (p1EntryAppliedStart .. p1EntryAppliedStart + p1EntryLen) + int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart; + assert p1DataPartEnd < p1EntryLen; + r.add(p1EntryStart, p1EntryEnd, subarray(p1Data, 0, p1DataPartEnd)); + if (p2EntryEnd <= p1EntryAppliedEnd) { // p2 fits completely into p1 + r.add(p1EntryEnd, p1EntryEnd, p2Data); + // p2 consumed, p1 has p1EntryLen - p1DataPartEnd - p2EntryRange bytes left to *insert* insideP1entry = true; - // p2 completely fits into changes of p1 - int e = p2EntryEnd - p1TotalAppliedDelta; // p2EntryEnd in p1 scale - if (e > p1EntryEnd) { - // any index past p1 end shall be calculated with respect to p1 end, thus it's unsafe to go past p1 end (there may be more p1 entries there) - e = p1EntryEnd; - } - r.add(s, e, p2Data); // add p2 - // modify p1 leftover - p1EntryStart = e; - if (p2EntryRange >= p1EntryLen) { - p1EntryLen = 0; - p1Data = new byte[0]; - } else { - p1Data = subarray(p1Data, p1DataPartEnd + p2EntryRange, p1Data.length-1 /*up to the last one*/); - p1EntryLen -= p2EntryRange; - } - // p2 is handled, but there are leftovers of p1 + p1EntryStart = p1EntryEnd; + p1EntryLen -= p1DataPartEnd; + p1EntryLen -= p2EntryRange; + // p2EntryEnd <= p1EntryAppliedEnd ==> p2EntryEnd <= p1EntryAppliedStart + p1EntryLen + // -p2EntryStart ==> p2EntryRange <= p1EntryAppliedStart-p2EntryStart + p1EntryLen + // p1EntryAppliedStart-p2EntryStart = -p1DataPartEnd ==> p2EntryRange <= p1EntryLen - p1DataEndPart + // +p1DataEndPart ==> p2EntryRange + p1DataEndPart <= p1EntryLen + assert p1EntryLen >= 0; + // p1EntryLen==0 with insideP1entry == true is nor really good here (gives empty patch elements x;x;0), + // however changing <= to < in p2EntryEnd <= p1EntryAppliedEnd above leads to errors + p1Data = subarray(p1Data, p1DataPartEnd+p2EntryRange, p1Data.length); + // augment total delta with p1EntryDelta part already consumed (p1EntryLen is pure insertion left for the next step) + p1TotalAppliedDelta += (p1EntryDelta - p1EntryLen); continue L0; - } else { // p2EntryEnd >= p1EntryAppliedEnd - // SKETCH: II + } else { + // p1 is consumed, take next insideP1entry = false; p1++; - if (p1EntryAppliedStart + p1EntryDelta >= p2EntryEnd) { - // here we know next p1 entry would be past p2 entry and thus can put p2 right away - r.add(p2EntryStart - p1TotalAppliedDelta, p1EntryEnd, p2Data); - p1TotalAppliedDelta += p1EntryDelta; - continue L0; - } else { - // there are chances there are more p1 entries till p2 ends - insideP2entry = true; - p2EntryStartOffset = p1TotalAppliedDelta; - // p2EntryEnd is past delta, no chances for p1Data leftovers to be in use - // p2 processing is not over, need to fix end, depending on what else fits into p2 range (if nothing, can put p2.end right away) - // fall-through to get p1TotalAppliedDelta incremented; - } + insideP2entry = true; + p2EntrySavedStart = p1EntryEnd; // this is how far we've progressed in p1 + // fall-through to get p1TotalAppliedDelta updated with consumed p1 } } } else { // p1EntryAppliedStart >= p2EntryStart @@ -331,34 +315,43 @@ // SHALL NOT increment p1TotalAppliedDelta as we didn't use any of p1 continue L0; // next p2 } else { // p2EntryEnd >= p1EntryAppliedStart - // SKETCH: I or IV - // p2EntryEnd is either < p1EntryAppliedEnd or past it - if (p2EntryEnd <= p1EntryAppliedEnd) { + // SKETCH: I or IV: + // p2 start is outside of p1 range. + // + // p2DataPartEnd: this is how many bytes prior to p1EntryStart is replaced by p2Data + int p2DataPartEnd = p1EntryAppliedStart - p2EntryStart; + if (p2EntryEnd < p1EntryAppliedEnd) { // SKETCH: I: copy p2, strip p1 to start from p2EntryEnd, next i (p2) insideP1entry = true; - int e = p2EntryEnd - p1TotalAppliedDelta; - if (e > p1EntryEnd) { - e = p1EntryEnd; // added by analogy with above. Is needed? - } - r.add(p2EntryStart - p1TotalAppliedDelta, e, p2Data); - p1EntryStart = e; - int p1DataShift = p2EntryEnd - p1EntryAppliedStart; - if (p1DataShift >= p1EntryLen) { - p1EntryLen = 0; - p1Data = new byte[0]; - } else { - p1EntryLen -= p1DataShift; - p1Data = subarray(p1Data, p1DataShift, p1Data.length - 1); - } - // p1TotalAppliedDelta would get incremented once this modified p1 is handled + // replace whole p1 (extended to the left by (p2 \ p1) front bytes) + r.add(p1EntryStart - p2DataPartEnd, p1EntryEnd, p2Data); + p1EntryStart = p1EntryEnd; + // see how much of p1 is left for insertion + int p1DataPartEnd = p2EntryEnd - p1EntryAppliedStart; // #1 + // Similar, although incorrect: p1DataPartEnd == p2Data.length - p2DataPartEnd; // #2 + // #1(p2EntryStart + p2DataLen) - p1EntryAppliedStart + // #2 p2DataLen - (p1EntryAppliedStart - p2EntryStart) + // but this works only in assumption that p2EntryEnd-p2EntryStart == p2Data.length + // + // p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedStart + p1EntryLen + // -p1EntryAppliedStart (to compare against p1DataPartEnd) ==> 0 <= p1DataPartEnd < p1EntryLen + assert p1DataPartEnd < p1EntryLen; + assert p1DataPartEnd >= 0; + p1EntryLen -= p1DataPartEnd; + p1Data = subarray(p1Data, p1DataPartEnd, p1Data.length); + + // p1TotalAppliedDelta XXX + p1TotalAppliedDelta += (p1EntryDelta - p1EntryLen); continue L0; // next p2; } else { - // p2EntryEnd > p1EntryAppliedEnd + // p2EntryEnd >= p1EntryAppliedEnd // SKETCH IV: skip (rest of) p1 completely, continue the same unless found p1 with start or end past p2EntryEnd. insideP1entry = false; + // p1 consumed p1++; insideP2entry = true; - p2EntryStartOffset = p1TotalAppliedDelta; + // extend to the left of p1 by p2 \ p1 front bytes + p2EntrySavedStart = p1EntryStart - p2DataPartEnd; // fall-through to get p1TotalAppliedDelta incremented } } @@ -368,10 +361,9 @@ { // no more p1 entries, shall close p2 (if it's handled, code above jumps directly to L0) // regardless of whether insideP2 is .t - int s = p2EntryStart; - // p2EntryStartOffset != -1 when we started p2 entry processing, but not completed + int s = p2EntrySavedStart != -1 ? p2EntrySavedStart : p2EntryStart - p1TotalAppliedDelta; + // p2EntrySavedStart != -1 when we started p2 entry processing, but not completed // if we handled last p1 entry but didn't start with p2 entry processing, it's -1 and regular p1 delta shall be used - s -= p2EntryStartOffset == -1 ? p1TotalAppliedDelta : p2EntryStartOffset; r.add(s, p2EntryEnd - p1TotalAppliedDelta, p2Data); } } @@ -386,6 +378,60 @@ return r; } + /** + * Combine consecutive regions into one. + * XXX NOW only combines two subsequent regions, seems enough for quick test + * @return this or new instance of consecutive regions found + */ + public Patch normalize() { + Patch rv = null; + for (int i = 1, x = data.size(); i < x; i++) { + if (starts.get(i) == ends.get(i-1)) { + if (rv == null) { + rv = new Patch(); + rv.copyOf(this, 0, i-1); +// } else if (ends.get(i-1) == rv.ends.get(rv.ends.size()-1)) { +// // "JUST IN CASE" code, i++ below prevents us from getting here +// // if the last region is the one already merged, +// // ignore this occurrence (otherwise data(i-1) would get copied again) +// continue; + } + byte[] d1 = data.get(i-1); + byte[] d = data.get(i); + byte[] nd; + if (d1.length > 0 && d.length > 0) { + nd = new byte[d1.length + d.length]; + System.arraycopy(d1, 0, nd, 0, d1.length); + System.arraycopy(d, 0, nd, d1.length, d.length); + } else { + nd = d1.length == 0 ? d : d1 ; + } + rv.add(starts.get(i-1), ends.get(i), nd); + i++; // skip i-th element (effectively means we detect only pairs) + // without this ++, element(i-1) is added to rv once "else" below is hit on the next step + } else { + if (rv != null) { + rv.add(this, i-1); + } + } + } + if (rv == null) { + return this; + } else { + int last = count() - 1; + if (starts.get(last) != ends.get(last-1)) { + rv.add(this, last); + } + } + return rv; + } + + private void copyOf(Patch another, int fromIndex, int upToIndex) { + while(fromIndex < upToIndex) { + add(another, fromIndex++); + } + } + public class PatchDataSource implements DataSerializer.DataSource { public void serialize(DataSerializer out) throws IOException { diff -r 90df078d6418 -r 47dfa0ec7e35 src/org/tmatesoft/hg/internal/RevlogDump.java --- a/src/org/tmatesoft/hg/internal/RevlogDump.java Mon Apr 22 19:17:29 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/RevlogDump.java Wed Apr 24 15:39:53 2013 +0200 @@ -29,6 +29,7 @@ import java.nio.channels.FileChannel; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.DataFormatException; import java.util.zip.Inflater; /** @@ -50,7 +51,7 @@ String filename = "store/00changelog.i"; // String filename = "store/data/hello.c.i"; // String filename = "store/data/docs/readme.i"; - System.out.println(escape("abc\0def\nzxc\tmnb")); +// System.out.println(escape("abc\0def\nzxc\tmnb")); boolean dumpDataFull = true; boolean dumpDataStats = false; if (args.length > 1) { @@ -61,77 +62,45 @@ } final boolean needRevData = dumpDataFull || dumpDataStats; // - DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(repo, filename)))); - DataInput di = dis; - dis.mark(10); - int versionField = di.readInt(); - dis.reset(); - final int INLINEDATA = 1 << 16; - - final boolean inlineData = (versionField & INLINEDATA) != 0; - System.out.printf("%#8x, inline: %b\n", versionField, inlineData); - FileChannel dataStream = null; - if (!inlineData && needRevData) { - dataStream = new FileInputStream(new File(repo, filename.substring(0, filename.length()-2) + ".d")).getChannel(); - } + RevlogReader rr = new RevlogReader(new File(repo, filename)).needData(needRevData); + rr.init(needRevData); + System.out.printf("%#8x, inline: %b\n", rr.versionField, rr.inlineData); System.out.println("Index Offset Flags Packed Actual Base Rev Link Rev Parent1 Parent2 nodeid"); - int entryIndex = 0; - while (dis.available() > 0) { - long l = di.readLong(); - long offset = entryIndex == 0 ? 0 : (l >>> 16); - int flags = (int) (l & 0x0FFFF); - int compressedLen = di.readInt(); - int actualLen = di.readInt(); - int baseRevision = di.readInt(); - int linkRevision = di.readInt(); - int parent1Revision = di.readInt(); - int parent2Revision = di.readInt(); - byte[] buf = new byte[32]; - di.readFully(buf, 12, 20); - dis.skipBytes(12); - // CAN'T USE skip() here without extra precautions. E.g. I ran into situation when - // buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12). - // BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code. - System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d %040x\n", entryIndex, offset, flags, compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, new BigInteger(buf)); - String resultString; - byte[] data = new byte[compressedLen]; - if (inlineData) { - di.readFully(data); - } else if (needRevData) { - dataStream.position(offset); - dataStream.read(ByteBuffer.wrap(data)); - } + ByteBuffer data = null; + while (rr.hasMore()) { + rr.readNext(); + System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d %040x\n", rr.entryIndex, rr.offset, rr.flags, rr.compressedLen, rr.actualLen, rr.baseRevision, rr.linkRevision, rr.parent1Revision, rr.parent2Revision, new BigInteger(rr.nodeid)); if (needRevData) { - if (compressedLen == 0) { + String resultString; + if (rr.getDataLength() == 0) { resultString = ""; } else { - if (data[0] == 0x78 /* 'x' */) { - Inflater zlib = new Inflater(); - zlib.setInput(data, 0, compressedLen); - byte[] result = new byte[actualLen*2]; - int resultLen = zlib.inflate(result); - zlib.end(); - resultString = buildString(result, 0, resultLen, baseRevision != entryIndex, dumpDataFull); - } else if (data[0] == 0x75 /* 'u' */) { - resultString = buildString(data, 1, data.length - 1, baseRevision != entryIndex, dumpDataFull); - } else { - resultString = buildString(data, 0, data.length, baseRevision != entryIndex, dumpDataFull); - } + data = ensureCapacity(data, rr.getDataLength()); + rr.getData(data); + data.flip(); + resultString = buildString(data, rr.isPatch(), dumpDataFull); } - System.out.println(resultString); + if (resultString.endsWith("\n")) { + System.out.print(resultString); + } else { + System.out.println(resultString); + } } - entryIndex++; } - dis.close(); - if (dataStream != null) { - dataStream.close(); - } - // + rr.done(); } - private static String buildString(byte[] data, int offset, int len, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException { + private static ByteBuffer ensureCapacity(ByteBuffer src, int requiredCap) { + if (src == null || src.capacity() < requiredCap) { + return ByteBuffer.allocate((1 + requiredCap) * 3 / 2); + } + src.clear(); + return src; + } + + private static String buildString(ByteBuffer data, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException { if (isPatch) { - DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data, offset, len)); + DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data.array(), data.arrayOffset(), data.remaining())); StringBuilder sb = new StringBuilder(); sb.append(":\n"); while (dis.available() > 0) { @@ -152,9 +121,9 @@ return sb.toString(); } else { if (completeDataDump) { - return escape(new String(data, offset, len, "UTF-8")); + return escape(new String(data.array(), data.arrayOffset(), data.remaining(), "UTF-8")); } - return String.format(":%d bytes", len-offset); + return String.format(":%d bytes", data.remaining()); } } @@ -186,4 +155,159 @@ m.appendTail(rv); return rv.toString(); } + + public static class RevlogReader { + + private final File file; + private boolean needRevData; + private DataInputStream dis; + private boolean inlineData; + public int versionField; + private FileChannel dataStream; + public int entryIndex; + private byte[] data; + private int dataOffset, dataLen; + public long offset; + public int flags; + public int baseRevision; + public int linkRevision; + public int parent1Revision; + public int parent2Revision; + public int compressedLen; + public int actualLen; + public byte[] nodeid = new byte[21]; // need 1 byte in the front to be 0 to avoid negative BigInts + + public RevlogReader(File f) { + assert f.getName().endsWith(".i"); + file = f; + } + + // affects #readNext() + public RevlogReader needData(boolean needData) { + needRevData = needData; + return this; + } + + public void init(boolean mayRequireData) throws IOException { + dis = new DataInputStream(new BufferedInputStream(new FileInputStream(file))); + DataInput di = dis; + dis.mark(10); + versionField = di.readInt(); + dis.reset(); + final int INLINEDATA = 1 << 16; + inlineData = (versionField & INLINEDATA) != 0; + + dataStream = null; + if (!inlineData && mayRequireData) { + String fname = file.getAbsolutePath(); + dataStream = new FileInputStream(new File(fname.substring(0, fname.length()-2) + ".d")).getChannel(); + } + + entryIndex = -1; + } + + public void startFrom(int startEntryIndex) throws IOException { + if (dis == null) { + throw new IllegalStateException("Call #init() first"); + } + if (entryIndex != -1 && startEntryIndex != 0) { + throw new IllegalStateException("Can't seek once iteration has started"); + } + if (dataStream == null) { + throw new IllegalStateException("Sorry, initial seek is now supported for separate .i/.d only"); + } + long newPos = startEntryIndex * Internals.REVLOGV1_RECORD_SIZE, actualSkip; + do { + actualSkip = dis.skip(newPos); + if (actualSkip <= 0) { + throw new IllegalStateException(String.valueOf(actualSkip)); + } + newPos -= actualSkip; + } while (newPos > 0); + entryIndex = startEntryIndex - 1; + } + + public boolean hasMore() throws IOException { + return dis.available() > 0; + } + + public void readNext() throws IOException, DataFormatException { + entryIndex++; + DataInput di = dis; + long l = di.readLong(); + offset = entryIndex == 0 ? 0 : (l >>> 16); + flags = (int) (l & 0x0FFFF); + compressedLen = di.readInt(); + actualLen = di.readInt(); + baseRevision = di.readInt(); + linkRevision = di.readInt(); + parent1Revision = di.readInt(); + parent2Revision = di.readInt(); + di.readFully(nodeid, 1, 20); + dis.skipBytes(12); + // CAN'T USE skip() here without extra precautions. E.g. I ran into situation when + // buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12). + // BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code. + data = new byte[compressedLen]; + if (inlineData) { + di.readFully(data); + } else if (needRevData) { + dataStream.position(offset); + dataStream.read(ByteBuffer.wrap(data)); + } + if (needRevData) { + if (compressedLen == 0) { + data = null; + dataOffset = dataLen = 0; + } else { + if (data[0] == 0x78 /* 'x' */) { + Inflater zlib = new Inflater(); + zlib.setInput(data, 0, compressedLen); + byte[] result = new byte[actualLen * 3]; + int resultLen = zlib.inflate(result); + zlib.end(); + data = result; + dataOffset = 0; + dataLen = resultLen; + } else if (data[0] == 0x75 /* 'u' */) { + dataOffset = 1; + dataLen = data.length - 1; + } else { + dataOffset = 0; + dataLen = data.length; + } + } + } + } + + public int getDataLength() { + // NOT actualLen - there are empty patch revisions (dataLen == 0, but actualLen == previous length) + // NOT compressedLen - zip data is uncompressed + return dataLen; + } + + public void getData(ByteBuffer bb) { + assert bb.remaining() >= dataLen; + bb.put(data, dataOffset, dataLen); + } + + public boolean isPatch() { + assert entryIndex != -1; + return baseRevision != entryIndex; + } + + public boolean isInline() { + assert dis != null; + return inlineData; + } + + public void done() throws IOException { + dis.close(); + dis = null; + if (dataStream != null) { + dataStream.close(); + dataStream = null; + } + } + } } diff -r 90df078d6418 -r 47dfa0ec7e35 src/org/tmatesoft/hg/repo/HgManifest.java --- a/src/org/tmatesoft/hg/repo/HgManifest.java Mon Apr 22 19:17:29 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgManifest.java Wed Apr 24 15:39:53 2013 +0200 @@ -371,12 +371,12 @@ * Denotes entering specific manifest revision, separate entries are * reported with subsequence {@link #next(Nodeid, Path, Flags)} calls. * - * @param mainfestRevisionIndex local revision index of the inspected revision + * @param manifestRevisionIndex local revision index of the inspected revision * @param manifestRevision revision of the manifest we're about to iterate through * @param changelogRevisionIndex local revision index of changelog this manifest points to * @return true to continue iteration, false to stop */ - boolean begin(int mainfestRevisionIndex, Nodeid manifestRevision, int changelogRevisionIndex); + boolean begin(int manifestRevisionIndex, Nodeid manifestRevision, int changelogRevisionIndex); /** diff -r 90df078d6418 -r 47dfa0ec7e35 test/org/tmatesoft/hg/test/TestRevlog.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/org/tmatesoft/hg/test/TestRevlog.java Wed Apr 24 15:39:53 2013 +0200 @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2013 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.test; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; + +import org.tmatesoft.hg.core.HgRepositoryNotFoundException; +import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.internal.ByteArrayDataAccess; +import org.tmatesoft.hg.internal.DiffHelper; +import org.tmatesoft.hg.internal.Patch; +import org.tmatesoft.hg.internal.DiffHelper.LineSequence; +import org.tmatesoft.hg.internal.RevlogDump.RevlogReader; +import org.tmatesoft.hg.repo.HgLookup; +import org.tmatesoft.hg.repo.HgManifest; +import org.tmatesoft.hg.repo.HgManifest.Flags; +import org.tmatesoft.hg.repo.HgRepository; +import org.tmatesoft.hg.util.Path; + +/** + * Not a real JUnit test now + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class TestRevlog { + private ByteBuffer patchData; + + public static void main(String[] args) throws Exception { + File indexFile = new File("/home/artem/hg/cpython/.hg/store/00manifest.i"); + new TestRevlog().run(indexFile); + } + + private void run(File indexFile) throws Exception { + final boolean shallDumpDiff = Boolean.TRUE.booleanValue(); + final boolean thoroughCheck = Boolean.TRUE.booleanValue(); + // + RevlogReader rr = new RevlogReader(indexFile); + rr.init(true); + rr.needData(true); + int startEntryIndex = 76507; // 150--87 + rr.startFrom(startEntryIndex); + rr.readNext(); + ByteBuffer baseRevision = null; + if (rr.isPatch()) { + byte[] cc = getRevisionTrueContent(indexFile.getParentFile(), rr.entryIndex, rr.linkRevision); + baseRevision = ByteBuffer.wrap(cc); + } else { + baseRevision = ByteBuffer.allocate(rr.getDataLength()); + rr.getData(baseRevision); + baseRevision.flip(); + } + ByteArrayDataAccess baseRevisionContent = new ByteArrayDataAccess(baseRevision.array(), baseRevision.arrayOffset(), baseRevision.remaining()); + // + final long start = System.currentTimeMillis(); + int n = 1419; + Patch seqPatch = null, normalizedPatch = null; + while (rr.hasMore() && n-- > 0) { + rr.readNext(); + if (!rr.isPatch()) { + break; + } + if (rr.getDataLength() == 0) { + System.out.printf("Empty content of revision %d\n", rr.entryIndex); + continue; + } + Patch p1 = createPatch(rr); + if (seqPatch != null) { + if (n < 1) { + System.out.println("+" + p1); + System.currentTimeMillis(); + } + seqPatch = seqPatch.apply(p1); + Patch ppp = normalizedPatch.apply(p1); + normalizedPatch = ppp.normalize(); + if (n <= 1) { + System.out.println("=" + seqPatch); + } + if (n == 0) { + System.out.println("A" + ppp); + System.out.println("N" + normalizedPatch); + normalizedPatch = ppp; + } + // + if (!thoroughCheck) { + if (baseRevisionContent.length() + seqPatch.patchSizeDelta() != rr.actualLen) { + System.out.printf("Sequential patches:\tPatchRevision #%d (+%d, cset:%d) failed\n", rr.entryIndex, rr.entryIndex - startEntryIndex, rr.linkRevision); + } + if (baseRevisionContent.length() + normalizedPatch.patchSizeDelta() != rr.actualLen) { + System.out.printf("Normalized patches:\tPatchRevision #%d (+%d, cset:%d) failed\n", rr.entryIndex, rr.entryIndex - startEntryIndex, rr.linkRevision); + } + } else { + byte[] origin = getRevisionTrueContent(indexFile.getParentFile(), rr.entryIndex, rr.linkRevision); + try { + byte[] result1 = seqPatch.apply(baseRevisionContent, rr.actualLen); + if (!Arrays.equals(result1, origin)) { + System.out.printf("Sequential patches:\tPatchRevision #%d (+%d, cset:%d) failed\n", rr.entryIndex, rr.entryIndex - startEntryIndex, rr.linkRevision); + } + } catch (ArrayIndexOutOfBoundsException ex) { + System.err.printf("Failure at entry %d (+%d)\n", rr.entryIndex, rr.entryIndex - startEntryIndex); + ex.printStackTrace(); + } + try { + byte[] result2 = normalizedPatch.apply(baseRevisionContent, rr.actualLen); + if (!Arrays.equals(result2, origin)) { + System.out.printf("Normalized patches:\tPatchRevision #%d (+%d, cset:%d) failed\n", rr.entryIndex, rr.entryIndex - startEntryIndex, rr.linkRevision); + } + } catch (ArrayIndexOutOfBoundsException ex) { + System.err.printf("Failure at entry %d (+%d)\n", rr.entryIndex, rr.entryIndex - startEntryIndex); + ex.printStackTrace(); + } + } + } else { + seqPatch = p1; + normalizedPatch = p1.normalize(); + } + } + final long end1 = System.currentTimeMillis(); + // +// byte[] result = seqPatch.apply(baseRevisionContent, rr.actualLen); + byte[] result = normalizedPatch.apply(baseRevisionContent, rr.actualLen); + final long end2 = System.currentTimeMillis(); + byte[] origin = getRevisionTrueContent(indexFile.getParentFile(), rr.entryIndex, rr.linkRevision); + final long end3 = System.currentTimeMillis(); + rr.done(); + System.out.printf("Collected patches up to revision %d. Patches total: %d, last contains %d elements\n", rr.entryIndex, rr.entryIndex - startEntryIndex + 1, seqPatch.count()); + if (!Arrays.equals(result, origin)) { + if (shallDumpDiff) { + diff(result, origin); + dumpLineDifference(result, origin); + } else { + System.out.println("FAILURE!"); + } + } else { + System.out.println("OK!"); + System.out.printf("Iterate: %d ms, apply collected: %d ms, total=%d ms; Conventional: %d ms\n", (end1-start), (end2-end1), (end2-start), (end3-end2)); + } + Patch normalized = normalizedPatch; //seqPatch.normalize(); + System.out.printf("N%s\n%d => %d patch elements\n", normalized, seqPatch.count(), normalized.count()); +// System.out.println(rs); + } + + private void dumpLineDifference(byte[] result, byte[] origin) { + String rs = new String(result).replace('\0', '\t'); + String os = new String(origin).replace('\0', '\t'); + String[] rsLines = rs.split("\n"); + String[] osLines = os.split("\n"); + int approxPos = 0; + for (int i = 0; i < Math.min(rsLines.length, osLines.length); i++) { + if (!rsLines[i].equals(osLines[i])) { + System.out.printf("@%d (offset ~%d)\n\t%s\n\t%s\n", i, approxPos, osLines[i], rsLines[i]); + } + approxPos += rsLines[i].length() + 1; + } + } + + private void diff(byte[] result, byte[] origin) { + DiffHelper pg = new DiffHelper(); + pg.init(LineSequence.newlines(origin), LineSequence.newlines(result)); + pg.findMatchingBlocks(new DiffHelper.DeltaDumpInspector()); + } + + private Patch createPatch(RevlogReader rr) throws IOException { + assert rr.isPatch(); + if (patchData == null || patchData.capacity() < rr.getDataLength()) { + patchData = ByteBuffer.allocate(rr.getDataLength()); + } else { + patchData.clear(); + } + rr.getData(patchData); + patchData.flip(); + Patch patch1 = new Patch(); + patch1.read(new ByteArrayDataAccess(patchData.array(), patchData.arrayOffset(), patchData.remaining())); + return patch1; + } + + private byte[] getRevisionTrueContent(File repoLoc, final int manifestRev, int clogRev) throws HgRepositoryNotFoundException { + HgRepository hgRepo = new HgLookup().detect(repoLoc); + final ByteArrayOutputStream out = new ByteArrayOutputStream(1024 * 1000); + hgRepo.getManifest().walk(clogRev, clogRev, new HgManifest.Inspector() { + + public boolean next(Nodeid nid, Path fname, Flags flags) { + try { + out.write(fname.toString().getBytes()); + out.write(0); + out.write(nid.toString().getBytes()); + if (flags == Flags.Exec) { + out.write('x'); + } else if (flags == Flags.Link) { + out.write('l'); + } + out.write('\n'); + } catch (IOException e) { + throw new IllegalStateException(e); + } + return true; + } + + public boolean end(int manifestRevisionIndex) { + return false; + } + + public boolean begin(int manifestRevisionIndex, Nodeid manifestRevision, int changelogRevisionIndex) { + if (manifestRev != manifestRevisionIndex) { + throw new IllegalStateException(String.valueOf(manifestRevisionIndex)); + } + return true; + } + }); + return out.toByteArray(); + } +}