changeset 583:47dfa0ec7e35

Effective revlog patching
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 24 Apr 2013 15:39:53 +0200
parents 90df078d6418
children ed243b668502
files src/org/tmatesoft/hg/internal/DiffHelper.java src/org/tmatesoft/hg/internal/Patch.java src/org/tmatesoft/hg/internal/RevlogDump.java src/org/tmatesoft/hg/repo/HgManifest.java test/org/tmatesoft/hg/test/TestRevlog.java
diffstat 5 files changed, 559 insertions(+), 159 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/DiffHelper.java	Mon Apr 22 19:17:29 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/DiffHelper.java	Wed Apr 24 15:39:53 2013 +0200
@@ -232,7 +232,7 @@
 		}
 	}
 	
-	static class DeltaDumpInspector<T extends ChunkSequence<?>> extends DeltaInspector<T> {
+	public static class DeltaDumpInspector<T extends ChunkSequence<?>> extends DeltaInspector<T> {
 
 		@Override
 		protected void changed(int s1From, int s1To, int s2From, int s2To) {
--- a/src/org/tmatesoft/hg/internal/Patch.java	Mon Apr 22 19:17:29 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/Patch.java	Wed Apr 24 15:39:53 2013 +0200
@@ -86,7 +86,7 @@
 	}
 
 	// number of bytes this patch will add (or remove, if negative) from the base revision
-	private int patchSizeDelta() {
+	public int patchSizeDelta() {
 		int rv = 0;
 		int prevEnd = 0;
 		for (int i = 0, x = data.size(); i < x; i++) {
@@ -182,13 +182,19 @@
 	}
 
 	/*package-local*/ void add(int start, int end, byte[] d) {
+		// FIXME if start == end(-1), merge data
+		if (start == end && d.length == 0) {
+			System.currentTimeMillis();
+			return;
+		}
 		starts.add(start);
 		ends.add(end);
 		data.add(d);
 	}
 	
+	// copies [start..end) bytes from the d
 	private static byte[] subarray(byte[] d, int start, int end) {
-		byte[] r = new byte[end-start+1];
+		byte[] r = new byte[end-start];
 		System.arraycopy(d, start, r, 0, r.length);
 		return r;
 	}
@@ -196,7 +202,7 @@
 	/**
 	 * Modify this patch with subsequent patch 
 	 */
-	private /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) {
+	public /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) {
 		Patch r = new Patch();
 		int p1TotalAppliedDelta = 0; // value to add to start and end indexes of the older patch to get their values as if
 		// in the patched text, iow, directly comparable with respective indexes from the newer patch.
@@ -211,10 +217,10 @@
 			final int p2EntryRange = p2EntryEnd - p2EntryStart;
 			final byte[] p2Data = another.data.get(p2);
 			boolean insideP2entry = false;
-			int p2EntryStartOffset = -1;
+			// when we iterate p1 elements within single p2, we need to remember where p2
+			// shall ultimately start in terms of p1
+			int p2EntrySavedStart = -1;
 			///
-			p1EntryStart = p1EntryEnd = p1EntryLen = 0;
-			p1Data = null;
 			
 L1:			while (p1 < p1Max) {
 				if (!insideP1entry) {
@@ -229,8 +235,8 @@
 				final int p1EntryAppliedEnd = p1EntryAppliedStart + p1EntryLen; // end of j'th patch entry in the text which is source for p2
 				
 				if (insideP2entry) {
-					if (p2EntryEnd < p1EntryAppliedStart) {
-						r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data);
+					if (p2EntryEnd <= p1EntryAppliedStart) {
+						r.add(p2EntrySavedStart, p2EntryEnd - p1TotalAppliedDelta, p2Data);
 						insideP2entry = false;
 						continue L0; 
 					}
@@ -242,17 +248,18 @@
 						p1TotalAppliedDelta += p1EntryDelta;
 						continue L1;
 					}
-					// p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedEnd
-					r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data);
-					p1EntryStart = p2EntryEnd - p1TotalAppliedDelta;
-					final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart + 1;
-					if (p1DataPartShift >= p1EntryLen) {
-						p1EntryLen = 0;
-						p1Data = new byte[0];
-					} else {
-						p1EntryLen -= p1DataPartShift;
-						p1Data = subarray(p1Data, p1DataPartShift, p1Data.length);
-					}
+					// p1EntryAppliedStart < p2EntryEnd < p1EntryAppliedEnd
+					// can add up to p1EntryEnd here (not only to p1EntryStart), but decided
+					// to leave p1 intact here, to avoid delta/range recalculation 
+					r.add(p2EntrySavedStart, p1EntryStart, p2Data);
+					// consume part of p1 overlapped by current p2
+					final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart;
+					// p2EntryEnd < p1EntryAppliedEnd		==> p2EntryEnd < p1EntryAppliedStart + p1EntryLen
+					//										==> p2EntryEnd - p1EntryAppliedStart < p1EntryLen
+					assert p1DataPartShift < p1EntryLen;
+					p1EntryLen -= p1DataPartShift;
+					p1Data = subarray(p1Data, p1DataPartShift, p1Data.length);
+					p1TotalAppliedDelta += p1DataPartShift;
 					insideP1entry = true;
 					insideP2entry = false;
 					continue L0;
@@ -265,63 +272,40 @@
 						insideP1entry = false;
 						p1++;
 						// fall-through to get p1TotalAppliedDelta incremented
-					} else { // SKETCH: II or III
+					} else { // SKETCH: II or III - p2 start inside p1 range
 						// remember, p1EntryDelta may be negative
 						// shall break j'th entry into few 
 						// fix p1's end/length
-						// p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd
-						int s = p2EntryStart - p1TotalAppliedDelta; // p2EntryStart in p1 scale. Is within p1 range
-						if (s > p1EntryEnd) {
-							s = p1EntryEnd;
-						}
-						int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart; // index, not count. <= (p1EntryEnd-p1EntryStart).
-						// add what left from p1
-						if (p1DataPartEnd < p1EntryLen) {
-							r.add(p1EntryStart, s, subarray(p1Data, 0, p1DataPartEnd)); 
-						} else {
-							p1DataPartEnd = p1EntryLen-1; // record factual number of p1 bytes we consumed.
-							r.add(p1EntryStart, s, p1Data);
-						}
-						p1TotalAppliedDelta += p1DataPartEnd - (s - p1EntryStart); // (s2 - (s1+delta)) - (s2 - delta - s1) = s2-s1-delta-s2+delta+s1 = 0, unless p1DataPartEnd >= p1Data.length
-						p1EntryLen -= (p1DataPartEnd+1); 
-						if (p2EntryEnd < p1EntryAppliedEnd) {
-							// SKETCH: III
+						// p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd, or, alternatively
+						// p2EntryStart is from (p1EntryAppliedStart .. p1EntryAppliedStart + p1EntryLen)
+						int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart;
+						assert p1DataPartEnd < p1EntryLen;
+						r.add(p1EntryStart, p1EntryEnd, subarray(p1Data, 0, p1DataPartEnd));
+						if (p2EntryEnd <= p1EntryAppliedEnd) { // p2 fits completely into p1
+							r.add(p1EntryEnd, p1EntryEnd, p2Data);
+							// p2 consumed, p1 has p1EntryLen - p1DataPartEnd - p2EntryRange bytes left to *insert*
 							insideP1entry = true;
-							// p2 completely fits into changes of p1
-							int e = p2EntryEnd - p1TotalAppliedDelta; // p2EntryEnd in p1 scale
-							if (e > p1EntryEnd) {
-								// any index past p1 end shall be calculated with respect to p1 end, thus it's unsafe to go past p1 end (there may be more p1 entries there)   
-								e = p1EntryEnd;
-							}
-							r.add(s, e, p2Data); // add p2
-							// modify p1 leftover
-							p1EntryStart = e;
-							if (p2EntryRange >= p1EntryLen) {
-								p1EntryLen = 0;
-								p1Data = new byte[0];
-							} else {
-								p1Data = subarray(p1Data, p1DataPartEnd + p2EntryRange, p1Data.length-1 /*up to the last one*/);
-								p1EntryLen -= p2EntryRange;
-							}
-							// p2 is handled, but there are leftovers of p1
+							p1EntryStart = p1EntryEnd;
+							p1EntryLen -= p1DataPartEnd;
+							p1EntryLen -= p2EntryRange;
+							// p2EntryEnd <= p1EntryAppliedEnd						==> p2EntryEnd <= p1EntryAppliedStart + p1EntryLen
+							// -p2EntryStart    									==> p2EntryRange <= p1EntryAppliedStart-p2EntryStart + p1EntryLen
+							// p1EntryAppliedStart-p2EntryStart = -p1DataPartEnd	==> p2EntryRange <= p1EntryLen - p1DataEndPart
+							//	+p1DataEndPart										==> p2EntryRange + p1DataEndPart <= p1EntryLen
+							assert p1EntryLen >= 0;
+							// p1EntryLen==0 with insideP1entry == true is nor really good here (gives empty patch elements x;x;0), 
+							// however changing <= to < in p2EntryEnd <= p1EntryAppliedEnd above leads to errors
+							p1Data = subarray(p1Data, p1DataPartEnd+p2EntryRange, p1Data.length);
+							// augment total delta with p1EntryDelta part already consumed (p1EntryLen is pure insertion left for the next step) 
+							p1TotalAppliedDelta += (p1EntryDelta - p1EntryLen);
 							continue L0;
-						} else { // p2EntryEnd >= p1EntryAppliedEnd
-							// SKETCH: II
+						} else {
+							// p1 is consumed, take next
 							insideP1entry = false;
 							p1++;
-							if (p1EntryAppliedStart + p1EntryDelta >= p2EntryEnd) {
-								// here we know next p1 entry would be past p2 entry and thus can put p2 right away
-								r.add(p2EntryStart - p1TotalAppliedDelta, p1EntryEnd, p2Data);
-								p1TotalAppliedDelta += p1EntryDelta;
-								continue L0;
-							} else {
-								// there are chances there are more p1 entries till p2 ends
-								insideP2entry = true;
-								p2EntryStartOffset = p1TotalAppliedDelta;
-								// p2EntryEnd is past delta, no chances for p1Data leftovers to be in use
-								// p2 processing is not over, need to fix end, depending on what else fits into p2 range (if nothing, can put p2.end right away)
-								// fall-through to get p1TotalAppliedDelta incremented;
-							}
+							insideP2entry = true;
+							p2EntrySavedStart = p1EntryEnd; // this is how far we've progressed in p1
+							// fall-through to get p1TotalAppliedDelta updated with consumed p1
 						}
 					}
 				} else { // p1EntryAppliedStart >= p2EntryStart
@@ -331,34 +315,43 @@
 						// SHALL NOT increment p1TotalAppliedDelta as we didn't use any of p1
 						continue L0; // next p2 
 					} else { // p2EntryEnd >= p1EntryAppliedStart
-						// SKETCH: I or IV
-						// p2EntryEnd is either  < p1EntryAppliedEnd or past it
-						if (p2EntryEnd <= p1EntryAppliedEnd) {
+						// SKETCH: I or IV:
+						// p2 start is outside of p1 range.
+						//
+						// p2DataPartEnd: this is how many bytes prior to p1EntryStart is replaced by p2Data
+						int p2DataPartEnd = p1EntryAppliedStart - p2EntryStart;
+						if (p2EntryEnd < p1EntryAppliedEnd) {
 							// SKETCH: I: copy p2, strip p1 to start from p2EntryEnd, next i (p2)
 							insideP1entry = true;
-							int e = p2EntryEnd - p1TotalAppliedDelta;
-							if (e > p1EntryEnd) {
-								e = p1EntryEnd; // added by analogy with above. Is needed?
-							}
-							r.add(p2EntryStart - p1TotalAppliedDelta, e, p2Data);
-							p1EntryStart = e;
-							int p1DataShift = p2EntryEnd - p1EntryAppliedStart;
-							if (p1DataShift >= p1EntryLen) {
-								p1EntryLen = 0;
-								p1Data = new byte[0];
-							} else {
-								p1EntryLen -= p1DataShift;
-								p1Data = subarray(p1Data, p1DataShift, p1Data.length - 1);
-							}
-							// p1TotalAppliedDelta would get incremented once this modified p1 is handled
+							// replace whole p1 (extended to the left by (p2 \ p1) front bytes)
+							r.add(p1EntryStart - p2DataPartEnd, p1EntryEnd, p2Data);
+							p1EntryStart = p1EntryEnd;
+							// see how much of p1 is left for insertion
+							int p1DataPartEnd = p2EntryEnd - p1EntryAppliedStart; // #1
+							// Similar, although incorrect: p1DataPartEnd == p2Data.length - p2DataPartEnd; // #2
+							// #1(p2EntryStart + p2DataLen) - p1EntryAppliedStart
+							// #2 p2DataLen - (p1EntryAppliedStart - p2EntryStart)
+							// but this works only in assumption that p2EntryEnd-p2EntryStart == p2Data.length
+							//
+							// p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedStart + p1EntryLen
+							// -p1EntryAppliedStart (to compare against p1DataPartEnd)  ==> 	0 <= p1DataPartEnd < p1EntryLen
+							assert p1DataPartEnd < p1EntryLen;
+							assert p1DataPartEnd >= 0;
+							p1EntryLen -= p1DataPartEnd;
+							p1Data = subarray(p1Data, p1DataPartEnd, p1Data.length);
+
+							// p1TotalAppliedDelta XXX
+							p1TotalAppliedDelta += (p1EntryDelta - p1EntryLen);
 							continue L0; // next p2;
 						} else {
-							// p2EntryEnd > p1EntryAppliedEnd
+							// p2EntryEnd >= p1EntryAppliedEnd
 							// SKETCH IV: skip (rest of) p1 completely, continue the same unless  found p1 with start or end past p2EntryEnd.
 							insideP1entry = false;
+							// p1 consumed
 							p1++;
 							insideP2entry = true;
-							p2EntryStartOffset = p1TotalAppliedDelta;
+							// extend to the left of p1 by p2 \ p1 front bytes
+							p2EntrySavedStart = p1EntryStart - p2DataPartEnd;
 							// fall-through to get p1TotalAppliedDelta incremented
 						}
 					}
@@ -368,10 +361,9 @@
 			{
 				// no more p1 entries, shall close p2 (if it's handled, code above jumps directly to L0)
 				// regardless of whether insideP2 is .t
-				int s = p2EntryStart;
-				// p2EntryStartOffset != -1 when we started p2 entry processing, but not completed
+				int s = p2EntrySavedStart != -1 ? p2EntrySavedStart : p2EntryStart - p1TotalAppliedDelta;
+				// p2EntrySavedStart != -1 when we started p2 entry processing, but not completed
 				// if we handled last p1 entry but didn't start with p2 entry processing, it's -1 and regular p1 delta shall be used
-				s -= p2EntryStartOffset == -1 ? p1TotalAppliedDelta : p2EntryStartOffset;
 				r.add(s, p2EntryEnd - p1TotalAppliedDelta, p2Data);
 			}
 		}
@@ -386,6 +378,60 @@
 		return r;
 	}
 
+	/**
+	 * Combine consecutive regions into one.
+	 * XXX NOW only combines two subsequent regions, seems enough for quick test
+	 * @return <code>this</code> or new instance of consecutive regions found
+	 */
+	public Patch normalize() {
+		Patch rv = null;
+		for (int i = 1, x = data.size(); i < x; i++) {
+			if (starts.get(i) == ends.get(i-1)) {
+				if (rv == null) {
+					rv = new Patch();
+					rv.copyOf(this, 0, i-1);
+//				} else if (ends.get(i-1) == rv.ends.get(rv.ends.size()-1)) {
+//					// "JUST IN CASE" code, i++ below prevents us from getting here
+//					// if the last region is the one already merged,
+//					// ignore this occurrence (otherwise data(i-1) would get copied again) 
+//					continue;
+				}
+				byte[] d1 = data.get(i-1);
+				byte[] d = data.get(i);
+				byte[] nd;
+				if (d1.length > 0 && d.length > 0) {
+					nd = new byte[d1.length + d.length];
+					System.arraycopy(d1, 0, nd, 0, d1.length);
+					System.arraycopy(d, 0, nd, d1.length, d.length);
+				} else {
+					nd = d1.length == 0 ? d : d1 ;
+				}
+				rv.add(starts.get(i-1), ends.get(i), nd);
+				i++; // skip i-th element (effectively means we detect only pairs)
+				// without this ++, element(i-1) is added to rv once "else" below is hit on the next step
+			} else {
+				if (rv != null) {
+					rv.add(this, i-1);
+				}
+			}
+		}
+		if (rv == null) {
+			return this;
+		} else {
+			int last = count() - 1;
+			if (starts.get(last) != ends.get(last-1)) {
+				rv.add(this, last);
+			}
+		}
+		return rv;
+	}
+	
+	private void copyOf(Patch another, int fromIndex, int upToIndex) {
+		while(fromIndex < upToIndex) {
+			add(another, fromIndex++);
+		}
+	}
+
 	public class PatchDataSource implements DataSerializer.DataSource {
 
 		public void serialize(DataSerializer out) throws IOException {
--- a/src/org/tmatesoft/hg/internal/RevlogDump.java	Mon Apr 22 19:17:29 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/RevlogDump.java	Wed Apr 24 15:39:53 2013 +0200
@@ -29,6 +29,7 @@
 import java.nio.channels.FileChannel;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.zip.DataFormatException;
 import java.util.zip.Inflater;
 
 /**
@@ -50,7 +51,7 @@
 		String filename = "store/00changelog.i";
 //		String filename = "store/data/hello.c.i";
 //		String filename = "store/data/docs/readme.i";
-		System.out.println(escape("abc\0def\nzxc\tmnb"));
+//		System.out.println(escape("abc\0def\nzxc\tmnb"));
 		boolean dumpDataFull = true;
 		boolean dumpDataStats = false;
 		if (args.length > 1) {
@@ -61,77 +62,45 @@
 		}
 		final boolean needRevData = dumpDataFull || dumpDataStats; 
 		//
-		DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(repo, filename))));
-		DataInput di = dis;
-		dis.mark(10);
-		int versionField = di.readInt();
-		dis.reset();
-		final int INLINEDATA = 1 << 16;
-		
-		final boolean inlineData = (versionField & INLINEDATA) != 0;
-		System.out.printf("%#8x, inline: %b\n", versionField, inlineData);
-		FileChannel dataStream = null; 
-		if (!inlineData && needRevData) {
-			dataStream = new FileInputStream(new File(repo, filename.substring(0, filename.length()-2) + ".d")).getChannel();
-		}
+		RevlogReader rr = new RevlogReader(new File(repo, filename)).needData(needRevData);
+		rr.init(needRevData);
+		System.out.printf("%#8x, inline: %b\n", rr.versionField, rr.inlineData);
 		System.out.println("Index    Offset      Flags     Packed     Actual   Base Rev   Link Rev  Parent1  Parent2     nodeid");
-		int entryIndex = 0;
-		while (dis.available() > 0) {
-			long l = di.readLong();
-			long offset = entryIndex == 0 ? 0 : (l >>> 16);
-			int flags = (int) (l & 0x0FFFF);
-			int compressedLen = di.readInt();
-			int actualLen = di.readInt();
-			int baseRevision = di.readInt();
-			int linkRevision = di.readInt();
-			int parent1Revision = di.readInt();
-			int parent2Revision = di.readInt();
-			byte[] buf = new byte[32];
-			di.readFully(buf, 12, 20);
-			dis.skipBytes(12); 
-			// CAN'T USE skip() here without extra precautions. E.g. I ran into situation when 
-			// buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12). 
-			// BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code.
-			System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d     %040x\n", entryIndex, offset, flags, compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, new BigInteger(buf));
-			String resultString;
-			byte[] data = new byte[compressedLen];
-			if (inlineData) {
-				di.readFully(data);
-			} else if (needRevData) {
-				dataStream.position(offset);
-				dataStream.read(ByteBuffer.wrap(data));
-			}
+		ByteBuffer data = null;
+		while (rr.hasMore()) {
+			rr.readNext();
+			System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d     %040x\n", rr.entryIndex, rr.offset, rr.flags, rr.compressedLen, rr.actualLen, rr.baseRevision, rr.linkRevision, rr.parent1Revision, rr.parent2Revision, new BigInteger(rr.nodeid));
 			if (needRevData) {
-				if (compressedLen == 0) {
+				String resultString;
+				if (rr.getDataLength() == 0) {
 					resultString = "<NO DATA>";
 				} else {
-					if (data[0] == 0x78 /* 'x' */) {
-						Inflater zlib = new Inflater();
-						zlib.setInput(data, 0, compressedLen);
-						byte[] result = new byte[actualLen*2];
-						int resultLen = zlib.inflate(result);
-						zlib.end();
-						resultString = buildString(result, 0, resultLen, baseRevision != entryIndex, dumpDataFull);
-					} else if (data[0] == 0x75 /* 'u' */) {
-						resultString = buildString(data, 1, data.length - 1, baseRevision != entryIndex, dumpDataFull);
-					} else {
-						resultString = buildString(data, 0, data.length, baseRevision != entryIndex, dumpDataFull);
-					}
+					data = ensureCapacity(data, rr.getDataLength());
+					rr.getData(data);
+					data.flip();
+					resultString = buildString(data, rr.isPatch(), dumpDataFull);
 				}
-				System.out.println(resultString);
+				if (resultString.endsWith("\n")) {
+					System.out.print(resultString);
+				} else {
+					System.out.println(resultString);
+				}
 			}
-			entryIndex++;
 		}
-		dis.close();
-		if (dataStream != null) {
-			dataStream.close();
-		}
-		//
+		rr.done();
 	}
 	
-	private static String buildString(byte[] data, int offset, int len, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException {
+	private static ByteBuffer ensureCapacity(ByteBuffer src, int requiredCap) {
+		if (src == null || src.capacity() < requiredCap) {
+			return ByteBuffer.allocate((1 + requiredCap) * 3 / 2);
+		}
+		src.clear();
+		return src;
+	}
+	
+	private static String buildString(ByteBuffer data, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException {
 		if (isPatch) {
-			DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data, offset, len));
+			DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data.array(), data.arrayOffset(), data.remaining()));
 			StringBuilder sb = new StringBuilder();
 			sb.append("<PATCH>:\n");
 			while (dis.available() > 0) {
@@ -152,9 +121,9 @@
 			return sb.toString();
 		} else {
 			if (completeDataDump) {
-				return escape(new String(data, offset, len, "UTF-8"));
+				return escape(new String(data.array(), data.arrayOffset(), data.remaining(), "UTF-8"));
 			}
-			return String.format("<DATA>:%d bytes", len-offset);
+			return String.format("<DATA>:%d bytes", data.remaining());
 		}
 	}
 	
@@ -186,4 +155,159 @@
 		m.appendTail(rv);
 		return rv.toString();
 	}
+
+	public static class RevlogReader {
+		
+		private final File file;
+		private boolean needRevData;
+		private DataInputStream dis;
+		private boolean inlineData;
+		public int versionField;
+		private FileChannel dataStream;
+		public int entryIndex;
+		private byte[] data;
+		private int dataOffset, dataLen;
+		public long offset;
+		public int flags;
+		public int baseRevision;
+		public int linkRevision;
+		public int parent1Revision;
+		public int parent2Revision;
+		public int compressedLen;
+		public int actualLen;
+		public byte[] nodeid = new byte[21]; // need 1 byte in the front to be 0 to avoid negative BigInts
+
+		public RevlogReader(File f) {
+			assert f.getName().endsWith(".i");
+			file = f;
+		}
+
+		// affects #readNext()
+		public RevlogReader needData(boolean needData) {
+			needRevData = needData;
+			return this;
+		}
+		
+		public void init(boolean mayRequireData) throws IOException {
+			dis = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
+			DataInput di = dis;
+			dis.mark(10);
+			versionField = di.readInt();
+			dis.reset();
+			final int INLINEDATA = 1 << 16;
+			inlineData = (versionField & INLINEDATA) != 0;
+			
+			dataStream = null; 
+			if (!inlineData && mayRequireData) {
+				String fname = file.getAbsolutePath();
+				dataStream = new FileInputStream(new File(fname.substring(0, fname.length()-2) + ".d")).getChannel();
+			}
+			
+			entryIndex = -1;
+		}
+		
+		public void startFrom(int startEntryIndex) throws IOException {
+			if (dis == null) {
+				throw new IllegalStateException("Call #init() first");
+			}
+			if (entryIndex != -1 && startEntryIndex != 0) {
+				throw new IllegalStateException("Can't seek once iteration has started");
+			}
+			if (dataStream == null) {
+				throw new IllegalStateException("Sorry, initial seek is now supported for separate .i/.d only");
+			}
+			long newPos = startEntryIndex * Internals.REVLOGV1_RECORD_SIZE, actualSkip;
+			do {
+				actualSkip = dis.skip(newPos);
+				if (actualSkip <= 0) {
+					throw new IllegalStateException(String.valueOf(actualSkip));
+				}
+				newPos -= actualSkip;
+			} while (newPos > 0);
+			entryIndex = startEntryIndex - 1;
+		}
+		
+		public boolean hasMore() throws IOException {
+			return dis.available() > 0;
+		}
+		
+		public void readNext() throws IOException, DataFormatException {
+			entryIndex++;
+			DataInput di = dis;
+			long l = di.readLong();
+			offset = entryIndex == 0 ? 0 : (l >>> 16);
+			flags = (int) (l & 0x0FFFF);
+			compressedLen = di.readInt();
+			actualLen = di.readInt();
+			baseRevision = di.readInt();
+			linkRevision = di.readInt();
+			parent1Revision = di.readInt();
+			parent2Revision = di.readInt();
+			di.readFully(nodeid, 1, 20);
+			dis.skipBytes(12); 
+			// CAN'T USE skip() here without extra precautions. E.g. I ran into situation when 
+			// buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12). 
+			// BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code.
+			data = new byte[compressedLen];
+			if (inlineData) {
+				di.readFully(data);
+			} else if (needRevData) {
+				dataStream.position(offset);
+				dataStream.read(ByteBuffer.wrap(data));
+			}
+			if (needRevData) {
+				if (compressedLen == 0) {
+					data = null;
+					dataOffset = dataLen = 0;
+				} else {
+					if (data[0] == 0x78 /* 'x' */) {
+						Inflater zlib = new Inflater();
+						zlib.setInput(data, 0, compressedLen);
+						byte[] result = new byte[actualLen * 3];
+						int resultLen = zlib.inflate(result);
+						zlib.end();
+						data = result;
+						dataOffset = 0;
+						dataLen = resultLen;
+					} else if (data[0] == 0x75 /* 'u' */) {
+						dataOffset = 1;
+						dataLen = data.length - 1;
+					} else {
+						dataOffset = 0;
+						dataLen = data.length;
+					}
+				}
+			}
+		}
+		
+		public int getDataLength() {
+			// NOT actualLen - there are empty patch revisions (dataLen == 0, but actualLen == previous length)
+			// NOT compressedLen - zip data is uncompressed
+			return dataLen;
+		}
+		
+		public void getData(ByteBuffer bb) {
+			assert bb.remaining() >= dataLen;
+			bb.put(data, dataOffset, dataLen);
+		}
+		
+		public boolean isPatch() {
+			assert entryIndex != -1;
+			return baseRevision != entryIndex;
+		}
+		
+		public boolean isInline() {
+			assert dis != null;
+			return inlineData;
+		}
+
+		public void done() throws IOException {
+			dis.close();
+			dis = null;
+			if (dataStream != null) {
+				dataStream.close();
+				dataStream = null;
+			}
+		}
+	}
 }
--- a/src/org/tmatesoft/hg/repo/HgManifest.java	Mon Apr 22 19:17:29 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgManifest.java	Wed Apr 24 15:39:53 2013 +0200
@@ -371,12 +371,12 @@
 		 * Denotes entering specific manifest revision, separate entries are
 		 * reported with subsequence {@link #next(Nodeid, Path, Flags)} calls.
 		 * 
-		 * @param mainfestRevisionIndex  local revision index of the inspected revision
+		 * @param manifestRevisionIndex  local revision index of the inspected revision
 		 * @param manifestRevision revision of the manifest we're about to iterate through
 		 * @param changelogRevisionIndex local revision index of changelog this manifest points to 
 		 * @return <code>true</code> to continue iteration, <code>false</code> to stop
 		 */
-		boolean begin(int mainfestRevisionIndex, Nodeid manifestRevision, int changelogRevisionIndex);
+		boolean begin(int manifestRevisionIndex, Nodeid manifestRevision, int changelogRevisionIndex);
 
 		
 		/**
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/org/tmatesoft/hg/test/TestRevlog.java	Wed Apr 24 15:39:53 2013 +0200
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import org.tmatesoft.hg.core.HgRepositoryNotFoundException;
+import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.ByteArrayDataAccess;
+import org.tmatesoft.hg.internal.DiffHelper;
+import org.tmatesoft.hg.internal.Patch;
+import org.tmatesoft.hg.internal.DiffHelper.LineSequence;
+import org.tmatesoft.hg.internal.RevlogDump.RevlogReader;
+import org.tmatesoft.hg.repo.HgLookup;
+import org.tmatesoft.hg.repo.HgManifest;
+import org.tmatesoft.hg.repo.HgManifest.Flags;
+import org.tmatesoft.hg.repo.HgRepository;
+import org.tmatesoft.hg.util.Path;
+
+/**
+ * Not a real JUnit test now
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class TestRevlog {
+	private ByteBuffer patchData;
+
+	public static void main(String[] args) throws Exception {
+		File indexFile = new File("/home/artem/hg/cpython/.hg/store/00manifest.i");
+		new TestRevlog().run(indexFile);
+	}
+	
+	private void run(File indexFile) throws Exception {
+		final boolean shallDumpDiff = Boolean.TRUE.booleanValue();
+		final boolean thoroughCheck = Boolean.TRUE.booleanValue();
+		//
+		RevlogReader rr = new RevlogReader(indexFile);
+		rr.init(true);
+		rr.needData(true);
+		int startEntryIndex = 76507; // 150--87 
+		rr.startFrom(startEntryIndex);
+		rr.readNext();
+		ByteBuffer baseRevision = null;
+		if (rr.isPatch()) {
+			byte[] cc = getRevisionTrueContent(indexFile.getParentFile(), rr.entryIndex, rr.linkRevision);
+			baseRevision = ByteBuffer.wrap(cc);
+		} else {
+			baseRevision = ByteBuffer.allocate(rr.getDataLength());
+			rr.getData(baseRevision);
+			baseRevision.flip();
+		}
+		ByteArrayDataAccess baseRevisionContent = new ByteArrayDataAccess(baseRevision.array(), baseRevision.arrayOffset(), baseRevision.remaining());
+		//
+		final long start = System.currentTimeMillis();
+		int n = 1419;
+		Patch seqPatch = null, normalizedPatch = null;
+		while (rr.hasMore() && n-- > 0) {
+			rr.readNext();
+			if (!rr.isPatch()) {
+				break;
+			}
+			if (rr.getDataLength() == 0) {
+				System.out.printf("Empty content of revision %d\n", rr.entryIndex);
+				continue;
+			}
+			Patch p1 = createPatch(rr);
+			if (seqPatch != null) {
+				if (n < 1) {
+					System.out.println("+" + p1);
+					System.currentTimeMillis();
+				}
+				seqPatch = seqPatch.apply(p1);
+				Patch ppp = normalizedPatch.apply(p1);
+				normalizedPatch = ppp.normalize();
+				if (n <= 1) {
+					System.out.println("=" + seqPatch);
+				}
+				if (n == 0) {
+					System.out.println("A" + ppp);
+					System.out.println("N" + normalizedPatch);
+					normalizedPatch = ppp;
+				}
+				//
+				if (!thoroughCheck) {
+					if (baseRevisionContent.length() + seqPatch.patchSizeDelta() != rr.actualLen) {
+						System.out.printf("Sequential patches:\tPatchRevision #%d (+%d, cset:%d) failed\n", rr.entryIndex, rr.entryIndex - startEntryIndex, rr.linkRevision);
+					}
+					if (baseRevisionContent.length() + normalizedPatch.patchSizeDelta() != rr.actualLen) {
+						System.out.printf("Normalized patches:\tPatchRevision #%d (+%d, cset:%d) failed\n", rr.entryIndex, rr.entryIndex - startEntryIndex, rr.linkRevision);
+					}
+				} else {
+					byte[] origin = getRevisionTrueContent(indexFile.getParentFile(), rr.entryIndex, rr.linkRevision);
+					try {
+						byte[] result1 = seqPatch.apply(baseRevisionContent, rr.actualLen);
+						if (!Arrays.equals(result1, origin)) {
+							System.out.printf("Sequential patches:\tPatchRevision #%d (+%d, cset:%d) failed\n", rr.entryIndex, rr.entryIndex - startEntryIndex, rr.linkRevision);
+						}
+					} catch (ArrayIndexOutOfBoundsException ex) {
+						System.err.printf("Failure at entry %d (+%d)\n", rr.entryIndex, rr.entryIndex - startEntryIndex);
+						ex.printStackTrace();
+					}
+					try {
+						byte[] result2 = normalizedPatch.apply(baseRevisionContent, rr.actualLen);
+						if (!Arrays.equals(result2, origin)) {
+							System.out.printf("Normalized patches:\tPatchRevision #%d (+%d, cset:%d) failed\n", rr.entryIndex, rr.entryIndex - startEntryIndex, rr.linkRevision);
+						}
+					} catch (ArrayIndexOutOfBoundsException ex) {
+						System.err.printf("Failure at entry %d (+%d)\n", rr.entryIndex, rr.entryIndex - startEntryIndex);
+						ex.printStackTrace();
+					}
+				}
+			} else {
+				seqPatch = p1;
+				normalizedPatch = p1.normalize();
+			}
+		}
+		final long end1 = System.currentTimeMillis();
+		//
+//		byte[] result = seqPatch.apply(baseRevisionContent, rr.actualLen);
+		byte[] result = normalizedPatch.apply(baseRevisionContent, rr.actualLen);
+		final long end2 = System.currentTimeMillis();
+		byte[] origin = getRevisionTrueContent(indexFile.getParentFile(), rr.entryIndex, rr.linkRevision);
+		final long end3 = System.currentTimeMillis();
+		rr.done();
+		System.out.printf("Collected patches up to revision %d. Patches total: %d, last contains %d elements\n", rr.entryIndex, rr.entryIndex - startEntryIndex + 1, seqPatch.count());
+		if (!Arrays.equals(result, origin)) {
+			if (shallDumpDiff) {
+				diff(result, origin);
+				dumpLineDifference(result, origin);
+			} else {
+				System.out.println("FAILURE!");
+			}
+		} else {
+			System.out.println("OK!");
+			System.out.printf("Iterate: %d ms, apply collected: %d ms, total=%d ms; Conventional: %d ms\n", (end1-start), (end2-end1), (end2-start), (end3-end2));
+		}
+		Patch normalized = normalizedPatch; //seqPatch.normalize();
+		System.out.printf("N%s\n%d => %d patch elements\n", normalized, seqPatch.count(), normalized.count());
+//		System.out.println(rs);
+	}
+
+	private void dumpLineDifference(byte[] result, byte[] origin) {
+		String rs = new String(result).replace('\0', '\t');
+		String os = new String(origin).replace('\0', '\t');
+		String[] rsLines = rs.split("\n");
+		String[] osLines = os.split("\n");
+		int approxPos = 0;
+		for (int i = 0; i < Math.min(rsLines.length, osLines.length); i++) {
+			if (!rsLines[i].equals(osLines[i])) {
+				System.out.printf("@%d (offset ~%d)\n\t%s\n\t%s\n", i, approxPos, osLines[i], rsLines[i]);
+			}
+			approxPos += rsLines[i].length() + 1;
+		}
+	}
+
+	private void diff(byte[] result, byte[] origin) {
+		DiffHelper<LineSequence> pg = new DiffHelper<LineSequence>();
+		pg.init(LineSequence.newlines(origin), LineSequence.newlines(result));
+		pg.findMatchingBlocks(new DiffHelper.DeltaDumpInspector<LineSequence>());
+	}
+
+	private Patch createPatch(RevlogReader rr) throws IOException {
+		assert rr.isPatch();
+		if (patchData == null || patchData.capacity() < rr.getDataLength()) {
+			patchData = ByteBuffer.allocate(rr.getDataLength());
+		} else {
+			patchData.clear();
+		}
+		rr.getData(patchData);
+		patchData.flip();
+		Patch patch1 = new Patch();
+		patch1.read(new ByteArrayDataAccess(patchData.array(), patchData.arrayOffset(), patchData.remaining()));
+		return patch1;
+	}
+	
+	private byte[] getRevisionTrueContent(File repoLoc, final int manifestRev, int clogRev) throws HgRepositoryNotFoundException {
+		HgRepository hgRepo = new HgLookup().detect(repoLoc);
+		final ByteArrayOutputStream out = new ByteArrayOutputStream(1024 * 1000);
+		hgRepo.getManifest().walk(clogRev, clogRev, new HgManifest.Inspector() {
+			
+			public boolean next(Nodeid nid, Path fname, Flags flags) {
+				try {
+					out.write(fname.toString().getBytes());
+					out.write(0);
+					out.write(nid.toString().getBytes());
+					if (flags == Flags.Exec) {
+						out.write('x');
+					} else if (flags == Flags.Link) {
+						out.write('l');
+					}
+					out.write('\n');
+				} catch (IOException e) {
+					throw new IllegalStateException(e);
+				}
+				return true;
+			}
+			
+			public boolean end(int manifestRevisionIndex) {
+				return false;
+			}
+			
+			public boolean begin(int manifestRevisionIndex, Nodeid manifestRevision, int changelogRevisionIndex) {
+				if (manifestRev != manifestRevisionIndex) {
+					throw new IllegalStateException(String.valueOf(manifestRevisionIndex));
+				}
+				return true;
+			}
+		});
+		return out.toByteArray();
+	}
+}