changeset 551:4ea0351ca878

Better (precise) name for diff facility, tests
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 20 Feb 2013 18:19:52 +0100
parents c1478cc31f45
children 45751456b471
files build.xml src/org/tmatesoft/hg/internal/AnnotateFacility.java src/org/tmatesoft/hg/internal/DiffHelper.java src/org/tmatesoft/hg/internal/GeneratePatchInspector.java src/org/tmatesoft/hg/internal/IntMap.java src/org/tmatesoft/hg/internal/IntVector.java src/org/tmatesoft/hg/internal/PatchGenerator.java test/org/tmatesoft/hg/test/TestDiffHelper.java
diffstat 8 files changed, 638 insertions(+), 450 deletions(-) [+]
line wrap: on
line diff
--- a/build.xml	Tue Feb 19 21:35:09 2013 +0100
+++ b/build.xml	Wed Feb 20 18:19:52 2013 +0100
@@ -106,6 +106,7 @@
 			<test name="org.tmatesoft.hg.test.TestAddRemove" />
 			<test name="org.tmatesoft.hg.test.TestCommit" />
 			<test name="org.tmatesoft.hg.test.TestBlame" />
+			<test name="org.tmatesoft.hg.test.TestDiffHelper" />
 		</junit>
 	</target>
 
--- a/src/org/tmatesoft/hg/internal/AnnotateFacility.java	Tue Feb 19 21:35:09 2013 +0100
+++ b/src/org/tmatesoft/hg/internal/AnnotateFacility.java	Wed Feb 20 18:19:52 2013 +0100
@@ -20,7 +20,7 @@
 import static org.tmatesoft.hg.repo.HgRepository.TIP;
 
 import org.tmatesoft.hg.core.Nodeid;
-import org.tmatesoft.hg.internal.PatchGenerator.LineSequence;
+import org.tmatesoft.hg.internal.DiffHelper.LineSequence;
 import org.tmatesoft.hg.repo.HgDataFile;
 import org.tmatesoft.hg.repo.HgInvalidStateException;
 import org.tmatesoft.hg.util.CancelledException;
@@ -41,7 +41,7 @@
 		int fileRevIndex2 = fileRevIndex(df, csetRevIndex2);
 		LineSequence c1 = lines(df, fileRevIndex1);
 		LineSequence c2 = lines(df, fileRevIndex2);
-		PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>();
+		DiffHelper<LineSequence> pg = new DiffHelper<LineSequence>();
 		pg.init(c1, c2);
 		pg.findMatchingBlocks(new BlameBlockInspector(insp, csetRevIndex1, csetRevIndex2));
 	}
@@ -86,7 +86,7 @@
 			LineSequence p2Lines = lines(df, fileParentRevs[1]);
 			int p1ClogIndex = df.getChangesetRevisionIndex(fileParentRevs[0]);
 			int p2ClogIndex = df.getChangesetRevisionIndex(fileParentRevs[1]);
-			PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>();
+			DiffHelper<LineSequence> pg = new DiffHelper<LineSequence>();
 			pg.init(p2Lines, fileRevLines);
 			EqualBlocksCollector p2MergeCommon = new EqualBlocksCollector();
 			pg.findMatchingBlocks(p2MergeCommon);
@@ -109,7 +109,7 @@
 			LineSequence parentLines = lines(df, soleParent);
 			
 			int parentChangesetRevIndex = df.getChangesetRevisionIndex(soleParent);
-			PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>();
+			DiffHelper<LineSequence> pg = new DiffHelper<LineSequence>();
 			pg.init(parentLines, fileRevLines);
 			pg.findMatchingBlocks(new BlameBlockInspector(insp, parentChangesetRevIndex, csetRevIndex));
 		}
@@ -194,7 +194,7 @@
 	
 
 
-	static class BlameBlockInspector extends PatchGenerator.DeltaInspector<LineSequence> {
+	static class BlameBlockInspector extends DiffHelper.DeltaInspector<LineSequence> {
 		private final BlockInspector insp;
 		private final int csetOrigin;
 		private final int csetTarget;
@@ -443,7 +443,7 @@
 		}
 	}
 
-	static class EqualBlocksCollector implements PatchGenerator.MatchInspector<LineSequence> {
+	static class EqualBlocksCollector implements DiffHelper.MatchInspector<LineSequence> {
 		private final IntVector matches = new IntVector(10*3, 2*3);
 
 		public void begin(LineSequence s1, LineSequence s2) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/DiffHelper.java	Wed Feb 20 18:19:52 2013 +0100
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.tmatesoft.hg.repo.HgInvalidStateException;
+
+/**
+ * Mercurial cares about changes only up to the line level, e.g. a simple file version dump in manifest looks like (RevlogDump output):
+ * 
+ *   522:        233748      0        103      17438        433        522      521       -1     756073cf2321df44d3ed0585f2a5754bc8a1b2f6
+ *   <PATCH>:
+ *   3487..3578, 91:src/org/tmatesoft/hg/core/HgIterateDirection.java\00add61a8a665c5d8f092210767f812fe0d335ac8
+ *   
+ * I.e. for the {fname}{revision} entry format of manifest, not only {revision} is changed, but the whole line, with unchanged {fname} is recorded
+ * in the patch.
+ * 
+ * Mercurial paper describes reasons for choosing this approach to delta generation, too.
+ * 
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class DiffHelper<T extends DiffHelper.ChunkSequence<?>> {
+
+	private Map<Object, IntVector> chunk2UseIndex;
+	private T seq1, seq2;
+
+	// get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively
+	private int matchStartS1, matchStartS2;
+
+	private MatchInspector<T> matchInspector; 
+
+	public void init(T s1, T s2) {
+		seq1 = s1;
+		seq2 = s2;
+		prepare(s2);
+	}
+	
+	public void init(T s1) {
+		if (seq2 == null) {
+			throw new IllegalStateException("Use this #init() only when target sequence shall be matched against different origin");
+		}
+		seq1 = s1;
+	}
+
+
+	private void prepare(T s2) {
+		chunk2UseIndex = new HashMap<Object, IntVector>();
+		for (int i = 0, len = s2.chunkCount(); i < len; i++) {
+			Object bc = s2.chunk(i);
+			IntVector loc = chunk2UseIndex.get(bc);
+			if (loc == null) {
+				chunk2UseIndex.put(bc, loc = new IntVector());
+			}
+			loc.add(i);
+			// bc.registerUseIn(i) - BEWARE, use of bc here is incorrect
+			// in this case need to find the only ByteChain to keep indexes
+			// i.e. when there are few equal ByteChain instances, notion of "usedIn" shall be either shared (reference same vector)
+			// or kept within only one of them
+		}
+	}
+	
+	public void findMatchingBlocks(MatchInspector<T> insp) {
+		insp.begin(seq1, seq2);
+		matchInspector = insp;
+		findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount());
+		insp.end();
+	}
+	
+	/**
+	 * implementation based on Python's difflib.py and SequenceMatcher 
+	 */
+	public int longestMatch(int startS1, int endS1, int startS2, int endS2) {
+		matchStartS1 = matchStartS2 = 0;
+		int maxLength = 0;
+		IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8);
+		for (int i = startS1; i < endS1; i++) {
+			Object bc = seq1.chunk(i);
+			IntVector occurencesInS2 = chunk2UseIndex.get(bc);
+			if (occurencesInS2 == null) {
+				chunkIndex2MatchCount.clear();
+				continue;
+			}
+			IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8);
+			for (int j : occurencesInS2.toArray()) {
+				// s1[i] == s2[j]
+				if (j < startS2) {
+					continue;
+				}
+				if (j >= endS2) {
+					break;
+				}
+				int prevChunkMatches = chunkIndex2MatchCount.containsKey(j-1) ? chunkIndex2MatchCount.get(j-1) : 0;
+				int k = prevChunkMatches + 1;
+				newChunkIndex2MatchCount.put(j, k);
+				if (k > maxLength) {
+					matchStartS1 = i-k+1;
+					matchStartS2 = j-k+1;
+					maxLength = k;
+				}
+			}
+			chunkIndex2MatchCount = newChunkIndex2MatchCount;
+		}
+		return maxLength;
+	}
+	
+	private void findMatchingBlocks(int startS1, int endS1, int startS2, int endS2) {
+		int matchLength = longestMatch(startS1, endS1, startS2, endS2);
+		if (matchLength > 0) {
+			final int saveStartS1 = matchStartS1, saveStartS2 = matchStartS2;
+			if (startS1 < matchStartS1 && startS2 < matchStartS2) {
+				findMatchingBlocks(startS1, matchStartS1, startS2, matchStartS2);
+			}
+			matchInspector.match(saveStartS1, saveStartS2, matchLength);
+			if (saveStartS1+matchLength < endS1 && saveStartS2+matchLength < endS2) {
+				findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2);
+			}
+		}
+	}
+	
+	public interface MatchInspector<T extends ChunkSequence<?>> {
+		void begin(T s1, T s2);
+		void match(int startSeq1, int startSeq2, int matchLength);
+		void end();
+	}
+	
+	static class MatchDumpInspector<T extends ChunkSequence<?>> implements MatchInspector<T> {
+		private int matchCount;
+
+		public void begin(T s1, T s2) {
+			matchCount = 0;
+		}
+
+		public void match(int startSeq1, int startSeq2, int matchLength) {
+			matchCount++;
+			System.out.printf("match #%d: from line #%d  and line #%d of length %d\n", matchCount, startSeq1, startSeq2, matchLength);
+		}
+
+		public void end() {
+			if (matchCount == 0) {
+				System.out.println("NO MATCHES FOUND!");
+			}
+		}
+	}
+	
+	/**
+	 * Matcher implementation that translates "match/equal" notification to a delta-style "added/removed/changed". 
+	 */
+	public static class DeltaInspector<T extends ChunkSequence<?>> implements MatchInspector<T> {
+		protected int changeStartS1, changeStartS2;
+		protected T seq1, seq2;
+
+		public void begin(T s1, T s2) {
+			seq1 = s1;
+			seq2 = s2;
+			changeStartS1 = changeStartS2 = 0;
+		}
+
+		public void match(int startSeq1, int startSeq2, int matchLength) {
+			reportDeltaElement(startSeq1, startSeq2, matchLength);
+			changeStartS1 = startSeq1 + matchLength;
+			changeStartS2 = startSeq2 + matchLength;
+		}
+
+		public void end() {
+			if (changeStartS1 < seq1.chunkCount()-1 || changeStartS2 < seq2.chunkCount()-1) {
+				reportDeltaElement(seq1.chunkCount()-1, seq2.chunkCount()-1, 0);
+			}
+		}
+
+		protected void reportDeltaElement(int matchStartSeq1, int matchStartSeq2, int matchLength) {
+			if (changeStartS1 < matchStartSeq1) {
+				if (changeStartS2 < matchStartSeq2) {
+					changed(changeStartS1, matchStartSeq1, changeStartS2, matchStartSeq2);
+				} else {
+					assert changeStartS2 == matchStartSeq2;
+					deleted(matchStartSeq2, changeStartS1, matchStartSeq1);
+				}
+			} else {
+				assert changeStartS1 == matchStartSeq1;
+				if(changeStartS2 < matchStartSeq2) {
+					added(changeStartS1, changeStartS2, matchStartSeq2);
+				} else {
+					assert changeStartS2 == matchStartSeq2;
+					if (matchStartSeq1 > 0 || matchStartSeq2 > 0) {
+						// FIXME perhaps, exception is too much for the case
+						// once diff is covered with tests, replace with assert false : msg; 
+						throw new HgInvalidStateException(String.format("adjustent equal blocks %d, %d and %d,%d", changeStartS1, matchStartSeq1, changeStartS2, matchStartSeq2));
+					}
+				}
+			}
+			if (matchLength > 0) {
+				unchanged(matchStartSeq1, matchStartSeq2, matchLength);
+			}
+		}
+
+		/**
+		 * [s1From..s1To) replaced with [s2From..s2To)
+		 */
+		protected void changed(int s1From, int s1To, int s2From, int s2To) {
+			// NO-OP
+		}
+
+		protected void deleted(int s2DeletePoint, int s1From, int s1To) {
+			// NO-OP
+		}
+
+		protected void added(int s1InsertPoint, int s2From, int s2To) {
+			// NO-OP
+		}
+
+		protected void unchanged(int s1From, int s2From, int length) {
+			// NO-OP
+		}
+	}
+	
+	static class DeltaDumpInspector<T extends ChunkSequence<?>> extends DeltaInspector<T> {
+
+		@Override
+		protected void changed(int s1From, int s1To, int s2From, int s2To) {
+			System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To);
+		}
+		
+		@Override
+		protected void deleted(int s2DeletionPoint, int s1From, int s1To) {
+			System.out.printf("deleted [%d..%d)\n", s1From, s1To);
+		}
+		
+		@Override
+		protected void added(int s1InsertPoint, int s2From, int s2To) {
+			System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint);
+		}
+
+		@Override
+		protected void unchanged(int s1From, int s2From, int length) {
+			System.out.printf("same [%d..%d) and [%d..%d)\n", s1From, s1From + length, s2From, s2From + length);
+		}
+	}
+	
+	/**
+	 * Generic sequence of chunk, where chunk is anything comparable to another chunk, e.g. a string or a single char
+	 * Sequence diff algorithm above doesn't care about sequence nature.
+	 */
+	public interface ChunkSequence<T> {
+		public T chunk(int index);
+		public int chunkCount();
+	}
+	
+	public static final class LineSequence implements ChunkSequence<LineSequence.ByteChain> {
+		
+		private final byte[] input;
+		private ArrayList<ByteChain> lines;
+
+		public LineSequence(byte[] data) {
+			input = data;
+		}
+		
+		public static LineSequence newlines(byte[] array) {
+			return new LineSequence(array).splitByNewlines();
+		}
+
+		// sequence ends with fake, empty line chunk
+		public LineSequence splitByNewlines() {
+			lines = new ArrayList<ByteChain>();
+			int lastStart = 0;
+			for (int i = 0; i < input.length; i++) {
+				if (input[i] == '\n') {
+					lines.add(new ByteChain(lastStart, i+1));
+					lastStart = i+1;
+				} else if (input[i] == '\r') {
+					if (i+1 < input.length && input[i+1] == '\n') {
+						i++;
+					}
+					lines.add(new ByteChain(lastStart, i+1));
+					lastStart = i+1;
+				}
+			}
+			if (lastStart < input.length) {
+				lines.add(new ByteChain(lastStart, input.length));
+			}
+			// empty chunk to keep offset of input end
+			lines.add(new ByteChain(input.length));
+			return this;
+		}
+		
+		public ByteChain chunk(int index) {
+			return lines.get(index);
+		}
+		
+		public int chunkCount() {
+			return lines.size();
+		}
+		
+		public byte[] data(int chunkFrom, int chunkTo) {
+			if (chunkFrom == chunkTo) {
+				return new byte[0];
+			}
+			int from = chunk(chunkFrom).getOffset(), to = chunk(chunkTo).getOffset();
+			byte[] rv = new byte[to - from];
+			System.arraycopy(input, from, rv, 0, rv.length);
+			return rv;
+		}
+
+		
+		final class ByteChain {
+			private final int start, end;
+			private final int hash;
+			
+			/**
+			 * construct a chunk with a sole purpose to keep 
+			 * offset of the data end
+			 */
+			ByteChain(int offset) {
+				start = end = offset;
+				// ensure this chunk doesn't match trailing chunk of another sequence
+				hash = System.identityHashCode(this);
+			}
+			
+			ByteChain(int s, int e) {
+				start = s;
+				end = e;
+				hash = calcHash(input, s, e);
+			}
+			
+			/**
+			 * byte offset of the this ByteChain inside ChainSequence 
+			 */
+			public int getOffset() {
+				return start;
+			}
+			
+			public byte[] data() {
+				byte[] rv = new byte[end - start];
+				System.arraycopy(input, start, rv, 0, rv.length);
+				return rv;
+			}
+			
+			@Override
+			public boolean equals(Object obj) {
+				if (obj == null || obj.getClass() != ByteChain.class) {
+					return false;
+				}
+				ByteChain other = (ByteChain) obj;
+				if (other.hash != hash || other.end - other.start != end - start) {
+					return false;
+				}
+				return other.match(input, start);
+			}
+			
+			private boolean match(byte[] oi, int from) {
+				for (int i = start, j = from; i < end; i++, j++) {
+					if (LineSequence.this.input[i] != oi[j]) {
+						return false;
+					}
+				}
+				return true;
+			}
+			
+			@Override
+			public int hashCode() {
+				return hash;
+			}
+			
+			@Override
+			public String toString() {
+				return String.format("[@%d\"%s\"]", start, new String(data()));
+			}
+		}
+
+		// same as Arrays.hashCode(byte[]), just for a slice of a bigger array
+		static int calcHash(byte[] data, int from, int to) {
+			int result = 1;
+			for (int i = from; i < to; i++) {
+				result = 31 * result + data[i];
+			}
+			return result;
+		}
+	}
+}
--- a/src/org/tmatesoft/hg/internal/GeneratePatchInspector.java	Tue Feb 19 21:35:09 2013 +0100
+++ b/src/org/tmatesoft/hg/internal/GeneratePatchInspector.java	Wed Feb 20 18:19:52 2013 +0100
@@ -16,8 +16,8 @@
  */
 package org.tmatesoft.hg.internal;
 
-import org.tmatesoft.hg.internal.PatchGenerator.DeltaInspector;
-import org.tmatesoft.hg.internal.PatchGenerator.LineSequence;
+import org.tmatesoft.hg.internal.DiffHelper.DeltaInspector;
+import org.tmatesoft.hg.internal.DiffHelper.LineSequence;
 
 class GeneratePatchInspector extends DeltaInspector<LineSequence> {
 	private final Patch deltaCollector;
@@ -29,7 +29,7 @@
 	
 	public static Patch delta(byte[] prev, byte[] content) {
 		Patch rv = new Patch();
-		PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>();
+		DiffHelper<LineSequence> pg = new DiffHelper<LineSequence>();
 		pg.init(new LineSequence(prev).splitByNewlines(), new LineSequence(content).splitByNewlines());
 		pg.findMatchingBlocks(new GeneratePatchInspector(rv));
 		return rv;
--- a/src/org/tmatesoft/hg/internal/IntMap.java	Tue Feb 19 21:35:09 2013 +0100
+++ b/src/org/tmatesoft/hg/internal/IntMap.java	Wed Feb 20 18:19:52 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 TMate Software Ltd
+ * Copyright (c) 2011-2013 TMate Software Ltd
  *  
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -16,6 +16,7 @@
  */
 package org.tmatesoft.hg.internal;
 
+import java.util.Arrays;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -127,6 +128,11 @@
 		}
 	}
 	
+	public void clear() {
+		Arrays.fill(values, 0, size, null); // do not keep the references
+		size = 0;
+	}
+	
 	/**
 	 * Forget first N entries (in natural order) in the map.
 	 */
--- a/src/org/tmatesoft/hg/internal/IntVector.java	Tue Feb 19 21:35:09 2013 +0100
+++ b/src/org/tmatesoft/hg/internal/IntVector.java	Wed Feb 20 18:19:52 2013 +0100
@@ -42,11 +42,20 @@
 
 	public void add(int v) {
 		if (count == data.length) {
-			grow();
+			grow(0);
 		}
 		data[count++] = v;
 	}
 	
+	public void add(int... values) {
+		if (count + values.length > data.length) {
+			grow(count + values.length - data.length);
+		}
+		for (int v : values) {
+			data[count++] = v;
+		}
+	}
+	
 	public int get(int i) {
 		if (i < 0 || i >= count) {
 			throw new IndexOutOfBoundsException(String.format("Index: %d, size: %d", i, count));
@@ -95,11 +104,14 @@
 		return toArray();
 	}
 
-	private void grow() {
+	private void grow(int newCapacityHint) {
 		if (increment == 0) {
 			throw new UnsupportedOperationException("This vector is not allowed to expand");
 		}
 		int newCapacity = increment < 0 ? data.length << 1 : data.length + increment;
+		if (newCapacityHint > 0 && newCapacity < newCapacityHint) {
+			newCapacity = newCapacityHint;
+		}
 		assert newCapacity > 0 && newCapacity != data.length : newCapacity;
 		int[] newData = new int[newCapacity];
 		System.arraycopy(data, 0, newData, 0, count);
--- a/src/org/tmatesoft/hg/internal/PatchGenerator.java	Tue Feb 19 21:35:09 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,438 +0,0 @@
-/*
- * Copyright (c) 2013 TMate Software Ltd
- *  
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * For information on how to redistribute this software under
- * the terms of a license other than GNU General Public License
- * contact TMate Software at support@hg4j.com
- */
-package org.tmatesoft.hg.internal;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.tmatesoft.hg.repo.HgDataFile;
-import org.tmatesoft.hg.repo.HgInvalidStateException;
-import org.tmatesoft.hg.repo.HgLookup;
-import org.tmatesoft.hg.repo.HgRepository;
-
-/**
- * Mercurial cares about changes only up to the line level, e.g. a simple file version dump in manifest looks like (RevlogDump output):
- * 
- *   522:        233748      0        103      17438        433        522      521       -1     756073cf2321df44d3ed0585f2a5754bc8a1b2f6
- *   <PATCH>:
- *   3487..3578, 91:src/org/tmatesoft/hg/core/HgIterateDirection.java\00add61a8a665c5d8f092210767f812fe0d335ac8
- *   
- * I.e. for the {fname}{revision} entry format of manifest, not only {revision} is changed, but the whole line, with unchanged {fname} is recorded
- * in the patch.
- * 
- * Mercurial paper describes reasons for choosing this approach to delta generation, too.
- * 
- * 
- * @author Artem Tikhomirov
- * @author TMate Software Ltd.
- */
-public class PatchGenerator<T extends PatchGenerator.ChunkSequence<?>> {
-
-	private Map<Chunk, IntVector> chunk2UseIndex;
-	private T seq1, seq2;
-
-	// get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively
-	private int matchStartS1, matchStartS2;
-
-	private MatchInspector<T> matchInspector; 
-
-	public void init(T s1, T s2) {
-		seq1 = s1;
-		seq2 = s2;
-		prepare(s2);
-	}
-	
-	public void init(T s1) {
-		if (seq2 == null) {
-			throw new IllegalStateException("Use this #init() only when target sequence shall be matched against different origin");
-		}
-		seq1 = s1;
-	}
-
-
-	private void prepare(T s2) {
-		chunk2UseIndex = new HashMap<Chunk, IntVector>();
-		for (int i = 0, len = s2.chunkCount(); i < len; i++) {
-			Chunk bc = s2.chunk(i);
-			IntVector loc = chunk2UseIndex.get(bc);
-			if (loc == null) {
-				chunk2UseIndex.put(bc, loc = new IntVector());
-			}
-			loc.add(i);
-			// bc.registerUseIn(i) - BEWARE, use of bc here is incorrect
-			// in this case need to find the only ByteChain to keep indexes
-			// i.e. when there are few equal ByteChain instances, notion of "usedIn" shall be either shared (reference same vector)
-			// or kept within only one of them
-		}
-//		for (ChunkSequence.ByteChain bc : chunk2UseIndex.keySet()) {
-//			System.out.printf("%s: {", new String(bc.data()));
-//			for (int x : chunk2UseIndex.get(bc).toArray()) {
-//				System.out.printf(" %d,", x);
-//			}
-//			System.out.println("}");
-//		}
-	}
-	
-	public void findMatchingBlocks(MatchInspector<T> insp) {
-		insp.begin(seq1, seq2);
-		matchInspector = insp;
-		findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount());
-		insp.end();
-	}
-	
-	/**
-	 * implementation based on Python's difflib.py and SequenceMatcher 
-	 */
-	public int longestMatch(int startS1, int endS1, int startS2, int endS2) {
-		matchStartS1 = matchStartS2 = 0;
-		int maxLength = 0;
-		IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8);
-		for (int i = startS1; i < endS1; i++) {
-			Chunk bc = seq1.chunk(i);
-			IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8);
-			IntVector occurencesInS2 = chunk2UseIndex.get(bc);
-			if (occurencesInS2 == null) {
-				// chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance
-				chunkIndex2MatchCount = newChunkIndex2MatchCount;
-				continue;
-			}
-			for (int j : occurencesInS2.toArray()) {
-				// s1[i] == s2[j]
-				if (j < startS2) {
-					continue;
-				}
-				if (j >= endS2) {
-					break;
-				}
-				int prevChunkMatches = chunkIndex2MatchCount.containsKey(j-1) ? chunkIndex2MatchCount.get(j-1) : 0;
-				int k = prevChunkMatches + 1;
-				newChunkIndex2MatchCount.put(j, k);
-				if (k > maxLength) {
-					matchStartS1 = i-k+1;
-					matchStartS2 = j-k+1;
-					maxLength = k;
-				}
-			}
-			chunkIndex2MatchCount = newChunkIndex2MatchCount;
-		}
-		return maxLength;
-	}
-	
-	private void findMatchingBlocks(int startS1, int endS1, int startS2, int endS2) {
-		int matchLength = longestMatch(startS1, endS1, startS2, endS2);
-		if (matchLength > 0) {
-			final int saveStartS1 = matchStartS1, saveStartS2 = matchStartS2;
-			if (startS1 < matchStartS1 && startS2 < matchStartS2) {
-				findMatchingBlocks(startS1, matchStartS1, startS2, matchStartS2);
-			}
-			matchInspector.match(saveStartS1, saveStartS2, matchLength);
-			if (saveStartS1+matchLength < endS1 && saveStartS2+matchLength < endS2) {
-				findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2);
-			}
-		}
-	}
-	
-	interface MatchInspector<T extends ChunkSequence<?>> {
-		void begin(T s1, T s2);
-		void match(int startSeq1, int startSeq2, int matchLength);
-		void end();
-	}
-	
-	static class MatchDumpInspector<T extends ChunkSequence<?>> implements MatchInspector<T> {
-		private int matchCount;
-
-		public void begin(T s1, T s2) {
-			matchCount = 0;
-		}
-
-		public void match(int startSeq1, int startSeq2, int matchLength) {
-			matchCount++;
-			System.out.printf("match #%d: from line #%d  and line #%d of length %d\n", matchCount, startSeq1, startSeq2, matchLength);
-		}
-
-		public void end() {
-			if (matchCount == 0) {
-				System.out.println("NO MATCHES FOUND!");
-			}
-		}
-	}
-	
-	static class DeltaInspector<T extends ChunkSequence<?>> implements MatchInspector<T> {
-		protected int changeStartS1, changeStartS2;
-		protected T seq1, seq2;
-
-		public void begin(T s1, T s2) {
-			seq1 = s1;
-			seq2 = s2;
-			changeStartS1 = changeStartS2 = 0;
-		}
-
-		public void match(int startSeq1, int startSeq2, int matchLength) {
-			reportDeltaElement(startSeq1, startSeq2, matchLength);
-			changeStartS1 = startSeq1 + matchLength;
-			changeStartS2 = startSeq2 + matchLength;
-		}
-
-		public void end() {
-			if (changeStartS1 < seq1.chunkCount()-1 || changeStartS2 < seq2.chunkCount()-1) {
-				reportDeltaElement(seq1.chunkCount()-1, seq2.chunkCount()-1, 0);
-			}
-		}
-
-		protected void reportDeltaElement(int matchStartSeq1, int matchStartSeq2, int matchLength) {
-			if (changeStartS1 < matchStartSeq1) {
-				if (changeStartS2 < matchStartSeq2) {
-					changed(changeStartS1, matchStartSeq1, changeStartS2, matchStartSeq2);
-				} else {
-					assert changeStartS2 == matchStartSeq2;
-					deleted(matchStartSeq2, changeStartS1, matchStartSeq1);
-				}
-			} else {
-				assert changeStartS1 == matchStartSeq1;
-				if(changeStartS2 < matchStartSeq2) {
-					added(changeStartS1, changeStartS2, matchStartSeq2);
-				} else {
-					assert changeStartS2 == matchStartSeq2;
-					if (matchStartSeq1 > 0 || matchStartSeq2 > 0) {
-						// FIXME perhaps, exception is too much for the case
-						// once diff is covered with tests, replace with assert false : msg; 
-						throw new HgInvalidStateException(String.format("adjustent equal blocks %d, %d and %d,%d", changeStartS1, matchStartSeq1, changeStartS2, matchStartSeq2));
-					}
-				}
-			}
-			if (matchLength > 0) {
-				unchanged(matchStartSeq1, matchStartSeq2, matchLength);
-			}
-		}
-
-		/**
-		 * [s1From..s1To) replaced with [s2From..s2To)
-		 */
-		protected void changed(int s1From, int s1To, int s2From, int s2To) {
-			// NO-OP
-		}
-
-		protected void deleted(int s2DeletePoint, int s1From, int s1To) {
-			// NO-OP
-		}
-
-		protected void added(int s1InsertPoint, int s2From, int s2To) {
-			// NO-OP
-		}
-
-		protected void unchanged(int s1From, int s2From, int length) {
-			// NO-OP
-		}
-	}
-	
-	static class DeltaDumpInspector<T extends ChunkSequence<?>> extends DeltaInspector<T> {
-
-		@Override
-		protected void changed(int s1From, int s1To, int s2From, int s2To) {
-			System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To);
-		}
-		
-		@Override
-		protected void deleted(int s2DeletionPoint, int s1From, int s1To) {
-			System.out.printf("deleted [%d..%d)\n", s1From, s1To);
-		}
-		
-		@Override
-		protected void added(int s1InsertPoint, int s2From, int s2To) {
-			System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint);
-		}
-
-		@Override
-		protected void unchanged(int s1From, int s2From, int length) {
-			System.out.printf("same [%d..%d) and [%d..%d)\n", s1From, s1From + length, s2From, s2From + length);
-		}
-	}
-	
-	public static void main(String[] args) throws Exception {
-		PatchGenerator<LineSequence> pg1 = new PatchGenerator<LineSequence>();
-//		pg1.init(LineSequence.newlines("hello\nabc".getBytes()), LineSequence.newlines("hello\nworld".getBytes()));
-//		pg1.init(LineSequence.newlines("".getBytes()), LineSequence.newlines("hello\nworld".getBytes()));
-		pg1.init(LineSequence.newlines("hello\nworld".getBytes()), LineSequence.newlines("".getBytes()));
-		pg1.findMatchingBlocks(new MatchDumpInspector<LineSequence>());
-		pg1.findMatchingBlocks(new DeltaDumpInspector<LineSequence>());
-		if (Boolean.FALSE.booleanValue()) {
-			return;
-		}
-		HgRepository repo = new HgLookup().detectFromWorkingDir();
-		HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java");
-		ByteArrayChannel bac1, bac2;
-		df.content(80, bac1 = new ByteArrayChannel());
-		df.content(81, bac2 = new ByteArrayChannel());
-//		String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2";
-//		String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2";
-		PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>();
-		byte[] data1 = bac1.toArray();
-		byte[] data2 = bac2.toArray();
-		pg.init(new LineSequence(data1).splitByNewlines(), new LineSequence(data2).splitByNewlines());
-		System.out.println("Matches:");
-		pg.findMatchingBlocks(new MatchDumpInspector<LineSequence>());
-		System.out.println("Deltas:");
-		pg.findMatchingBlocks(new DeltaDumpInspector<LineSequence>());
-	}
-
-	/**
-	 * Unsure if this marker interface worth presence
-	 */
-	public interface Chunk {
-	}
-	
-	/**
-	 * Generic sequence of chunk, where chunk is anything comparable to another chunk, e.g. a string or a single char
-	 * Sequence diff algorithm above doesn't care about sequence nature.
-	 */
-	public interface ChunkSequence<T extends Chunk> {
-		public T chunk(int index);
-		public int chunkCount();
-	}
-	
-	static final class LineSequence implements ChunkSequence<LineSequence.ByteChain> {
-		
-		private final byte[] input;
-		private ArrayList<ByteChain> lines;
-
-		public LineSequence(byte[] data) {
-			input = data;
-		}
-		
-		public static LineSequence newlines(byte[] array) {
-			return new LineSequence(array).splitByNewlines();
-		}
-
-		// sequence ends with fake, empty line chunk
-		public LineSequence splitByNewlines() {
-			lines = new ArrayList<ByteChain>();
-			int lastStart = 0;
-			for (int i = 0; i < input.length; i++) {
-				if (input[i] == '\n') {
-					lines.add(new ByteChain(lastStart, i+1));
-					lastStart = i+1;
-				} else if (input[i] == '\r') {
-					if (i+1 < input.length && input[i+1] == '\n') {
-						i++;
-					}
-					lines.add(new ByteChain(lastStart, i+1));
-					lastStart = i+1;
-				}
-			}
-			if (lastStart < input.length) {
-				lines.add(new ByteChain(lastStart, input.length));
-			}
-			// empty chunk to keep offset of input end
-			lines.add(new ByteChain(input.length));
-			return this;
-		}
-		
-		public ByteChain chunk(int index) {
-			return lines.get(index);
-		}
-		
-		public int chunkCount() {
-			return lines.size();
-		}
-		
-		public byte[] data(int chunkFrom, int chunkTo) {
-			if (chunkFrom == chunkTo) {
-				return new byte[0];
-			}
-			int from = chunk(chunkFrom).getOffset(), to = chunk(chunkTo).getOffset();
-			byte[] rv = new byte[to - from];
-			System.arraycopy(input, from, rv, 0, rv.length);
-			return rv;
-		}
-
-		
-		final class ByteChain implements Chunk {
-			private final int start, end;
-			private final int hash;
-			
-			/**
-			 * construct a chunk with a sole purpose to keep 
-			 * offset of the data end
-			 */
-			ByteChain(int offset) {
-				start = end = offset;
-				// ensure this chunk doesn't match trailing chunk of another sequence
-				hash = System.identityHashCode(this);
-			}
-			
-			ByteChain(int s, int e) {
-				start = s;
-				end = e;
-				hash = calcHash(input, s, e);
-			}
-			
-			/**
-			 * byte offset of the this ByteChain inside ChainSequence 
-			 */
-			public int getOffset() {
-				return start;
-			}
-			
-			public byte[] data() {
-				byte[] rv = new byte[end - start];
-				System.arraycopy(input, start, rv, 0, rv.length);
-				return rv;
-			}
-			
-			@Override
-			public boolean equals(Object obj) {
-				if (obj == null || obj.getClass() != ByteChain.class) {
-					return false;
-				}
-				ByteChain other = (ByteChain) obj;
-				if (other.hash != hash || other.end - other.start != end - start) {
-					return false;
-				}
-				return other.match(input, start);
-			}
-			
-			private boolean match(byte[] oi, int from) {
-				for (int i = start, j = from; i < end; i++, j++) {
-					if (LineSequence.this.input[i] != oi[j]) {
-						return false;
-					}
-				}
-				return true;
-			}
-			
-			@Override
-			public int hashCode() {
-				return hash;
-			}
-			
-			@Override
-			public String toString() {
-				return String.format("[@%d\"%s\"]", start, new String(data()));
-			}
-		}
-
-		// same as Arrays.hashCode(byte[]), just for a slice of a bigger array
-		static int calcHash(byte[] data, int from, int to) {
-			int result = 1;
-			for (int i = from; i < to; i++) {
-				result = 31 * result + data[i];
-			}
-			return result;
-		}
-	}
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/org/tmatesoft/hg/test/TestDiffHelper.java	Wed Feb 20 18:19:52 2013 +0100
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.test;
+
+import static org.junit.Assert.*;
+import static org.tmatesoft.hg.internal.DiffHelper.LineSequence.newlines;
+
+import org.junit.Test;
+import org.tmatesoft.hg.internal.DiffHelper;
+import org.tmatesoft.hg.internal.DiffHelper.ChunkSequence;
+import org.tmatesoft.hg.internal.DiffHelper.LineSequence;
+import org.tmatesoft.hg.internal.IntVector;
+
+/**
+ * Testing DiffHelper (foundation for facilities like commit and annotate) directly
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class TestDiffHelper {
+
+	@Test
+	public void testSimple() {
+		DiffHelper<LineSequence> diffHelper = new DiffHelper<LineSequence>();
+		MatchCollector<LineSequence> mc; DeltaCollector dc;
+
+		// single change
+		diffHelper.init(newlines("hello\nabc".getBytes()), newlines("hello\nworld".getBytes()));
+		diffHelper.findMatchingBlocks(mc = new MatchCollector<LineSequence>());
+		assertEquals(1, mc.matchCount());
+		assertTrue(mc.originLineMatched(0));
+		assertTrue(mc.targetLineMatched(0));
+		assertFalse(mc.originLineMatched(1));
+		assertFalse(mc.targetLineMatched(1));
+		diffHelper.findMatchingBlocks(dc = new DeltaCollector());
+		assertEquals(1, dc.unchangedCount());
+		assertEquals(1, dc.deletedCount());
+		assertEquals(1, dc.addedCount());
+
+		// boundary case, additions to an empty origin
+		diffHelper.init(newlines("".getBytes()), newlines("hello\nworld".getBytes()));
+		diffHelper.findMatchingBlocks(mc = new MatchCollector<LineSequence>());
+		assertEquals(0, mc.matchCount());
+		diffHelper.findMatchingBlocks(dc = new DeltaCollector());
+		assertEquals(0, dc.unchangedCount());
+		assertEquals(0, dc.deletedCount());
+		assertEquals(1, dc.addedCount()); // two lines added, but 1 range
+
+		// boundary case, complete deletion
+		diffHelper.init(newlines("hello\nworld".getBytes()), newlines("".getBytes()));
+		diffHelper.findMatchingBlocks(mc = new MatchCollector<LineSequence>());
+		assertEquals(0, mc.matchCount());
+		diffHelper.findMatchingBlocks(dc = new DeltaCollector());
+		assertEquals(0, dc.unchangedCount());
+		assertEquals(1, dc.deletedCount());
+		assertEquals(0, dc.addedCount());
+
+		// regular case, few changes
+		String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2";
+		String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2";
+		diffHelper.init(newlines(s1.getBytes()), newlines(s2.getBytes()));
+		diffHelper.findMatchingBlocks(mc = new MatchCollector<LineSequence>());
+		assertEquals(2, mc.matchCount());
+		assertFalse(mc.originLineMatched(0));
+		assertTrue(mc.originLineMatched(1));
+		assertTrue(mc.originLineMatched(2));
+		assertFalse(mc.originLineMatched(3));
+		assertFalse(mc.originLineMatched(4));
+		assertTrue(mc.originLineMatched(5));
+		assertFalse(mc.targetLineMatched(0));
+		assertFalse(mc.targetLineMatched(1));
+		assertTrue(mc.targetLineMatched(2));
+		assertTrue(mc.targetLineMatched(3));
+		assertTrue(mc.targetLineMatched(4));
+		diffHelper.findMatchingBlocks(dc = new DeltaCollector());
+		assertEquals(2, dc.unchangedCount()); // 3 lines but 2 ranges
+		assertEquals(2, dc.deletedCount());
+		assertEquals(1, dc.addedCount());
+		assertTrue(dc.deletedLine(0));
+		assertTrue(dc.deletedLine(3));
+		assertTrue(dc.deletedLine(4));
+		assertTrue(dc.addedLine(0));
+		assertTrue(dc.addedLine(1));
+	}
+	
+	@Test
+	public void testOtherSequence() {
+		class CharSequence implements DiffHelper.ChunkSequence<Character> {
+			private final char[] chunks;
+
+			CharSequence(String s) {
+				chunks = s.toCharArray();
+			}
+			public Character chunk(int index) {
+				return chunks[index];
+			}
+			public int chunkCount() {
+				return chunks.length;
+			}
+		}
+		DiffHelper<CharSequence> diff = new DiffHelper<CharSequence>();
+		diff.init(new CharSequence("abcefg"), new CharSequence("bcdegh"));
+		MatchCollector<CharSequence> mc;
+		diff.findMatchingBlocks(mc = new MatchCollector<CharSequence>());
+		assertEquals(3, mc.matchCount()); // bc, e, g
+	}
+	
+	// range is comprised of 3 values, range length always last, range start comes at index o (either 0 or 1)
+	static boolean includes(IntVector ranges, int o, int ln) {
+		assert ranges.size() % 3 == 0;
+		for (int i = 2; i < ranges.size(); o += 3, i+=3) {
+			int rangeStart = ranges.get(o);
+			if (rangeStart > ln) {
+				return false;
+			}
+			int rangeLen = ranges.get(i);
+			if (rangeStart + rangeLen > ln) {
+				return true;
+			}
+		}
+		return false;
+	}
+
+	static class MatchCollector<T extends ChunkSequence<?>> implements DiffHelper.MatchInspector<T> {
+		private IntVector matched = new IntVector(10 * 3, 5 * 3);
+
+		public void begin(T s1, T s2) {
+		}
+
+		public void match(int startSeq1, int startSeq2, int matchLength) {
+			matched.add(startSeq1, startSeq2, matchLength);
+		}
+
+		public void end() {
+		}
+		
+		int matchCount() {
+			return matched.size() / 3;
+		}
+		
+		// true if zero-based line matches any "same" block in the origin
+		boolean originLineMatched(int ln) {
+			return includes(matched, 0, ln);
+		}
+		
+		boolean targetLineMatched(int ln) {
+			return includes(matched, 1, ln);
+		}
+	}
+	
+	static class DeltaCollector extends DiffHelper.DeltaInspector<LineSequence> {
+		private IntVector added, deleted, same;
+		public DeltaCollector() {
+			final int x = 10 * 3, y = 5 * 3;
+			added = new IntVector(x, y);
+			deleted = new IntVector(x, y);
+			same = new IntVector(x, y);
+		}
+		@Override
+		protected void added(int s1InsertPoint, int s2From, int s2To) {
+			// TODO Auto-generated method stub
+			added.add(s1InsertPoint, s2From, s2To - s2From);
+		}
+		@Override
+		protected void changed(int s1From, int s1To, int s2From, int s2To) {
+			deleted(s2From, s1From, s1To);
+			added(s1From, s2From, s2To);
+		}
+		@Override
+		protected void deleted(int s2DeletePoint, int s1From, int s1To) {
+			deleted.add(s2DeletePoint, s1From, s1To - s1From);
+		}
+		@Override
+		protected void unchanged(int s1From, int s2From, int length) {
+			same.add(s1From, s2From, length);
+		}
+
+		int unchangedCount() {
+			return same.size() / 3;
+		}
+
+		int addedCount() {
+			return added.size() / 3;
+		}
+
+		int deletedCount() {
+			return deleted.size() / 3;
+		}
+		boolean addedLine(int ln) {
+			return includes(added, 1, ln);
+		}
+		boolean deletedLine(int ln) {
+			return includes(deleted, 1, ln);
+		}
+	}
+}