Mercurial > jhg
changeset 551:4ea0351ca878
Better (precise) name for diff facility, tests
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Wed, 20 Feb 2013 18:19:52 +0100 |
parents | c1478cc31f45 |
children | 45751456b471 |
files | build.xml src/org/tmatesoft/hg/internal/AnnotateFacility.java src/org/tmatesoft/hg/internal/DiffHelper.java src/org/tmatesoft/hg/internal/GeneratePatchInspector.java src/org/tmatesoft/hg/internal/IntMap.java src/org/tmatesoft/hg/internal/IntVector.java src/org/tmatesoft/hg/internal/PatchGenerator.java test/org/tmatesoft/hg/test/TestDiffHelper.java |
diffstat | 8 files changed, 638 insertions(+), 450 deletions(-) [+] |
line wrap: on
line diff
--- a/build.xml Tue Feb 19 21:35:09 2013 +0100 +++ b/build.xml Wed Feb 20 18:19:52 2013 +0100 @@ -106,6 +106,7 @@ <test name="org.tmatesoft.hg.test.TestAddRemove" /> <test name="org.tmatesoft.hg.test.TestCommit" /> <test name="org.tmatesoft.hg.test.TestBlame" /> + <test name="org.tmatesoft.hg.test.TestDiffHelper" /> </junit> </target>
--- a/src/org/tmatesoft/hg/internal/AnnotateFacility.java Tue Feb 19 21:35:09 2013 +0100 +++ b/src/org/tmatesoft/hg/internal/AnnotateFacility.java Wed Feb 20 18:19:52 2013 +0100 @@ -20,7 +20,7 @@ import static org.tmatesoft.hg.repo.HgRepository.TIP; import org.tmatesoft.hg.core.Nodeid; -import org.tmatesoft.hg.internal.PatchGenerator.LineSequence; +import org.tmatesoft.hg.internal.DiffHelper.LineSequence; import org.tmatesoft.hg.repo.HgDataFile; import org.tmatesoft.hg.repo.HgInvalidStateException; import org.tmatesoft.hg.util.CancelledException; @@ -41,7 +41,7 @@ int fileRevIndex2 = fileRevIndex(df, csetRevIndex2); LineSequence c1 = lines(df, fileRevIndex1); LineSequence c2 = lines(df, fileRevIndex2); - PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>(); + DiffHelper<LineSequence> pg = new DiffHelper<LineSequence>(); pg.init(c1, c2); pg.findMatchingBlocks(new BlameBlockInspector(insp, csetRevIndex1, csetRevIndex2)); } @@ -86,7 +86,7 @@ LineSequence p2Lines = lines(df, fileParentRevs[1]); int p1ClogIndex = df.getChangesetRevisionIndex(fileParentRevs[0]); int p2ClogIndex = df.getChangesetRevisionIndex(fileParentRevs[1]); - PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>(); + DiffHelper<LineSequence> pg = new DiffHelper<LineSequence>(); pg.init(p2Lines, fileRevLines); EqualBlocksCollector p2MergeCommon = new EqualBlocksCollector(); pg.findMatchingBlocks(p2MergeCommon); @@ -109,7 +109,7 @@ LineSequence parentLines = lines(df, soleParent); int parentChangesetRevIndex = df.getChangesetRevisionIndex(soleParent); - PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>(); + DiffHelper<LineSequence> pg = new DiffHelper<LineSequence>(); pg.init(parentLines, fileRevLines); pg.findMatchingBlocks(new BlameBlockInspector(insp, parentChangesetRevIndex, csetRevIndex)); } @@ -194,7 +194,7 @@ - static class BlameBlockInspector extends PatchGenerator.DeltaInspector<LineSequence> { + static class BlameBlockInspector extends DiffHelper.DeltaInspector<LineSequence> { private final BlockInspector insp; private final int csetOrigin; private final int csetTarget; @@ -443,7 +443,7 @@ } } - static class EqualBlocksCollector implements PatchGenerator.MatchInspector<LineSequence> { + static class EqualBlocksCollector implements DiffHelper.MatchInspector<LineSequence> { private final IntVector matches = new IntVector(10*3, 2*3); public void begin(LineSequence s1, LineSequence s2) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/DiffHelper.java Wed Feb 20 18:19:52 2013 +0100 @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2013 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.internal; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import org.tmatesoft.hg.repo.HgInvalidStateException; + +/** + * Mercurial cares about changes only up to the line level, e.g. a simple file version dump in manifest looks like (RevlogDump output): + * + * 522: 233748 0 103 17438 433 522 521 -1 756073cf2321df44d3ed0585f2a5754bc8a1b2f6 + * <PATCH>: + * 3487..3578, 91:src/org/tmatesoft/hg/core/HgIterateDirection.java\00add61a8a665c5d8f092210767f812fe0d335ac8 + * + * I.e. for the {fname}{revision} entry format of manifest, not only {revision} is changed, but the whole line, with unchanged {fname} is recorded + * in the patch. + * + * Mercurial paper describes reasons for choosing this approach to delta generation, too. + * + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class DiffHelper<T extends DiffHelper.ChunkSequence<?>> { + + private Map<Object, IntVector> chunk2UseIndex; + private T seq1, seq2; + + // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively + private int matchStartS1, matchStartS2; + + private MatchInspector<T> matchInspector; + + public void init(T s1, T s2) { + seq1 = s1; + seq2 = s2; + prepare(s2); + } + + public void init(T s1) { + if (seq2 == null) { + throw new IllegalStateException("Use this #init() only when target sequence shall be matched against different origin"); + } + seq1 = s1; + } + + + private void prepare(T s2) { + chunk2UseIndex = new HashMap<Object, IntVector>(); + for (int i = 0, len = s2.chunkCount(); i < len; i++) { + Object bc = s2.chunk(i); + IntVector loc = chunk2UseIndex.get(bc); + if (loc == null) { + chunk2UseIndex.put(bc, loc = new IntVector()); + } + loc.add(i); + // bc.registerUseIn(i) - BEWARE, use of bc here is incorrect + // in this case need to find the only ByteChain to keep indexes + // i.e. when there are few equal ByteChain instances, notion of "usedIn" shall be either shared (reference same vector) + // or kept within only one of them + } + } + + public void findMatchingBlocks(MatchInspector<T> insp) { + insp.begin(seq1, seq2); + matchInspector = insp; + findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount()); + insp.end(); + } + + /** + * implementation based on Python's difflib.py and SequenceMatcher + */ + public int longestMatch(int startS1, int endS1, int startS2, int endS2) { + matchStartS1 = matchStartS2 = 0; + int maxLength = 0; + IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8); + for (int i = startS1; i < endS1; i++) { + Object bc = seq1.chunk(i); + IntVector occurencesInS2 = chunk2UseIndex.get(bc); + if (occurencesInS2 == null) { + chunkIndex2MatchCount.clear(); + continue; + } + IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8); + for (int j : occurencesInS2.toArray()) { + // s1[i] == s2[j] + if (j < startS2) { + continue; + } + if (j >= endS2) { + break; + } + int prevChunkMatches = chunkIndex2MatchCount.containsKey(j-1) ? chunkIndex2MatchCount.get(j-1) : 0; + int k = prevChunkMatches + 1; + newChunkIndex2MatchCount.put(j, k); + if (k > maxLength) { + matchStartS1 = i-k+1; + matchStartS2 = j-k+1; + maxLength = k; + } + } + chunkIndex2MatchCount = newChunkIndex2MatchCount; + } + return maxLength; + } + + private void findMatchingBlocks(int startS1, int endS1, int startS2, int endS2) { + int matchLength = longestMatch(startS1, endS1, startS2, endS2); + if (matchLength > 0) { + final int saveStartS1 = matchStartS1, saveStartS2 = matchStartS2; + if (startS1 < matchStartS1 && startS2 < matchStartS2) { + findMatchingBlocks(startS1, matchStartS1, startS2, matchStartS2); + } + matchInspector.match(saveStartS1, saveStartS2, matchLength); + if (saveStartS1+matchLength < endS1 && saveStartS2+matchLength < endS2) { + findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2); + } + } + } + + public interface MatchInspector<T extends ChunkSequence<?>> { + void begin(T s1, T s2); + void match(int startSeq1, int startSeq2, int matchLength); + void end(); + } + + static class MatchDumpInspector<T extends ChunkSequence<?>> implements MatchInspector<T> { + private int matchCount; + + public void begin(T s1, T s2) { + matchCount = 0; + } + + public void match(int startSeq1, int startSeq2, int matchLength) { + matchCount++; + System.out.printf("match #%d: from line #%d and line #%d of length %d\n", matchCount, startSeq1, startSeq2, matchLength); + } + + public void end() { + if (matchCount == 0) { + System.out.println("NO MATCHES FOUND!"); + } + } + } + + /** + * Matcher implementation that translates "match/equal" notification to a delta-style "added/removed/changed". + */ + public static class DeltaInspector<T extends ChunkSequence<?>> implements MatchInspector<T> { + protected int changeStartS1, changeStartS2; + protected T seq1, seq2; + + public void begin(T s1, T s2) { + seq1 = s1; + seq2 = s2; + changeStartS1 = changeStartS2 = 0; + } + + public void match(int startSeq1, int startSeq2, int matchLength) { + reportDeltaElement(startSeq1, startSeq2, matchLength); + changeStartS1 = startSeq1 + matchLength; + changeStartS2 = startSeq2 + matchLength; + } + + public void end() { + if (changeStartS1 < seq1.chunkCount()-1 || changeStartS2 < seq2.chunkCount()-1) { + reportDeltaElement(seq1.chunkCount()-1, seq2.chunkCount()-1, 0); + } + } + + protected void reportDeltaElement(int matchStartSeq1, int matchStartSeq2, int matchLength) { + if (changeStartS1 < matchStartSeq1) { + if (changeStartS2 < matchStartSeq2) { + changed(changeStartS1, matchStartSeq1, changeStartS2, matchStartSeq2); + } else { + assert changeStartS2 == matchStartSeq2; + deleted(matchStartSeq2, changeStartS1, matchStartSeq1); + } + } else { + assert changeStartS1 == matchStartSeq1; + if(changeStartS2 < matchStartSeq2) { + added(changeStartS1, changeStartS2, matchStartSeq2); + } else { + assert changeStartS2 == matchStartSeq2; + if (matchStartSeq1 > 0 || matchStartSeq2 > 0) { + // FIXME perhaps, exception is too much for the case + // once diff is covered with tests, replace with assert false : msg; + throw new HgInvalidStateException(String.format("adjustent equal blocks %d, %d and %d,%d", changeStartS1, matchStartSeq1, changeStartS2, matchStartSeq2)); + } + } + } + if (matchLength > 0) { + unchanged(matchStartSeq1, matchStartSeq2, matchLength); + } + } + + /** + * [s1From..s1To) replaced with [s2From..s2To) + */ + protected void changed(int s1From, int s1To, int s2From, int s2To) { + // NO-OP + } + + protected void deleted(int s2DeletePoint, int s1From, int s1To) { + // NO-OP + } + + protected void added(int s1InsertPoint, int s2From, int s2To) { + // NO-OP + } + + protected void unchanged(int s1From, int s2From, int length) { + // NO-OP + } + } + + static class DeltaDumpInspector<T extends ChunkSequence<?>> extends DeltaInspector<T> { + + @Override + protected void changed(int s1From, int s1To, int s2From, int s2To) { + System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To); + } + + @Override + protected void deleted(int s2DeletionPoint, int s1From, int s1To) { + System.out.printf("deleted [%d..%d)\n", s1From, s1To); + } + + @Override + protected void added(int s1InsertPoint, int s2From, int s2To) { + System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint); + } + + @Override + protected void unchanged(int s1From, int s2From, int length) { + System.out.printf("same [%d..%d) and [%d..%d)\n", s1From, s1From + length, s2From, s2From + length); + } + } + + /** + * Generic sequence of chunk, where chunk is anything comparable to another chunk, e.g. a string or a single char + * Sequence diff algorithm above doesn't care about sequence nature. + */ + public interface ChunkSequence<T> { + public T chunk(int index); + public int chunkCount(); + } + + public static final class LineSequence implements ChunkSequence<LineSequence.ByteChain> { + + private final byte[] input; + private ArrayList<ByteChain> lines; + + public LineSequence(byte[] data) { + input = data; + } + + public static LineSequence newlines(byte[] array) { + return new LineSequence(array).splitByNewlines(); + } + + // sequence ends with fake, empty line chunk + public LineSequence splitByNewlines() { + lines = new ArrayList<ByteChain>(); + int lastStart = 0; + for (int i = 0; i < input.length; i++) { + if (input[i] == '\n') { + lines.add(new ByteChain(lastStart, i+1)); + lastStart = i+1; + } else if (input[i] == '\r') { + if (i+1 < input.length && input[i+1] == '\n') { + i++; + } + lines.add(new ByteChain(lastStart, i+1)); + lastStart = i+1; + } + } + if (lastStart < input.length) { + lines.add(new ByteChain(lastStart, input.length)); + } + // empty chunk to keep offset of input end + lines.add(new ByteChain(input.length)); + return this; + } + + public ByteChain chunk(int index) { + return lines.get(index); + } + + public int chunkCount() { + return lines.size(); + } + + public byte[] data(int chunkFrom, int chunkTo) { + if (chunkFrom == chunkTo) { + return new byte[0]; + } + int from = chunk(chunkFrom).getOffset(), to = chunk(chunkTo).getOffset(); + byte[] rv = new byte[to - from]; + System.arraycopy(input, from, rv, 0, rv.length); + return rv; + } + + + final class ByteChain { + private final int start, end; + private final int hash; + + /** + * construct a chunk with a sole purpose to keep + * offset of the data end + */ + ByteChain(int offset) { + start = end = offset; + // ensure this chunk doesn't match trailing chunk of another sequence + hash = System.identityHashCode(this); + } + + ByteChain(int s, int e) { + start = s; + end = e; + hash = calcHash(input, s, e); + } + + /** + * byte offset of the this ByteChain inside ChainSequence + */ + public int getOffset() { + return start; + } + + public byte[] data() { + byte[] rv = new byte[end - start]; + System.arraycopy(input, start, rv, 0, rv.length); + return rv; + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != ByteChain.class) { + return false; + } + ByteChain other = (ByteChain) obj; + if (other.hash != hash || other.end - other.start != end - start) { + return false; + } + return other.match(input, start); + } + + private boolean match(byte[] oi, int from) { + for (int i = start, j = from; i < end; i++, j++) { + if (LineSequence.this.input[i] != oi[j]) { + return false; + } + } + return true; + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public String toString() { + return String.format("[@%d\"%s\"]", start, new String(data())); + } + } + + // same as Arrays.hashCode(byte[]), just for a slice of a bigger array + static int calcHash(byte[] data, int from, int to) { + int result = 1; + for (int i = from; i < to; i++) { + result = 31 * result + data[i]; + } + return result; + } + } +}
--- a/src/org/tmatesoft/hg/internal/GeneratePatchInspector.java Tue Feb 19 21:35:09 2013 +0100 +++ b/src/org/tmatesoft/hg/internal/GeneratePatchInspector.java Wed Feb 20 18:19:52 2013 +0100 @@ -16,8 +16,8 @@ */ package org.tmatesoft.hg.internal; -import org.tmatesoft.hg.internal.PatchGenerator.DeltaInspector; -import org.tmatesoft.hg.internal.PatchGenerator.LineSequence; +import org.tmatesoft.hg.internal.DiffHelper.DeltaInspector; +import org.tmatesoft.hg.internal.DiffHelper.LineSequence; class GeneratePatchInspector extends DeltaInspector<LineSequence> { private final Patch deltaCollector; @@ -29,7 +29,7 @@ public static Patch delta(byte[] prev, byte[] content) { Patch rv = new Patch(); - PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>(); + DiffHelper<LineSequence> pg = new DiffHelper<LineSequence>(); pg.init(new LineSequence(prev).splitByNewlines(), new LineSequence(content).splitByNewlines()); pg.findMatchingBlocks(new GeneratePatchInspector(rv)); return rv;
--- a/src/org/tmatesoft/hg/internal/IntMap.java Tue Feb 19 21:35:09 2013 +0100 +++ b/src/org/tmatesoft/hg/internal/IntMap.java Wed Feb 20 18:19:52 2013 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 TMate Software Ltd + * Copyright (c) 2011-2013 TMate Software Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,6 +16,7 @@ */ package org.tmatesoft.hg.internal; +import java.util.Arrays; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; @@ -127,6 +128,11 @@ } } + public void clear() { + Arrays.fill(values, 0, size, null); // do not keep the references + size = 0; + } + /** * Forget first N entries (in natural order) in the map. */
--- a/src/org/tmatesoft/hg/internal/IntVector.java Tue Feb 19 21:35:09 2013 +0100 +++ b/src/org/tmatesoft/hg/internal/IntVector.java Wed Feb 20 18:19:52 2013 +0100 @@ -42,11 +42,20 @@ public void add(int v) { if (count == data.length) { - grow(); + grow(0); } data[count++] = v; } + public void add(int... values) { + if (count + values.length > data.length) { + grow(count + values.length - data.length); + } + for (int v : values) { + data[count++] = v; + } + } + public int get(int i) { if (i < 0 || i >= count) { throw new IndexOutOfBoundsException(String.format("Index: %d, size: %d", i, count)); @@ -95,11 +104,14 @@ return toArray(); } - private void grow() { + private void grow(int newCapacityHint) { if (increment == 0) { throw new UnsupportedOperationException("This vector is not allowed to expand"); } int newCapacity = increment < 0 ? data.length << 1 : data.length + increment; + if (newCapacityHint > 0 && newCapacity < newCapacityHint) { + newCapacity = newCapacityHint; + } assert newCapacity > 0 && newCapacity != data.length : newCapacity; int[] newData = new int[newCapacity]; System.arraycopy(data, 0, newData, 0, count);
--- a/src/org/tmatesoft/hg/internal/PatchGenerator.java Tue Feb 19 21:35:09 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,438 +0,0 @@ -/* - * Copyright (c) 2013 TMate Software Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * For information on how to redistribute this software under - * the terms of a license other than GNU General Public License - * contact TMate Software at support@hg4j.com - */ -package org.tmatesoft.hg.internal; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - -import org.tmatesoft.hg.repo.HgDataFile; -import org.tmatesoft.hg.repo.HgInvalidStateException; -import org.tmatesoft.hg.repo.HgLookup; -import org.tmatesoft.hg.repo.HgRepository; - -/** - * Mercurial cares about changes only up to the line level, e.g. a simple file version dump in manifest looks like (RevlogDump output): - * - * 522: 233748 0 103 17438 433 522 521 -1 756073cf2321df44d3ed0585f2a5754bc8a1b2f6 - * <PATCH>: - * 3487..3578, 91:src/org/tmatesoft/hg/core/HgIterateDirection.java\00add61a8a665c5d8f092210767f812fe0d335ac8 - * - * I.e. for the {fname}{revision} entry format of manifest, not only {revision} is changed, but the whole line, with unchanged {fname} is recorded - * in the patch. - * - * Mercurial paper describes reasons for choosing this approach to delta generation, too. - * - * - * @author Artem Tikhomirov - * @author TMate Software Ltd. - */ -public class PatchGenerator<T extends PatchGenerator.ChunkSequence<?>> { - - private Map<Chunk, IntVector> chunk2UseIndex; - private T seq1, seq2; - - // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively - private int matchStartS1, matchStartS2; - - private MatchInspector<T> matchInspector; - - public void init(T s1, T s2) { - seq1 = s1; - seq2 = s2; - prepare(s2); - } - - public void init(T s1) { - if (seq2 == null) { - throw new IllegalStateException("Use this #init() only when target sequence shall be matched against different origin"); - } - seq1 = s1; - } - - - private void prepare(T s2) { - chunk2UseIndex = new HashMap<Chunk, IntVector>(); - for (int i = 0, len = s2.chunkCount(); i < len; i++) { - Chunk bc = s2.chunk(i); - IntVector loc = chunk2UseIndex.get(bc); - if (loc == null) { - chunk2UseIndex.put(bc, loc = new IntVector()); - } - loc.add(i); - // bc.registerUseIn(i) - BEWARE, use of bc here is incorrect - // in this case need to find the only ByteChain to keep indexes - // i.e. when there are few equal ByteChain instances, notion of "usedIn" shall be either shared (reference same vector) - // or kept within only one of them - } -// for (ChunkSequence.ByteChain bc : chunk2UseIndex.keySet()) { -// System.out.printf("%s: {", new String(bc.data())); -// for (int x : chunk2UseIndex.get(bc).toArray()) { -// System.out.printf(" %d,", x); -// } -// System.out.println("}"); -// } - } - - public void findMatchingBlocks(MatchInspector<T> insp) { - insp.begin(seq1, seq2); - matchInspector = insp; - findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount()); - insp.end(); - } - - /** - * implementation based on Python's difflib.py and SequenceMatcher - */ - public int longestMatch(int startS1, int endS1, int startS2, int endS2) { - matchStartS1 = matchStartS2 = 0; - int maxLength = 0; - IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8); - for (int i = startS1; i < endS1; i++) { - Chunk bc = seq1.chunk(i); - IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8); - IntVector occurencesInS2 = chunk2UseIndex.get(bc); - if (occurencesInS2 == null) { - // chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance - chunkIndex2MatchCount = newChunkIndex2MatchCount; - continue; - } - for (int j : occurencesInS2.toArray()) { - // s1[i] == s2[j] - if (j < startS2) { - continue; - } - if (j >= endS2) { - break; - } - int prevChunkMatches = chunkIndex2MatchCount.containsKey(j-1) ? chunkIndex2MatchCount.get(j-1) : 0; - int k = prevChunkMatches + 1; - newChunkIndex2MatchCount.put(j, k); - if (k > maxLength) { - matchStartS1 = i-k+1; - matchStartS2 = j-k+1; - maxLength = k; - } - } - chunkIndex2MatchCount = newChunkIndex2MatchCount; - } - return maxLength; - } - - private void findMatchingBlocks(int startS1, int endS1, int startS2, int endS2) { - int matchLength = longestMatch(startS1, endS1, startS2, endS2); - if (matchLength > 0) { - final int saveStartS1 = matchStartS1, saveStartS2 = matchStartS2; - if (startS1 < matchStartS1 && startS2 < matchStartS2) { - findMatchingBlocks(startS1, matchStartS1, startS2, matchStartS2); - } - matchInspector.match(saveStartS1, saveStartS2, matchLength); - if (saveStartS1+matchLength < endS1 && saveStartS2+matchLength < endS2) { - findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2); - } - } - } - - interface MatchInspector<T extends ChunkSequence<?>> { - void begin(T s1, T s2); - void match(int startSeq1, int startSeq2, int matchLength); - void end(); - } - - static class MatchDumpInspector<T extends ChunkSequence<?>> implements MatchInspector<T> { - private int matchCount; - - public void begin(T s1, T s2) { - matchCount = 0; - } - - public void match(int startSeq1, int startSeq2, int matchLength) { - matchCount++; - System.out.printf("match #%d: from line #%d and line #%d of length %d\n", matchCount, startSeq1, startSeq2, matchLength); - } - - public void end() { - if (matchCount == 0) { - System.out.println("NO MATCHES FOUND!"); - } - } - } - - static class DeltaInspector<T extends ChunkSequence<?>> implements MatchInspector<T> { - protected int changeStartS1, changeStartS2; - protected T seq1, seq2; - - public void begin(T s1, T s2) { - seq1 = s1; - seq2 = s2; - changeStartS1 = changeStartS2 = 0; - } - - public void match(int startSeq1, int startSeq2, int matchLength) { - reportDeltaElement(startSeq1, startSeq2, matchLength); - changeStartS1 = startSeq1 + matchLength; - changeStartS2 = startSeq2 + matchLength; - } - - public void end() { - if (changeStartS1 < seq1.chunkCount()-1 || changeStartS2 < seq2.chunkCount()-1) { - reportDeltaElement(seq1.chunkCount()-1, seq2.chunkCount()-1, 0); - } - } - - protected void reportDeltaElement(int matchStartSeq1, int matchStartSeq2, int matchLength) { - if (changeStartS1 < matchStartSeq1) { - if (changeStartS2 < matchStartSeq2) { - changed(changeStartS1, matchStartSeq1, changeStartS2, matchStartSeq2); - } else { - assert changeStartS2 == matchStartSeq2; - deleted(matchStartSeq2, changeStartS1, matchStartSeq1); - } - } else { - assert changeStartS1 == matchStartSeq1; - if(changeStartS2 < matchStartSeq2) { - added(changeStartS1, changeStartS2, matchStartSeq2); - } else { - assert changeStartS2 == matchStartSeq2; - if (matchStartSeq1 > 0 || matchStartSeq2 > 0) { - // FIXME perhaps, exception is too much for the case - // once diff is covered with tests, replace with assert false : msg; - throw new HgInvalidStateException(String.format("adjustent equal blocks %d, %d and %d,%d", changeStartS1, matchStartSeq1, changeStartS2, matchStartSeq2)); - } - } - } - if (matchLength > 0) { - unchanged(matchStartSeq1, matchStartSeq2, matchLength); - } - } - - /** - * [s1From..s1To) replaced with [s2From..s2To) - */ - protected void changed(int s1From, int s1To, int s2From, int s2To) { - // NO-OP - } - - protected void deleted(int s2DeletePoint, int s1From, int s1To) { - // NO-OP - } - - protected void added(int s1InsertPoint, int s2From, int s2To) { - // NO-OP - } - - protected void unchanged(int s1From, int s2From, int length) { - // NO-OP - } - } - - static class DeltaDumpInspector<T extends ChunkSequence<?>> extends DeltaInspector<T> { - - @Override - protected void changed(int s1From, int s1To, int s2From, int s2To) { - System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To); - } - - @Override - protected void deleted(int s2DeletionPoint, int s1From, int s1To) { - System.out.printf("deleted [%d..%d)\n", s1From, s1To); - } - - @Override - protected void added(int s1InsertPoint, int s2From, int s2To) { - System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint); - } - - @Override - protected void unchanged(int s1From, int s2From, int length) { - System.out.printf("same [%d..%d) and [%d..%d)\n", s1From, s1From + length, s2From, s2From + length); - } - } - - public static void main(String[] args) throws Exception { - PatchGenerator<LineSequence> pg1 = new PatchGenerator<LineSequence>(); -// pg1.init(LineSequence.newlines("hello\nabc".getBytes()), LineSequence.newlines("hello\nworld".getBytes())); -// pg1.init(LineSequence.newlines("".getBytes()), LineSequence.newlines("hello\nworld".getBytes())); - pg1.init(LineSequence.newlines("hello\nworld".getBytes()), LineSequence.newlines("".getBytes())); - pg1.findMatchingBlocks(new MatchDumpInspector<LineSequence>()); - pg1.findMatchingBlocks(new DeltaDumpInspector<LineSequence>()); - if (Boolean.FALSE.booleanValue()) { - return; - } - HgRepository repo = new HgLookup().detectFromWorkingDir(); - HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java"); - ByteArrayChannel bac1, bac2; - df.content(80, bac1 = new ByteArrayChannel()); - df.content(81, bac2 = new ByteArrayChannel()); -// String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2"; -// String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2"; - PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>(); - byte[] data1 = bac1.toArray(); - byte[] data2 = bac2.toArray(); - pg.init(new LineSequence(data1).splitByNewlines(), new LineSequence(data2).splitByNewlines()); - System.out.println("Matches:"); - pg.findMatchingBlocks(new MatchDumpInspector<LineSequence>()); - System.out.println("Deltas:"); - pg.findMatchingBlocks(new DeltaDumpInspector<LineSequence>()); - } - - /** - * Unsure if this marker interface worth presence - */ - public interface Chunk { - } - - /** - * Generic sequence of chunk, where chunk is anything comparable to another chunk, e.g. a string or a single char - * Sequence diff algorithm above doesn't care about sequence nature. - */ - public interface ChunkSequence<T extends Chunk> { - public T chunk(int index); - public int chunkCount(); - } - - static final class LineSequence implements ChunkSequence<LineSequence.ByteChain> { - - private final byte[] input; - private ArrayList<ByteChain> lines; - - public LineSequence(byte[] data) { - input = data; - } - - public static LineSequence newlines(byte[] array) { - return new LineSequence(array).splitByNewlines(); - } - - // sequence ends with fake, empty line chunk - public LineSequence splitByNewlines() { - lines = new ArrayList<ByteChain>(); - int lastStart = 0; - for (int i = 0; i < input.length; i++) { - if (input[i] == '\n') { - lines.add(new ByteChain(lastStart, i+1)); - lastStart = i+1; - } else if (input[i] == '\r') { - if (i+1 < input.length && input[i+1] == '\n') { - i++; - } - lines.add(new ByteChain(lastStart, i+1)); - lastStart = i+1; - } - } - if (lastStart < input.length) { - lines.add(new ByteChain(lastStart, input.length)); - } - // empty chunk to keep offset of input end - lines.add(new ByteChain(input.length)); - return this; - } - - public ByteChain chunk(int index) { - return lines.get(index); - } - - public int chunkCount() { - return lines.size(); - } - - public byte[] data(int chunkFrom, int chunkTo) { - if (chunkFrom == chunkTo) { - return new byte[0]; - } - int from = chunk(chunkFrom).getOffset(), to = chunk(chunkTo).getOffset(); - byte[] rv = new byte[to - from]; - System.arraycopy(input, from, rv, 0, rv.length); - return rv; - } - - - final class ByteChain implements Chunk { - private final int start, end; - private final int hash; - - /** - * construct a chunk with a sole purpose to keep - * offset of the data end - */ - ByteChain(int offset) { - start = end = offset; - // ensure this chunk doesn't match trailing chunk of another sequence - hash = System.identityHashCode(this); - } - - ByteChain(int s, int e) { - start = s; - end = e; - hash = calcHash(input, s, e); - } - - /** - * byte offset of the this ByteChain inside ChainSequence - */ - public int getOffset() { - return start; - } - - public byte[] data() { - byte[] rv = new byte[end - start]; - System.arraycopy(input, start, rv, 0, rv.length); - return rv; - } - - @Override - public boolean equals(Object obj) { - if (obj == null || obj.getClass() != ByteChain.class) { - return false; - } - ByteChain other = (ByteChain) obj; - if (other.hash != hash || other.end - other.start != end - start) { - return false; - } - return other.match(input, start); - } - - private boolean match(byte[] oi, int from) { - for (int i = start, j = from; i < end; i++, j++) { - if (LineSequence.this.input[i] != oi[j]) { - return false; - } - } - return true; - } - - @Override - public int hashCode() { - return hash; - } - - @Override - public String toString() { - return String.format("[@%d\"%s\"]", start, new String(data())); - } - } - - // same as Arrays.hashCode(byte[]), just for a slice of a bigger array - static int calcHash(byte[] data, int from, int to) { - int result = 1; - for (int i = from; i < to; i++) { - result = 31 * result + data[i]; - } - return result; - } - } -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/org/tmatesoft/hg/test/TestDiffHelper.java Wed Feb 20 18:19:52 2013 +0100 @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2013 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.test; + +import static org.junit.Assert.*; +import static org.tmatesoft.hg.internal.DiffHelper.LineSequence.newlines; + +import org.junit.Test; +import org.tmatesoft.hg.internal.DiffHelper; +import org.tmatesoft.hg.internal.DiffHelper.ChunkSequence; +import org.tmatesoft.hg.internal.DiffHelper.LineSequence; +import org.tmatesoft.hg.internal.IntVector; + +/** + * Testing DiffHelper (foundation for facilities like commit and annotate) directly + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class TestDiffHelper { + + @Test + public void testSimple() { + DiffHelper<LineSequence> diffHelper = new DiffHelper<LineSequence>(); + MatchCollector<LineSequence> mc; DeltaCollector dc; + + // single change + diffHelper.init(newlines("hello\nabc".getBytes()), newlines("hello\nworld".getBytes())); + diffHelper.findMatchingBlocks(mc = new MatchCollector<LineSequence>()); + assertEquals(1, mc.matchCount()); + assertTrue(mc.originLineMatched(0)); + assertTrue(mc.targetLineMatched(0)); + assertFalse(mc.originLineMatched(1)); + assertFalse(mc.targetLineMatched(1)); + diffHelper.findMatchingBlocks(dc = new DeltaCollector()); + assertEquals(1, dc.unchangedCount()); + assertEquals(1, dc.deletedCount()); + assertEquals(1, dc.addedCount()); + + // boundary case, additions to an empty origin + diffHelper.init(newlines("".getBytes()), newlines("hello\nworld".getBytes())); + diffHelper.findMatchingBlocks(mc = new MatchCollector<LineSequence>()); + assertEquals(0, mc.matchCount()); + diffHelper.findMatchingBlocks(dc = new DeltaCollector()); + assertEquals(0, dc.unchangedCount()); + assertEquals(0, dc.deletedCount()); + assertEquals(1, dc.addedCount()); // two lines added, but 1 range + + // boundary case, complete deletion + diffHelper.init(newlines("hello\nworld".getBytes()), newlines("".getBytes())); + diffHelper.findMatchingBlocks(mc = new MatchCollector<LineSequence>()); + assertEquals(0, mc.matchCount()); + diffHelper.findMatchingBlocks(dc = new DeltaCollector()); + assertEquals(0, dc.unchangedCount()); + assertEquals(1, dc.deletedCount()); + assertEquals(0, dc.addedCount()); + + // regular case, few changes + String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2"; + String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2"; + diffHelper.init(newlines(s1.getBytes()), newlines(s2.getBytes())); + diffHelper.findMatchingBlocks(mc = new MatchCollector<LineSequence>()); + assertEquals(2, mc.matchCount()); + assertFalse(mc.originLineMatched(0)); + assertTrue(mc.originLineMatched(1)); + assertTrue(mc.originLineMatched(2)); + assertFalse(mc.originLineMatched(3)); + assertFalse(mc.originLineMatched(4)); + assertTrue(mc.originLineMatched(5)); + assertFalse(mc.targetLineMatched(0)); + assertFalse(mc.targetLineMatched(1)); + assertTrue(mc.targetLineMatched(2)); + assertTrue(mc.targetLineMatched(3)); + assertTrue(mc.targetLineMatched(4)); + diffHelper.findMatchingBlocks(dc = new DeltaCollector()); + assertEquals(2, dc.unchangedCount()); // 3 lines but 2 ranges + assertEquals(2, dc.deletedCount()); + assertEquals(1, dc.addedCount()); + assertTrue(dc.deletedLine(0)); + assertTrue(dc.deletedLine(3)); + assertTrue(dc.deletedLine(4)); + assertTrue(dc.addedLine(0)); + assertTrue(dc.addedLine(1)); + } + + @Test + public void testOtherSequence() { + class CharSequence implements DiffHelper.ChunkSequence<Character> { + private final char[] chunks; + + CharSequence(String s) { + chunks = s.toCharArray(); + } + public Character chunk(int index) { + return chunks[index]; + } + public int chunkCount() { + return chunks.length; + } + } + DiffHelper<CharSequence> diff = new DiffHelper<CharSequence>(); + diff.init(new CharSequence("abcefg"), new CharSequence("bcdegh")); + MatchCollector<CharSequence> mc; + diff.findMatchingBlocks(mc = new MatchCollector<CharSequence>()); + assertEquals(3, mc.matchCount()); // bc, e, g + } + + // range is comprised of 3 values, range length always last, range start comes at index o (either 0 or 1) + static boolean includes(IntVector ranges, int o, int ln) { + assert ranges.size() % 3 == 0; + for (int i = 2; i < ranges.size(); o += 3, i+=3) { + int rangeStart = ranges.get(o); + if (rangeStart > ln) { + return false; + } + int rangeLen = ranges.get(i); + if (rangeStart + rangeLen > ln) { + return true; + } + } + return false; + } + + static class MatchCollector<T extends ChunkSequence<?>> implements DiffHelper.MatchInspector<T> { + private IntVector matched = new IntVector(10 * 3, 5 * 3); + + public void begin(T s1, T s2) { + } + + public void match(int startSeq1, int startSeq2, int matchLength) { + matched.add(startSeq1, startSeq2, matchLength); + } + + public void end() { + } + + int matchCount() { + return matched.size() / 3; + } + + // true if zero-based line matches any "same" block in the origin + boolean originLineMatched(int ln) { + return includes(matched, 0, ln); + } + + boolean targetLineMatched(int ln) { + return includes(matched, 1, ln); + } + } + + static class DeltaCollector extends DiffHelper.DeltaInspector<LineSequence> { + private IntVector added, deleted, same; + public DeltaCollector() { + final int x = 10 * 3, y = 5 * 3; + added = new IntVector(x, y); + deleted = new IntVector(x, y); + same = new IntVector(x, y); + } + @Override + protected void added(int s1InsertPoint, int s2From, int s2To) { + // TODO Auto-generated method stub + added.add(s1InsertPoint, s2From, s2To - s2From); + } + @Override + protected void changed(int s1From, int s1To, int s2From, int s2To) { + deleted(s2From, s1From, s1To); + added(s1From, s2From, s2To); + } + @Override + protected void deleted(int s2DeletePoint, int s1From, int s1To) { + deleted.add(s2DeletePoint, s1From, s1To - s1From); + } + @Override + protected void unchanged(int s1From, int s2From, int length) { + same.add(s1From, s2From, length); + } + + int unchangedCount() { + return same.size() / 3; + } + + int addedCount() { + return added.size() / 3; + } + + int deletedCount() { + return deleted.size() / 3; + } + boolean addedLine(int ln) { + return includes(added, 1, ln); + } + boolean deletedLine(int ln) { + return includes(deleted, 1, ln); + } + } +}