comparison src/org/tmatesoft/hg/internal/PatchGenerator.java @ 544:7f5998a9619d

Refactor PatchGenerator to be generic and welcome sequence of any nature
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 15 Feb 2013 16:48:54 +0100
parents 1e95f48d9886
children 15b406c7cd9d
comparison
equal deleted inserted replaced
543:1e95f48d9886 544:7f5998a9619d
38 * 38 *
39 * 39 *
40 * @author Artem Tikhomirov 40 * @author Artem Tikhomirov
41 * @author TMate Software Ltd. 41 * @author TMate Software Ltd.
42 */ 42 */
43 public class PatchGenerator { 43 public class PatchGenerator<T extends PatchGenerator.ChunkSequence<?>> {
44 44
45 private Map<ChunkSequence.ByteChain, IntVector> chunk2UseIndex; 45 private Map<Chunk, IntVector> chunk2UseIndex;
46 private ChunkSequence seq1, seq2; 46 private T seq1, seq2;
47 47
48 // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively 48 // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively
49 private int matchStartS1, matchStartS2; 49 private int matchStartS1, matchStartS2;
50 50
51 private MatchInspector matchInspector; 51 private MatchInspector<T> matchInspector;
52 52
53 public void init(byte[] data1, byte[] data2) { 53 public void init(T s1, T s2) {
54 seq1 = new ChunkSequence(data1); 54 seq1 = s1;
55 seq1.splitByNewlines(); 55 seq2 = s2;
56 seq2 = new ChunkSequence(data2); 56 prepare(s2);
57 seq2.splitByNewlines(); 57 }
58 prepare(seq2); 58
59 } 59
60 60 private void prepare(T s2) {
61 private void prepare(ChunkSequence s2) { 61 chunk2UseIndex = new HashMap<Chunk, IntVector>();
62 chunk2UseIndex = new HashMap<ChunkSequence.ByteChain, IntVector>();
63 for (int i = 0, len = s2.chunkCount(); i < len; i++) { 62 for (int i = 0, len = s2.chunkCount(); i < len; i++) {
64 ChunkSequence.ByteChain bc = s2.chunk(i); 63 Chunk bc = s2.chunk(i);
65 IntVector loc = chunk2UseIndex.get(bc); 64 IntVector loc = chunk2UseIndex.get(bc);
66 if (loc == null) { 65 if (loc == null) {
67 chunk2UseIndex.put(bc, loc = new IntVector()); 66 chunk2UseIndex.put(bc, loc = new IntVector());
68 } 67 }
69 loc.add(i); 68 loc.add(i);
79 // } 78 // }
80 // System.out.println("}"); 79 // System.out.println("}");
81 // } 80 // }
82 } 81 }
83 82
84 public void findMatchingBlocks(MatchInspector insp) { 83 public void findMatchingBlocks(MatchInspector<T> insp) {
85 insp.begin(seq1, seq2); 84 insp.begin(seq1, seq2);
86 matchInspector = insp; 85 matchInspector = insp;
87 findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount()); 86 findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount());
88 insp.end(); 87 insp.end();
89 } 88 }
94 public int longestMatch(int startS1, int endS1, int startS2, int endS2) { 93 public int longestMatch(int startS1, int endS1, int startS2, int endS2) {
95 matchStartS1 = matchStartS2 = 0; 94 matchStartS1 = matchStartS2 = 0;
96 int maxLength = 0; 95 int maxLength = 0;
97 IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8); 96 IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8);
98 for (int i = startS1; i < endS1; i++) { 97 for (int i = startS1; i < endS1; i++) {
99 ChunkSequence.ByteChain bc = seq1.chunk(i); 98 Chunk bc = seq1.chunk(i);
100 IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8); 99 IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8);
101 IntVector occurencesInS2 = chunk2UseIndex.get(bc); 100 IntVector occurencesInS2 = chunk2UseIndex.get(bc);
102 if (occurencesInS2 == null) { 101 if (occurencesInS2 == null) {
103 // chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance 102 // chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance
104 chunkIndex2MatchCount = newChunkIndex2MatchCount; 103 chunkIndex2MatchCount = newChunkIndex2MatchCount;
138 findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2); 137 findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2);
139 } 138 }
140 } 139 }
141 } 140 }
142 141
143 interface MatchInspector { 142 interface MatchInspector<T extends ChunkSequence<?>> {
144 void begin(ChunkSequence s1, ChunkSequence s2); 143 void begin(T s1, T s2);
145 void match(int startSeq1, int startSeq2, int matchLength); 144 void match(int startSeq1, int startSeq2, int matchLength);
146 void end(); 145 void end();
147 } 146 }
148 147
149 static class MatchDumpInspector implements MatchInspector { 148 static class MatchDumpInspector<T extends ChunkSequence<?>> implements MatchInspector<T> {
150 public void begin(ChunkSequence s1, ChunkSequence s2) { 149 public void begin(T s1, T s2) {
151 } 150 }
152 151
153 public void match(int startSeq1, int startSeq2, int matchLength) { 152 public void match(int startSeq1, int startSeq2, int matchLength) {
154 System.out.printf("match: from line #%d and line #%d of length %d\n", startSeq1, startSeq2, matchLength); 153 System.out.printf("match: from line #%d and line #%d of length %d\n", startSeq1, startSeq2, matchLength);
155 } 154 }
156 155
157 public void end() { 156 public void end() {
158 } 157 }
159 } 158 }
160 159
161 static class DeltaInspector implements MatchInspector { 160 static class DeltaInspector<T extends ChunkSequence<?>> implements MatchInspector<T> {
162 protected int changeStartS1, changeStartS2; 161 protected int changeStartS1, changeStartS2;
163 protected ChunkSequence seq1, seq2; 162 protected T seq1, seq2;
164 163
165 164 public void begin(T s1, T s2) {
166 public void begin(ChunkSequence s1, ChunkSequence s2) {
167 seq1 = s1; 165 seq1 = s1;
168 seq2 = s2; 166 seq2 = s2;
169 changeStartS1 = changeStartS2 = 0; 167 changeStartS1 = changeStartS2 = 0;
170 } 168 }
171 169
221 protected void unchanged(int s1From, int s2From, int length) { 219 protected void unchanged(int s1From, int s2From, int length) {
222 // NO-OP 220 // NO-OP
223 } 221 }
224 } 222 }
225 223
226 static class DeltaDumpInspector extends DeltaInspector { 224 static class DeltaDumpInspector<T extends ChunkSequence<?>> extends DeltaInspector<T> {
227 225
228 @Override 226 @Override
229 protected void changed(int s1From, int s1To, int s2From, int s2To) { 227 protected void changed(int s1From, int s1To, int s2From, int s2To) {
230 System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To); 228 System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To);
231 } 229 }
240 System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint); 238 System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint);
241 } 239 }
242 240
243 } 241 }
244 242
245 static class PatchFillInspector extends DeltaInspector {
246 private final Patch deltaCollector;
247
248 PatchFillInspector(Patch p) {
249 assert p != null;
250 deltaCollector = p;
251 }
252
253 @Override
254 protected void changed(int s1From, int s1To, int s2From, int s2To) {
255 int from = seq1.chunk(s1From).getOffset();
256 int to = seq1.chunk(s1To).getOffset();
257 byte[] data = seq2.data(s2From, s2To);
258 deltaCollector.add(from, to, data);
259 }
260
261 @Override
262 protected void deleted(int s2DeletionPoint, int s1From, int s1To) {
263 int from = seq1.chunk(s1From).getOffset();
264 int to = seq1.chunk(s1To).getOffset();
265 deltaCollector.add(from, to, new byte[0]);
266 }
267
268 @Override
269 protected void added(int s1InsertPoint, int s2From, int s2To) {
270 int insPoint = seq1.chunk(s1InsertPoint).getOffset();
271 byte[] data = seq2.data(s2From, s2To);
272 deltaCollector.add(insPoint, insPoint, data);
273 }
274 }
275
276
277
278 public static void main(String[] args) throws Exception { 243 public static void main(String[] args) throws Exception {
279 PatchGenerator pg1 = new PatchGenerator(); 244 PatchGenerator<LineSequence> pg1 = new PatchGenerator<LineSequence>();
280 pg1.init("hello".getBytes(), "hello\nworld".getBytes()); 245 pg1.init(LineSequence.newlines("hello".getBytes()), LineSequence.newlines("hello\nworld".getBytes()));
281 pg1.findMatchingBlocks(new MatchDumpInspector()); 246 pg1.findMatchingBlocks(new MatchDumpInspector<LineSequence>());
282 pg1.findMatchingBlocks(new DeltaDumpInspector()); 247 pg1.findMatchingBlocks(new DeltaDumpInspector<LineSequence>());
283 if (Boolean.FALSE.booleanValue()) { 248 if (Boolean.FALSE.booleanValue()) {
284 return; 249 return;
285 } 250 }
286 HgRepository repo = new HgLookup().detectFromWorkingDir(); 251 HgRepository repo = new HgLookup().detectFromWorkingDir();
287 HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java"); 252 HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java");
288 ByteArrayChannel bac1, bac2; 253 ByteArrayChannel bac1, bac2;
289 df.content(80, bac1 = new ByteArrayChannel()); 254 df.content(80, bac1 = new ByteArrayChannel());
290 df.content(81, bac2 = new ByteArrayChannel()); 255 df.content(81, bac2 = new ByteArrayChannel());
291 // String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2"; 256 // String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2";
292 // String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2"; 257 // String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2";
293 PatchGenerator pg = new PatchGenerator(); 258 PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>();
294 pg.init(bac1.toArray(), bac2.toArray()); 259 byte[] data1 = bac1.toArray();
260 byte[] data2 = bac2.toArray();
261 pg.init(new LineSequence(data1).splitByNewlines(), new LineSequence(data2).splitByNewlines());
295 System.out.println("Matches:"); 262 System.out.println("Matches:");
296 pg.findMatchingBlocks(new MatchDumpInspector()); 263 pg.findMatchingBlocks(new MatchDumpInspector<LineSequence>());
297 System.out.println("Deltas:"); 264 System.out.println("Deltas:");
298 pg.findMatchingBlocks(new DeltaDumpInspector()); 265 pg.findMatchingBlocks(new DeltaDumpInspector<LineSequence>());
299 } 266 }
300 267
301 public Patch delta(byte[] prev, byte[] content) { 268 /**
302 Patch rv = new Patch(); 269 * Unsure if this marker interface worth presence
303 init(prev, content);
304 findMatchingBlocks(new PatchFillInspector(rv));
305 return rv;
306 }
307
308 /*
309 * TODO shall be parameterized (template?) and refacctored to facilitate matching non lines only
310 * (sequence diff algorithm above doesn't care about sequence nature)
311 */ 270 */
312 static final class ChunkSequence { 271 public interface Chunk {
272 }
273
274 /**
275 * Generic sequence of chunk, where chunk is anything comparable to another chunk, e.g. a string or a single char
276 * Sequence diff algorithm above doesn't care about sequence nature.
277 */
278 public interface ChunkSequence<T extends Chunk> {
279 public T chunk(int index);
280 public int chunkCount();
281 }
282
283 static final class LineSequence implements ChunkSequence<LineSequence.ByteChain> {
313 284
314 private final byte[] input; 285 private final byte[] input;
315 private ArrayList<ByteChain> lines; 286 private ArrayList<ByteChain> lines;
316 287
317 public ChunkSequence(byte[] data) { 288 public LineSequence(byte[] data) {
318 input = data; 289 input = data;
319 } 290 }
320 291
321 public void splitByNewlines() { 292 public static LineSequence newlines(byte[] array) {
293 return new LineSequence(array).splitByNewlines();
294 }
295
296 public LineSequence splitByNewlines() {
322 lines = new ArrayList<ByteChain>(); 297 lines = new ArrayList<ByteChain>();
323 int lastStart = 0; 298 int lastStart = 0;
324 for (int i = 0; i < input.length; i++) { 299 for (int i = 0; i < input.length; i++) {
325 if (input[i] == '\n') { 300 if (input[i] == '\n') {
326 lines.add(new ByteChain(lastStart, i+1)); 301 lines.add(new ByteChain(lastStart, i+1));
336 if (lastStart < input.length) { 311 if (lastStart < input.length) {
337 lines.add(new ByteChain(lastStart, input.length)); 312 lines.add(new ByteChain(lastStart, input.length));
338 } 313 }
339 // empty chunk to keep offset of input end 314 // empty chunk to keep offset of input end
340 lines.add(new ByteChain(input.length, input.length)); 315 lines.add(new ByteChain(input.length, input.length));
316 return this;
341 } 317 }
342 318
343 public ByteChain chunk(int index) { 319 public ByteChain chunk(int index) {
344 return lines.get(index); 320 return lines.get(index);
345 } 321 }
357 System.arraycopy(input, from, rv, 0, rv.length); 333 System.arraycopy(input, from, rv, 0, rv.length);
358 return rv; 334 return rv;
359 } 335 }
360 336
361 337
362 final class ByteChain { 338 final class ByteChain implements Chunk {
363 private final int start, end; 339 private final int start, end;
364 private final int hash; 340 private final int hash;
365 341
366 ByteChain(int s, int e) { 342 ByteChain(int s, int e) {
367 start = s; 343 start = s;
394 return other.match(input, start); 370 return other.match(input, start);
395 } 371 }
396 372
397 private boolean match(byte[] oi, int from) { 373 private boolean match(byte[] oi, int from) {
398 for (int i = start, j = from; i < end; i++, j++) { 374 for (int i = start, j = from; i < end; i++, j++) {
399 if (ChunkSequence.this.input[i] != oi[j]) { 375 if (LineSequence.this.input[i] != oi[j]) {
400 return false; 376 return false;
401 } 377 }
402 } 378 }
403 return true; 379 return true;
404 } 380 }