Mercurial > jhg
comparison src/org/tmatesoft/hg/internal/PatchGenerator.java @ 544:7f5998a9619d
Refactor PatchGenerator to be generic and welcome sequence of any nature
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Fri, 15 Feb 2013 16:48:54 +0100 |
parents | 1e95f48d9886 |
children | 15b406c7cd9d |
comparison
equal
deleted
inserted
replaced
543:1e95f48d9886 | 544:7f5998a9619d |
---|---|
38 * | 38 * |
39 * | 39 * |
40 * @author Artem Tikhomirov | 40 * @author Artem Tikhomirov |
41 * @author TMate Software Ltd. | 41 * @author TMate Software Ltd. |
42 */ | 42 */ |
43 public class PatchGenerator { | 43 public class PatchGenerator<T extends PatchGenerator.ChunkSequence<?>> { |
44 | 44 |
45 private Map<ChunkSequence.ByteChain, IntVector> chunk2UseIndex; | 45 private Map<Chunk, IntVector> chunk2UseIndex; |
46 private ChunkSequence seq1, seq2; | 46 private T seq1, seq2; |
47 | 47 |
48 // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively | 48 // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively |
49 private int matchStartS1, matchStartS2; | 49 private int matchStartS1, matchStartS2; |
50 | 50 |
51 private MatchInspector matchInspector; | 51 private MatchInspector<T> matchInspector; |
52 | 52 |
53 public void init(byte[] data1, byte[] data2) { | 53 public void init(T s1, T s2) { |
54 seq1 = new ChunkSequence(data1); | 54 seq1 = s1; |
55 seq1.splitByNewlines(); | 55 seq2 = s2; |
56 seq2 = new ChunkSequence(data2); | 56 prepare(s2); |
57 seq2.splitByNewlines(); | 57 } |
58 prepare(seq2); | 58 |
59 } | 59 |
60 | 60 private void prepare(T s2) { |
61 private void prepare(ChunkSequence s2) { | 61 chunk2UseIndex = new HashMap<Chunk, IntVector>(); |
62 chunk2UseIndex = new HashMap<ChunkSequence.ByteChain, IntVector>(); | |
63 for (int i = 0, len = s2.chunkCount(); i < len; i++) { | 62 for (int i = 0, len = s2.chunkCount(); i < len; i++) { |
64 ChunkSequence.ByteChain bc = s2.chunk(i); | 63 Chunk bc = s2.chunk(i); |
65 IntVector loc = chunk2UseIndex.get(bc); | 64 IntVector loc = chunk2UseIndex.get(bc); |
66 if (loc == null) { | 65 if (loc == null) { |
67 chunk2UseIndex.put(bc, loc = new IntVector()); | 66 chunk2UseIndex.put(bc, loc = new IntVector()); |
68 } | 67 } |
69 loc.add(i); | 68 loc.add(i); |
79 // } | 78 // } |
80 // System.out.println("}"); | 79 // System.out.println("}"); |
81 // } | 80 // } |
82 } | 81 } |
83 | 82 |
84 public void findMatchingBlocks(MatchInspector insp) { | 83 public void findMatchingBlocks(MatchInspector<T> insp) { |
85 insp.begin(seq1, seq2); | 84 insp.begin(seq1, seq2); |
86 matchInspector = insp; | 85 matchInspector = insp; |
87 findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount()); | 86 findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount()); |
88 insp.end(); | 87 insp.end(); |
89 } | 88 } |
94 public int longestMatch(int startS1, int endS1, int startS2, int endS2) { | 93 public int longestMatch(int startS1, int endS1, int startS2, int endS2) { |
95 matchStartS1 = matchStartS2 = 0; | 94 matchStartS1 = matchStartS2 = 0; |
96 int maxLength = 0; | 95 int maxLength = 0; |
97 IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8); | 96 IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8); |
98 for (int i = startS1; i < endS1; i++) { | 97 for (int i = startS1; i < endS1; i++) { |
99 ChunkSequence.ByteChain bc = seq1.chunk(i); | 98 Chunk bc = seq1.chunk(i); |
100 IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8); | 99 IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8); |
101 IntVector occurencesInS2 = chunk2UseIndex.get(bc); | 100 IntVector occurencesInS2 = chunk2UseIndex.get(bc); |
102 if (occurencesInS2 == null) { | 101 if (occurencesInS2 == null) { |
103 // chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance | 102 // chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance |
104 chunkIndex2MatchCount = newChunkIndex2MatchCount; | 103 chunkIndex2MatchCount = newChunkIndex2MatchCount; |
138 findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2); | 137 findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2); |
139 } | 138 } |
140 } | 139 } |
141 } | 140 } |
142 | 141 |
143 interface MatchInspector { | 142 interface MatchInspector<T extends ChunkSequence<?>> { |
144 void begin(ChunkSequence s1, ChunkSequence s2); | 143 void begin(T s1, T s2); |
145 void match(int startSeq1, int startSeq2, int matchLength); | 144 void match(int startSeq1, int startSeq2, int matchLength); |
146 void end(); | 145 void end(); |
147 } | 146 } |
148 | 147 |
149 static class MatchDumpInspector implements MatchInspector { | 148 static class MatchDumpInspector<T extends ChunkSequence<?>> implements MatchInspector<T> { |
150 public void begin(ChunkSequence s1, ChunkSequence s2) { | 149 public void begin(T s1, T s2) { |
151 } | 150 } |
152 | 151 |
153 public void match(int startSeq1, int startSeq2, int matchLength) { | 152 public void match(int startSeq1, int startSeq2, int matchLength) { |
154 System.out.printf("match: from line #%d and line #%d of length %d\n", startSeq1, startSeq2, matchLength); | 153 System.out.printf("match: from line #%d and line #%d of length %d\n", startSeq1, startSeq2, matchLength); |
155 } | 154 } |
156 | 155 |
157 public void end() { | 156 public void end() { |
158 } | 157 } |
159 } | 158 } |
160 | 159 |
161 static class DeltaInspector implements MatchInspector { | 160 static class DeltaInspector<T extends ChunkSequence<?>> implements MatchInspector<T> { |
162 protected int changeStartS1, changeStartS2; | 161 protected int changeStartS1, changeStartS2; |
163 protected ChunkSequence seq1, seq2; | 162 protected T seq1, seq2; |
164 | 163 |
165 | 164 public void begin(T s1, T s2) { |
166 public void begin(ChunkSequence s1, ChunkSequence s2) { | |
167 seq1 = s1; | 165 seq1 = s1; |
168 seq2 = s2; | 166 seq2 = s2; |
169 changeStartS1 = changeStartS2 = 0; | 167 changeStartS1 = changeStartS2 = 0; |
170 } | 168 } |
171 | 169 |
221 protected void unchanged(int s1From, int s2From, int length) { | 219 protected void unchanged(int s1From, int s2From, int length) { |
222 // NO-OP | 220 // NO-OP |
223 } | 221 } |
224 } | 222 } |
225 | 223 |
226 static class DeltaDumpInspector extends DeltaInspector { | 224 static class DeltaDumpInspector<T extends ChunkSequence<?>> extends DeltaInspector<T> { |
227 | 225 |
228 @Override | 226 @Override |
229 protected void changed(int s1From, int s1To, int s2From, int s2To) { | 227 protected void changed(int s1From, int s1To, int s2From, int s2To) { |
230 System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To); | 228 System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To); |
231 } | 229 } |
240 System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint); | 238 System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint); |
241 } | 239 } |
242 | 240 |
243 } | 241 } |
244 | 242 |
245 static class PatchFillInspector extends DeltaInspector { | |
246 private final Patch deltaCollector; | |
247 | |
248 PatchFillInspector(Patch p) { | |
249 assert p != null; | |
250 deltaCollector = p; | |
251 } | |
252 | |
253 @Override | |
254 protected void changed(int s1From, int s1To, int s2From, int s2To) { | |
255 int from = seq1.chunk(s1From).getOffset(); | |
256 int to = seq1.chunk(s1To).getOffset(); | |
257 byte[] data = seq2.data(s2From, s2To); | |
258 deltaCollector.add(from, to, data); | |
259 } | |
260 | |
261 @Override | |
262 protected void deleted(int s2DeletionPoint, int s1From, int s1To) { | |
263 int from = seq1.chunk(s1From).getOffset(); | |
264 int to = seq1.chunk(s1To).getOffset(); | |
265 deltaCollector.add(from, to, new byte[0]); | |
266 } | |
267 | |
268 @Override | |
269 protected void added(int s1InsertPoint, int s2From, int s2To) { | |
270 int insPoint = seq1.chunk(s1InsertPoint).getOffset(); | |
271 byte[] data = seq2.data(s2From, s2To); | |
272 deltaCollector.add(insPoint, insPoint, data); | |
273 } | |
274 } | |
275 | |
276 | |
277 | |
278 public static void main(String[] args) throws Exception { | 243 public static void main(String[] args) throws Exception { |
279 PatchGenerator pg1 = new PatchGenerator(); | 244 PatchGenerator<LineSequence> pg1 = new PatchGenerator<LineSequence>(); |
280 pg1.init("hello".getBytes(), "hello\nworld".getBytes()); | 245 pg1.init(LineSequence.newlines("hello".getBytes()), LineSequence.newlines("hello\nworld".getBytes())); |
281 pg1.findMatchingBlocks(new MatchDumpInspector()); | 246 pg1.findMatchingBlocks(new MatchDumpInspector<LineSequence>()); |
282 pg1.findMatchingBlocks(new DeltaDumpInspector()); | 247 pg1.findMatchingBlocks(new DeltaDumpInspector<LineSequence>()); |
283 if (Boolean.FALSE.booleanValue()) { | 248 if (Boolean.FALSE.booleanValue()) { |
284 return; | 249 return; |
285 } | 250 } |
286 HgRepository repo = new HgLookup().detectFromWorkingDir(); | 251 HgRepository repo = new HgLookup().detectFromWorkingDir(); |
287 HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java"); | 252 HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java"); |
288 ByteArrayChannel bac1, bac2; | 253 ByteArrayChannel bac1, bac2; |
289 df.content(80, bac1 = new ByteArrayChannel()); | 254 df.content(80, bac1 = new ByteArrayChannel()); |
290 df.content(81, bac2 = new ByteArrayChannel()); | 255 df.content(81, bac2 = new ByteArrayChannel()); |
291 // String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2"; | 256 // String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2"; |
292 // String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2"; | 257 // String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2"; |
293 PatchGenerator pg = new PatchGenerator(); | 258 PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>(); |
294 pg.init(bac1.toArray(), bac2.toArray()); | 259 byte[] data1 = bac1.toArray(); |
260 byte[] data2 = bac2.toArray(); | |
261 pg.init(new LineSequence(data1).splitByNewlines(), new LineSequence(data2).splitByNewlines()); | |
295 System.out.println("Matches:"); | 262 System.out.println("Matches:"); |
296 pg.findMatchingBlocks(new MatchDumpInspector()); | 263 pg.findMatchingBlocks(new MatchDumpInspector<LineSequence>()); |
297 System.out.println("Deltas:"); | 264 System.out.println("Deltas:"); |
298 pg.findMatchingBlocks(new DeltaDumpInspector()); | 265 pg.findMatchingBlocks(new DeltaDumpInspector<LineSequence>()); |
299 } | 266 } |
300 | 267 |
301 public Patch delta(byte[] prev, byte[] content) { | 268 /** |
302 Patch rv = new Patch(); | 269 * Unsure if this marker interface worth presence |
303 init(prev, content); | |
304 findMatchingBlocks(new PatchFillInspector(rv)); | |
305 return rv; | |
306 } | |
307 | |
308 /* | |
309 * TODO shall be parameterized (template?) and refacctored to facilitate matching non lines only | |
310 * (sequence diff algorithm above doesn't care about sequence nature) | |
311 */ | 270 */ |
312 static final class ChunkSequence { | 271 public interface Chunk { |
272 } | |
273 | |
274 /** | |
275 * Generic sequence of chunk, where chunk is anything comparable to another chunk, e.g. a string or a single char | |
276 * Sequence diff algorithm above doesn't care about sequence nature. | |
277 */ | |
278 public interface ChunkSequence<T extends Chunk> { | |
279 public T chunk(int index); | |
280 public int chunkCount(); | |
281 } | |
282 | |
283 static final class LineSequence implements ChunkSequence<LineSequence.ByteChain> { | |
313 | 284 |
314 private final byte[] input; | 285 private final byte[] input; |
315 private ArrayList<ByteChain> lines; | 286 private ArrayList<ByteChain> lines; |
316 | 287 |
317 public ChunkSequence(byte[] data) { | 288 public LineSequence(byte[] data) { |
318 input = data; | 289 input = data; |
319 } | 290 } |
320 | 291 |
321 public void splitByNewlines() { | 292 public static LineSequence newlines(byte[] array) { |
293 return new LineSequence(array).splitByNewlines(); | |
294 } | |
295 | |
296 public LineSequence splitByNewlines() { | |
322 lines = new ArrayList<ByteChain>(); | 297 lines = new ArrayList<ByteChain>(); |
323 int lastStart = 0; | 298 int lastStart = 0; |
324 for (int i = 0; i < input.length; i++) { | 299 for (int i = 0; i < input.length; i++) { |
325 if (input[i] == '\n') { | 300 if (input[i] == '\n') { |
326 lines.add(new ByteChain(lastStart, i+1)); | 301 lines.add(new ByteChain(lastStart, i+1)); |
336 if (lastStart < input.length) { | 311 if (lastStart < input.length) { |
337 lines.add(new ByteChain(lastStart, input.length)); | 312 lines.add(new ByteChain(lastStart, input.length)); |
338 } | 313 } |
339 // empty chunk to keep offset of input end | 314 // empty chunk to keep offset of input end |
340 lines.add(new ByteChain(input.length, input.length)); | 315 lines.add(new ByteChain(input.length, input.length)); |
316 return this; | |
341 } | 317 } |
342 | 318 |
343 public ByteChain chunk(int index) { | 319 public ByteChain chunk(int index) { |
344 return lines.get(index); | 320 return lines.get(index); |
345 } | 321 } |
357 System.arraycopy(input, from, rv, 0, rv.length); | 333 System.arraycopy(input, from, rv, 0, rv.length); |
358 return rv; | 334 return rv; |
359 } | 335 } |
360 | 336 |
361 | 337 |
362 final class ByteChain { | 338 final class ByteChain implements Chunk { |
363 private final int start, end; | 339 private final int start, end; |
364 private final int hash; | 340 private final int hash; |
365 | 341 |
366 ByteChain(int s, int e) { | 342 ByteChain(int s, int e) { |
367 start = s; | 343 start = s; |
394 return other.match(input, start); | 370 return other.match(input, start); |
395 } | 371 } |
396 | 372 |
397 private boolean match(byte[] oi, int from) { | 373 private boolean match(byte[] oi, int from) { |
398 for (int i = start, j = from; i < end; i++, j++) { | 374 for (int i = start, j = from; i < end; i++, j++) { |
399 if (ChunkSequence.this.input[i] != oi[j]) { | 375 if (LineSequence.this.input[i] != oi[j]) { |
400 return false; | 376 return false; |
401 } | 377 } |
402 } | 378 } |
403 return true; | 379 return true; |
404 } | 380 } |