Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/PatchGenerator.java @ 544:7f5998a9619d
Refactor PatchGenerator to be generic and welcome sequence of any nature
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Fri, 15 Feb 2013 16:48:54 +0100 |
| parents | 1e95f48d9886 |
| children | 15b406c7cd9d |
comparison
equal
deleted
inserted
replaced
| 543:1e95f48d9886 | 544:7f5998a9619d |
|---|---|
| 38 * | 38 * |
| 39 * | 39 * |
| 40 * @author Artem Tikhomirov | 40 * @author Artem Tikhomirov |
| 41 * @author TMate Software Ltd. | 41 * @author TMate Software Ltd. |
| 42 */ | 42 */ |
| 43 public class PatchGenerator { | 43 public class PatchGenerator<T extends PatchGenerator.ChunkSequence<?>> { |
| 44 | 44 |
| 45 private Map<ChunkSequence.ByteChain, IntVector> chunk2UseIndex; | 45 private Map<Chunk, IntVector> chunk2UseIndex; |
| 46 private ChunkSequence seq1, seq2; | 46 private T seq1, seq2; |
| 47 | 47 |
| 48 // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively | 48 // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively |
| 49 private int matchStartS1, matchStartS2; | 49 private int matchStartS1, matchStartS2; |
| 50 | 50 |
| 51 private MatchInspector matchInspector; | 51 private MatchInspector<T> matchInspector; |
| 52 | 52 |
| 53 public void init(byte[] data1, byte[] data2) { | 53 public void init(T s1, T s2) { |
| 54 seq1 = new ChunkSequence(data1); | 54 seq1 = s1; |
| 55 seq1.splitByNewlines(); | 55 seq2 = s2; |
| 56 seq2 = new ChunkSequence(data2); | 56 prepare(s2); |
| 57 seq2.splitByNewlines(); | 57 } |
| 58 prepare(seq2); | 58 |
| 59 } | 59 |
| 60 | 60 private void prepare(T s2) { |
| 61 private void prepare(ChunkSequence s2) { | 61 chunk2UseIndex = new HashMap<Chunk, IntVector>(); |
| 62 chunk2UseIndex = new HashMap<ChunkSequence.ByteChain, IntVector>(); | |
| 63 for (int i = 0, len = s2.chunkCount(); i < len; i++) { | 62 for (int i = 0, len = s2.chunkCount(); i < len; i++) { |
| 64 ChunkSequence.ByteChain bc = s2.chunk(i); | 63 Chunk bc = s2.chunk(i); |
| 65 IntVector loc = chunk2UseIndex.get(bc); | 64 IntVector loc = chunk2UseIndex.get(bc); |
| 66 if (loc == null) { | 65 if (loc == null) { |
| 67 chunk2UseIndex.put(bc, loc = new IntVector()); | 66 chunk2UseIndex.put(bc, loc = new IntVector()); |
| 68 } | 67 } |
| 69 loc.add(i); | 68 loc.add(i); |
| 79 // } | 78 // } |
| 80 // System.out.println("}"); | 79 // System.out.println("}"); |
| 81 // } | 80 // } |
| 82 } | 81 } |
| 83 | 82 |
| 84 public void findMatchingBlocks(MatchInspector insp) { | 83 public void findMatchingBlocks(MatchInspector<T> insp) { |
| 85 insp.begin(seq1, seq2); | 84 insp.begin(seq1, seq2); |
| 86 matchInspector = insp; | 85 matchInspector = insp; |
| 87 findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount()); | 86 findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount()); |
| 88 insp.end(); | 87 insp.end(); |
| 89 } | 88 } |
| 94 public int longestMatch(int startS1, int endS1, int startS2, int endS2) { | 93 public int longestMatch(int startS1, int endS1, int startS2, int endS2) { |
| 95 matchStartS1 = matchStartS2 = 0; | 94 matchStartS1 = matchStartS2 = 0; |
| 96 int maxLength = 0; | 95 int maxLength = 0; |
| 97 IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8); | 96 IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8); |
| 98 for (int i = startS1; i < endS1; i++) { | 97 for (int i = startS1; i < endS1; i++) { |
| 99 ChunkSequence.ByteChain bc = seq1.chunk(i); | 98 Chunk bc = seq1.chunk(i); |
| 100 IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8); | 99 IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8); |
| 101 IntVector occurencesInS2 = chunk2UseIndex.get(bc); | 100 IntVector occurencesInS2 = chunk2UseIndex.get(bc); |
| 102 if (occurencesInS2 == null) { | 101 if (occurencesInS2 == null) { |
| 103 // chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance | 102 // chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance |
| 104 chunkIndex2MatchCount = newChunkIndex2MatchCount; | 103 chunkIndex2MatchCount = newChunkIndex2MatchCount; |
| 138 findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2); | 137 findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2); |
| 139 } | 138 } |
| 140 } | 139 } |
| 141 } | 140 } |
| 142 | 141 |
| 143 interface MatchInspector { | 142 interface MatchInspector<T extends ChunkSequence<?>> { |
| 144 void begin(ChunkSequence s1, ChunkSequence s2); | 143 void begin(T s1, T s2); |
| 145 void match(int startSeq1, int startSeq2, int matchLength); | 144 void match(int startSeq1, int startSeq2, int matchLength); |
| 146 void end(); | 145 void end(); |
| 147 } | 146 } |
| 148 | 147 |
| 149 static class MatchDumpInspector implements MatchInspector { | 148 static class MatchDumpInspector<T extends ChunkSequence<?>> implements MatchInspector<T> { |
| 150 public void begin(ChunkSequence s1, ChunkSequence s2) { | 149 public void begin(T s1, T s2) { |
| 151 } | 150 } |
| 152 | 151 |
| 153 public void match(int startSeq1, int startSeq2, int matchLength) { | 152 public void match(int startSeq1, int startSeq2, int matchLength) { |
| 154 System.out.printf("match: from line #%d and line #%d of length %d\n", startSeq1, startSeq2, matchLength); | 153 System.out.printf("match: from line #%d and line #%d of length %d\n", startSeq1, startSeq2, matchLength); |
| 155 } | 154 } |
| 156 | 155 |
| 157 public void end() { | 156 public void end() { |
| 158 } | 157 } |
| 159 } | 158 } |
| 160 | 159 |
| 161 static class DeltaInspector implements MatchInspector { | 160 static class DeltaInspector<T extends ChunkSequence<?>> implements MatchInspector<T> { |
| 162 protected int changeStartS1, changeStartS2; | 161 protected int changeStartS1, changeStartS2; |
| 163 protected ChunkSequence seq1, seq2; | 162 protected T seq1, seq2; |
| 164 | 163 |
| 165 | 164 public void begin(T s1, T s2) { |
| 166 public void begin(ChunkSequence s1, ChunkSequence s2) { | |
| 167 seq1 = s1; | 165 seq1 = s1; |
| 168 seq2 = s2; | 166 seq2 = s2; |
| 169 changeStartS1 = changeStartS2 = 0; | 167 changeStartS1 = changeStartS2 = 0; |
| 170 } | 168 } |
| 171 | 169 |
| 221 protected void unchanged(int s1From, int s2From, int length) { | 219 protected void unchanged(int s1From, int s2From, int length) { |
| 222 // NO-OP | 220 // NO-OP |
| 223 } | 221 } |
| 224 } | 222 } |
| 225 | 223 |
| 226 static class DeltaDumpInspector extends DeltaInspector { | 224 static class DeltaDumpInspector<T extends ChunkSequence<?>> extends DeltaInspector<T> { |
| 227 | 225 |
| 228 @Override | 226 @Override |
| 229 protected void changed(int s1From, int s1To, int s2From, int s2To) { | 227 protected void changed(int s1From, int s1To, int s2From, int s2To) { |
| 230 System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To); | 228 System.out.printf("changed [%d..%d) with [%d..%d)\n", s1From, s1To, s2From, s2To); |
| 231 } | 229 } |
| 240 System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint); | 238 System.out.printf("added [%d..%d) at %d\n", s2From, s2To, s1InsertPoint); |
| 241 } | 239 } |
| 242 | 240 |
| 243 } | 241 } |
| 244 | 242 |
| 245 static class PatchFillInspector extends DeltaInspector { | |
| 246 private final Patch deltaCollector; | |
| 247 | |
| 248 PatchFillInspector(Patch p) { | |
| 249 assert p != null; | |
| 250 deltaCollector = p; | |
| 251 } | |
| 252 | |
| 253 @Override | |
| 254 protected void changed(int s1From, int s1To, int s2From, int s2To) { | |
| 255 int from = seq1.chunk(s1From).getOffset(); | |
| 256 int to = seq1.chunk(s1To).getOffset(); | |
| 257 byte[] data = seq2.data(s2From, s2To); | |
| 258 deltaCollector.add(from, to, data); | |
| 259 } | |
| 260 | |
| 261 @Override | |
| 262 protected void deleted(int s2DeletionPoint, int s1From, int s1To) { | |
| 263 int from = seq1.chunk(s1From).getOffset(); | |
| 264 int to = seq1.chunk(s1To).getOffset(); | |
| 265 deltaCollector.add(from, to, new byte[0]); | |
| 266 } | |
| 267 | |
| 268 @Override | |
| 269 protected void added(int s1InsertPoint, int s2From, int s2To) { | |
| 270 int insPoint = seq1.chunk(s1InsertPoint).getOffset(); | |
| 271 byte[] data = seq2.data(s2From, s2To); | |
| 272 deltaCollector.add(insPoint, insPoint, data); | |
| 273 } | |
| 274 } | |
| 275 | |
| 276 | |
| 277 | |
| 278 public static void main(String[] args) throws Exception { | 243 public static void main(String[] args) throws Exception { |
| 279 PatchGenerator pg1 = new PatchGenerator(); | 244 PatchGenerator<LineSequence> pg1 = new PatchGenerator<LineSequence>(); |
| 280 pg1.init("hello".getBytes(), "hello\nworld".getBytes()); | 245 pg1.init(LineSequence.newlines("hello".getBytes()), LineSequence.newlines("hello\nworld".getBytes())); |
| 281 pg1.findMatchingBlocks(new MatchDumpInspector()); | 246 pg1.findMatchingBlocks(new MatchDumpInspector<LineSequence>()); |
| 282 pg1.findMatchingBlocks(new DeltaDumpInspector()); | 247 pg1.findMatchingBlocks(new DeltaDumpInspector<LineSequence>()); |
| 283 if (Boolean.FALSE.booleanValue()) { | 248 if (Boolean.FALSE.booleanValue()) { |
| 284 return; | 249 return; |
| 285 } | 250 } |
| 286 HgRepository repo = new HgLookup().detectFromWorkingDir(); | 251 HgRepository repo = new HgLookup().detectFromWorkingDir(); |
| 287 HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java"); | 252 HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java"); |
| 288 ByteArrayChannel bac1, bac2; | 253 ByteArrayChannel bac1, bac2; |
| 289 df.content(80, bac1 = new ByteArrayChannel()); | 254 df.content(80, bac1 = new ByteArrayChannel()); |
| 290 df.content(81, bac2 = new ByteArrayChannel()); | 255 df.content(81, bac2 = new ByteArrayChannel()); |
| 291 // String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2"; | 256 // String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2"; |
| 292 // String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2"; | 257 // String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2"; |
| 293 PatchGenerator pg = new PatchGenerator(); | 258 PatchGenerator<LineSequence> pg = new PatchGenerator<LineSequence>(); |
| 294 pg.init(bac1.toArray(), bac2.toArray()); | 259 byte[] data1 = bac1.toArray(); |
| 260 byte[] data2 = bac2.toArray(); | |
| 261 pg.init(new LineSequence(data1).splitByNewlines(), new LineSequence(data2).splitByNewlines()); | |
| 295 System.out.println("Matches:"); | 262 System.out.println("Matches:"); |
| 296 pg.findMatchingBlocks(new MatchDumpInspector()); | 263 pg.findMatchingBlocks(new MatchDumpInspector<LineSequence>()); |
| 297 System.out.println("Deltas:"); | 264 System.out.println("Deltas:"); |
| 298 pg.findMatchingBlocks(new DeltaDumpInspector()); | 265 pg.findMatchingBlocks(new DeltaDumpInspector<LineSequence>()); |
| 299 } | 266 } |
| 300 | 267 |
| 301 public Patch delta(byte[] prev, byte[] content) { | 268 /** |
| 302 Patch rv = new Patch(); | 269 * Unsure if this marker interface worth presence |
| 303 init(prev, content); | |
| 304 findMatchingBlocks(new PatchFillInspector(rv)); | |
| 305 return rv; | |
| 306 } | |
| 307 | |
| 308 /* | |
| 309 * TODO shall be parameterized (template?) and refacctored to facilitate matching non lines only | |
| 310 * (sequence diff algorithm above doesn't care about sequence nature) | |
| 311 */ | 270 */ |
| 312 static final class ChunkSequence { | 271 public interface Chunk { |
| 272 } | |
| 273 | |
| 274 /** | |
| 275 * Generic sequence of chunk, where chunk is anything comparable to another chunk, e.g. a string or a single char | |
| 276 * Sequence diff algorithm above doesn't care about sequence nature. | |
| 277 */ | |
| 278 public interface ChunkSequence<T extends Chunk> { | |
| 279 public T chunk(int index); | |
| 280 public int chunkCount(); | |
| 281 } | |
| 282 | |
| 283 static final class LineSequence implements ChunkSequence<LineSequence.ByteChain> { | |
| 313 | 284 |
| 314 private final byte[] input; | 285 private final byte[] input; |
| 315 private ArrayList<ByteChain> lines; | 286 private ArrayList<ByteChain> lines; |
| 316 | 287 |
| 317 public ChunkSequence(byte[] data) { | 288 public LineSequence(byte[] data) { |
| 318 input = data; | 289 input = data; |
| 319 } | 290 } |
| 320 | 291 |
| 321 public void splitByNewlines() { | 292 public static LineSequence newlines(byte[] array) { |
| 293 return new LineSequence(array).splitByNewlines(); | |
| 294 } | |
| 295 | |
| 296 public LineSequence splitByNewlines() { | |
| 322 lines = new ArrayList<ByteChain>(); | 297 lines = new ArrayList<ByteChain>(); |
| 323 int lastStart = 0; | 298 int lastStart = 0; |
| 324 for (int i = 0; i < input.length; i++) { | 299 for (int i = 0; i < input.length; i++) { |
| 325 if (input[i] == '\n') { | 300 if (input[i] == '\n') { |
| 326 lines.add(new ByteChain(lastStart, i+1)); | 301 lines.add(new ByteChain(lastStart, i+1)); |
| 336 if (lastStart < input.length) { | 311 if (lastStart < input.length) { |
| 337 lines.add(new ByteChain(lastStart, input.length)); | 312 lines.add(new ByteChain(lastStart, input.length)); |
| 338 } | 313 } |
| 339 // empty chunk to keep offset of input end | 314 // empty chunk to keep offset of input end |
| 340 lines.add(new ByteChain(input.length, input.length)); | 315 lines.add(new ByteChain(input.length, input.length)); |
| 316 return this; | |
| 341 } | 317 } |
| 342 | 318 |
| 343 public ByteChain chunk(int index) { | 319 public ByteChain chunk(int index) { |
| 344 return lines.get(index); | 320 return lines.get(index); |
| 345 } | 321 } |
| 357 System.arraycopy(input, from, rv, 0, rv.length); | 333 System.arraycopy(input, from, rv, 0, rv.length); |
| 358 return rv; | 334 return rv; |
| 359 } | 335 } |
| 360 | 336 |
| 361 | 337 |
| 362 final class ByteChain { | 338 final class ByteChain implements Chunk { |
| 363 private final int start, end; | 339 private final int start, end; |
| 364 private final int hash; | 340 private final int hash; |
| 365 | 341 |
| 366 ByteChain(int s, int e) { | 342 ByteChain(int s, int e) { |
| 367 start = s; | 343 start = s; |
| 394 return other.match(input, start); | 370 return other.match(input, start); |
| 395 } | 371 } |
| 396 | 372 |
| 397 private boolean match(byte[] oi, int from) { | 373 private boolean match(byte[] oi, int from) { |
| 398 for (int i = start, j = from; i < end; i++, j++) { | 374 for (int i = start, j = from; i < end; i++, j++) { |
| 399 if (ChunkSequence.this.input[i] != oi[j]) { | 375 if (LineSequence.this.input[i] != oi[j]) { |
| 400 return false; | 376 return false; |
| 401 } | 377 } |
| 402 } | 378 } |
| 403 return true; | 379 return true; |
| 404 } | 380 } |
