annotate src/org/tmatesoft/hg/internal/PatchGenerator.java @ 533:e6f72c9829a6

Generate patches using diff algorithm
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 30 Jan 2013 15:48:36 +0100
parents
children dd4f6311af52
rev   line source
533
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
1 /*
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
2 * Copyright (c) 2013 TMate Software Ltd
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
3 *
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
4 * This program is free software; you can redistribute it and/or modify
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
5 * it under the terms of the GNU General Public License as published by
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
6 * the Free Software Foundation; version 2 of the License.
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
7 *
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
8 * This program is distributed in the hope that it will be useful,
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
11 * GNU General Public License for more details.
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
12 *
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
13 * For information on how to redistribute this software under
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
14 * the terms of a license other than GNU General Public License
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
15 * contact TMate Software at support@hg4j.com
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
16 */
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
17 package org.tmatesoft.hg.internal;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
18
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
19 import java.util.ArrayList;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
20 import java.util.HashMap;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
21 import java.util.Map;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
22
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
23 import org.tmatesoft.hg.repo.HgDataFile;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
24 import org.tmatesoft.hg.repo.HgLookup;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
25 import org.tmatesoft.hg.repo.HgRepository;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
26
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
27 /**
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
28 * Mercurial cares about changes only up to the line level, e.g. a simple file version bump in manifest looks like (RevlogDump output):
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
29 *
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
30 * 522: 233748 0 103 17438 433 522 521 -1 756073cf2321df44d3ed0585f2a5754bc8a1b2f6
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
31 * <PATCH>:
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
32 * 3487..3578, 91:src/org/tmatesoft/hg/core/HgIterateDirection.java\00add61a8a665c5d8f092210767f812fe0d335ac8
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
33 *
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
34 * I.e. for the {fname}{revision} entry format of manifest, not only {revision} is changed, but the whole line, with unchanged {fname} is recorded
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
35 * in the patch.
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
36 *
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
37 * Mercurial paper describes reasons for choosing this approach to delta generation, too.
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
38 *
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
39 *
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
40 * @author Artem Tikhomirov
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
41 * @author TMate Software Ltd.
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
42 */
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
43 public class PatchGenerator {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
44
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
45 private Map<ChunkSequence.ByteChain, IntVector> chunk2UseIndex;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
46 private ChunkSequence seq1, seq2;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
47
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
48 // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
49 private int matchStartS1, matchStartS2;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
50 // get filled by #findMatchingBlocks, track start of changed/unknown sequence in seq1 and seq2
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
51 private int changeStartS1, changeStartS2;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
52
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
53 public void init(byte[] data1, byte[] data2) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
54 seq1 = new ChunkSequence(data1);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
55 seq1.splitByNewlines();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
56 seq2 = new ChunkSequence(data2);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
57 seq2.splitByNewlines();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
58 prepare(seq2);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
59 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
60
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
61 private void prepare(ChunkSequence s2) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
62 chunk2UseIndex = new HashMap<ChunkSequence.ByteChain, IntVector>();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
63 for (int i = 0, len = s2.chunkCount(); i < len; i++) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
64 ChunkSequence.ByteChain bc = s2.chunk(i);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
65 IntVector loc = chunk2UseIndex.get(bc);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
66 if (loc == null) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
67 chunk2UseIndex.put(bc, loc = new IntVector());
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
68 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
69 loc.add(i);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
70 // bc.registerUseIn(i) - BEWARE, use of bc here is incorrect
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
71 // in this case need to find the only ByteChain to keep indexes
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
72 // i.e. when there are few equal ByteChain instances, notion of "usedIn" shall be either shared (reference same vector)
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
73 // or kept within only one of them
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
74 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
75 // for (ChunkSequence.ByteChain bc : chunk2UseIndex.keySet()) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
76 // System.out.printf("%s: {", new String(bc.data()));
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
77 // for (int x : chunk2UseIndex.get(bc).toArray()) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
78 // System.out.printf(" %d,", x);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
79 // }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
80 // System.out.println("}");
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
81 // }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
82 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
83
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
84 public void findMatchingBlocks() {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
85 changeStartS1 = changeStartS2 = 0;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
86 findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount());
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
87 if (changeStartS1 < seq1.chunkCount() || changeStartS2 < seq2.chunkCount()) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
88 reportDeltaElement(seq1.chunkCount(), seq2.chunkCount());
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
89 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
90 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
91
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
92 /**
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
93 * implementation based on Python's difflib.py and SequenceMatcher
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
94 */
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
95 public int longestMatch(int startS1, int endS1, int startS2, int endS2) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
96 matchStartS1 = matchStartS2 = 0;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
97 int maxLength = 0;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
98 IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
99 for (int i = startS1; i < endS1; i++) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
100 ChunkSequence.ByteChain bc = seq1.chunk(i);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
101 IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
102 IntVector occurencesInS2 = chunk2UseIndex.get(bc);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
103 if (occurencesInS2 == null) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
104 // chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
105 chunkIndex2MatchCount = newChunkIndex2MatchCount;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
106 continue;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
107 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
108 for (int j : occurencesInS2.toArray()) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
109 // s1[i] == s2[j]
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
110 if (j < startS2) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
111 continue;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
112 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
113 if (j >= endS2) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
114 break;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
115 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
116 int prevChunkMatches = chunkIndex2MatchCount.containsKey(j-1) ? chunkIndex2MatchCount.get(j-1) : 0;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
117 int k = prevChunkMatches + 1;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
118 newChunkIndex2MatchCount.put(j, k);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
119 if (k > maxLength) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
120 matchStartS1 = i-k+1;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
121 matchStartS2 = j-k+1;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
122 maxLength = k;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
123 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
124 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
125 chunkIndex2MatchCount = newChunkIndex2MatchCount;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
126 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
127 return maxLength;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
128 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
129
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
130 public void findMatchingBlocks(int startS1, int endS1, int startS2, int endS2) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
131 int matchLength = longestMatch(startS1, endS1, startS2, endS2);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
132 if (matchLength > 0) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
133 final int saveStartS1 = matchStartS1, saveStartS2 = matchStartS2;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
134 if (startS1 < matchStartS1 && startS2 < matchStartS2) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
135 findMatchingBlocks(startS1, matchStartS1, startS2, matchStartS2);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
136 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
137 reportDeltaElement(saveStartS1, saveStartS2);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
138 changeStartS1 = saveStartS1 + matchLength;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
139 changeStartS2 = saveStartS2 + matchLength;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
140 // System.out.printf("match: from line #%d and line #%d of length %d\n", saveStartS1, saveStartS2, matchLength);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
141 if (saveStartS1+matchLength < endS1 && saveStartS2+matchLength < endS2) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
142 findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
143 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
144 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
145 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
146
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
147 private Patch deltaCollector;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
148
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
149 private void reportDeltaElement(int i, int j) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
150 if (changeStartS1 < i) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
151 if (changeStartS2 < j) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
152 System.out.printf("changed [%d..%d) with [%d..%d)\n", changeStartS1, i, changeStartS2, j);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
153 } else {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
154 assert changeStartS2 == j;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
155 System.out.printf("deleted [%d..%d)\n", changeStartS1, i);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
156 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
157 if (deltaCollector != null) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
158 int from = seq1.chunk(changeStartS1).getOffset();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
159 int to = seq1.chunk(i).getOffset();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
160 byte[] data = seq2.data(changeStartS2, j);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
161 deltaCollector.add(from, to, data);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
162 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
163 } else {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
164 assert changeStartS1 == i;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
165 if(changeStartS2 < j) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
166 System.out.printf("added [%d..%d)\n", changeStartS2, j);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
167 } else {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
168 assert changeStartS2 == j;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
169 System.out.printf("adjustent equal blocks %d, %d and %d,%d\n", changeStartS1, i, changeStartS2, j);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
170 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
171 if (deltaCollector != null) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
172 int insPoint = seq1.chunk(changeStartS1).getOffset();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
173 byte[] data = seq2.data(changeStartS2, j);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
174 deltaCollector.add(insPoint, insPoint, data);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
175 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
176 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
177 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
178
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
179 public static void main(String[] args) throws Exception {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
180 HgRepository repo = new HgLookup().detectFromWorkingDir();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
181 HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java");
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
182 ByteArrayChannel bac1, bac2;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
183 df.content(80, bac1 = new ByteArrayChannel());
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
184 df.content(81, bac2 = new ByteArrayChannel());
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
185 // String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2";
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
186 // String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2";
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
187 PatchGenerator pg = new PatchGenerator();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
188 pg.init(bac1.toArray(), bac2.toArray());
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
189 pg.findMatchingBlocks();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
190 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
191
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
192 public Patch delta(byte[] prev, byte[] content) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
193 deltaCollector = new Patch();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
194 init(prev, content);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
195 findMatchingBlocks();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
196 return deltaCollector;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
197 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
198
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
199 private static class ChunkSequence {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
200
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
201 private final byte[] input;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
202 private ArrayList<ByteChain> lines;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
203
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
204 public ChunkSequence(byte[] data) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
205 input = data;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
206 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
207
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
208 public void splitByNewlines() {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
209 lines = new ArrayList<ByteChain>();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
210 int lastStart = 0;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
211 for (int i = 0; i < input.length; i++) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
212 if (input[i] == '\n') {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
213 lines.add(new ByteChain(lastStart, i+1));
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
214 lastStart = i+1;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
215 } else if (input[i] == '\r') {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
216 if (i+1 < input.length && input[i+1] == '\n') {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
217 i++;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
218 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
219 lines.add(new ByteChain(lastStart, i+1));
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
220 lastStart = i+1;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
221 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
222 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
223 if (lastStart < input.length) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
224 lines.add(new ByteChain(lastStart, input.length));
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
225 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
226 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
227
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
228 public ByteChain chunk(int index) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
229 return lines.get(index);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
230 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
231
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
232 public int chunkCount() {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
233 return lines.size();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
234 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
235
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
236 public byte[] data(int chunkFrom, int chunkTo) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
237 if (chunkFrom == chunkTo) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
238 return new byte[0];
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
239 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
240 int from = chunk(chunkFrom).getOffset(), to = chunk(chunkTo).getOffset();
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
241 byte[] rv = new byte[to - from];
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
242 System.arraycopy(input, from, rv, 0, rv.length);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
243 return rv;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
244 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
245
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
246
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
247 final class ByteChain {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
248 private final int start, end;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
249 private final int hash;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
250
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
251 ByteChain(int s, int e) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
252 start = s;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
253 end = e;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
254 hash = calcHash(input, s, e);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
255 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
256
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
257 /**
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
258 * byte offset of the this ByteChain inside ChainSequence
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
259 */
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
260 public int getOffset() {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
261 return start;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
262 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
263
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
264 public byte[] data() {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
265 byte[] rv = new byte[end - start];
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
266 System.arraycopy(input, start, rv, 0, rv.length);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
267 return rv;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
268 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
269
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
270 @Override
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
271 public boolean equals(Object obj) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
272 if (obj == null || obj.getClass() != ByteChain.class) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
273 return false;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
274 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
275 ByteChain other = (ByteChain) obj;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
276 if (other.hash != hash || other.end - other.start != end - start) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
277 return false;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
278 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
279 return other.match(input, start);
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
280 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
281
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
282 private boolean match(byte[] oi, int from) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
283 for (int i = start, j = from; i < end; i++, j++) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
284 if (ChunkSequence.this.input[i] != oi[j]) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
285 return false;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
286 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
287 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
288 return true;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
289 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
290
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
291 @Override
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
292 public int hashCode() {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
293 return hash;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
294 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
295
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
296 @Override
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
297 public String toString() {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
298 return String.format("[@%d\"%s\"]", start, new String(data()));
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
299 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
300 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
301
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
302 // same as Arrays.hashCode(byte[]), just for a slice of a bigger array
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
303 static int calcHash(byte[] data, int from, int to) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
304 int result = 1;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
305 for (int i = from; i < to; i++) {
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
306 result = 31 * result + data[i];
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
307 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
308 return result;
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
309 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
310 }
e6f72c9829a6 Generate patches using diff algorithm
Artem Tikhomirov <tikhomirov.artem@gmail.com>
parents:
diff changeset
311 }