comparison src/org/tmatesoft/hg/internal/Patch.java @ 330:9747a786a34d

Patch merging algorithm complete trial
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 25 Oct 2011 03:30:02 +0200
parents 694ebabb5cb3
children 7f136a3fa671
comparison
equal deleted inserted replaced
329:694ebabb5cb3 330:9747a786a34d
16 */ 16 */
17 package org.tmatesoft.hg.internal; 17 package org.tmatesoft.hg.internal;
18 18
19 import java.io.IOException; 19 import java.io.IOException;
20 import java.util.ArrayList; 20 import java.util.ArrayList;
21 import java.util.Formatter;
21 22
22 /** 23 /**
23 * @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description 24 * @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description
24 * 25 *
25 * range [start..end] in original source gets replaced with data of length (do not keep, use data.length instead) 26 * range [start..end] in original source gets replaced with data of length (do not keep, use data.length instead)
29 * @author TMate Software Ltd. 30 * @author TMate Software Ltd.
30 */ 31 */
31 public final class Patch { 32 public final class Patch {
32 private final IntVector starts, ends; 33 private final IntVector starts, ends;
33 private final ArrayList<byte[]> data; 34 private final ArrayList<byte[]> data;
34 35
36 private static byte[] generate(int c) {
37 byte[] rv = new byte[c];
38 for (int i = 0; i < c; i++) {
39 byte x = (byte) ('a' + i);
40 rv[i] = x;
41 }
42 return rv;
43 }
44
45 public static void main(String[] args) {
46 Patch p1 = new Patch(), p2 = new Patch();
47 // simple cases (one element in either patch)
48 // III: (1,10 20) & (5,15,15) p2End from [p1End..p1AppliedEnd] (i.e. within p1 range but index is past p2 end index)
49 // II: (1,10,7) & (3,15,15) insideP2 = true and no more p1 entries
50 // II: (1,1,10) & (3,11,15)
51 // independent: (1,10,10) & (15,25,10); (15, 25, 10) & (1, 10, 10)
52 // I: (15, 25, 10) & (10, 20, 10). result: [10, 20, 10] [20, 25, 5]
53 // IV: (15, 25, 10) & (10, 30, 20)
54 //
55 // cycle with insideP2
56 //
57 // cycle with insideP1
58 //
59 // multiple elements in patches (offsets)
60 p1.add(15, 25, generate(10));
61 p2.add(10, 30, generate(20));
62 System.out.println("p1: " + p1);
63 System.out.println("p2: " + p2);
64 Patch r = p1.apply(p2);
65 System.out.println("r: " + r);
66 }
67
35 public Patch() { 68 public Patch() {
36 starts = new IntVector(); 69 starts = new IntVector();
37 ends = new IntVector(); 70 ends = new IntVector();
38 data = new ArrayList<byte[]>(); 71 data = new ArrayList<byte[]>();
72 }
73
74 public String toString() {
75 StringBuilder sb = new StringBuilder();
76 Formatter f = new Formatter(sb);
77 for (int i = 0; i < count(); i++) {
78 f.format("[%d, %d, %d] ", starts.get(i), ends.get(i), data.get(i).length);
79 }
80 return sb.toString();
39 } 81 }
40 82
41 public int count() { 83 public int count() {
42 return data.size(); 84 return data.size();
43 } 85 }
110 starts.add(s); 152 starts.add(s);
111 ends.add(e); 153 ends.add(e);
112 data.add(src); 154 data.add(src);
113 } 155 }
114 156
115 /* 157 private void add(Patch p, int i) {
116 private void add(Patch another, int index) { 158 add(p.starts.get(i), p.ends.get(i), p.data.get(i));
117 starts.add(another.starts.get(index)); 159 }
118 ends.add(another.ends.get(index)); 160
119 data.add(another.data.get(index)); 161 private void add(int start, int end, byte[] d) {
162 starts.add(start);
163 ends.add(end);
164 data.add(d);
165 }
166
167 private static byte[] subarray(byte[] d, int start, int end) {
168 byte[] r = new byte[end-start+1];
169 System.arraycopy(d, start, r, 0, r.length);
170 return r;
120 } 171 }
121 172
122 /** 173 /**
123 * Modify this patch with subsequent patch 174 * Modify this patch with subsequent patch
124 * / 175 */
125 public void apply(Patch another) { 176 private /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) {
126 Patch r = new Patch(); 177 Patch r = new Patch();
127 int p1AppliedPos = 0; 178 int p1TotalAppliedDelta = 0; // value to add to start and end indexes of the older patch to get their values as if
128 int p1PrevEnd = 0; 179 // in the patched text, iow, directly comparable with respective indexes from the newer patch.
129 for (int i = 0, j = 0, iMax = another.count(), jMax = this.count(); i < iMax; i++) { 180 int p1EntryStart = 0, p1EntryEnd = 0, p1EntryLen = 0;
130 int newerPatchEntryStart = another.starts.get(i); 181 byte[] p1Data = null;
131 int olderPatchEntryEnd; 182 boolean insideP1entry = false;
183 int p2 = 0, p1 = 0;
184 final int p2Max = another.count(), p1Max = this.count();
185 L0: for (; p2 < p2Max; p2++) {
186 int p2EntryStart = another.starts.get(p2);
187 int p2EntryEnd = another.ends.get(p2);
188 final int p2EntryRange = p2EntryEnd - p2EntryStart;
189 final byte[] p2Data = another.data.get(p2);
190 boolean insideP2entry = false;
191 int p2EntryStartOffset = -1;
192 ///
193 p1EntryStart = p1EntryEnd = p1EntryLen = 0;
194 p1Data = null;
132 195
133 while (j < jMax) { 196 L1: while (p1 < p1Max) {
134 if (starts.get(j) < newerPatchEntryStart) { 197 if (!insideP1entry) {
135 if (starts.get(j)+data.get(j).length <= newerPatchEntryStart) { 198 p1EntryStart = starts.get(p1);
136 r.add(this, j); 199 p1EntryEnd = ends.get(p1);
200 p1Data = data.get(p1);
201 p1EntryLen = p1Data.length;
202 }// else keep values
203
204 final int p1EntryDelta = p1EntryLen - (p1EntryEnd - p1EntryStart); // number of actually inserted(+) or deleted(-) chars
205 final int p1EntryAppliedStart = p1TotalAppliedDelta + p1EntryStart;
206 final int p1EntryAppliedEnd = p1EntryAppliedStart + p1EntryLen; // end of j'th patch entry in the text which is source for p2
207
208 if (insideP2entry) {
209 if (p2EntryEnd < p1EntryAppliedStart) {
210 r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data);
211 insideP2entry = false;
212 continue L0;
213 }
214 if (p2EntryEnd >= p1EntryAppliedEnd) {
215 // when p2EntryEnd == p1EntryAppliedEnd, I assume p1TotalAppliedDelta can't be used for p2EntryEnd to get it to p1 range, but rather shall be
216 // augmented with current p1 entry and at the next p1 entry (likely to hit p1EntryAppliedStart > p2EntryEnd above) would do the rest
217 insideP1entry = false;
218 p1++;
219 p1TotalAppliedDelta += p1EntryDelta;
220 continue L1;
221 }
222 // p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedEnd
223 r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data);
224 p1EntryStart = p2EntryEnd - p1TotalAppliedDelta;
225 final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart + 1;
226 if (p1DataPartShift >= p1EntryLen) {
227 p1EntryLen = 0;
228 p1Data = new byte[0];
137 } else { 229 } else {
138 int newLen = newerPatchEntryStart - starts.get(j); 230 p1EntryLen -= p1DataPartShift;
139 int newEnd = ends.get(j) <= newerPatchEntryStart ? ends.get(j) : newerPatchEntryStart; 231 p1Data = subarray(p1Data, p1DataPartShift, p1Data.length);
140 r.add(starts.get(j), newEnd, data.get(j), newLen); 232 }
141 break; 233 insideP1entry = true;
142 } 234 insideP2entry = false;
235 continue L0;
143 } 236 }
144 p1AppliedPos += starts.get(j) - p1PrevEnd; 237
145 p1AppliedPos += data.get(j).length; 238 if (p1EntryAppliedStart < p2EntryStart) {
146 p1PrevEnd = ends.get(j); 239 if (p1EntryAppliedEnd <= p2EntryStart) { // p1EntryAppliedEnd in fact index of the first char *after* patch
147 j++; 240 // completely independent, copy and continue
241 r.add(p1EntryStart, p1EntryEnd, p1Data);
242 insideP1entry = false;
243 p1++;
244 // fall-through to get p1TotalAppliedDelta incremented
245 } else { // SKETCH: II or III
246 // remember, p1EntryDelta may be negative
247 // shall break j'th entry into few
248 // fix p1's end/length
249 // p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd
250 int s = p2EntryStart - p1TotalAppliedDelta; // p2EntryStart in p1 scale. Is within p1 range
251 if (s > p1EntryEnd) {
252 s = p1EntryEnd;
253 }
254 int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart; // index, not count. <= (p1EntryEnd-p1EntryStart).
255 // add what left from p1
256 if (p1DataPartEnd < p1EntryLen) {
257 r.add(p1EntryStart, s, subarray(p1Data, 0, p1DataPartEnd));
258 } else {
259 p1DataPartEnd = p1EntryLen-1; // record factual number of p1 bytes we consumed.
260 r.add(p1EntryStart, s, p1Data);
261 }
262 p1TotalAppliedDelta += p1DataPartEnd - (s - p1EntryStart); // (s2 - (s1+delta)) - (s2 - delta - s1) = s2-s1-delta-s2+delta+s1 = 0, unless p1DataPartEnd >= p1Data.length
263 p1EntryLen -= (p1DataPartEnd+1);
264 if (p2EntryEnd < p1EntryAppliedEnd) {
265 // SKETCH: III
266 insideP1entry = true;
267 // p2 completely fits into changes of p1
268 int e = p2EntryEnd - p1TotalAppliedDelta; // p2EntryEnd in p1 scale
269 if (e > p1EntryEnd) {
270 // any index past p1 end shall be calculated with respect to p1 end, thus it's unsafe to go past p1 end (there may be more p1 entries there)
271 e = p1EntryEnd;
272 }
273 r.add(s, e, p2Data); // add p2
274 // modify p1 leftover
275 p1EntryStart = e;
276 if (p2EntryRange >= p1EntryLen) {
277 p1EntryLen = 0;
278 p1Data = new byte[0];
279 } else {
280 p1Data = subarray(p1Data, p1DataPartEnd + p2EntryRange, p1Data.length-1 /*up to the last one*/);
281 p1EntryLen -= p2EntryRange;
282 }
283 // p2 is handled, but there are leftovers of p1
284 continue L0;
285 } else { // p2EntryEnd >= p1EntryAppliedEnd
286 // SKETCH: II
287 insideP1entry = false;
288 p1++;
289 if (p1EntryAppliedStart + p1EntryDelta >= p2EntryEnd) {
290 // here we know next p1 entry would be past p2 entry and thus can put p2 right away
291 r.add(p2EntryStart - p1TotalAppliedDelta, p1EntryEnd, p2Data);
292 p1TotalAppliedDelta += p1EntryDelta;
293 continue L0;
294 } else {
295 // there are chances there are more p1 entries till p2 ends
296 insideP2entry = true;
297 p2EntryStartOffset = p1TotalAppliedDelta;
298 // p2EntryEnd is past delta, no chances for p1Data leftovers to be in use
299 // p2 processing is not over, need to fix end, depending on what else fits into p2 range (if nothing, can put p2.end right away)
300 // fall-through to get p1TotalAppliedDelta incremented;
301 }
302 }
303 }
304 } else { // p1EntryAppliedStart >= p2EntryStart
305 if (p2EntryEnd < p1EntryAppliedStart) {
306 // newer patch completely fits between two older patches
307 r.add(p2EntryStart - p1TotalAppliedDelta, p2EntryEnd - p1TotalAppliedDelta, p2Data);
308 // SHALL NOT increment p1TotalAppliedDelta as we didn't use any of p1
309 continue L0; // next p2
310 } else { // p2EntryEnd >= p1EntryAppliedStart
311 // SKETCH: I or IV
312 // p2EntryEnd is either < p1EntryAppliedEnd or past it
313 if (p2EntryEnd <= p1EntryAppliedEnd) {
314 // SKETCH: I: copy p2, strip p1 to start from p2EntryEnd, next i (p2)
315 insideP1entry = true;
316 int e = p2EntryEnd - p1TotalAppliedDelta;
317 if (e > p1EntryEnd) {
318 e = p1EntryEnd; // added by analogy with above. Is needed?
319 }
320 r.add(p2EntryStart - p1TotalAppliedDelta, e, p2Data);
321 p1EntryStart = e;
322 int p1DataShift = p2EntryEnd - p1EntryAppliedStart;
323 if (p1DataShift >= p1EntryLen) {
324 p1EntryLen = 0;
325 p1Data = new byte[0];
326 } else {
327 p1EntryLen -= p1DataShift;
328 p1Data = subarray(p1Data, p1DataShift, p1Data.length - 1);
329 }
330 // p1TotalAppliedDelta would get incremented once this modified p1 is handled
331 continue L0; // next p2;
332 } else {
333 // p2EntryEnd > p1EntryAppliedEnd
334 // SKETCH IV: skip (rest of) p1 completely, continue the same unless found p1 with start or end past p2EntryEnd.
335 insideP1entry = false;
336 p1++;
337 insideP2entry = true;
338 p2EntryStartOffset = p1TotalAppliedDelta;
339 // fall-through to get p1TotalAppliedDelta incremented
340 }
341 }
342 }
343 p1TotalAppliedDelta += p1EntryDelta;
344 } // while (p1 < p1Max)
345 {
346 // no more p1 entries, shall close p2 (if it's handled, code above jumps directly to L0)
347 // regardless of whether insideP2 is .t
348 int s = p2EntryStart;
349 // p2EntryStartOffset != -1 when we started p2 entry processing, but not completed
350 // if we handled last p1 entry but didn't start with p2 entry processing, it's -1 and regular p1 delta shall be used
351 s -= p2EntryStartOffset == -1 ? p1TotalAppliedDelta : p2EntryStartOffset;
352 r.add(s, p2EntryEnd - p1TotalAppliedDelta, p2Data);
148 } 353 }
149 r.add(newerPatchEntryStart, another.ends.get(i), another.data.get(i)); 354 }
150 p1AppliedPos += newerPatchEntryStart + p1PrevEnd - another.data.get(i).length; 355 if (p1 < p1Max && insideP1entry) {
151 // either j == jMax and another(i, i+1, ..., iMax) need to be just copied 356 r.add(p1EntryStart, p1EntryEnd, p1Data);
152 // or new patch entry starts before end of one of original patch entries 357 p1++;
153 if (olderPatchEntryEnd > (destPosition + newerPatchEntryStart)) { 358 }
154 destPosition += starts.get(j) - prevEnd; // count those in the original stream up to old patch start 359 while (p1 < p1Max) {
155 int newLen = newerPatchEntryStart - destPosition; 360 r.add(this, p1);
156 } 361 p1++;
157 } 362 };
158 } 363 return r;
159 */ 364 }
160 } 365 }