comparison src/org/tmatesoft/hg/internal/Patch.java @ 583:47dfa0ec7e35

Effective revlog patching
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 24 Apr 2013 15:39:53 +0200
parents 243202f1bda5
children ed243b668502
comparison
equal deleted inserted replaced
582:90df078d6418 583:47dfa0ec7e35
84 public int count() { 84 public int count() {
85 return data.size(); 85 return data.size();
86 } 86 }
87 87
88 // number of bytes this patch will add (or remove, if negative) from the base revision 88 // number of bytes this patch will add (or remove, if negative) from the base revision
89 private int patchSizeDelta() { 89 public int patchSizeDelta() {
90 int rv = 0; 90 int rv = 0;
91 int prevEnd = 0; 91 int prevEnd = 0;
92 for (int i = 0, x = data.size(); i < x; i++) { 92 for (int i = 0, x = data.size(); i < x; i++) {
93 final int start = starts.get(i); 93 final int start = starts.get(i);
94 final int len = data.get(i).length; 94 final int len = data.get(i).length;
180 private void add(Patch p, int i) { 180 private void add(Patch p, int i) {
181 add(p.starts.get(i), p.ends.get(i), p.data.get(i)); 181 add(p.starts.get(i), p.ends.get(i), p.data.get(i));
182 } 182 }
183 183
184 /*package-local*/ void add(int start, int end, byte[] d) { 184 /*package-local*/ void add(int start, int end, byte[] d) {
185 // FIXME if start == end(-1), merge data
186 if (start == end && d.length == 0) {
187 System.currentTimeMillis();
188 return;
189 }
185 starts.add(start); 190 starts.add(start);
186 ends.add(end); 191 ends.add(end);
187 data.add(d); 192 data.add(d);
188 } 193 }
189 194
195 // copies [start..end) bytes from the d
190 private static byte[] subarray(byte[] d, int start, int end) { 196 private static byte[] subarray(byte[] d, int start, int end) {
191 byte[] r = new byte[end-start+1]; 197 byte[] r = new byte[end-start];
192 System.arraycopy(d, start, r, 0, r.length); 198 System.arraycopy(d, start, r, 0, r.length);
193 return r; 199 return r;
194 } 200 }
195 201
196 /** 202 /**
197 * Modify this patch with subsequent patch 203 * Modify this patch with subsequent patch
198 */ 204 */
199 private /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) { 205 public /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) {
200 Patch r = new Patch(); 206 Patch r = new Patch();
201 int p1TotalAppliedDelta = 0; // value to add to start and end indexes of the older patch to get their values as if 207 int p1TotalAppliedDelta = 0; // value to add to start and end indexes of the older patch to get their values as if
202 // in the patched text, iow, directly comparable with respective indexes from the newer patch. 208 // in the patched text, iow, directly comparable with respective indexes from the newer patch.
203 int p1EntryStart = 0, p1EntryEnd = 0, p1EntryLen = 0; 209 int p1EntryStart = 0, p1EntryEnd = 0, p1EntryLen = 0;
204 byte[] p1Data = null; 210 byte[] p1Data = null;
209 int p2EntryStart = another.starts.get(p2); 215 int p2EntryStart = another.starts.get(p2);
210 int p2EntryEnd = another.ends.get(p2); 216 int p2EntryEnd = another.ends.get(p2);
211 final int p2EntryRange = p2EntryEnd - p2EntryStart; 217 final int p2EntryRange = p2EntryEnd - p2EntryStart;
212 final byte[] p2Data = another.data.get(p2); 218 final byte[] p2Data = another.data.get(p2);
213 boolean insideP2entry = false; 219 boolean insideP2entry = false;
214 int p2EntryStartOffset = -1; 220 // when we iterate p1 elements within single p2, we need to remember where p2
221 // shall ultimately start in terms of p1
222 int p2EntrySavedStart = -1;
215 /// 223 ///
216 p1EntryStart = p1EntryEnd = p1EntryLen = 0;
217 p1Data = null;
218 224
219 L1: while (p1 < p1Max) { 225 L1: while (p1 < p1Max) {
220 if (!insideP1entry) { 226 if (!insideP1entry) {
221 p1EntryStart = starts.get(p1); 227 p1EntryStart = starts.get(p1);
222 p1EntryEnd = ends.get(p1); 228 p1EntryEnd = ends.get(p1);
227 final int p1EntryDelta = p1EntryLen - (p1EntryEnd - p1EntryStart); // number of actually inserted(+) or deleted(-) chars 233 final int p1EntryDelta = p1EntryLen - (p1EntryEnd - p1EntryStart); // number of actually inserted(+) or deleted(-) chars
228 final int p1EntryAppliedStart = p1TotalAppliedDelta + p1EntryStart; 234 final int p1EntryAppliedStart = p1TotalAppliedDelta + p1EntryStart;
229 final int p1EntryAppliedEnd = p1EntryAppliedStart + p1EntryLen; // end of j'th patch entry in the text which is source for p2 235 final int p1EntryAppliedEnd = p1EntryAppliedStart + p1EntryLen; // end of j'th patch entry in the text which is source for p2
230 236
231 if (insideP2entry) { 237 if (insideP2entry) {
232 if (p2EntryEnd < p1EntryAppliedStart) { 238 if (p2EntryEnd <= p1EntryAppliedStart) {
233 r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data); 239 r.add(p2EntrySavedStart, p2EntryEnd - p1TotalAppliedDelta, p2Data);
234 insideP2entry = false; 240 insideP2entry = false;
235 continue L0; 241 continue L0;
236 } 242 }
237 if (p2EntryEnd >= p1EntryAppliedEnd) { 243 if (p2EntryEnd >= p1EntryAppliedEnd) {
238 // when p2EntryEnd == p1EntryAppliedEnd, I assume p1TotalAppliedDelta can't be used for p2EntryEnd to get it to p1 range, but rather shall be 244 // when p2EntryEnd == p1EntryAppliedEnd, I assume p1TotalAppliedDelta can't be used for p2EntryEnd to get it to p1 range, but rather shall be
240 insideP1entry = false; 246 insideP1entry = false;
241 p1++; 247 p1++;
242 p1TotalAppliedDelta += p1EntryDelta; 248 p1TotalAppliedDelta += p1EntryDelta;
243 continue L1; 249 continue L1;
244 } 250 }
245 // p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedEnd 251 // p1EntryAppliedStart < p2EntryEnd < p1EntryAppliedEnd
246 r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data); 252 // can add up to p1EntryEnd here (not only to p1EntryStart), but decided
247 p1EntryStart = p2EntryEnd - p1TotalAppliedDelta; 253 // to leave p1 intact here, to avoid delta/range recalculation
248 final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart + 1; 254 r.add(p2EntrySavedStart, p1EntryStart, p2Data);
249 if (p1DataPartShift >= p1EntryLen) { 255 // consume part of p1 overlapped by current p2
250 p1EntryLen = 0; 256 final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart;
251 p1Data = new byte[0]; 257 // p2EntryEnd < p1EntryAppliedEnd ==> p2EntryEnd < p1EntryAppliedStart + p1EntryLen
252 } else { 258 // ==> p2EntryEnd - p1EntryAppliedStart < p1EntryLen
253 p1EntryLen -= p1DataPartShift; 259 assert p1DataPartShift < p1EntryLen;
254 p1Data = subarray(p1Data, p1DataPartShift, p1Data.length); 260 p1EntryLen -= p1DataPartShift;
255 } 261 p1Data = subarray(p1Data, p1DataPartShift, p1Data.length);
262 p1TotalAppliedDelta += p1DataPartShift;
256 insideP1entry = true; 263 insideP1entry = true;
257 insideP2entry = false; 264 insideP2entry = false;
258 continue L0; 265 continue L0;
259 } 266 }
260 267
263 // completely independent, copy and continue 270 // completely independent, copy and continue
264 r.add(p1EntryStart, p1EntryEnd, p1Data); 271 r.add(p1EntryStart, p1EntryEnd, p1Data);
265 insideP1entry = false; 272 insideP1entry = false;
266 p1++; 273 p1++;
267 // fall-through to get p1TotalAppliedDelta incremented 274 // fall-through to get p1TotalAppliedDelta incremented
268 } else { // SKETCH: II or III 275 } else { // SKETCH: II or III - p2 start inside p1 range
269 // remember, p1EntryDelta may be negative 276 // remember, p1EntryDelta may be negative
270 // shall break j'th entry into few 277 // shall break j'th entry into few
271 // fix p1's end/length 278 // fix p1's end/length
272 // p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd 279 // p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd, or, alternatively
273 int s = p2EntryStart - p1TotalAppliedDelta; // p2EntryStart in p1 scale. Is within p1 range 280 // p2EntryStart is from (p1EntryAppliedStart .. p1EntryAppliedStart + p1EntryLen)
274 if (s > p1EntryEnd) { 281 int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart;
275 s = p1EntryEnd; 282 assert p1DataPartEnd < p1EntryLen;
276 } 283 r.add(p1EntryStart, p1EntryEnd, subarray(p1Data, 0, p1DataPartEnd));
277 int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart; // index, not count. <= (p1EntryEnd-p1EntryStart). 284 if (p2EntryEnd <= p1EntryAppliedEnd) { // p2 fits completely into p1
278 // add what left from p1 285 r.add(p1EntryEnd, p1EntryEnd, p2Data);
279 if (p1DataPartEnd < p1EntryLen) { 286 // p2 consumed, p1 has p1EntryLen - p1DataPartEnd - p2EntryRange bytes left to *insert*
280 r.add(p1EntryStart, s, subarray(p1Data, 0, p1DataPartEnd)); 287 insideP1entry = true;
288 p1EntryStart = p1EntryEnd;
289 p1EntryLen -= p1DataPartEnd;
290 p1EntryLen -= p2EntryRange;
291 // p2EntryEnd <= p1EntryAppliedEnd ==> p2EntryEnd <= p1EntryAppliedStart + p1EntryLen
292 // -p2EntryStart ==> p2EntryRange <= p1EntryAppliedStart-p2EntryStart + p1EntryLen
293 // p1EntryAppliedStart-p2EntryStart = -p1DataPartEnd ==> p2EntryRange <= p1EntryLen - p1DataEndPart
294 // +p1DataEndPart ==> p2EntryRange + p1DataEndPart <= p1EntryLen
295 assert p1EntryLen >= 0;
296 // p1EntryLen==0 with insideP1entry == true is nor really good here (gives empty patch elements x;x;0),
297 // however changing <= to < in p2EntryEnd <= p1EntryAppliedEnd above leads to errors
298 p1Data = subarray(p1Data, p1DataPartEnd+p2EntryRange, p1Data.length);
299 // augment total delta with p1EntryDelta part already consumed (p1EntryLen is pure insertion left for the next step)
300 p1TotalAppliedDelta += (p1EntryDelta - p1EntryLen);
301 continue L0;
281 } else { 302 } else {
282 p1DataPartEnd = p1EntryLen-1; // record factual number of p1 bytes we consumed. 303 // p1 is consumed, take next
283 r.add(p1EntryStart, s, p1Data);
284 }
285 p1TotalAppliedDelta += p1DataPartEnd - (s - p1EntryStart); // (s2 - (s1+delta)) - (s2 - delta - s1) = s2-s1-delta-s2+delta+s1 = 0, unless p1DataPartEnd >= p1Data.length
286 p1EntryLen -= (p1DataPartEnd+1);
287 if (p2EntryEnd < p1EntryAppliedEnd) {
288 // SKETCH: III
289 insideP1entry = true;
290 // p2 completely fits into changes of p1
291 int e = p2EntryEnd - p1TotalAppliedDelta; // p2EntryEnd in p1 scale
292 if (e > p1EntryEnd) {
293 // any index past p1 end shall be calculated with respect to p1 end, thus it's unsafe to go past p1 end (there may be more p1 entries there)
294 e = p1EntryEnd;
295 }
296 r.add(s, e, p2Data); // add p2
297 // modify p1 leftover
298 p1EntryStart = e;
299 if (p2EntryRange >= p1EntryLen) {
300 p1EntryLen = 0;
301 p1Data = new byte[0];
302 } else {
303 p1Data = subarray(p1Data, p1DataPartEnd + p2EntryRange, p1Data.length-1 /*up to the last one*/);
304 p1EntryLen -= p2EntryRange;
305 }
306 // p2 is handled, but there are leftovers of p1
307 continue L0;
308 } else { // p2EntryEnd >= p1EntryAppliedEnd
309 // SKETCH: II
310 insideP1entry = false; 304 insideP1entry = false;
311 p1++; 305 p1++;
312 if (p1EntryAppliedStart + p1EntryDelta >= p2EntryEnd) { 306 insideP2entry = true;
313 // here we know next p1 entry would be past p2 entry and thus can put p2 right away 307 p2EntrySavedStart = p1EntryEnd; // this is how far we've progressed in p1
314 r.add(p2EntryStart - p1TotalAppliedDelta, p1EntryEnd, p2Data); 308 // fall-through to get p1TotalAppliedDelta updated with consumed p1
315 p1TotalAppliedDelta += p1EntryDelta;
316 continue L0;
317 } else {
318 // there are chances there are more p1 entries till p2 ends
319 insideP2entry = true;
320 p2EntryStartOffset = p1TotalAppliedDelta;
321 // p2EntryEnd is past delta, no chances for p1Data leftovers to be in use
322 // p2 processing is not over, need to fix end, depending on what else fits into p2 range (if nothing, can put p2.end right away)
323 // fall-through to get p1TotalAppliedDelta incremented;
324 }
325 } 309 }
326 } 310 }
327 } else { // p1EntryAppliedStart >= p2EntryStart 311 } else { // p1EntryAppliedStart >= p2EntryStart
328 if (p2EntryEnd < p1EntryAppliedStart) { 312 if (p2EntryEnd < p1EntryAppliedStart) {
329 // newer patch completely fits between two older patches 313 // newer patch completely fits between two older patches
330 r.add(p2EntryStart - p1TotalAppliedDelta, p2EntryEnd - p1TotalAppliedDelta, p2Data); 314 r.add(p2EntryStart - p1TotalAppliedDelta, p2EntryEnd - p1TotalAppliedDelta, p2Data);
331 // SHALL NOT increment p1TotalAppliedDelta as we didn't use any of p1 315 // SHALL NOT increment p1TotalAppliedDelta as we didn't use any of p1
332 continue L0; // next p2 316 continue L0; // next p2
333 } else { // p2EntryEnd >= p1EntryAppliedStart 317 } else { // p2EntryEnd >= p1EntryAppliedStart
334 // SKETCH: I or IV 318 // SKETCH: I or IV:
335 // p2EntryEnd is either < p1EntryAppliedEnd or past it 319 // p2 start is outside of p1 range.
336 if (p2EntryEnd <= p1EntryAppliedEnd) { 320 //
321 // p2DataPartEnd: this is how many bytes prior to p1EntryStart is replaced by p2Data
322 int p2DataPartEnd = p1EntryAppliedStart - p2EntryStart;
323 if (p2EntryEnd < p1EntryAppliedEnd) {
337 // SKETCH: I: copy p2, strip p1 to start from p2EntryEnd, next i (p2) 324 // SKETCH: I: copy p2, strip p1 to start from p2EntryEnd, next i (p2)
338 insideP1entry = true; 325 insideP1entry = true;
339 int e = p2EntryEnd - p1TotalAppliedDelta; 326 // replace whole p1 (extended to the left by (p2 \ p1) front bytes)
340 if (e > p1EntryEnd) { 327 r.add(p1EntryStart - p2DataPartEnd, p1EntryEnd, p2Data);
341 e = p1EntryEnd; // added by analogy with above. Is needed? 328 p1EntryStart = p1EntryEnd;
342 } 329 // see how much of p1 is left for insertion
343 r.add(p2EntryStart - p1TotalAppliedDelta, e, p2Data); 330 int p1DataPartEnd = p2EntryEnd - p1EntryAppliedStart; // #1
344 p1EntryStart = e; 331 // Similar, although incorrect: p1DataPartEnd == p2Data.length - p2DataPartEnd; // #2
345 int p1DataShift = p2EntryEnd - p1EntryAppliedStart; 332 // #1(p2EntryStart + p2DataLen) - p1EntryAppliedStart
346 if (p1DataShift >= p1EntryLen) { 333 // #2 p2DataLen - (p1EntryAppliedStart - p2EntryStart)
347 p1EntryLen = 0; 334 // but this works only in assumption that p2EntryEnd-p2EntryStart == p2Data.length
348 p1Data = new byte[0]; 335 //
349 } else { 336 // p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedStart + p1EntryLen
350 p1EntryLen -= p1DataShift; 337 // -p1EntryAppliedStart (to compare against p1DataPartEnd) ==> 0 <= p1DataPartEnd < p1EntryLen
351 p1Data = subarray(p1Data, p1DataShift, p1Data.length - 1); 338 assert p1DataPartEnd < p1EntryLen;
352 } 339 assert p1DataPartEnd >= 0;
353 // p1TotalAppliedDelta would get incremented once this modified p1 is handled 340 p1EntryLen -= p1DataPartEnd;
341 p1Data = subarray(p1Data, p1DataPartEnd, p1Data.length);
342
343 // p1TotalAppliedDelta XXX
344 p1TotalAppliedDelta += (p1EntryDelta - p1EntryLen);
354 continue L0; // next p2; 345 continue L0; // next p2;
355 } else { 346 } else {
356 // p2EntryEnd > p1EntryAppliedEnd 347 // p2EntryEnd >= p1EntryAppliedEnd
357 // SKETCH IV: skip (rest of) p1 completely, continue the same unless found p1 with start or end past p2EntryEnd. 348 // SKETCH IV: skip (rest of) p1 completely, continue the same unless found p1 with start or end past p2EntryEnd.
358 insideP1entry = false; 349 insideP1entry = false;
350 // p1 consumed
359 p1++; 351 p1++;
360 insideP2entry = true; 352 insideP2entry = true;
361 p2EntryStartOffset = p1TotalAppliedDelta; 353 // extend to the left of p1 by p2 \ p1 front bytes
354 p2EntrySavedStart = p1EntryStart - p2DataPartEnd;
362 // fall-through to get p1TotalAppliedDelta incremented 355 // fall-through to get p1TotalAppliedDelta incremented
363 } 356 }
364 } 357 }
365 } 358 }
366 p1TotalAppliedDelta += p1EntryDelta; 359 p1TotalAppliedDelta += p1EntryDelta;
367 } // while (p1 < p1Max) 360 } // while (p1 < p1Max)
368 { 361 {
369 // no more p1 entries, shall close p2 (if it's handled, code above jumps directly to L0) 362 // no more p1 entries, shall close p2 (if it's handled, code above jumps directly to L0)
370 // regardless of whether insideP2 is .t 363 // regardless of whether insideP2 is .t
371 int s = p2EntryStart; 364 int s = p2EntrySavedStart != -1 ? p2EntrySavedStart : p2EntryStart - p1TotalAppliedDelta;
372 // p2EntryStartOffset != -1 when we started p2 entry processing, but not completed 365 // p2EntrySavedStart != -1 when we started p2 entry processing, but not completed
373 // if we handled last p1 entry but didn't start with p2 entry processing, it's -1 and regular p1 delta shall be used 366 // if we handled last p1 entry but didn't start with p2 entry processing, it's -1 and regular p1 delta shall be used
374 s -= p2EntryStartOffset == -1 ? p1TotalAppliedDelta : p2EntryStartOffset;
375 r.add(s, p2EntryEnd - p1TotalAppliedDelta, p2Data); 367 r.add(s, p2EntryEnd - p1TotalAppliedDelta, p2Data);
376 } 368 }
377 } 369 }
378 if (p1 < p1Max && insideP1entry) { 370 if (p1 < p1Max && insideP1entry) {
379 r.add(p1EntryStart, p1EntryEnd, p1Data); 371 r.add(p1EntryStart, p1EntryEnd, p1Data);
384 p1++; 376 p1++;
385 }; 377 };
386 return r; 378 return r;
387 } 379 }
388 380
381 /**
382 * Combine consecutive regions into one.
383 * XXX NOW only combines two subsequent regions, seems enough for quick test
384 * @return <code>this</code> or new instance of consecutive regions found
385 */
386 public Patch normalize() {
387 Patch rv = null;
388 for (int i = 1, x = data.size(); i < x; i++) {
389 if (starts.get(i) == ends.get(i-1)) {
390 if (rv == null) {
391 rv = new Patch();
392 rv.copyOf(this, 0, i-1);
393 // } else if (ends.get(i-1) == rv.ends.get(rv.ends.size()-1)) {
394 // // "JUST IN CASE" code, i++ below prevents us from getting here
395 // // if the last region is the one already merged,
396 // // ignore this occurrence (otherwise data(i-1) would get copied again)
397 // continue;
398 }
399 byte[] d1 = data.get(i-1);
400 byte[] d = data.get(i);
401 byte[] nd;
402 if (d1.length > 0 && d.length > 0) {
403 nd = new byte[d1.length + d.length];
404 System.arraycopy(d1, 0, nd, 0, d1.length);
405 System.arraycopy(d, 0, nd, d1.length, d.length);
406 } else {
407 nd = d1.length == 0 ? d : d1 ;
408 }
409 rv.add(starts.get(i-1), ends.get(i), nd);
410 i++; // skip i-th element (effectively means we detect only pairs)
411 // without this ++, element(i-1) is added to rv once "else" below is hit on the next step
412 } else {
413 if (rv != null) {
414 rv.add(this, i-1);
415 }
416 }
417 }
418 if (rv == null) {
419 return this;
420 } else {
421 int last = count() - 1;
422 if (starts.get(last) != ends.get(last-1)) {
423 rv.add(this, last);
424 }
425 }
426 return rv;
427 }
428
429 private void copyOf(Patch another, int fromIndex, int upToIndex) {
430 while(fromIndex < upToIndex) {
431 add(another, fromIndex++);
432 }
433 }
434
389 public class PatchDataSource implements DataSerializer.DataSource { 435 public class PatchDataSource implements DataSerializer.DataSource {
390 436
391 public void serialize(DataSerializer out) throws IOException { 437 public void serialize(DataSerializer out) throws IOException {
392 Patch.this.serialize(out); 438 Patch.this.serialize(out);
393 } 439 }