diff src/org/tmatesoft/hg/internal/Patch.java @ 583:47dfa0ec7e35

Effective revlog patching
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 24 Apr 2013 15:39:53 +0200
parents 243202f1bda5
children ed243b668502
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/Patch.java	Mon Apr 22 19:17:29 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/Patch.java	Wed Apr 24 15:39:53 2013 +0200
@@ -86,7 +86,7 @@
 	}
 
 	// number of bytes this patch will add (or remove, if negative) from the base revision
-	private int patchSizeDelta() {
+	public int patchSizeDelta() {
 		int rv = 0;
 		int prevEnd = 0;
 		for (int i = 0, x = data.size(); i < x; i++) {
@@ -182,13 +182,19 @@
 	}
 
 	/*package-local*/ void add(int start, int end, byte[] d) {
+		// FIXME if start == end(-1), merge data
+		if (start == end && d.length == 0) {
+			System.currentTimeMillis();
+			return;
+		}
 		starts.add(start);
 		ends.add(end);
 		data.add(d);
 	}
 	
+	// copies [start..end) bytes from the d
 	private static byte[] subarray(byte[] d, int start, int end) {
-		byte[] r = new byte[end-start+1];
+		byte[] r = new byte[end-start];
 		System.arraycopy(d, start, r, 0, r.length);
 		return r;
 	}
@@ -196,7 +202,7 @@
 	/**
 	 * Modify this patch with subsequent patch 
 	 */
-	private /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) {
+	public /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) {
 		Patch r = new Patch();
 		int p1TotalAppliedDelta = 0; // value to add to start and end indexes of the older patch to get their values as if
 		// in the patched text, iow, directly comparable with respective indexes from the newer patch.
@@ -211,10 +217,10 @@
 			final int p2EntryRange = p2EntryEnd - p2EntryStart;
 			final byte[] p2Data = another.data.get(p2);
 			boolean insideP2entry = false;
-			int p2EntryStartOffset = -1;
+			// when we iterate p1 elements within single p2, we need to remember where p2
+			// shall ultimately start in terms of p1
+			int p2EntrySavedStart = -1;
 			///
-			p1EntryStart = p1EntryEnd = p1EntryLen = 0;
-			p1Data = null;
 			
 L1:			while (p1 < p1Max) {
 				if (!insideP1entry) {
@@ -229,8 +235,8 @@
 				final int p1EntryAppliedEnd = p1EntryAppliedStart + p1EntryLen; // end of j'th patch entry in the text which is source for p2
 				
 				if (insideP2entry) {
-					if (p2EntryEnd < p1EntryAppliedStart) {
-						r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data);
+					if (p2EntryEnd <= p1EntryAppliedStart) {
+						r.add(p2EntrySavedStart, p2EntryEnd - p1TotalAppliedDelta, p2Data);
 						insideP2entry = false;
 						continue L0; 
 					}
@@ -242,17 +248,18 @@
 						p1TotalAppliedDelta += p1EntryDelta;
 						continue L1;
 					}
-					// p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedEnd
-					r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data);
-					p1EntryStart = p2EntryEnd - p1TotalAppliedDelta;
-					final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart + 1;
-					if (p1DataPartShift >= p1EntryLen) {
-						p1EntryLen = 0;
-						p1Data = new byte[0];
-					} else {
-						p1EntryLen -= p1DataPartShift;
-						p1Data = subarray(p1Data, p1DataPartShift, p1Data.length);
-					}
+					// p1EntryAppliedStart < p2EntryEnd < p1EntryAppliedEnd
+					// can add up to p1EntryEnd here (not only to p1EntryStart), but decided
+					// to leave p1 intact here, to avoid delta/range recalculation 
+					r.add(p2EntrySavedStart, p1EntryStart, p2Data);
+					// consume part of p1 overlapped by current p2
+					final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart;
+					// p2EntryEnd < p1EntryAppliedEnd		==> p2EntryEnd < p1EntryAppliedStart + p1EntryLen
+					//										==> p2EntryEnd - p1EntryAppliedStart < p1EntryLen
+					assert p1DataPartShift < p1EntryLen;
+					p1EntryLen -= p1DataPartShift;
+					p1Data = subarray(p1Data, p1DataPartShift, p1Data.length);
+					p1TotalAppliedDelta += p1DataPartShift;
 					insideP1entry = true;
 					insideP2entry = false;
 					continue L0;
@@ -265,63 +272,40 @@
 						insideP1entry = false;
 						p1++;
 						// fall-through to get p1TotalAppliedDelta incremented
-					} else { // SKETCH: II or III
+					} else { // SKETCH: II or III - p2 start inside p1 range
 						// remember, p1EntryDelta may be negative
 						// shall break j'th entry into few 
 						// fix p1's end/length
-						// p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd
-						int s = p2EntryStart - p1TotalAppliedDelta; // p2EntryStart in p1 scale. Is within p1 range
-						if (s > p1EntryEnd) {
-							s = p1EntryEnd;
-						}
-						int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart; // index, not count. <= (p1EntryEnd-p1EntryStart).
-						// add what left from p1
-						if (p1DataPartEnd < p1EntryLen) {
-							r.add(p1EntryStart, s, subarray(p1Data, 0, p1DataPartEnd)); 
-						} else {
-							p1DataPartEnd = p1EntryLen-1; // record factual number of p1 bytes we consumed.
-							r.add(p1EntryStart, s, p1Data);
-						}
-						p1TotalAppliedDelta += p1DataPartEnd - (s - p1EntryStart); // (s2 - (s1+delta)) - (s2 - delta - s1) = s2-s1-delta-s2+delta+s1 = 0, unless p1DataPartEnd >= p1Data.length
-						p1EntryLen -= (p1DataPartEnd+1); 
-						if (p2EntryEnd < p1EntryAppliedEnd) {
-							// SKETCH: III
+						// p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd, or, alternatively
+						// p2EntryStart is from (p1EntryAppliedStart .. p1EntryAppliedStart + p1EntryLen)
+						int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart;
+						assert p1DataPartEnd < p1EntryLen;
+						r.add(p1EntryStart, p1EntryEnd, subarray(p1Data, 0, p1DataPartEnd));
+						if (p2EntryEnd <= p1EntryAppliedEnd) { // p2 fits completely into p1
+							r.add(p1EntryEnd, p1EntryEnd, p2Data);
+							// p2 consumed, p1 has p1EntryLen - p1DataPartEnd - p2EntryRange bytes left to *insert*
 							insideP1entry = true;
-							// p2 completely fits into changes of p1
-							int e = p2EntryEnd - p1TotalAppliedDelta; // p2EntryEnd in p1 scale
-							if (e > p1EntryEnd) {
-								// any index past p1 end shall be calculated with respect to p1 end, thus it's unsafe to go past p1 end (there may be more p1 entries there)   
-								e = p1EntryEnd;
-							}
-							r.add(s, e, p2Data); // add p2
-							// modify p1 leftover
-							p1EntryStart = e;
-							if (p2EntryRange >= p1EntryLen) {
-								p1EntryLen = 0;
-								p1Data = new byte[0];
-							} else {
-								p1Data = subarray(p1Data, p1DataPartEnd + p2EntryRange, p1Data.length-1 /*up to the last one*/);
-								p1EntryLen -= p2EntryRange;
-							}
-							// p2 is handled, but there are leftovers of p1
+							p1EntryStart = p1EntryEnd;
+							p1EntryLen -= p1DataPartEnd;
+							p1EntryLen -= p2EntryRange;
+							// p2EntryEnd <= p1EntryAppliedEnd						==> p2EntryEnd <= p1EntryAppliedStart + p1EntryLen
+							// -p2EntryStart    									==> p2EntryRange <= p1EntryAppliedStart-p2EntryStart + p1EntryLen
+							// p1EntryAppliedStart-p2EntryStart = -p1DataPartEnd	==> p2EntryRange <= p1EntryLen - p1DataEndPart
+							//	+p1DataEndPart										==> p2EntryRange + p1DataEndPart <= p1EntryLen
+							assert p1EntryLen >= 0;
+							// p1EntryLen==0 with insideP1entry == true is nor really good here (gives empty patch elements x;x;0), 
+							// however changing <= to < in p2EntryEnd <= p1EntryAppliedEnd above leads to errors
+							p1Data = subarray(p1Data, p1DataPartEnd+p2EntryRange, p1Data.length);
+							// augment total delta with p1EntryDelta part already consumed (p1EntryLen is pure insertion left for the next step) 
+							p1TotalAppliedDelta += (p1EntryDelta - p1EntryLen);
 							continue L0;
-						} else { // p2EntryEnd >= p1EntryAppliedEnd
-							// SKETCH: II
+						} else {
+							// p1 is consumed, take next
 							insideP1entry = false;
 							p1++;
-							if (p1EntryAppliedStart + p1EntryDelta >= p2EntryEnd) {
-								// here we know next p1 entry would be past p2 entry and thus can put p2 right away
-								r.add(p2EntryStart - p1TotalAppliedDelta, p1EntryEnd, p2Data);
-								p1TotalAppliedDelta += p1EntryDelta;
-								continue L0;
-							} else {
-								// there are chances there are more p1 entries till p2 ends
-								insideP2entry = true;
-								p2EntryStartOffset = p1TotalAppliedDelta;
-								// p2EntryEnd is past delta, no chances for p1Data leftovers to be in use
-								// p2 processing is not over, need to fix end, depending on what else fits into p2 range (if nothing, can put p2.end right away)
-								// fall-through to get p1TotalAppliedDelta incremented;
-							}
+							insideP2entry = true;
+							p2EntrySavedStart = p1EntryEnd; // this is how far we've progressed in p1
+							// fall-through to get p1TotalAppliedDelta updated with consumed p1
 						}
 					}
 				} else { // p1EntryAppliedStart >= p2EntryStart
@@ -331,34 +315,43 @@
 						// SHALL NOT increment p1TotalAppliedDelta as we didn't use any of p1
 						continue L0; // next p2 
 					} else { // p2EntryEnd >= p1EntryAppliedStart
-						// SKETCH: I or IV
-						// p2EntryEnd is either  < p1EntryAppliedEnd or past it
-						if (p2EntryEnd <= p1EntryAppliedEnd) {
+						// SKETCH: I or IV:
+						// p2 start is outside of p1 range.
+						//
+						// p2DataPartEnd: this is how many bytes prior to p1EntryStart is replaced by p2Data
+						int p2DataPartEnd = p1EntryAppliedStart - p2EntryStart;
+						if (p2EntryEnd < p1EntryAppliedEnd) {
 							// SKETCH: I: copy p2, strip p1 to start from p2EntryEnd, next i (p2)
 							insideP1entry = true;
-							int e = p2EntryEnd - p1TotalAppliedDelta;
-							if (e > p1EntryEnd) {
-								e = p1EntryEnd; // added by analogy with above. Is needed?
-							}
-							r.add(p2EntryStart - p1TotalAppliedDelta, e, p2Data);
-							p1EntryStart = e;
-							int p1DataShift = p2EntryEnd - p1EntryAppliedStart;
-							if (p1DataShift >= p1EntryLen) {
-								p1EntryLen = 0;
-								p1Data = new byte[0];
-							} else {
-								p1EntryLen -= p1DataShift;
-								p1Data = subarray(p1Data, p1DataShift, p1Data.length - 1);
-							}
-							// p1TotalAppliedDelta would get incremented once this modified p1 is handled
+							// replace whole p1 (extended to the left by (p2 \ p1) front bytes)
+							r.add(p1EntryStart - p2DataPartEnd, p1EntryEnd, p2Data);
+							p1EntryStart = p1EntryEnd;
+							// see how much of p1 is left for insertion
+							int p1DataPartEnd = p2EntryEnd - p1EntryAppliedStart; // #1
+							// Similar, although incorrect: p1DataPartEnd == p2Data.length - p2DataPartEnd; // #2
+							// #1(p2EntryStart + p2DataLen) - p1EntryAppliedStart
+							// #2 p2DataLen - (p1EntryAppliedStart - p2EntryStart)
+							// but this works only in assumption that p2EntryEnd-p2EntryStart == p2Data.length
+							//
+							// p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedStart + p1EntryLen
+							// -p1EntryAppliedStart (to compare against p1DataPartEnd)  ==> 	0 <= p1DataPartEnd < p1EntryLen
+							assert p1DataPartEnd < p1EntryLen;
+							assert p1DataPartEnd >= 0;
+							p1EntryLen -= p1DataPartEnd;
+							p1Data = subarray(p1Data, p1DataPartEnd, p1Data.length);
+
+							// p1TotalAppliedDelta XXX
+							p1TotalAppliedDelta += (p1EntryDelta - p1EntryLen);
 							continue L0; // next p2;
 						} else {
-							// p2EntryEnd > p1EntryAppliedEnd
+							// p2EntryEnd >= p1EntryAppliedEnd
 							// SKETCH IV: skip (rest of) p1 completely, continue the same unless  found p1 with start or end past p2EntryEnd.
 							insideP1entry = false;
+							// p1 consumed
 							p1++;
 							insideP2entry = true;
-							p2EntryStartOffset = p1TotalAppliedDelta;
+							// extend to the left of p1 by p2 \ p1 front bytes
+							p2EntrySavedStart = p1EntryStart - p2DataPartEnd;
 							// fall-through to get p1TotalAppliedDelta incremented
 						}
 					}
@@ -368,10 +361,9 @@
 			{
 				// no more p1 entries, shall close p2 (if it's handled, code above jumps directly to L0)
 				// regardless of whether insideP2 is .t
-				int s = p2EntryStart;
-				// p2EntryStartOffset != -1 when we started p2 entry processing, but not completed
+				int s = p2EntrySavedStart != -1 ? p2EntrySavedStart : p2EntryStart - p1TotalAppliedDelta;
+				// p2EntrySavedStart != -1 when we started p2 entry processing, but not completed
 				// if we handled last p1 entry but didn't start with p2 entry processing, it's -1 and regular p1 delta shall be used
-				s -= p2EntryStartOffset == -1 ? p1TotalAppliedDelta : p2EntryStartOffset;
 				r.add(s, p2EntryEnd - p1TotalAppliedDelta, p2Data);
 			}
 		}
@@ -386,6 +378,60 @@
 		return r;
 	}
 
+	/**
+	 * Combine consecutive regions into one.
+	 * XXX NOW only combines two subsequent regions, seems enough for quick test
+	 * @return <code>this</code> or new instance of consecutive regions found
+	 */
+	public Patch normalize() {
+		Patch rv = null;
+		for (int i = 1, x = data.size(); i < x; i++) {
+			if (starts.get(i) == ends.get(i-1)) {
+				if (rv == null) {
+					rv = new Patch();
+					rv.copyOf(this, 0, i-1);
+//				} else if (ends.get(i-1) == rv.ends.get(rv.ends.size()-1)) {
+//					// "JUST IN CASE" code, i++ below prevents us from getting here
+//					// if the last region is the one already merged,
+//					// ignore this occurrence (otherwise data(i-1) would get copied again) 
+//					continue;
+				}
+				byte[] d1 = data.get(i-1);
+				byte[] d = data.get(i);
+				byte[] nd;
+				if (d1.length > 0 && d.length > 0) {
+					nd = new byte[d1.length + d.length];
+					System.arraycopy(d1, 0, nd, 0, d1.length);
+					System.arraycopy(d, 0, nd, d1.length, d.length);
+				} else {
+					nd = d1.length == 0 ? d : d1 ;
+				}
+				rv.add(starts.get(i-1), ends.get(i), nd);
+				i++; // skip i-th element (effectively means we detect only pairs)
+				// without this ++, element(i-1) is added to rv once "else" below is hit on the next step
+			} else {
+				if (rv != null) {
+					rv.add(this, i-1);
+				}
+			}
+		}
+		if (rv == null) {
+			return this;
+		} else {
+			int last = count() - 1;
+			if (starts.get(last) != ends.get(last-1)) {
+				rv.add(this, last);
+			}
+		}
+		return rv;
+	}
+	
+	private void copyOf(Patch another, int fromIndex, int upToIndex) {
+		while(fromIndex < upToIndex) {
+			add(another, fromIndex++);
+		}
+	}
+
 	public class PatchDataSource implements DataSerializer.DataSource {
 
 		public void serialize(DataSerializer out) throws IOException {