changeset 330:9747a786a34d

Patch merging algorithm complete trial
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 25 Oct 2011 03:30:02 +0200 (2011-10-25)
parents 694ebabb5cb3
children a37ce7145c3f
files src/org/tmatesoft/hg/internal/Patch.java
diffstat 1 files changed, 239 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/Patch.java	Thu Oct 13 03:30:50 2011 +0200
+++ b/src/org/tmatesoft/hg/internal/Patch.java	Tue Oct 25 03:30:02 2011 +0200
@@ -18,6 +18,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Formatter;
 
 /**
  * @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description
@@ -31,13 +32,54 @@
 public final class Patch {
 	private final IntVector starts, ends;
 	private final ArrayList<byte[]> data;
-	
+
+	private static byte[] generate(int c) {
+		byte[] rv = new byte[c];
+		for (int i = 0; i < c; i++) {
+			byte x = (byte) ('a' + i);
+			rv[i] = x;
+		}
+		return rv;
+	}
+
+	public static void main(String[] args) {
+		Patch p1 = new Patch(), p2 = new Patch();
+		// simple cases (one element in either patch)
+		// III: (1,10 20) & (5,15,15) p2End from [p1End..p1AppliedEnd] (i.e. within p1 range but index is past p2 end index) 
+		//  II: (1,10,7) & (3,15,15) insideP2 = true and no more p1 entries
+		//  II: (1,1,10) & (3,11,15)
+		// independent: (1,10,10) & (15,25,10);  (15, 25, 10) & (1, 10, 10) 
+		//   I: (15, 25, 10) & (10, 20, 10). result: [10, 20, 10] [20, 25, 5]
+		//  IV: (15, 25, 10) & (10, 30, 20)
+		// 
+		// cycle with insideP2
+		//
+		// cycle with insideP1
+		//
+		// multiple elements in patches (offsets)
+		p1.add(15, 25, generate(10));
+		p2.add(10, 30, generate(20));
+		System.out.println("p1: " + p1);
+		System.out.println("p2: " + p2);
+		Patch r = p1.apply(p2);
+		System.out.println("r: " + r);
+	}
+
 	public Patch() {
 		starts = new IntVector();
 		ends = new IntVector();
 		data = new ArrayList<byte[]>();
 	}
 	
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		Formatter f = new Formatter(sb);
+		for (int i = 0; i < count(); i++) {
+			f.format("[%d, %d, %d] ", starts.get(i), ends.get(i), data.get(i).length);
+		}
+		return sb.toString();
+	}
+	
 	public int count() {
 		return data.size();
 	}
@@ -112,49 +154,212 @@
 		data.add(src);
 	}
 
-/*
-	private void add(Patch another, int index) {
-		starts.add(another.starts.get(index));
-		ends.add(another.ends.get(index));
-		data.add(another.data.get(index));
+	private void add(Patch p, int i) {
+		add(p.starts.get(i), p.ends.get(i), p.data.get(i));
+	}
+
+	private void add(int start, int end, byte[] d) {
+		starts.add(start);
+		ends.add(end);
+		data.add(d);
+	}
+	
+	private static byte[] subarray(byte[] d, int start, int end) {
+		byte[] r = new byte[end-start+1];
+		System.arraycopy(d, start, r, 0, r.length);
+		return r;
 	}
 
 	/**
 	 * Modify this patch with subsequent patch 
-	 * /
-	public void apply(Patch another) {
+	 */
+	private /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) {
 		Patch r = new Patch();
-		int p1AppliedPos = 0;
-		int p1PrevEnd = 0;
-		for (int i = 0, j = 0, iMax = another.count(), jMax = this.count(); i < iMax; i++) {
-			int newerPatchEntryStart = another.starts.get(i);
-			int olderPatchEntryEnd;
+		int p1TotalAppliedDelta = 0; // value to add to start and end indexes of the older patch to get their values as if
+		// in the patched text, iow, directly comparable with respective indexes from the newer patch.
+		int p1EntryStart = 0, p1EntryEnd = 0, p1EntryLen = 0;
+		byte[] p1Data = null;
+		boolean insideP1entry = false;
+		int p2 = 0, p1 = 0;
+		final int p2Max = another.count(), p1Max = this.count();
+L0:		for (; p2 < p2Max; p2++) {
+			int p2EntryStart = another.starts.get(p2);
+			int p2EntryEnd = another.ends.get(p2);
+			final int p2EntryRange = p2EntryEnd - p2EntryStart;
+			final byte[] p2Data = another.data.get(p2);
+			boolean insideP2entry = false;
+			int p2EntryStartOffset = -1;
+			///
+			p1EntryStart = p1EntryEnd = p1EntryLen = 0;
+			p1Data = null;
 			
-			while (j < jMax) {
-				if (starts.get(j) < newerPatchEntryStart) {
-					if (starts.get(j)+data.get(j).length <= newerPatchEntryStart) {
-						r.add(this, j);
+L1:			while (p1 < p1Max) {
+				if (!insideP1entry) {
+					p1EntryStart = starts.get(p1);
+					p1EntryEnd = ends.get(p1);
+					p1Data = data.get(p1);
+					p1EntryLen = p1Data.length;
+				}// else keep values
+
+				final int p1EntryDelta = p1EntryLen - (p1EntryEnd - p1EntryStart); // number of actually inserted(+) or deleted(-) chars
+				final int p1EntryAppliedStart = p1TotalAppliedDelta + p1EntryStart;
+				final int p1EntryAppliedEnd = p1EntryAppliedStart + p1EntryLen; // end of j'th patch entry in the text which is source for p2
+				
+				if (insideP2entry) {
+					if (p2EntryEnd < p1EntryAppliedStart) {
+						r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data);
+						insideP2entry = false;
+						continue L0; 
+					}
+					if (p2EntryEnd >= p1EntryAppliedEnd) {
+						// when p2EntryEnd == p1EntryAppliedEnd, I assume p1TotalAppliedDelta can't be used for p2EntryEnd to get it to p1 range, but rather shall be
+						// augmented with current p1 entry and at the next p1 entry (likely to hit p1EntryAppliedStart > p2EntryEnd above) would do the rest 
+						insideP1entry = false;
+						p1++;
+						p1TotalAppliedDelta += p1EntryDelta;
+						continue L1;
+					}
+					// p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedEnd
+					r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data);
+					p1EntryStart = p2EntryEnd - p1TotalAppliedDelta;
+					final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart + 1;
+					if (p1DataPartShift >= p1EntryLen) {
+						p1EntryLen = 0;
+						p1Data = new byte[0];
 					} else {
-						int newLen = newerPatchEntryStart - starts.get(j);
-						int newEnd = ends.get(j) <= newerPatchEntryStart ? ends.get(j) : newerPatchEntryStart; 
-						r.add(starts.get(j), newEnd, data.get(j), newLen);
-						break;
+						p1EntryLen -= p1DataPartShift;
+						p1Data = subarray(p1Data, p1DataPartShift, p1Data.length);
+					}
+					insideP1entry = true;
+					insideP2entry = false;
+					continue L0;
+				}
+
+				if (p1EntryAppliedStart < p2EntryStart) {
+					if (p1EntryAppliedEnd <= p2EntryStart) { // p1EntryAppliedEnd in fact index of the first char *after* patch
+						// completely independent, copy and continue
+						r.add(p1EntryStart, p1EntryEnd, p1Data);
+						insideP1entry = false;
+						p1++;
+						// fall-through to get p1TotalAppliedDelta incremented
+					} else { // SKETCH: II or III
+						// remember, p1EntryDelta may be negative
+						// shall break j'th entry into few 
+						// fix p1's end/length
+						// p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd
+						int s = p2EntryStart - p1TotalAppliedDelta; // p2EntryStart in p1 scale. Is within p1 range
+						if (s > p1EntryEnd) {
+							s = p1EntryEnd;
+						}
+						int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart; // index, not count. <= (p1EntryEnd-p1EntryStart).
+						// add what left from p1
+						if (p1DataPartEnd < p1EntryLen) {
+							r.add(p1EntryStart, s, subarray(p1Data, 0, p1DataPartEnd)); 
+						} else {
+							p1DataPartEnd = p1EntryLen-1; // record factual number of p1 bytes we consumed.
+							r.add(p1EntryStart, s, p1Data);
+						}
+						p1TotalAppliedDelta += p1DataPartEnd - (s - p1EntryStart); // (s2 - (s1+delta)) - (s2 - delta - s1) = s2-s1-delta-s2+delta+s1 = 0, unless p1DataPartEnd >= p1Data.length
+						p1EntryLen -= (p1DataPartEnd+1); 
+						if (p2EntryEnd < p1EntryAppliedEnd) {
+							// SKETCH: III
+							insideP1entry = true;
+							// p2 completely fits into changes of p1
+							int e = p2EntryEnd - p1TotalAppliedDelta; // p2EntryEnd in p1 scale
+							if (e > p1EntryEnd) {
+								// any index past p1 end shall be calculated with respect to p1 end, thus it's unsafe to go past p1 end (there may be more p1 entries there)   
+								e = p1EntryEnd;
+							}
+							r.add(s, e, p2Data); // add p2
+							// modify p1 leftover
+							p1EntryStart = e;
+							if (p2EntryRange >= p1EntryLen) {
+								p1EntryLen = 0;
+								p1Data = new byte[0];
+							} else {
+								p1Data = subarray(p1Data, p1DataPartEnd + p2EntryRange, p1Data.length-1 /*up to the last one*/);
+								p1EntryLen -= p2EntryRange;
+							}
+							// p2 is handled, but there are leftovers of p1
+							continue L0;
+						} else { // p2EntryEnd >= p1EntryAppliedEnd
+							// SKETCH: II
+							insideP1entry = false;
+							p1++;
+							if (p1EntryAppliedStart + p1EntryDelta >= p2EntryEnd) {
+								// here we know next p1 entry would be past p2 entry and thus can put p2 right away
+								r.add(p2EntryStart - p1TotalAppliedDelta, p1EntryEnd, p2Data);
+								p1TotalAppliedDelta += p1EntryDelta;
+								continue L0;
+							} else {
+								// there are chances there are more p1 entries till p2 ends
+								insideP2entry = true;
+								p2EntryStartOffset = p1TotalAppliedDelta;
+								// p2EntryEnd is past delta, no chances for p1Data leftovers to be in use
+								// p2 processing is not over, need to fix end, depending on what else fits into p2 range (if nothing, can put p2.end right away)
+								// fall-through to get p1TotalAppliedDelta incremented;
+							}
+						}
+					}
+				} else { // p1EntryAppliedStart >= p2EntryStart
+					if (p2EntryEnd < p1EntryAppliedStart) {
+						// newer patch completely fits between two older patches 
+						r.add(p2EntryStart - p1TotalAppliedDelta, p2EntryEnd - p1TotalAppliedDelta, p2Data);
+						// SHALL NOT increment p1TotalAppliedDelta as we didn't use any of p1
+						continue L0; // next p2 
+					} else { // p2EntryEnd >= p1EntryAppliedStart
+						// SKETCH: I or IV
+						// p2EntryEnd is either  < p1EntryAppliedEnd or past it
+						if (p2EntryEnd <= p1EntryAppliedEnd) {
+							// SKETCH: I: copy p2, strip p1 to start from p2EntryEnd, next i (p2)
+							insideP1entry = true;
+							int e = p2EntryEnd - p1TotalAppliedDelta;
+							if (e > p1EntryEnd) {
+								e = p1EntryEnd; // added by analogy with above. Is needed?
+							}
+							r.add(p2EntryStart - p1TotalAppliedDelta, e, p2Data);
+							p1EntryStart = e;
+							int p1DataShift = p2EntryEnd - p1EntryAppliedStart;
+							if (p1DataShift >= p1EntryLen) {
+								p1EntryLen = 0;
+								p1Data = new byte[0];
+							} else {
+								p1EntryLen -= p1DataShift;
+								p1Data = subarray(p1Data, p1DataShift, p1Data.length - 1);
+							}
+							// p1TotalAppliedDelta would get incremented once this modified p1 is handled
+							continue L0; // next p2;
+						} else {
+							// p2EntryEnd > p1EntryAppliedEnd
+							// SKETCH IV: skip (rest of) p1 completely, continue the same unless  found p1 with start or end past p2EntryEnd.
+							insideP1entry = false;
+							p1++;
+							insideP2entry = true;
+							p2EntryStartOffset = p1TotalAppliedDelta;
+							// fall-through to get p1TotalAppliedDelta incremented
+						}
 					}
 				}
-				p1AppliedPos += starts.get(j) - p1PrevEnd;
-				p1AppliedPos += data.get(j).length;
-				p1PrevEnd = ends.get(j);
-				j++;
-			}
-			r.add(newerPatchEntryStart, another.ends.get(i), another.data.get(i));
-			p1AppliedPos += newerPatchEntryStart + p1PrevEnd - another.data.get(i).length;
-			// either j == jMax and another(i, i+1, ..., iMax) need to be just copied
-			// or new patch entry starts before end of one of original patch entries
-			if (olderPatchEntryEnd > (destPosition + newerPatchEntryStart)) {
-				destPosition += starts.get(j) - prevEnd; // count those in the original stream up to old patch start
-				int newLen = newerPatchEntryStart - destPosition;
+				p1TotalAppliedDelta += p1EntryDelta;
+			} // while (p1 < p1Max)
+			{
+				// no more p1 entries, shall close p2 (if it's handled, code above jumps directly to L0)
+				// regardless of whether insideP2 is .t
+				int s = p2EntryStart;
+				// p2EntryStartOffset != -1 when we started p2 entry processing, but not completed
+				// if we handled last p1 entry but didn't start with p2 entry processing, it's -1 and regular p1 delta shall be used
+				s -= p2EntryStartOffset == -1 ? p1TotalAppliedDelta : p2EntryStartOffset;
+				r.add(s, p2EntryEnd - p1TotalAppliedDelta, p2Data);
 			}
 		}
+		if (p1 < p1Max && insideP1entry) {
+			r.add(p1EntryStart, p1EntryEnd, p1Data);
+			p1++;
+		}
+		while (p1 < p1Max) {
+			r.add(this, p1);
+			p1++;
+		};
+		return r;
 	}
-*/
 }
\ No newline at end of file