Mercurial > jhg
comparison src/org/tmatesoft/hg/internal/Patch.java @ 330:9747a786a34d
Patch merging algorithm complete trial
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> | 
|---|---|
| date | Tue, 25 Oct 2011 03:30:02 +0200 | 
| parents | 694ebabb5cb3 | 
| children | 7f136a3fa671 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 329:694ebabb5cb3 | 330:9747a786a34d | 
|---|---|
| 16 */ | 16 */ | 
| 17 package org.tmatesoft.hg.internal; | 17 package org.tmatesoft.hg.internal; | 
| 18 | 18 | 
| 19 import java.io.IOException; | 19 import java.io.IOException; | 
| 20 import java.util.ArrayList; | 20 import java.util.ArrayList; | 
| 21 import java.util.Formatter; | |
| 21 | 22 | 
| 22 /** | 23 /** | 
| 23 * @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description | 24 * @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description | 
| 24 * | 25 * | 
| 25 * range [start..end] in original source gets replaced with data of length (do not keep, use data.length instead) | 26 * range [start..end] in original source gets replaced with data of length (do not keep, use data.length instead) | 
| 29 * @author TMate Software Ltd. | 30 * @author TMate Software Ltd. | 
| 30 */ | 31 */ | 
| 31 public final class Patch { | 32 public final class Patch { | 
| 32 private final IntVector starts, ends; | 33 private final IntVector starts, ends; | 
| 33 private final ArrayList<byte[]> data; | 34 private final ArrayList<byte[]> data; | 
| 34 | 35 | 
| 36 private static byte[] generate(int c) { | |
| 37 byte[] rv = new byte[c]; | |
| 38 for (int i = 0; i < c; i++) { | |
| 39 byte x = (byte) ('a' + i); | |
| 40 rv[i] = x; | |
| 41 } | |
| 42 return rv; | |
| 43 } | |
| 44 | |
| 45 public static void main(String[] args) { | |
| 46 Patch p1 = new Patch(), p2 = new Patch(); | |
| 47 // simple cases (one element in either patch) | |
| 48 // III: (1,10 20) & (5,15,15) p2End from [p1End..p1AppliedEnd] (i.e. within p1 range but index is past p2 end index) | |
| 49 // II: (1,10,7) & (3,15,15) insideP2 = true and no more p1 entries | |
| 50 // II: (1,1,10) & (3,11,15) | |
| 51 // independent: (1,10,10) & (15,25,10); (15, 25, 10) & (1, 10, 10) | |
| 52 // I: (15, 25, 10) & (10, 20, 10). result: [10, 20, 10] [20, 25, 5] | |
| 53 // IV: (15, 25, 10) & (10, 30, 20) | |
| 54 // | |
| 55 // cycle with insideP2 | |
| 56 // | |
| 57 // cycle with insideP1 | |
| 58 // | |
| 59 // multiple elements in patches (offsets) | |
| 60 p1.add(15, 25, generate(10)); | |
| 61 p2.add(10, 30, generate(20)); | |
| 62 System.out.println("p1: " + p1); | |
| 63 System.out.println("p2: " + p2); | |
| 64 Patch r = p1.apply(p2); | |
| 65 System.out.println("r: " + r); | |
| 66 } | |
| 67 | |
| 35 public Patch() { | 68 public Patch() { | 
| 36 starts = new IntVector(); | 69 starts = new IntVector(); | 
| 37 ends = new IntVector(); | 70 ends = new IntVector(); | 
| 38 data = new ArrayList<byte[]>(); | 71 data = new ArrayList<byte[]>(); | 
| 72 } | |
| 73 | |
| 74 public String toString() { | |
| 75 StringBuilder sb = new StringBuilder(); | |
| 76 Formatter f = new Formatter(sb); | |
| 77 for (int i = 0; i < count(); i++) { | |
| 78 f.format("[%d, %d, %d] ", starts.get(i), ends.get(i), data.get(i).length); | |
| 79 } | |
| 80 return sb.toString(); | |
| 39 } | 81 } | 
| 40 | 82 | 
| 41 public int count() { | 83 public int count() { | 
| 42 return data.size(); | 84 return data.size(); | 
| 43 } | 85 } | 
| 110 starts.add(s); | 152 starts.add(s); | 
| 111 ends.add(e); | 153 ends.add(e); | 
| 112 data.add(src); | 154 data.add(src); | 
| 113 } | 155 } | 
| 114 | 156 | 
| 115 /* | 157 private void add(Patch p, int i) { | 
| 116 private void add(Patch another, int index) { | 158 add(p.starts.get(i), p.ends.get(i), p.data.get(i)); | 
| 117 starts.add(another.starts.get(index)); | 159 } | 
| 118 ends.add(another.ends.get(index)); | 160 | 
| 119 data.add(another.data.get(index)); | 161 private void add(int start, int end, byte[] d) { | 
| 162 starts.add(start); | |
| 163 ends.add(end); | |
| 164 data.add(d); | |
| 165 } | |
| 166 | |
| 167 private static byte[] subarray(byte[] d, int start, int end) { | |
| 168 byte[] r = new byte[end-start+1]; | |
| 169 System.arraycopy(d, start, r, 0, r.length); | |
| 170 return r; | |
| 120 } | 171 } | 
| 121 | 172 | 
| 122 /** | 173 /** | 
| 123 * Modify this patch with subsequent patch | 174 * Modify this patch with subsequent patch | 
| 124 * / | 175 */ | 
| 125 public void apply(Patch another) { | 176 private /*SHALL BE PUBLIC ONCE TESTING ENDS*/ Patch apply(Patch another) { | 
| 126 Patch r = new Patch(); | 177 Patch r = new Patch(); | 
| 127 int p1AppliedPos = 0; | 178 int p1TotalAppliedDelta = 0; // value to add to start and end indexes of the older patch to get their values as if | 
| 128 int p1PrevEnd = 0; | 179 // in the patched text, iow, directly comparable with respective indexes from the newer patch. | 
| 129 for (int i = 0, j = 0, iMax = another.count(), jMax = this.count(); i < iMax; i++) { | 180 int p1EntryStart = 0, p1EntryEnd = 0, p1EntryLen = 0; | 
| 130 int newerPatchEntryStart = another.starts.get(i); | 181 byte[] p1Data = null; | 
| 131 int olderPatchEntryEnd; | 182 boolean insideP1entry = false; | 
| 183 int p2 = 0, p1 = 0; | |
| 184 final int p2Max = another.count(), p1Max = this.count(); | |
| 185 L0: for (; p2 < p2Max; p2++) { | |
| 186 int p2EntryStart = another.starts.get(p2); | |
| 187 int p2EntryEnd = another.ends.get(p2); | |
| 188 final int p2EntryRange = p2EntryEnd - p2EntryStart; | |
| 189 final byte[] p2Data = another.data.get(p2); | |
| 190 boolean insideP2entry = false; | |
| 191 int p2EntryStartOffset = -1; | |
| 192 /// | |
| 193 p1EntryStart = p1EntryEnd = p1EntryLen = 0; | |
| 194 p1Data = null; | |
| 132 | 195 | 
| 133 while (j < jMax) { | 196 L1: while (p1 < p1Max) { | 
| 134 if (starts.get(j) < newerPatchEntryStart) { | 197 if (!insideP1entry) { | 
| 135 if (starts.get(j)+data.get(j).length <= newerPatchEntryStart) { | 198 p1EntryStart = starts.get(p1); | 
| 136 r.add(this, j); | 199 p1EntryEnd = ends.get(p1); | 
| 200 p1Data = data.get(p1); | |
| 201 p1EntryLen = p1Data.length; | |
| 202 }// else keep values | |
| 203 | |
| 204 final int p1EntryDelta = p1EntryLen - (p1EntryEnd - p1EntryStart); // number of actually inserted(+) or deleted(-) chars | |
| 205 final int p1EntryAppliedStart = p1TotalAppliedDelta + p1EntryStart; | |
| 206 final int p1EntryAppliedEnd = p1EntryAppliedStart + p1EntryLen; // end of j'th patch entry in the text which is source for p2 | |
| 207 | |
| 208 if (insideP2entry) { | |
| 209 if (p2EntryEnd < p1EntryAppliedStart) { | |
| 210 r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data); | |
| 211 insideP2entry = false; | |
| 212 continue L0; | |
| 213 } | |
| 214 if (p2EntryEnd >= p1EntryAppliedEnd) { | |
| 215 // when p2EntryEnd == p1EntryAppliedEnd, I assume p1TotalAppliedDelta can't be used for p2EntryEnd to get it to p1 range, but rather shall be | |
| 216 // augmented with current p1 entry and at the next p1 entry (likely to hit p1EntryAppliedStart > p2EntryEnd above) would do the rest | |
| 217 insideP1entry = false; | |
| 218 p1++; | |
| 219 p1TotalAppliedDelta += p1EntryDelta; | |
| 220 continue L1; | |
| 221 } | |
| 222 // p1EntryAppliedStart <= p2EntryEnd < p1EntryAppliedEnd | |
| 223 r.add(p2EntryStart - p2EntryStartOffset, p2EntryEnd - p1TotalAppliedDelta, p2Data); | |
| 224 p1EntryStart = p2EntryEnd - p1TotalAppliedDelta; | |
| 225 final int p1DataPartShift = p2EntryEnd - p1EntryAppliedStart + 1; | |
| 226 if (p1DataPartShift >= p1EntryLen) { | |
| 227 p1EntryLen = 0; | |
| 228 p1Data = new byte[0]; | |
| 137 } else { | 229 } else { | 
| 138 int newLen = newerPatchEntryStart - starts.get(j); | 230 p1EntryLen -= p1DataPartShift; | 
| 139 int newEnd = ends.get(j) <= newerPatchEntryStart ? ends.get(j) : newerPatchEntryStart; | 231 p1Data = subarray(p1Data, p1DataPartShift, p1Data.length); | 
| 140 r.add(starts.get(j), newEnd, data.get(j), newLen); | 232 } | 
| 141 break; | 233 insideP1entry = true; | 
| 142 } | 234 insideP2entry = false; | 
| 235 continue L0; | |
| 143 } | 236 } | 
| 144 p1AppliedPos += starts.get(j) - p1PrevEnd; | 237 | 
| 145 p1AppliedPos += data.get(j).length; | 238 if (p1EntryAppliedStart < p2EntryStart) { | 
| 146 p1PrevEnd = ends.get(j); | 239 if (p1EntryAppliedEnd <= p2EntryStart) { // p1EntryAppliedEnd in fact index of the first char *after* patch | 
| 147 j++; | 240 // completely independent, copy and continue | 
| 241 r.add(p1EntryStart, p1EntryEnd, p1Data); | |
| 242 insideP1entry = false; | |
| 243 p1++; | |
| 244 // fall-through to get p1TotalAppliedDelta incremented | |
| 245 } else { // SKETCH: II or III | |
| 246 // remember, p1EntryDelta may be negative | |
| 247 // shall break j'th entry into few | |
| 248 // fix p1's end/length | |
| 249 // p1EntryAppliedStart < p2EntryStart < p1EntryAppliedEnd | |
| 250 int s = p2EntryStart - p1TotalAppliedDelta; // p2EntryStart in p1 scale. Is within p1 range | |
| 251 if (s > p1EntryEnd) { | |
| 252 s = p1EntryEnd; | |
| 253 } | |
| 254 int p1DataPartEnd = p2EntryStart - p1EntryAppliedStart; // index, not count. <= (p1EntryEnd-p1EntryStart). | |
| 255 // add what left from p1 | |
| 256 if (p1DataPartEnd < p1EntryLen) { | |
| 257 r.add(p1EntryStart, s, subarray(p1Data, 0, p1DataPartEnd)); | |
| 258 } else { | |
| 259 p1DataPartEnd = p1EntryLen-1; // record factual number of p1 bytes we consumed. | |
| 260 r.add(p1EntryStart, s, p1Data); | |
| 261 } | |
| 262 p1TotalAppliedDelta += p1DataPartEnd - (s - p1EntryStart); // (s2 - (s1+delta)) - (s2 - delta - s1) = s2-s1-delta-s2+delta+s1 = 0, unless p1DataPartEnd >= p1Data.length | |
| 263 p1EntryLen -= (p1DataPartEnd+1); | |
| 264 if (p2EntryEnd < p1EntryAppliedEnd) { | |
| 265 // SKETCH: III | |
| 266 insideP1entry = true; | |
| 267 // p2 completely fits into changes of p1 | |
| 268 int e = p2EntryEnd - p1TotalAppliedDelta; // p2EntryEnd in p1 scale | |
| 269 if (e > p1EntryEnd) { | |
| 270 // any index past p1 end shall be calculated with respect to p1 end, thus it's unsafe to go past p1 end (there may be more p1 entries there) | |
| 271 e = p1EntryEnd; | |
| 272 } | |
| 273 r.add(s, e, p2Data); // add p2 | |
| 274 // modify p1 leftover | |
| 275 p1EntryStart = e; | |
| 276 if (p2EntryRange >= p1EntryLen) { | |
| 277 p1EntryLen = 0; | |
| 278 p1Data = new byte[0]; | |
| 279 } else { | |
| 280 p1Data = subarray(p1Data, p1DataPartEnd + p2EntryRange, p1Data.length-1 /*up to the last one*/); | |
| 281 p1EntryLen -= p2EntryRange; | |
| 282 } | |
| 283 // p2 is handled, but there are leftovers of p1 | |
| 284 continue L0; | |
| 285 } else { // p2EntryEnd >= p1EntryAppliedEnd | |
| 286 // SKETCH: II | |
| 287 insideP1entry = false; | |
| 288 p1++; | |
| 289 if (p1EntryAppliedStart + p1EntryDelta >= p2EntryEnd) { | |
| 290 // here we know next p1 entry would be past p2 entry and thus can put p2 right away | |
| 291 r.add(p2EntryStart - p1TotalAppliedDelta, p1EntryEnd, p2Data); | |
| 292 p1TotalAppliedDelta += p1EntryDelta; | |
| 293 continue L0; | |
| 294 } else { | |
| 295 // there are chances there are more p1 entries till p2 ends | |
| 296 insideP2entry = true; | |
| 297 p2EntryStartOffset = p1TotalAppliedDelta; | |
| 298 // p2EntryEnd is past delta, no chances for p1Data leftovers to be in use | |
| 299 // p2 processing is not over, need to fix end, depending on what else fits into p2 range (if nothing, can put p2.end right away) | |
| 300 // fall-through to get p1TotalAppliedDelta incremented; | |
| 301 } | |
| 302 } | |
| 303 } | |
| 304 } else { // p1EntryAppliedStart >= p2EntryStart | |
| 305 if (p2EntryEnd < p1EntryAppliedStart) { | |
| 306 // newer patch completely fits between two older patches | |
| 307 r.add(p2EntryStart - p1TotalAppliedDelta, p2EntryEnd - p1TotalAppliedDelta, p2Data); | |
| 308 // SHALL NOT increment p1TotalAppliedDelta as we didn't use any of p1 | |
| 309 continue L0; // next p2 | |
| 310 } else { // p2EntryEnd >= p1EntryAppliedStart | |
| 311 // SKETCH: I or IV | |
| 312 // p2EntryEnd is either < p1EntryAppliedEnd or past it | |
| 313 if (p2EntryEnd <= p1EntryAppliedEnd) { | |
| 314 // SKETCH: I: copy p2, strip p1 to start from p2EntryEnd, next i (p2) | |
| 315 insideP1entry = true; | |
| 316 int e = p2EntryEnd - p1TotalAppliedDelta; | |
| 317 if (e > p1EntryEnd) { | |
| 318 e = p1EntryEnd; // added by analogy with above. Is needed? | |
| 319 } | |
| 320 r.add(p2EntryStart - p1TotalAppliedDelta, e, p2Data); | |
| 321 p1EntryStart = e; | |
| 322 int p1DataShift = p2EntryEnd - p1EntryAppliedStart; | |
| 323 if (p1DataShift >= p1EntryLen) { | |
| 324 p1EntryLen = 0; | |
| 325 p1Data = new byte[0]; | |
| 326 } else { | |
| 327 p1EntryLen -= p1DataShift; | |
| 328 p1Data = subarray(p1Data, p1DataShift, p1Data.length - 1); | |
| 329 } | |
| 330 // p1TotalAppliedDelta would get incremented once this modified p1 is handled | |
| 331 continue L0; // next p2; | |
| 332 } else { | |
| 333 // p2EntryEnd > p1EntryAppliedEnd | |
| 334 // SKETCH IV: skip (rest of) p1 completely, continue the same unless found p1 with start or end past p2EntryEnd. | |
| 335 insideP1entry = false; | |
| 336 p1++; | |
| 337 insideP2entry = true; | |
| 338 p2EntryStartOffset = p1TotalAppliedDelta; | |
| 339 // fall-through to get p1TotalAppliedDelta incremented | |
| 340 } | |
| 341 } | |
| 342 } | |
| 343 p1TotalAppliedDelta += p1EntryDelta; | |
| 344 } // while (p1 < p1Max) | |
| 345 { | |
| 346 // no more p1 entries, shall close p2 (if it's handled, code above jumps directly to L0) | |
| 347 // regardless of whether insideP2 is .t | |
| 348 int s = p2EntryStart; | |
| 349 // p2EntryStartOffset != -1 when we started p2 entry processing, but not completed | |
| 350 // if we handled last p1 entry but didn't start with p2 entry processing, it's -1 and regular p1 delta shall be used | |
| 351 s -= p2EntryStartOffset == -1 ? p1TotalAppliedDelta : p2EntryStartOffset; | |
| 352 r.add(s, p2EntryEnd - p1TotalAppliedDelta, p2Data); | |
| 148 } | 353 } | 
| 149 r.add(newerPatchEntryStart, another.ends.get(i), another.data.get(i)); | 354 } | 
| 150 p1AppliedPos += newerPatchEntryStart + p1PrevEnd - another.data.get(i).length; | 355 if (p1 < p1Max && insideP1entry) { | 
| 151 // either j == jMax and another(i, i+1, ..., iMax) need to be just copied | 356 r.add(p1EntryStart, p1EntryEnd, p1Data); | 
| 152 // or new patch entry starts before end of one of original patch entries | 357 p1++; | 
| 153 if (olderPatchEntryEnd > (destPosition + newerPatchEntryStart)) { | 358 } | 
| 154 destPosition += starts.get(j) - prevEnd; // count those in the original stream up to old patch start | 359 while (p1 < p1Max) { | 
| 155 int newLen = newerPatchEntryStart - destPosition; | 360 r.add(this, p1); | 
| 156 } | 361 p1++; | 
| 157 } | 362 }; | 
| 158 } | 363 return r; | 
| 159 */ | 364 } | 
| 160 } | 365 } | 
