Mercurial > jhg
comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 329:694ebabb5cb3
Refactor revlog patch mechanism, towards patch merging
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Thu, 13 Oct 2011 03:30:50 +0200 |
| parents | 650b45d290b1 |
| children | 8da7ade36c57 |
comparison
equal
deleted
inserted
replaced
| 328:a674b8590362 | 329:694ebabb5cb3 |
|---|---|
| 19 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; | 19 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; |
| 20 import static org.tmatesoft.hg.repo.HgRepository.TIP; | 20 import static org.tmatesoft.hg.repo.HgRepository.TIP; |
| 21 | 21 |
| 22 import java.io.File; | 22 import java.io.File; |
| 23 import java.io.IOException; | 23 import java.io.IOException; |
| 24 import java.util.ArrayList; | |
| 25 import java.util.List; | |
| 26 import java.util.zip.Inflater; | 24 import java.util.zip.Inflater; |
| 27 | 25 |
| 28 import org.tmatesoft.hg.core.HgBadStateException; | 26 import org.tmatesoft.hg.core.HgBadStateException; |
| 29 import org.tmatesoft.hg.core.Nodeid; | 27 import org.tmatesoft.hg.core.Nodeid; |
| 30 import org.tmatesoft.hg.repo.HgInternals; | 28 import org.tmatesoft.hg.repo.HgInternals; |
| 390 } | 388 } |
| 391 | 389 |
| 392 public boolean range(int start, int end) throws IOException { | 390 public boolean range(int start, int end) throws IOException { |
| 393 byte[] nodeidBuf = new byte[20]; | 391 byte[] nodeidBuf = new byte[20]; |
| 394 int i; | 392 int i; |
| 395 boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without | |
| 396 // it (i.e. replace with i >= start) | 393 // it (i.e. replace with i >= start) |
| 397 if (needData && (i = getBaseRevision(start)) < start) { | 394 if (needData && (i = getBaseRevision(start)) < start) { |
| 398 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData | 395 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData |
| 399 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). | 396 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). |
| 400 if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { | 397 if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { |
| 401 i = lastRevisionRead + 1; // start with first not-yet-read revision | 398 i = lastRevisionRead + 1; // start with first not-yet-read revision |
| 402 extraReadsToBaseRev = i < start; | |
| 403 } else { | 399 } else { |
| 404 if (lastUserData != null) { | 400 if (lastUserData != null) { |
| 405 lastUserData.done(); | 401 lastUserData.done(); |
| 406 lastUserData = null; | 402 lastUserData = null; |
| 407 } | 403 } |
| 408 extraReadsToBaseRev = true; | |
| 409 } | 404 } |
| 410 } else { | 405 } else { |
| 411 // don't need to clean lastUserData as it's always null when !needData | 406 // don't need to clean lastUserData as it's always null when !needData |
| 412 i = start; | 407 i = start; |
| 413 } | 408 } |
| 414 | 409 |
| 415 daIndex.seek(getIndexOffsetInt(i)); | 410 daIndex.seek(getIndexOffsetInt(i)); |
| 416 // | 411 // |
| 417 // reuse some instances | 412 // reuse some instances |
| 418 final ArrayList<PatchRecord> patches = new ArrayList<PatchRecord>(); | 413 final Patch patch = new Patch(); |
| 419 final Inflater inflater = new Inflater(); | 414 final Inflater inflater = new Inflater(); |
| 420 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel | 415 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel |
| 421 final byte[] inflaterBuffer = new byte[1024]; | 416 final byte[] inflaterBuffer = new byte[1024]; |
| 422 // | 417 // |
| 423 | 418 |
| 468 } | 463 } |
| 469 } | 464 } |
| 470 // XXX | 465 // XXX |
| 471 if (patchToPrevious) { | 466 if (patchToPrevious) { |
| 472 // this is a patch | 467 // this is a patch |
| 473 patches.clear(); // won't hurt to ensure there are no leftovers, even if we already cleaned | 468 patch.read(userDataAccess); |
| 474 while (!userDataAccess.isEmpty()) { | |
| 475 PatchRecord pr = PatchRecord.read(userDataAccess); | |
| 476 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); | |
| 477 patches.add(pr); | |
| 478 } | |
| 479 userDataAccess.done(); | 469 userDataAccess.done(); |
| 480 // | 470 // |
| 481 // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess | 471 // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess |
| 482 // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset | 472 // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset |
| 483 // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) | 473 // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) |
| 484 lastUserData.reset(); | 474 lastUserData.reset(); |
| 485 // final long startMeasuring = System.currentTimeMillis(); // TIMING | 475 // final long startMeasuring = System.currentTimeMillis(); // TIMING |
| 486 byte[] userData = apply(lastUserData, actualLen, patches); | 476 byte[] userData = patch.apply(lastUserData, actualLen); |
| 487 // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING | 477 // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING |
| 488 patches.clear(); // do not keep any reference, allow PatchRecord to be gc'd | 478 patch.clear(); // do not keep any reference, allow byte[] data to be gc'd |
| 489 userDataAccess = new ByteArrayDataAccess(userData); | 479 userDataAccess = new ByteArrayDataAccess(userData); |
| 490 } | 480 } |
| 491 } else { | 481 } else { |
| 492 if (inline) { | 482 if (inline) { |
| 493 daIndex.skip(compressedLen); | 483 daIndex.skip(compressedLen); |
| 494 } | 484 } |
| 495 } | 485 } |
| 496 if (!extraReadsToBaseRev || i >= start) { | 486 if (i >= start) { |
| 497 // final long startMeasuring = System.currentTimeMillis(); // TIMING | 487 // final long startMeasuring = System.currentTimeMillis(); // TIMING |
| 498 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); | 488 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); |
| 499 // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING | 489 // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING |
| 500 } | 490 } |
| 501 if (cb != null) { | 491 if (cb != null) { |
| 515 return true; | 505 return true; |
| 516 } | 506 } |
| 517 } | 507 } |
| 518 | 508 |
| 519 | 509 |
| 520 // mpatch.c : apply() | |
| 521 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF | |
| 522 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { | |
| 523 int last = 0, destIndex = 0; | |
| 524 if (outcomeLen == -1) { | |
| 525 outcomeLen = baseRevisionContent.length(); | |
| 526 for (int i = 0, x = patch.size(); i < x; i++) { | |
| 527 PatchRecord pr = patch.get(i); | |
| 528 outcomeLen += pr.start - last + pr.len; | |
| 529 last = pr.end; | |
| 530 } | |
| 531 outcomeLen -= last; | |
| 532 last = 0; | |
| 533 } | |
| 534 byte[] rv = new byte[outcomeLen]; | |
| 535 for (int i = 0, x = patch.size(); i < x; i++) { | |
| 536 PatchRecord pr = patch.get(i); | |
| 537 baseRevisionContent.seek(last); | |
| 538 baseRevisionContent.readBytes(rv, destIndex, pr.start-last); | |
| 539 destIndex += pr.start - last; | |
| 540 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); | |
| 541 destIndex += pr.data.length; | |
| 542 last = pr.end; | |
| 543 } | |
| 544 baseRevisionContent.seek(last); | |
| 545 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last)); | |
| 546 return rv; | |
| 547 } | |
| 548 | |
| 549 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description | |
| 550 public static class PatchRecord { | |
| 551 /* | |
| 552 Given there are pr1 and pr2: | |
| 553 pr1.start to pr1.end will be replaced with pr's data (of pr1.len) | |
| 554 pr1.end to pr2.start gets copied from base | |
| 555 */ | |
| 556 public int start, end, len; | |
| 557 public byte[] data; | |
| 558 | |
| 559 // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed | |
| 560 private PatchRecord(int p1, int p2, int length, byte[] src) { | |
| 561 start = p1; | |
| 562 end = p2; | |
| 563 len = length; | |
| 564 data = src; | |
| 565 } | |
| 566 | |
| 567 /*package-local*/ static PatchRecord read(byte[] data, int offset) { | |
| 568 final int x = offset; // shorthand | |
| 569 int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF); | |
| 570 int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF); | |
| 571 int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF); | |
| 572 byte[] dataCopy = new byte[len]; | |
| 573 System.arraycopy(data, x+12, dataCopy, 0, len); | |
| 574 return new PatchRecord(p1, p2, len, dataCopy); | |
| 575 } | |
| 576 | |
| 577 public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException { | |
| 578 int p1 = da.readInt(); | |
| 579 int p2 = da.readInt(); | |
| 580 int len = da.readInt(); | |
| 581 byte[] src = new byte[len]; | |
| 582 da.readBytes(src, 0, len); | |
| 583 return new PatchRecord(p1, p2, len, src); | |
| 584 } | |
| 585 } | |
| 586 | |
| 587 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data | |
| 588 // instantly - e.g. calculate hash, or comparing two revisions | |
| 589 public interface Inspector { | 510 public interface Inspector { |
| 590 // XXX boolean retVal to indicate whether to continue? | 511 // XXX boolean retVal to indicate whether to continue? |
| 591 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) | 512 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) |
| 592 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment | 513 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment |
| 593 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); | 514 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); |
