Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 584:ed243b668502
Conditionally enable effective patch merge alternative for revlog reading
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Thu, 25 Apr 2013 16:08:17 +0200 |
| parents | bd5926e24aa3 |
| children | b47ef0d2777b |
comparison
equal
deleted
inserted
replaced
| 583:47dfa0ec7e35 | 584:ed243b668502 |
|---|---|
| 265 start = indexSize - 1; | 265 start = indexSize - 1; |
| 266 } | 266 } |
| 267 HgInternals.checkRevlogRange(start, end, indexSize-1); | 267 HgInternals.checkRevlogRange(start, end, indexSize-1); |
| 268 // XXX may cache [start .. end] from index with a single read (pre-read) | 268 // XXX may cache [start .. end] from index with a single read (pre-read) |
| 269 | 269 |
| 270 ReaderN1 r = new ReaderN1(needData, inspector); | 270 ReaderN1 r = new ReaderN1(needData, inspector, dataAccess.shallMergePatches()); |
| 271 try { | 271 try { |
| 272 r.start(end - start + 1); | 272 r.start(end - start + 1); |
| 273 r.range(start, end); | 273 r.range(start, end); |
| 274 } catch (IOException ex) { | 274 } catch (IOException ex) { |
| 275 throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile); | 275 throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile); |
| 297 } | 297 } |
| 298 if (sortedRevisions[sortedRevisions.length - 1] > indexSize) { | 298 if (sortedRevisions[sortedRevisions.length - 1] > indexSize) { |
| 299 throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]); | 299 throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]); |
| 300 } | 300 } |
| 301 | 301 |
| 302 ReaderN1 r = new ReaderN1(needData, inspector); | 302 ReaderN1 r = new ReaderN1(needData, inspector, dataAccess.shallMergePatches()); |
| 303 try { | 303 try { |
| 304 r.start(sortedRevisions.length); | 304 r.start(sortedRevisions.length); |
| 305 for (int i = 0; i < sortedRevisions.length; ) { | 305 for (int i = 0; i < sortedRevisions.length; ) { |
| 306 int x = i; | 306 int x = i; |
| 307 i++; | 307 i++; |
| 380 | 380 |
| 381 private boolean outlineCached() { | 381 private boolean outlineCached() { |
| 382 return baseRevisions != null && baseRevisions.length > 0; | 382 return baseRevisions != null && baseRevisions.length > 0; |
| 383 } | 383 } |
| 384 | 384 |
| 385 // translate 6-byte offset field value to pysical file offset for inline revlogs | 385 // translate 6-byte offset field value to physical file offset for inline revlogs |
| 386 // DOESN'T MAKE SENSE if revlog with data is separate | 386 // DOESN'T MAKE SENSE if revlog with data is separate |
| 387 private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException { | 387 private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException { |
| 388 int o = Internals.ltoi(offset); | 388 int o = Internals.ltoi(offset); |
| 389 if (o != offset) { | 389 if (o != offset) { |
| 390 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file | 390 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file |
| 461 | 461 |
| 462 /** | 462 /** |
| 463 * operation with single file open/close and multiple diverse reads. | 463 * operation with single file open/close and multiple diverse reads. |
| 464 * XXX initOutline might need similar extraction to keep N1 format knowledge | 464 * XXX initOutline might need similar extraction to keep N1 format knowledge |
| 465 */ | 465 */ |
| 466 class ReaderN1 { | 466 final class ReaderN1 { |
| 467 private final Inspector inspector; | 467 private final Inspector inspector; |
| 468 private final boolean needData; | 468 private final boolean needData; |
| 469 private final boolean mergePatches; | |
| 469 private DataAccess daIndex = null, daData = null; | 470 private DataAccess daIndex = null, daData = null; |
| 470 private Lifecycle.BasicCallback cb = null; | 471 private Lifecycle.BasicCallback cb = null; |
| 471 private Lifecycle lifecycleListener = null; | 472 private Lifecycle lifecycleListener = null; |
| 472 private int lastRevisionRead = BAD_REVISION; | 473 private int lastRevisionRead = BAD_REVISION; |
| 473 private DataAccess lastUserData; | 474 private DataAccess lastUserData; |
| 475 // | |
| 476 // next are transient values, for range() use only | |
| 477 private final Inflater inflater = new Inflater(); | |
| 478 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel | |
| 479 private final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO consider using DAP.DEFAULT_FILE_BUFFER | |
| 480 private final byte[] nodeidBuf = new byte[20]; | |
| 481 // revlog record fields | |
| 482 private long offset; | |
| 483 @SuppressWarnings("unused") | |
| 484 private int flags; | |
| 485 private int compressedLen; | |
| 486 private int actualLen; | |
| 487 private int baseRevision; | |
| 488 private int linkRevision; | |
| 489 private int parent1Revision; | |
| 490 private int parent2Revision; | |
| 474 // next are to track two major bottlenecks - patch application and actual time spent in inspector | 491 // next are to track two major bottlenecks - patch application and actual time spent in inspector |
| 475 // private long applyTime, inspectorTime; // TIMING | 492 // private long applyTime, inspectorTime; // TIMING |
| 476 | 493 |
| 477 | 494 public ReaderN1(boolean dataRequested, Inspector insp, boolean usePatchMerge) { |
| 478 public ReaderN1(boolean needData, Inspector insp) { | |
| 479 assert insp != null; | 495 assert insp != null; |
| 480 this.needData = needData; | 496 needData = dataRequested; |
| 481 inspector = insp; | 497 inspector = insp; |
| 498 mergePatches = usePatchMerge; | |
| 482 } | 499 } |
| 483 | 500 |
| 484 public void start(int totalWork) { | 501 public void start(int totalWork) { |
| 485 daIndex = getIndexStream(); | 502 daIndex = getIndexStream(); |
| 486 if (needData && !inline) { | 503 if (needData && !inline) { |
| 511 daData.done(); | 528 daData.done(); |
| 512 daData = null; | 529 daData = null; |
| 513 } | 530 } |
| 514 // System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING | 531 // System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING |
| 515 } | 532 } |
| 533 | |
| 534 private void readHeaderRecord(int i) throws IOException { | |
| 535 if (inline && needData) { | |
| 536 // inspector reading data (though FilterDataAccess) may have affected index position | |
| 537 daIndex.seek(getIndexOffsetInt(i)); | |
| 538 } | |
| 539 long l = daIndex.readLong(); // 0 | |
| 540 offset = i == 0 ? 0 : (l >>> 16); | |
| 541 flags = (int) (l & 0x0FFFF); | |
| 542 compressedLen = daIndex.readInt(); // +8 | |
| 543 actualLen = daIndex.readInt(); // +12 | |
| 544 baseRevision = daIndex.readInt(); // +16 | |
| 545 linkRevision = daIndex.readInt(); // +20 | |
| 546 parent1Revision = daIndex.readInt(); | |
| 547 parent2Revision = daIndex.readInt(); | |
| 548 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty | |
| 549 daIndex.readBytes(nodeidBuf, 0, 20); // +32 | |
| 550 daIndex.skip(12); | |
| 551 } | |
| 552 | |
| 553 private boolean isPatch(int i) { | |
| 554 return baseRevision != i; // the only way I found to tell if it's a patch | |
| 555 } | |
| 556 | |
| 557 private DataAccess getStoredData(int i) throws IOException { | |
| 558 DataAccess userDataAccess = null; | |
| 559 DataAccess streamDataAccess; | |
| 560 long streamOffset; | |
| 561 if (inline) { | |
| 562 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; | |
| 563 streamDataAccess = daIndex; | |
| 564 // don't need to do seek as it's actual position in the index stream, but it's safe to seek, just in case | |
| 565 daIndex.longSeek(streamOffset); | |
| 566 } else { | |
| 567 streamOffset = offset; | |
| 568 streamDataAccess = daData; | |
| 569 daData.longSeek(streamOffset); | |
| 570 } | |
| 571 if (streamDataAccess.isEmpty() || compressedLen == 0) { | |
| 572 userDataAccess = new DataAccess(); // empty | |
| 573 } else { | |
| 574 final byte firstByte = streamDataAccess.readByte(); | |
| 575 if (firstByte == 0x78 /* 'x' */) { | |
| 576 inflater.reset(); | |
| 577 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, isPatch(i) ? -1 : actualLen, inflater, inflaterBuffer); | |
| 578 } else if (firstByte == 0x75 /* 'u' */) { | |
| 579 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); | |
| 580 } else { | |
| 581 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is | |
| 582 // | |
| 583 // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to | |
| 584 // initial offset before first attempt to read a byte | |
| 585 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); | |
| 586 } | |
| 587 } | |
| 588 return userDataAccess; | |
| 589 } | |
| 516 | 590 |
| 517 // may be invoked few times per instance life | 591 // may be invoked few times per instance life |
| 518 public boolean range(int start, int end) throws IOException { | 592 public boolean range(int start, int end) throws IOException { |
| 519 byte[] nodeidBuf = new byte[20]; | |
| 520 int i; | 593 int i; |
| 521 // it (i.e. replace with i >= start) | 594 // it (i.e. replace with i >= start) |
| 522 if (needData && (i = getBaseRevision(start)) < start) { | 595 if (needData && (i = getBaseRevision(start)) < start) { |
| 523 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData | 596 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData |
| 524 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). | 597 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). |
| 535 i = start; | 608 i = start; |
| 536 } | 609 } |
| 537 | 610 |
| 538 daIndex.seek(getIndexOffsetInt(i)); | 611 daIndex.seek(getIndexOffsetInt(i)); |
| 539 // | 612 // |
| 540 // reuse some instances | 613 // reuse instance, do not normalize it as patches from the stream are unlikely to need it |
| 541 final Patch patch = new Patch(); | 614 final Patch patch = new Patch(false); |
| 542 final Inflater inflater = new Inflater(); | 615 // |
| 543 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel | 616 if (needData && mergePatches && start-i > 2) { |
| 544 final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO consider using DAP.DEFAULT_FILE_BUFFER | 617 // i+1 == start just reads lastUserData, i+2 == start applies one patch - not worth dedicated effort |
| 618 Patch ultimatePatch = new Patch(true); | |
| 619 for ( ; i < start; i++) { | |
| 620 readHeaderRecord(i); | |
| 621 DataAccess userDataAccess = getStoredData(i); | |
| 622 if (lastUserData == null) { | |
| 623 assert !isPatch(i); | |
| 624 lastUserData = userDataAccess; | |
| 625 } else { | |
| 626 assert isPatch(i); // i < start and i == getBaseRevision() | |
| 627 patch.read(userDataAccess); | |
| 628 userDataAccess.done(); | |
| 629 // I assume empty patches are applied ok | |
| 630 ultimatePatch = ultimatePatch.apply(patch); | |
| 631 patch.clear(); | |
| 632 } | |
| 633 } | |
| 634 lastUserData.reset(); | |
| 635 byte[] userData = ultimatePatch.apply(lastUserData, actualLen); | |
| 636 ultimatePatch.clear(); | |
| 637 lastUserData.done(); | |
| 638 lastUserData = new ByteArrayDataAccess(userData); | |
| 639 } | |
| 545 // | 640 // |
| 546 | 641 |
| 547 for (; i <= end; i++ ) { | 642 for (; i <= end; i++ ) { |
| 548 if (inline && needData) { | 643 readHeaderRecord(i); |
| 549 // inspector reading data (though FilterDataAccess) may have affected index position | |
| 550 daIndex.seek(getIndexOffsetInt(i)); | |
| 551 } | |
| 552 long l = daIndex.readLong(); // 0 | |
| 553 long offset = i == 0 ? 0 : (l >>> 16); | |
| 554 @SuppressWarnings("unused") | |
| 555 int flags = (int) (l & 0x0FFFF); | |
| 556 int compressedLen = daIndex.readInt(); // +8 | |
| 557 int actualLen = daIndex.readInt(); // +12 | |
| 558 int baseRevision = daIndex.readInt(); // +16 | |
| 559 int linkRevision = daIndex.readInt(); // +20 | |
| 560 int parent1Revision = daIndex.readInt(); | |
| 561 int parent2Revision = daIndex.readInt(); | |
| 562 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty | |
| 563 daIndex.readBytes(nodeidBuf, 0, 20); // +32 | |
| 564 daIndex.skip(12); | |
| 565 DataAccess userDataAccess = null; | 644 DataAccess userDataAccess = null; |
| 566 if (needData) { | 645 if (needData) { |
| 567 long streamOffset; | 646 userDataAccess = getStoredData(i); |
| 568 DataAccess streamDataAccess; | |
| 569 if (inline) { | |
| 570 streamDataAccess = daIndex; | |
| 571 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream | |
| 572 } else { | |
| 573 streamOffset = offset; | |
| 574 streamDataAccess = daData; | |
| 575 daData.longSeek(streamOffset); | |
| 576 } | |
| 577 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch | |
| 578 if (streamDataAccess.isEmpty() || compressedLen == 0) { | |
| 579 userDataAccess = new DataAccess(); // empty | |
| 580 } else { | |
| 581 final byte firstByte = streamDataAccess.readByte(); | |
| 582 if (firstByte == 0x78 /* 'x' */) { | |
| 583 inflater.reset(); | |
| 584 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen, inflater, inflaterBuffer); | |
| 585 } else if (firstByte == 0x75 /* 'u' */) { | |
| 586 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); | |
| 587 } else { | |
| 588 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is | |
| 589 // | |
| 590 // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to | |
| 591 // initial offset before first attempt to read a byte | |
| 592 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); | |
| 593 } | |
| 594 } | |
| 595 // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch | 647 // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch |
| 596 if (patchToPrevious) { | 648 if (isPatch(i)) { |
| 597 // this is a patch | 649 // this is a patch |
| 598 if (userDataAccess.isEmpty()) { | 650 if (userDataAccess.isEmpty()) { |
| 599 // Issue 22, empty patch to an empty base revision | 651 // Issue 22, empty patch to an empty base revision |
| 600 // Issue 24, empty patch to non-empty base revision | 652 // Issue 24, empty patch to non-empty base revision |
| 601 // empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here) | 653 // empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here) |
