comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 584:ed243b668502

Conditionally enable effective patch merge alternative for revlog reading
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 25 Apr 2013 16:08:17 +0200
parents bd5926e24aa3
children b47ef0d2777b
comparison
equal deleted inserted replaced
583:47dfa0ec7e35 584:ed243b668502
265 start = indexSize - 1; 265 start = indexSize - 1;
266 } 266 }
267 HgInternals.checkRevlogRange(start, end, indexSize-1); 267 HgInternals.checkRevlogRange(start, end, indexSize-1);
268 // XXX may cache [start .. end] from index with a single read (pre-read) 268 // XXX may cache [start .. end] from index with a single read (pre-read)
269 269
270 ReaderN1 r = new ReaderN1(needData, inspector); 270 ReaderN1 r = new ReaderN1(needData, inspector, dataAccess.shallMergePatches());
271 try { 271 try {
272 r.start(end - start + 1); 272 r.start(end - start + 1);
273 r.range(start, end); 273 r.range(start, end);
274 } catch (IOException ex) { 274 } catch (IOException ex) {
275 throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile); 275 throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile);
297 } 297 }
298 if (sortedRevisions[sortedRevisions.length - 1] > indexSize) { 298 if (sortedRevisions[sortedRevisions.length - 1] > indexSize) {
299 throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]); 299 throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]);
300 } 300 }
301 301
302 ReaderN1 r = new ReaderN1(needData, inspector); 302 ReaderN1 r = new ReaderN1(needData, inspector, dataAccess.shallMergePatches());
303 try { 303 try {
304 r.start(sortedRevisions.length); 304 r.start(sortedRevisions.length);
305 for (int i = 0; i < sortedRevisions.length; ) { 305 for (int i = 0; i < sortedRevisions.length; ) {
306 int x = i; 306 int x = i;
307 i++; 307 i++;
380 380
381 private boolean outlineCached() { 381 private boolean outlineCached() {
382 return baseRevisions != null && baseRevisions.length > 0; 382 return baseRevisions != null && baseRevisions.length > 0;
383 } 383 }
384 384
385 // translate 6-byte offset field value to pysical file offset for inline revlogs 385 // translate 6-byte offset field value to physical file offset for inline revlogs
386 // DOESN'T MAKE SENSE if revlog with data is separate 386 // DOESN'T MAKE SENSE if revlog with data is separate
387 private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException { 387 private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException {
388 int o = Internals.ltoi(offset); 388 int o = Internals.ltoi(offset);
389 if (o != offset) { 389 if (o != offset) {
390 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file 390 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file
461 461
462 /** 462 /**
463 * operation with single file open/close and multiple diverse reads. 463 * operation with single file open/close and multiple diverse reads.
464 * XXX initOutline might need similar extraction to keep N1 format knowledge 464 * XXX initOutline might need similar extraction to keep N1 format knowledge
465 */ 465 */
466 class ReaderN1 { 466 final class ReaderN1 {
467 private final Inspector inspector; 467 private final Inspector inspector;
468 private final boolean needData; 468 private final boolean needData;
469 private final boolean mergePatches;
469 private DataAccess daIndex = null, daData = null; 470 private DataAccess daIndex = null, daData = null;
470 private Lifecycle.BasicCallback cb = null; 471 private Lifecycle.BasicCallback cb = null;
471 private Lifecycle lifecycleListener = null; 472 private Lifecycle lifecycleListener = null;
472 private int lastRevisionRead = BAD_REVISION; 473 private int lastRevisionRead = BAD_REVISION;
473 private DataAccess lastUserData; 474 private DataAccess lastUserData;
475 //
476 // next are transient values, for range() use only
477 private final Inflater inflater = new Inflater();
478 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel
479 private final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO consider using DAP.DEFAULT_FILE_BUFFER
480 private final byte[] nodeidBuf = new byte[20];
481 // revlog record fields
482 private long offset;
483 @SuppressWarnings("unused")
484 private int flags;
485 private int compressedLen;
486 private int actualLen;
487 private int baseRevision;
488 private int linkRevision;
489 private int parent1Revision;
490 private int parent2Revision;
474 // next are to track two major bottlenecks - patch application and actual time spent in inspector 491 // next are to track two major bottlenecks - patch application and actual time spent in inspector
475 // private long applyTime, inspectorTime; // TIMING 492 // private long applyTime, inspectorTime; // TIMING
476 493
477 494 public ReaderN1(boolean dataRequested, Inspector insp, boolean usePatchMerge) {
478 public ReaderN1(boolean needData, Inspector insp) {
479 assert insp != null; 495 assert insp != null;
480 this.needData = needData; 496 needData = dataRequested;
481 inspector = insp; 497 inspector = insp;
498 mergePatches = usePatchMerge;
482 } 499 }
483 500
484 public void start(int totalWork) { 501 public void start(int totalWork) {
485 daIndex = getIndexStream(); 502 daIndex = getIndexStream();
486 if (needData && !inline) { 503 if (needData && !inline) {
511 daData.done(); 528 daData.done();
512 daData = null; 529 daData = null;
513 } 530 }
514 // System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING 531 // System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING
515 } 532 }
533
534 private void readHeaderRecord(int i) throws IOException {
535 if (inline && needData) {
536 // inspector reading data (though FilterDataAccess) may have affected index position
537 daIndex.seek(getIndexOffsetInt(i));
538 }
539 long l = daIndex.readLong(); // 0
540 offset = i == 0 ? 0 : (l >>> 16);
541 flags = (int) (l & 0x0FFFF);
542 compressedLen = daIndex.readInt(); // +8
543 actualLen = daIndex.readInt(); // +12
544 baseRevision = daIndex.readInt(); // +16
545 linkRevision = daIndex.readInt(); // +20
546 parent1Revision = daIndex.readInt();
547 parent2Revision = daIndex.readInt();
548 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
549 daIndex.readBytes(nodeidBuf, 0, 20); // +32
550 daIndex.skip(12);
551 }
552
553 private boolean isPatch(int i) {
554 return baseRevision != i; // the only way I found to tell if it's a patch
555 }
556
557 private DataAccess getStoredData(int i) throws IOException {
558 DataAccess userDataAccess = null;
559 DataAccess streamDataAccess;
560 long streamOffset;
561 if (inline) {
562 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE;
563 streamDataAccess = daIndex;
564 // don't need to do seek as it's actual position in the index stream, but it's safe to seek, just in case
565 daIndex.longSeek(streamOffset);
566 } else {
567 streamOffset = offset;
568 streamDataAccess = daData;
569 daData.longSeek(streamOffset);
570 }
571 if (streamDataAccess.isEmpty() || compressedLen == 0) {
572 userDataAccess = new DataAccess(); // empty
573 } else {
574 final byte firstByte = streamDataAccess.readByte();
575 if (firstByte == 0x78 /* 'x' */) {
576 inflater.reset();
577 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, isPatch(i) ? -1 : actualLen, inflater, inflaterBuffer);
578 } else if (firstByte == 0x75 /* 'u' */) {
579 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
580 } else {
581 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is
582 //
583 // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to
584 // initial offset before first attempt to read a byte
585 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen);
586 }
587 }
588 return userDataAccess;
589 }
516 590
517 // may be invoked few times per instance life 591 // may be invoked few times per instance life
518 public boolean range(int start, int end) throws IOException { 592 public boolean range(int start, int end) throws IOException {
519 byte[] nodeidBuf = new byte[20];
520 int i; 593 int i;
521 // it (i.e. replace with i >= start) 594 // it (i.e. replace with i >= start)
522 if (needData && (i = getBaseRevision(start)) < start) { 595 if (needData && (i = getBaseRevision(start)) < start) {
523 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData 596 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData
524 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). 597 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below).
535 i = start; 608 i = start;
536 } 609 }
537 610
538 daIndex.seek(getIndexOffsetInt(i)); 611 daIndex.seek(getIndexOffsetInt(i));
539 // 612 //
540 // reuse some instances 613 // reuse instance, do not normalize it as patches from the stream are unlikely to need it
541 final Patch patch = new Patch(); 614 final Patch patch = new Patch(false);
542 final Inflater inflater = new Inflater(); 615 //
543 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel 616 if (needData && mergePatches && start-i > 2) {
544 final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO consider using DAP.DEFAULT_FILE_BUFFER 617 // i+1 == start just reads lastUserData, i+2 == start applies one patch - not worth dedicated effort
618 Patch ultimatePatch = new Patch(true);
619 for ( ; i < start; i++) {
620 readHeaderRecord(i);
621 DataAccess userDataAccess = getStoredData(i);
622 if (lastUserData == null) {
623 assert !isPatch(i);
624 lastUserData = userDataAccess;
625 } else {
626 assert isPatch(i); // i < start and i == getBaseRevision()
627 patch.read(userDataAccess);
628 userDataAccess.done();
629 // I assume empty patches are applied ok
630 ultimatePatch = ultimatePatch.apply(patch);
631 patch.clear();
632 }
633 }
634 lastUserData.reset();
635 byte[] userData = ultimatePatch.apply(lastUserData, actualLen);
636 ultimatePatch.clear();
637 lastUserData.done();
638 lastUserData = new ByteArrayDataAccess(userData);
639 }
545 // 640 //
546 641
547 for (; i <= end; i++ ) { 642 for (; i <= end; i++ ) {
548 if (inline && needData) { 643 readHeaderRecord(i);
549 // inspector reading data (though FilterDataAccess) may have affected index position
550 daIndex.seek(getIndexOffsetInt(i));
551 }
552 long l = daIndex.readLong(); // 0
553 long offset = i == 0 ? 0 : (l >>> 16);
554 @SuppressWarnings("unused")
555 int flags = (int) (l & 0x0FFFF);
556 int compressedLen = daIndex.readInt(); // +8
557 int actualLen = daIndex.readInt(); // +12
558 int baseRevision = daIndex.readInt(); // +16
559 int linkRevision = daIndex.readInt(); // +20
560 int parent1Revision = daIndex.readInt();
561 int parent2Revision = daIndex.readInt();
562 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
563 daIndex.readBytes(nodeidBuf, 0, 20); // +32
564 daIndex.skip(12);
565 DataAccess userDataAccess = null; 644 DataAccess userDataAccess = null;
566 if (needData) { 645 if (needData) {
567 long streamOffset; 646 userDataAccess = getStoredData(i);
568 DataAccess streamDataAccess;
569 if (inline) {
570 streamDataAccess = daIndex;
571 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream
572 } else {
573 streamOffset = offset;
574 streamDataAccess = daData;
575 daData.longSeek(streamOffset);
576 }
577 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch
578 if (streamDataAccess.isEmpty() || compressedLen == 0) {
579 userDataAccess = new DataAccess(); // empty
580 } else {
581 final byte firstByte = streamDataAccess.readByte();
582 if (firstByte == 0x78 /* 'x' */) {
583 inflater.reset();
584 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen, inflater, inflaterBuffer);
585 } else if (firstByte == 0x75 /* 'u' */) {
586 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
587 } else {
588 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is
589 //
590 // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to
591 // initial offset before first attempt to read a byte
592 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen);
593 }
594 }
595 // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch 647 // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch
596 if (patchToPrevious) { 648 if (isPatch(i)) {
597 // this is a patch 649 // this is a patch
598 if (userDataAccess.isEmpty()) { 650 if (userDataAccess.isEmpty()) {
599 // Issue 22, empty patch to an empty base revision 651 // Issue 22, empty patch to an empty base revision
600 // Issue 24, empty patch to non-empty base revision 652 // Issue 24, empty patch to non-empty base revision
601 // empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here) 653 // empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here)