Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 584:ed243b668502
Conditionally enable effective patch merge alternative for revlog reading
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 25 Apr 2013 16:08:17 +0200 |
parents | bd5926e24aa3 |
children | b47ef0d2777b |
comparison
equal
deleted
inserted
replaced
583:47dfa0ec7e35 | 584:ed243b668502 |
---|---|
265 start = indexSize - 1; | 265 start = indexSize - 1; |
266 } | 266 } |
267 HgInternals.checkRevlogRange(start, end, indexSize-1); | 267 HgInternals.checkRevlogRange(start, end, indexSize-1); |
268 // XXX may cache [start .. end] from index with a single read (pre-read) | 268 // XXX may cache [start .. end] from index with a single read (pre-read) |
269 | 269 |
270 ReaderN1 r = new ReaderN1(needData, inspector); | 270 ReaderN1 r = new ReaderN1(needData, inspector, dataAccess.shallMergePatches()); |
271 try { | 271 try { |
272 r.start(end - start + 1); | 272 r.start(end - start + 1); |
273 r.range(start, end); | 273 r.range(start, end); |
274 } catch (IOException ex) { | 274 } catch (IOException ex) { |
275 throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile); | 275 throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile); |
297 } | 297 } |
298 if (sortedRevisions[sortedRevisions.length - 1] > indexSize) { | 298 if (sortedRevisions[sortedRevisions.length - 1] > indexSize) { |
299 throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]); | 299 throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]); |
300 } | 300 } |
301 | 301 |
302 ReaderN1 r = new ReaderN1(needData, inspector); | 302 ReaderN1 r = new ReaderN1(needData, inspector, dataAccess.shallMergePatches()); |
303 try { | 303 try { |
304 r.start(sortedRevisions.length); | 304 r.start(sortedRevisions.length); |
305 for (int i = 0; i < sortedRevisions.length; ) { | 305 for (int i = 0; i < sortedRevisions.length; ) { |
306 int x = i; | 306 int x = i; |
307 i++; | 307 i++; |
380 | 380 |
381 private boolean outlineCached() { | 381 private boolean outlineCached() { |
382 return baseRevisions != null && baseRevisions.length > 0; | 382 return baseRevisions != null && baseRevisions.length > 0; |
383 } | 383 } |
384 | 384 |
385 // translate 6-byte offset field value to pysical file offset for inline revlogs | 385 // translate 6-byte offset field value to physical file offset for inline revlogs |
386 // DOESN'T MAKE SENSE if revlog with data is separate | 386 // DOESN'T MAKE SENSE if revlog with data is separate |
387 private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException { | 387 private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException { |
388 int o = Internals.ltoi(offset); | 388 int o = Internals.ltoi(offset); |
389 if (o != offset) { | 389 if (o != offset) { |
390 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file | 390 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file |
461 | 461 |
462 /** | 462 /** |
463 * operation with single file open/close and multiple diverse reads. | 463 * operation with single file open/close and multiple diverse reads. |
464 * XXX initOutline might need similar extraction to keep N1 format knowledge | 464 * XXX initOutline might need similar extraction to keep N1 format knowledge |
465 */ | 465 */ |
466 class ReaderN1 { | 466 final class ReaderN1 { |
467 private final Inspector inspector; | 467 private final Inspector inspector; |
468 private final boolean needData; | 468 private final boolean needData; |
469 private final boolean mergePatches; | |
469 private DataAccess daIndex = null, daData = null; | 470 private DataAccess daIndex = null, daData = null; |
470 private Lifecycle.BasicCallback cb = null; | 471 private Lifecycle.BasicCallback cb = null; |
471 private Lifecycle lifecycleListener = null; | 472 private Lifecycle lifecycleListener = null; |
472 private int lastRevisionRead = BAD_REVISION; | 473 private int lastRevisionRead = BAD_REVISION; |
473 private DataAccess lastUserData; | 474 private DataAccess lastUserData; |
475 // | |
476 // next are transient values, for range() use only | |
477 private final Inflater inflater = new Inflater(); | |
478 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel | |
479 private final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO consider using DAP.DEFAULT_FILE_BUFFER | |
480 private final byte[] nodeidBuf = new byte[20]; | |
481 // revlog record fields | |
482 private long offset; | |
483 @SuppressWarnings("unused") | |
484 private int flags; | |
485 private int compressedLen; | |
486 private int actualLen; | |
487 private int baseRevision; | |
488 private int linkRevision; | |
489 private int parent1Revision; | |
490 private int parent2Revision; | |
474 // next are to track two major bottlenecks - patch application and actual time spent in inspector | 491 // next are to track two major bottlenecks - patch application and actual time spent in inspector |
475 // private long applyTime, inspectorTime; // TIMING | 492 // private long applyTime, inspectorTime; // TIMING |
476 | 493 |
477 | 494 public ReaderN1(boolean dataRequested, Inspector insp, boolean usePatchMerge) { |
478 public ReaderN1(boolean needData, Inspector insp) { | |
479 assert insp != null; | 495 assert insp != null; |
480 this.needData = needData; | 496 needData = dataRequested; |
481 inspector = insp; | 497 inspector = insp; |
498 mergePatches = usePatchMerge; | |
482 } | 499 } |
483 | 500 |
484 public void start(int totalWork) { | 501 public void start(int totalWork) { |
485 daIndex = getIndexStream(); | 502 daIndex = getIndexStream(); |
486 if (needData && !inline) { | 503 if (needData && !inline) { |
511 daData.done(); | 528 daData.done(); |
512 daData = null; | 529 daData = null; |
513 } | 530 } |
514 // System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING | 531 // System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING |
515 } | 532 } |
533 | |
534 private void readHeaderRecord(int i) throws IOException { | |
535 if (inline && needData) { | |
536 // inspector reading data (though FilterDataAccess) may have affected index position | |
537 daIndex.seek(getIndexOffsetInt(i)); | |
538 } | |
539 long l = daIndex.readLong(); // 0 | |
540 offset = i == 0 ? 0 : (l >>> 16); | |
541 flags = (int) (l & 0x0FFFF); | |
542 compressedLen = daIndex.readInt(); // +8 | |
543 actualLen = daIndex.readInt(); // +12 | |
544 baseRevision = daIndex.readInt(); // +16 | |
545 linkRevision = daIndex.readInt(); // +20 | |
546 parent1Revision = daIndex.readInt(); | |
547 parent2Revision = daIndex.readInt(); | |
548 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty | |
549 daIndex.readBytes(nodeidBuf, 0, 20); // +32 | |
550 daIndex.skip(12); | |
551 } | |
552 | |
553 private boolean isPatch(int i) { | |
554 return baseRevision != i; // the only way I found to tell if it's a patch | |
555 } | |
556 | |
557 private DataAccess getStoredData(int i) throws IOException { | |
558 DataAccess userDataAccess = null; | |
559 DataAccess streamDataAccess; | |
560 long streamOffset; | |
561 if (inline) { | |
562 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; | |
563 streamDataAccess = daIndex; | |
564 // don't need to do seek as it's actual position in the index stream, but it's safe to seek, just in case | |
565 daIndex.longSeek(streamOffset); | |
566 } else { | |
567 streamOffset = offset; | |
568 streamDataAccess = daData; | |
569 daData.longSeek(streamOffset); | |
570 } | |
571 if (streamDataAccess.isEmpty() || compressedLen == 0) { | |
572 userDataAccess = new DataAccess(); // empty | |
573 } else { | |
574 final byte firstByte = streamDataAccess.readByte(); | |
575 if (firstByte == 0x78 /* 'x' */) { | |
576 inflater.reset(); | |
577 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, isPatch(i) ? -1 : actualLen, inflater, inflaterBuffer); | |
578 } else if (firstByte == 0x75 /* 'u' */) { | |
579 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); | |
580 } else { | |
581 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is | |
582 // | |
583 // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to | |
584 // initial offset before first attempt to read a byte | |
585 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); | |
586 } | |
587 } | |
588 return userDataAccess; | |
589 } | |
516 | 590 |
517 // may be invoked few times per instance life | 591 // may be invoked few times per instance life |
518 public boolean range(int start, int end) throws IOException { | 592 public boolean range(int start, int end) throws IOException { |
519 byte[] nodeidBuf = new byte[20]; | |
520 int i; | 593 int i; |
521 // it (i.e. replace with i >= start) | 594 // it (i.e. replace with i >= start) |
522 if (needData && (i = getBaseRevision(start)) < start) { | 595 if (needData && (i = getBaseRevision(start)) < start) { |
523 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData | 596 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData |
524 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). | 597 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). |
535 i = start; | 608 i = start; |
536 } | 609 } |
537 | 610 |
538 daIndex.seek(getIndexOffsetInt(i)); | 611 daIndex.seek(getIndexOffsetInt(i)); |
539 // | 612 // |
540 // reuse some instances | 613 // reuse instance, do not normalize it as patches from the stream are unlikely to need it |
541 final Patch patch = new Patch(); | 614 final Patch patch = new Patch(false); |
542 final Inflater inflater = new Inflater(); | 615 // |
543 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel | 616 if (needData && mergePatches && start-i > 2) { |
544 final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO consider using DAP.DEFAULT_FILE_BUFFER | 617 // i+1 == start just reads lastUserData, i+2 == start applies one patch - not worth dedicated effort |
618 Patch ultimatePatch = new Patch(true); | |
619 for ( ; i < start; i++) { | |
620 readHeaderRecord(i); | |
621 DataAccess userDataAccess = getStoredData(i); | |
622 if (lastUserData == null) { | |
623 assert !isPatch(i); | |
624 lastUserData = userDataAccess; | |
625 } else { | |
626 assert isPatch(i); // i < start and i == getBaseRevision() | |
627 patch.read(userDataAccess); | |
628 userDataAccess.done(); | |
629 // I assume empty patches are applied ok | |
630 ultimatePatch = ultimatePatch.apply(patch); | |
631 patch.clear(); | |
632 } | |
633 } | |
634 lastUserData.reset(); | |
635 byte[] userData = ultimatePatch.apply(lastUserData, actualLen); | |
636 ultimatePatch.clear(); | |
637 lastUserData.done(); | |
638 lastUserData = new ByteArrayDataAccess(userData); | |
639 } | |
545 // | 640 // |
546 | 641 |
547 for (; i <= end; i++ ) { | 642 for (; i <= end; i++ ) { |
548 if (inline && needData) { | 643 readHeaderRecord(i); |
549 // inspector reading data (though FilterDataAccess) may have affected index position | |
550 daIndex.seek(getIndexOffsetInt(i)); | |
551 } | |
552 long l = daIndex.readLong(); // 0 | |
553 long offset = i == 0 ? 0 : (l >>> 16); | |
554 @SuppressWarnings("unused") | |
555 int flags = (int) (l & 0x0FFFF); | |
556 int compressedLen = daIndex.readInt(); // +8 | |
557 int actualLen = daIndex.readInt(); // +12 | |
558 int baseRevision = daIndex.readInt(); // +16 | |
559 int linkRevision = daIndex.readInt(); // +20 | |
560 int parent1Revision = daIndex.readInt(); | |
561 int parent2Revision = daIndex.readInt(); | |
562 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty | |
563 daIndex.readBytes(nodeidBuf, 0, 20); // +32 | |
564 daIndex.skip(12); | |
565 DataAccess userDataAccess = null; | 644 DataAccess userDataAccess = null; |
566 if (needData) { | 645 if (needData) { |
567 long streamOffset; | 646 userDataAccess = getStoredData(i); |
568 DataAccess streamDataAccess; | |
569 if (inline) { | |
570 streamDataAccess = daIndex; | |
571 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream | |
572 } else { | |
573 streamOffset = offset; | |
574 streamDataAccess = daData; | |
575 daData.longSeek(streamOffset); | |
576 } | |
577 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch | |
578 if (streamDataAccess.isEmpty() || compressedLen == 0) { | |
579 userDataAccess = new DataAccess(); // empty | |
580 } else { | |
581 final byte firstByte = streamDataAccess.readByte(); | |
582 if (firstByte == 0x78 /* 'x' */) { | |
583 inflater.reset(); | |
584 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen, inflater, inflaterBuffer); | |
585 } else if (firstByte == 0x75 /* 'u' */) { | |
586 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); | |
587 } else { | |
588 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is | |
589 // | |
590 // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to | |
591 // initial offset before first attempt to read a byte | |
592 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); | |
593 } | |
594 } | |
595 // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch | 647 // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch |
596 if (patchToPrevious) { | 648 if (isPatch(i)) { |
597 // this is a patch | 649 // this is a patch |
598 if (userDataAccess.isEmpty()) { | 650 if (userDataAccess.isEmpty()) { |
599 // Issue 22, empty patch to an empty base revision | 651 // Issue 22, empty patch to an empty base revision |
600 // Issue 24, empty patch to non-empty base revision | 652 // Issue 24, empty patch to non-empty base revision |
601 // empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here) | 653 // empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here) |