Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 329:694ebabb5cb3
Refactor revlog patch mechanism, towards patch merging
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 13 Oct 2011 03:30:50 +0200 |
parents | 650b45d290b1 |
children | 8da7ade36c57 |
comparison
equal
deleted
inserted
replaced
328:a674b8590362 | 329:694ebabb5cb3 |
---|---|
19 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; | 19 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; |
20 import static org.tmatesoft.hg.repo.HgRepository.TIP; | 20 import static org.tmatesoft.hg.repo.HgRepository.TIP; |
21 | 21 |
22 import java.io.File; | 22 import java.io.File; |
23 import java.io.IOException; | 23 import java.io.IOException; |
24 import java.util.ArrayList; | |
25 import java.util.List; | |
26 import java.util.zip.Inflater; | 24 import java.util.zip.Inflater; |
27 | 25 |
28 import org.tmatesoft.hg.core.HgBadStateException; | 26 import org.tmatesoft.hg.core.HgBadStateException; |
29 import org.tmatesoft.hg.core.Nodeid; | 27 import org.tmatesoft.hg.core.Nodeid; |
30 import org.tmatesoft.hg.repo.HgInternals; | 28 import org.tmatesoft.hg.repo.HgInternals; |
390 } | 388 } |
391 | 389 |
392 public boolean range(int start, int end) throws IOException { | 390 public boolean range(int start, int end) throws IOException { |
393 byte[] nodeidBuf = new byte[20]; | 391 byte[] nodeidBuf = new byte[20]; |
394 int i; | 392 int i; |
395 boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without | |
396 // it (i.e. replace with i >= start) | 393 // it (i.e. replace with i >= start) |
397 if (needData && (i = getBaseRevision(start)) < start) { | 394 if (needData && (i = getBaseRevision(start)) < start) { |
398 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData | 395 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData |
399 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). | 396 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). |
400 if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { | 397 if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { |
401 i = lastRevisionRead + 1; // start with first not-yet-read revision | 398 i = lastRevisionRead + 1; // start with first not-yet-read revision |
402 extraReadsToBaseRev = i < start; | |
403 } else { | 399 } else { |
404 if (lastUserData != null) { | 400 if (lastUserData != null) { |
405 lastUserData.done(); | 401 lastUserData.done(); |
406 lastUserData = null; | 402 lastUserData = null; |
407 } | 403 } |
408 extraReadsToBaseRev = true; | |
409 } | 404 } |
410 } else { | 405 } else { |
411 // don't need to clean lastUserData as it's always null when !needData | 406 // don't need to clean lastUserData as it's always null when !needData |
412 i = start; | 407 i = start; |
413 } | 408 } |
414 | 409 |
415 daIndex.seek(getIndexOffsetInt(i)); | 410 daIndex.seek(getIndexOffsetInt(i)); |
416 // | 411 // |
417 // reuse some instances | 412 // reuse some instances |
418 final ArrayList<PatchRecord> patches = new ArrayList<PatchRecord>(); | 413 final Patch patch = new Patch(); |
419 final Inflater inflater = new Inflater(); | 414 final Inflater inflater = new Inflater(); |
420 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel | 415 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel |
421 final byte[] inflaterBuffer = new byte[1024]; | 416 final byte[] inflaterBuffer = new byte[1024]; |
422 // | 417 // |
423 | 418 |
468 } | 463 } |
469 } | 464 } |
470 // XXX | 465 // XXX |
471 if (patchToPrevious) { | 466 if (patchToPrevious) { |
472 // this is a patch | 467 // this is a patch |
473 patches.clear(); // won't hurt to ensure there are no leftovers, even if we already cleaned | 468 patch.read(userDataAccess); |
474 while (!userDataAccess.isEmpty()) { | |
475 PatchRecord pr = PatchRecord.read(userDataAccess); | |
476 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); | |
477 patches.add(pr); | |
478 } | |
479 userDataAccess.done(); | 469 userDataAccess.done(); |
480 // | 470 // |
481 // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess | 471 // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess |
482 // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset | 472 // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset |
483 // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) | 473 // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) |
484 lastUserData.reset(); | 474 lastUserData.reset(); |
485 // final long startMeasuring = System.currentTimeMillis(); // TIMING | 475 // final long startMeasuring = System.currentTimeMillis(); // TIMING |
486 byte[] userData = apply(lastUserData, actualLen, patches); | 476 byte[] userData = patch.apply(lastUserData, actualLen); |
487 // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING | 477 // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING |
488 patches.clear(); // do not keep any reference, allow PatchRecord to be gc'd | 478 patch.clear(); // do not keep any reference, allow byte[] data to be gc'd |
489 userDataAccess = new ByteArrayDataAccess(userData); | 479 userDataAccess = new ByteArrayDataAccess(userData); |
490 } | 480 } |
491 } else { | 481 } else { |
492 if (inline) { | 482 if (inline) { |
493 daIndex.skip(compressedLen); | 483 daIndex.skip(compressedLen); |
494 } | 484 } |
495 } | 485 } |
496 if (!extraReadsToBaseRev || i >= start) { | 486 if (i >= start) { |
497 // final long startMeasuring = System.currentTimeMillis(); // TIMING | 487 // final long startMeasuring = System.currentTimeMillis(); // TIMING |
498 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); | 488 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); |
499 // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING | 489 // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING |
500 } | 490 } |
501 if (cb != null) { | 491 if (cb != null) { |
515 return true; | 505 return true; |
516 } | 506 } |
517 } | 507 } |
518 | 508 |
519 | 509 |
520 // mpatch.c : apply() | |
521 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF | |
522 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { | |
523 int last = 0, destIndex = 0; | |
524 if (outcomeLen == -1) { | |
525 outcomeLen = baseRevisionContent.length(); | |
526 for (int i = 0, x = patch.size(); i < x; i++) { | |
527 PatchRecord pr = patch.get(i); | |
528 outcomeLen += pr.start - last + pr.len; | |
529 last = pr.end; | |
530 } | |
531 outcomeLen -= last; | |
532 last = 0; | |
533 } | |
534 byte[] rv = new byte[outcomeLen]; | |
535 for (int i = 0, x = patch.size(); i < x; i++) { | |
536 PatchRecord pr = patch.get(i); | |
537 baseRevisionContent.seek(last); | |
538 baseRevisionContent.readBytes(rv, destIndex, pr.start-last); | |
539 destIndex += pr.start - last; | |
540 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); | |
541 destIndex += pr.data.length; | |
542 last = pr.end; | |
543 } | |
544 baseRevisionContent.seek(last); | |
545 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last)); | |
546 return rv; | |
547 } | |
548 | |
549 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description | |
550 public static class PatchRecord { | |
551 /* | |
552 Given there are pr1 and pr2: | |
553 pr1.start to pr1.end will be replaced with pr's data (of pr1.len) | |
554 pr1.end to pr2.start gets copied from base | |
555 */ | |
556 public int start, end, len; | |
557 public byte[] data; | |
558 | |
559 // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed | |
560 private PatchRecord(int p1, int p2, int length, byte[] src) { | |
561 start = p1; | |
562 end = p2; | |
563 len = length; | |
564 data = src; | |
565 } | |
566 | |
567 /*package-local*/ static PatchRecord read(byte[] data, int offset) { | |
568 final int x = offset; // shorthand | |
569 int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF); | |
570 int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF); | |
571 int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF); | |
572 byte[] dataCopy = new byte[len]; | |
573 System.arraycopy(data, x+12, dataCopy, 0, len); | |
574 return new PatchRecord(p1, p2, len, dataCopy); | |
575 } | |
576 | |
577 public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException { | |
578 int p1 = da.readInt(); | |
579 int p2 = da.readInt(); | |
580 int len = da.readInt(); | |
581 byte[] src = new byte[len]; | |
582 da.readBytes(src, 0, len); | |
583 return new PatchRecord(p1, p2, len, src); | |
584 } | |
585 } | |
586 | |
587 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data | |
588 // instantly - e.g. calculate hash, or comparing two revisions | |
589 public interface Inspector { | 510 public interface Inspector { |
590 // XXX boolean retVal to indicate whether to continue? | 511 // XXX boolean retVal to indicate whether to continue? |
591 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) | 512 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) |
592 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment | 513 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment |
593 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); | 514 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); |