comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 329:694ebabb5cb3

Refactor revlog patch mechanism, towards patch merging
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 13 Oct 2011 03:30:50 +0200
parents 650b45d290b1
children 8da7ade36c57
comparison
equal deleted inserted replaced
328:a674b8590362 329:694ebabb5cb3
19 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; 19 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION;
20 import static org.tmatesoft.hg.repo.HgRepository.TIP; 20 import static org.tmatesoft.hg.repo.HgRepository.TIP;
21 21
22 import java.io.File; 22 import java.io.File;
23 import java.io.IOException; 23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.List;
26 import java.util.zip.Inflater; 24 import java.util.zip.Inflater;
27 25
28 import org.tmatesoft.hg.core.HgBadStateException; 26 import org.tmatesoft.hg.core.HgBadStateException;
29 import org.tmatesoft.hg.core.Nodeid; 27 import org.tmatesoft.hg.core.Nodeid;
30 import org.tmatesoft.hg.repo.HgInternals; 28 import org.tmatesoft.hg.repo.HgInternals;
390 } 388 }
391 389
392 public boolean range(int start, int end) throws IOException { 390 public boolean range(int start, int end) throws IOException {
393 byte[] nodeidBuf = new byte[20]; 391 byte[] nodeidBuf = new byte[20];
394 int i; 392 int i;
395 boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without
396 // it (i.e. replace with i >= start) 393 // it (i.e. replace with i >= start)
397 if (needData && (i = getBaseRevision(start)) < start) { 394 if (needData && (i = getBaseRevision(start)) < start) {
398 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData 395 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData
399 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). 396 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below).
400 if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { 397 if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) {
401 i = lastRevisionRead + 1; // start with first not-yet-read revision 398 i = lastRevisionRead + 1; // start with first not-yet-read revision
402 extraReadsToBaseRev = i < start;
403 } else { 399 } else {
404 if (lastUserData != null) { 400 if (lastUserData != null) {
405 lastUserData.done(); 401 lastUserData.done();
406 lastUserData = null; 402 lastUserData = null;
407 } 403 }
408 extraReadsToBaseRev = true;
409 } 404 }
410 } else { 405 } else {
411 // don't need to clean lastUserData as it's always null when !needData 406 // don't need to clean lastUserData as it's always null when !needData
412 i = start; 407 i = start;
413 } 408 }
414 409
415 daIndex.seek(getIndexOffsetInt(i)); 410 daIndex.seek(getIndexOffsetInt(i));
416 // 411 //
417 // reuse some instances 412 // reuse some instances
418 final ArrayList<PatchRecord> patches = new ArrayList<PatchRecord>(); 413 final Patch patch = new Patch();
419 final Inflater inflater = new Inflater(); 414 final Inflater inflater = new Inflater();
420 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel 415 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel
421 final byte[] inflaterBuffer = new byte[1024]; 416 final byte[] inflaterBuffer = new byte[1024];
422 // 417 //
423 418
468 } 463 }
469 } 464 }
470 // XXX 465 // XXX
471 if (patchToPrevious) { 466 if (patchToPrevious) {
472 // this is a patch 467 // this is a patch
473 patches.clear(); // won't hurt to ensure there are no leftovers, even if we already cleaned 468 patch.read(userDataAccess);
474 while (!userDataAccess.isEmpty()) {
475 PatchRecord pr = PatchRecord.read(userDataAccess);
476 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len);
477 patches.add(pr);
478 }
479 userDataAccess.done(); 469 userDataAccess.done();
480 // 470 //
481 // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess 471 // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess
482 // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset 472 // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset
483 // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) 473 // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess)
484 lastUserData.reset(); 474 lastUserData.reset();
485 // final long startMeasuring = System.currentTimeMillis(); // TIMING 475 // final long startMeasuring = System.currentTimeMillis(); // TIMING
486 byte[] userData = apply(lastUserData, actualLen, patches); 476 byte[] userData = patch.apply(lastUserData, actualLen);
487 // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING 477 // applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING
488 patches.clear(); // do not keep any reference, allow PatchRecord to be gc'd 478 patch.clear(); // do not keep any reference, allow byte[] data to be gc'd
489 userDataAccess = new ByteArrayDataAccess(userData); 479 userDataAccess = new ByteArrayDataAccess(userData);
490 } 480 }
491 } else { 481 } else {
492 if (inline) { 482 if (inline) {
493 daIndex.skip(compressedLen); 483 daIndex.skip(compressedLen);
494 } 484 }
495 } 485 }
496 if (!extraReadsToBaseRev || i >= start) { 486 if (i >= start) {
497 // final long startMeasuring = System.currentTimeMillis(); // TIMING 487 // final long startMeasuring = System.currentTimeMillis(); // TIMING
498 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); 488 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
499 // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING 489 // inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING
500 } 490 }
501 if (cb != null) { 491 if (cb != null) {
515 return true; 505 return true;
516 } 506 }
517 } 507 }
518 508
519 509
520 // mpatch.c : apply()
521 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
522 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException {
523 int last = 0, destIndex = 0;
524 if (outcomeLen == -1) {
525 outcomeLen = baseRevisionContent.length();
526 for (int i = 0, x = patch.size(); i < x; i++) {
527 PatchRecord pr = patch.get(i);
528 outcomeLen += pr.start - last + pr.len;
529 last = pr.end;
530 }
531 outcomeLen -= last;
532 last = 0;
533 }
534 byte[] rv = new byte[outcomeLen];
535 for (int i = 0, x = patch.size(); i < x; i++) {
536 PatchRecord pr = patch.get(i);
537 baseRevisionContent.seek(last);
538 baseRevisionContent.readBytes(rv, destIndex, pr.start-last);
539 destIndex += pr.start - last;
540 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length);
541 destIndex += pr.data.length;
542 last = pr.end;
543 }
544 baseRevisionContent.seek(last);
545 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last));
546 return rv;
547 }
548
549 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description
550 public static class PatchRecord {
551 /*
552 Given there are pr1 and pr2:
553 pr1.start to pr1.end will be replaced with pr's data (of pr1.len)
554 pr1.end to pr2.start gets copied from base
555 */
556 public int start, end, len;
557 public byte[] data;
558
559 // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed
560 private PatchRecord(int p1, int p2, int length, byte[] src) {
561 start = p1;
562 end = p2;
563 len = length;
564 data = src;
565 }
566
567 /*package-local*/ static PatchRecord read(byte[] data, int offset) {
568 final int x = offset; // shorthand
569 int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF);
570 int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF);
571 int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF);
572 byte[] dataCopy = new byte[len];
573 System.arraycopy(data, x+12, dataCopy, 0, len);
574 return new PatchRecord(p1, p2, len, dataCopy);
575 }
576
577 public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException {
578 int p1 = da.readInt();
579 int p2 = da.readInt();
580 int len = da.readInt();
581 byte[] src = new byte[len];
582 da.readBytes(src, 0, len);
583 return new PatchRecord(p1, p2, len, src);
584 }
585 }
586
587 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data
588 // instantly - e.g. calculate hash, or comparing two revisions
589 public interface Inspector { 510 public interface Inspector {
590 // XXX boolean retVal to indicate whether to continue? 511 // XXX boolean retVal to indicate whether to continue?
591 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) 512 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call)
592 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment 513 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment
593 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); 514 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data);