comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 263:31f67be94e71

RevlogStream - reduce number of object instances, reuse when possible
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 18 Aug 2011 18:06:44 +0200
parents e5776067b3b8
children 6bb5e7ed051a
comparison
equal deleted inserted replaced
262:3dcd3dd90c77 263:31f67be94e71
21 21
22 import java.io.File; 22 import java.io.File;
23 import java.io.IOException; 23 import java.io.IOException;
24 import java.util.ArrayList; 24 import java.util.ArrayList;
25 import java.util.List; 25 import java.util.List;
26 import java.util.zip.Inflater;
26 27
27 import org.tmatesoft.hg.core.HgBadStateException; 28 import org.tmatesoft.hg.core.HgBadStateException;
28 import org.tmatesoft.hg.core.Nodeid; 29 import org.tmatesoft.hg.core.Nodeid;
29 import org.tmatesoft.hg.repo.HgRepository; 30 import org.tmatesoft.hg.repo.HgRepository;
30 31
335 private final boolean needData; 336 private final boolean needData;
336 private DataAccess daIndex = null, daData = null; 337 private DataAccess daIndex = null, daData = null;
337 private Lifecycle.BasicCallback cb = null; 338 private Lifecycle.BasicCallback cb = null;
338 private int lastRevisionRead = BAD_REVISION; 339 private int lastRevisionRead = BAD_REVISION;
339 private DataAccess lastUserData; 340 private DataAccess lastUserData;
341 // next are to track two major bottlenecks - patch application and actual time spent in inspector
342 // private long applyTime, inspectorTime;
343
340 344
341 public ReaderN1(boolean needData, Inspector insp) { 345 public ReaderN1(boolean needData, Inspector insp) {
342 assert insp != null; 346 assert insp != null;
343 this.needData = needData; 347 this.needData = needData;
344 inspector = insp; 348 inspector = insp;
351 } 355 }
352 if (inspector instanceof Lifecycle) { 356 if (inspector instanceof Lifecycle) {
353 cb = new Lifecycle.BasicCallback(); 357 cb = new Lifecycle.BasicCallback();
354 ((Lifecycle) inspector).start(totalWork, cb, cb); 358 ((Lifecycle) inspector).start(totalWork, cb, cb);
355 } 359 }
360 // applyTime = inspectorTime = 0;
356 } 361 }
357 362
358 public void finish() { 363 public void finish() {
359 if (lastUserData != null) { 364 if (lastUserData != null) {
360 lastUserData.done(); 365 lastUserData.done();
365 } 370 }
366 daIndex.done(); 371 daIndex.done();
367 if (daData != null) { 372 if (daData != null) {
368 daData.done(); 373 daData.done();
369 } 374 }
370 } 375 // System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime);
371 376 }
377
372 public boolean range(int start, int end) throws IOException { 378 public boolean range(int start, int end) throws IOException {
373 byte[] nodeidBuf = new byte[20]; 379 byte[] nodeidBuf = new byte[20];
374 int i; 380 int i;
375 boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without 381 boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without
376 // it (i.e. replace with i >= start) 382 // it (i.e. replace with i >= start)
392 i = start; 398 i = start;
393 } 399 }
394 400
395 daIndex.seek(getIndexOffsetInt(i)); 401 daIndex.seek(getIndexOffsetInt(i));
396 // 402 //
403 // reuse some instances
397 final ArrayList<PatchRecord> patches = new ArrayList<PatchRecord>(); 404 final ArrayList<PatchRecord> patches = new ArrayList<PatchRecord>();
405 final Inflater inflater = new Inflater();
406 // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel
407 final byte[] inflaterBuffer = new byte[1024];
408 //
398 409
399 for (; i <= end; i++ ) { 410 for (; i <= end; i++ ) {
400 if (inline && needData) { 411 if (inline && needData) {
401 // inspector reading data (though FilterDataAccess) may have affected index position 412 // inspector reading data (though FilterDataAccess) may have affected index position
402 daIndex.seek(getIndexOffsetInt(i)); 413 daIndex.seek(getIndexOffsetInt(i));
430 if (streamDataAccess.isEmpty()) { 441 if (streamDataAccess.isEmpty()) {
431 userDataAccess = new DataAccess(); // empty 442 userDataAccess = new DataAccess(); // empty
432 } else { 443 } else {
433 final byte firstByte = streamDataAccess.readByte(); 444 final byte firstByte = streamDataAccess.readByte();
434 if (firstByte == 0x78 /* 'x' */) { 445 if (firstByte == 0x78 /* 'x' */) {
435 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen); 446 inflater.reset();
447 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen, inflater, inflaterBuffer);
436 } else if (firstByte == 0x75 /* 'u' */) { 448 } else if (firstByte == 0x75 /* 'u' */) {
437 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); 449 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
438 } else { 450 } else {
439 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' 451 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
440 // but I don't see reason not to return data as is 452 // but I don't see reason not to return data as is
442 } 454 }
443 } 455 }
444 // XXX 456 // XXX
445 if (patchToPrevious) { 457 if (patchToPrevious) {
446 // this is a patch 458 // this is a patch
459 patches.clear(); // won't hurt to ensure there are no leftovers, even if we already cleaned
447 while (!userDataAccess.isEmpty()) { 460 while (!userDataAccess.isEmpty()) {
448 PatchRecord pr = PatchRecord.read(userDataAccess); 461 PatchRecord pr = PatchRecord.read(userDataAccess);
449 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); 462 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len);
450 patches.add(pr); 463 patches.add(pr);
451 } 464 }
452 userDataAccess.done(); 465 userDataAccess.done();
453 // 466 //
467 // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess
468 // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset
469 // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess)
470 lastUserData.reset();
471 // final long startMeasuring = System.currentTimeMillis();
454 byte[] userData = apply(lastUserData, actualLen, patches); 472 byte[] userData = apply(lastUserData, actualLen, patches);
455 patches.clear(); 473 // applyTime += (System.currentTimeMillis() - startMeasuring);
474 patches.clear(); // do not keep any reference, allow PatchRecord to be gc'd
456 userDataAccess = new ByteArrayDataAccess(userData); 475 userDataAccess = new ByteArrayDataAccess(userData);
457 } 476 }
458 } else { 477 } else {
459 if (inline) { 478 if (inline) {
460 daIndex.skip(compressedLen); 479 daIndex.skip(compressedLen);
461 } 480 }
462 } 481 }
463 if (!extraReadsToBaseRev || i >= start) { 482 if (!extraReadsToBaseRev || i >= start) {
483 // final long startMeasuring = System.currentTimeMillis();
464 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); 484 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
485 // inspectorTime += (System.currentTimeMillis() - startMeasuring);
465 } 486 }
466 if (cb != null) { 487 if (cb != null) {
467 if (cb.isStopped()) { 488 if (cb.isStopped()) {
468 return false; 489 return false;
469 } 490 }
470 } 491 }
471 if (userDataAccess != null) { 492 if (userDataAccess != null) {
472 userDataAccess.reset(); 493 userDataAccess.reset(); // not sure this is necessary here, as lastUserData would get reset anyway before next use.
473 if (lastUserData != null) { 494 }
474 lastUserData.done(); 495 if (lastUserData != null) {
475 } 496 lastUserData.done();
476 lastUserData = userDataAccess; 497 }
477 } 498 lastUserData = userDataAccess;
478 } 499 }
479 lastRevisionRead = end; 500 lastRevisionRead = end;
480 return true; 501 return true;
481 } 502 }
482 } 503 }
495 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF 516 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
496 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { 517 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException {
497 int last = 0, destIndex = 0; 518 int last = 0, destIndex = 0;
498 if (outcomeLen == -1) { 519 if (outcomeLen == -1) {
499 outcomeLen = baseRevisionContent.length(); 520 outcomeLen = baseRevisionContent.length();
500 for (PatchRecord pr : patch) { 521 for (int i = 0, x = patch.size(); i < x; i++) {
522 PatchRecord pr = patch.get(i);
501 outcomeLen += pr.start - last + pr.len; 523 outcomeLen += pr.start - last + pr.len;
502 last = pr.end; 524 last = pr.end;
503 } 525 }
504 outcomeLen -= last; 526 outcomeLen -= last;
505 last = 0; 527 last = 0;
506 } 528 }
507 byte[] rv = new byte[outcomeLen]; 529 byte[] rv = new byte[outcomeLen];
508 for (PatchRecord pr : patch) { 530 for (int i = 0, x = patch.size(); i < x; i++) {
531 PatchRecord pr = patch.get(i);
509 baseRevisionContent.seek(last); 532 baseRevisionContent.seek(last);
510 baseRevisionContent.readBytes(rv, destIndex, pr.start-last); 533 baseRevisionContent.readBytes(rv, destIndex, pr.start-last);
511 destIndex += pr.start - last; 534 destIndex += pr.start - last;
512 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); 535 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length);
513 destIndex += pr.data.length; 536 destIndex += pr.data.length;