changeset 263:31f67be94e71

RevlogStream - reduce number of object instances, reuse when possible
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 18 Aug 2011 18:06:44 +0200
parents 3dcd3dd90c77
children 6bb5e7ed051a
files src/org/tmatesoft/hg/internal/InflaterDataAccess.java src/org/tmatesoft/hg/internal/RevlogStream.java test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java
diffstat 3 files changed, 120 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/InflaterDataAccess.java	Thu Aug 18 03:46:36 2011 +0200
+++ b/src/org/tmatesoft/hg/internal/InflaterDataAccess.java	Thu Aug 18 18:06:44 2011 +0200
@@ -41,18 +41,21 @@
 	private int decompressedLength;
 
 	public InflaterDataAccess(DataAccess dataAccess, int offset, int compressedLength) {
-		this(dataAccess, offset, compressedLength, -1, new Inflater(), 512);
+		this(dataAccess, offset, compressedLength, -1, new Inflater(), new byte[512]);
 	}
 
 	public InflaterDataAccess(DataAccess dataAccess, int offset, int compressedLength, int actualLength) {
-		this(dataAccess, offset, compressedLength, actualLength, new Inflater(), 512);
+		this(dataAccess, offset, compressedLength, actualLength, new Inflater(), new byte[512]);
 	}
 
-	public InflaterDataAccess(DataAccess dataAccess, int offset, int compressedLength, int actualLength, Inflater inflater, int bufSize) {
+	public InflaterDataAccess(DataAccess dataAccess, int offset, int compressedLength, int actualLength, Inflater inflater, byte[] buf) {
 		super(dataAccess, offset, compressedLength);
+		if (inflater == null || buf == null) {
+			throw new IllegalArgumentException();
+		}
 		this.inflater = inflater;
 		this.decompressedLength = actualLength;
-		buffer = new byte[bufSize];
+		buffer = buf;
 	}
 	
 	@Override
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java	Thu Aug 18 03:46:36 2011 +0200
+++ b/src/org/tmatesoft/hg/internal/RevlogStream.java	Thu Aug 18 18:06:44 2011 +0200
@@ -23,6 +23,7 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.zip.Inflater;
 
 import org.tmatesoft.hg.core.HgBadStateException;
 import org.tmatesoft.hg.core.Nodeid;
@@ -337,6 +338,9 @@
 		private Lifecycle.BasicCallback cb = null;
 		private int lastRevisionRead = BAD_REVISION;
 		private DataAccess lastUserData;
+		// next are to track two major bottlenecks - patch application and actual time spent in inspector 
+//		private long applyTime, inspectorTime;
+
 
 		public ReaderN1(boolean needData, Inspector insp) {
 			assert insp != null;
@@ -353,6 +357,7 @@
 				cb = new Lifecycle.BasicCallback();
 				((Lifecycle) inspector).start(totalWork, cb, cb);
 			}
+//			applyTime = inspectorTime = 0;
 		}
 
 		public void finish() {
@@ -367,8 +372,9 @@
 			if (daData != null) {
 				daData.done();
 			}
+//			System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime);
 		}
-		
+
 		public boolean range(int start, int end) throws IOException {
 			byte[] nodeidBuf = new byte[20];
 			int i;
@@ -394,7 +400,12 @@
 			
 			daIndex.seek(getIndexOffsetInt(i));
 			//
+			// reuse some instances
 			final ArrayList<PatchRecord> patches = new ArrayList<PatchRecord>();
+			final Inflater inflater = new Inflater();
+			// can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel
+			final byte[] inflaterBuffer = new byte[1024];
+			//
 			
 			for (; i <= end; i++ ) {
 				if (inline && needData) {
@@ -432,7 +443,8 @@
 					} else {
 						final byte firstByte = streamDataAccess.readByte();
 						if (firstByte == 0x78 /* 'x' */) {
-							userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen);
+							inflater.reset();
+							userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen, inflater, inflaterBuffer);
 						} else if (firstByte == 0x75 /* 'u' */) {
 							userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
 						} else {
@@ -444,6 +456,7 @@
 					// XXX 
 					if (patchToPrevious) {
 						// this is a patch
+						patches.clear(); // won't hurt to ensure there are no leftovers, even if we already cleaned
 						while (!userDataAccess.isEmpty()) {
 							PatchRecord pr = PatchRecord.read(userDataAccess);
 //							System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len);
@@ -451,8 +464,14 @@
 						}
 						userDataAccess.done();
 						//
+						// it shall be reset at the end of prev iteration, when it got assigned from userDataAccess
+						// however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset
+						// Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess)
+						lastUserData.reset();
+//						final long startMeasuring = System.currentTimeMillis();
 						byte[] userData = apply(lastUserData, actualLen, patches);
-						patches.clear();
+//						applyTime += (System.currentTimeMillis() - startMeasuring);
+						patches.clear(); // do not keep any reference, allow PatchRecord to be gc'd
 						userDataAccess = new ByteArrayDataAccess(userData);
 					}
 				} else {
@@ -461,7 +480,9 @@
 					}
 				}
 				if (!extraReadsToBaseRev || i >= start) {
+//					final long startMeasuring = System.currentTimeMillis();
 					inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
+//					inspectorTime += (System.currentTimeMillis() - startMeasuring);
 				}
 				if (cb != null) {
 					if (cb.isStopped()) {
@@ -469,12 +490,12 @@
 					}
 				}
 				if (userDataAccess != null) {
-					userDataAccess.reset();
-					if (lastUserData != null) {
-						lastUserData.done();
-					}
-					lastUserData = userDataAccess;
+					userDataAccess.reset(); // not sure this is necessary here, as lastUserData would get reset anyway before next use.
 				}
+				if (lastUserData != null) {
+					lastUserData.done();
+				}
+				lastUserData = userDataAccess;
 			}
 			lastRevisionRead = end;
 			return true;
@@ -497,7 +518,8 @@
 		int last = 0, destIndex = 0;
 		if (outcomeLen == -1) {
 			outcomeLen = baseRevisionContent.length();
-			for (PatchRecord pr : patch) {
+			for (int i = 0, x = patch.size(); i < x; i++) {
+				PatchRecord pr = patch.get(i);
 				outcomeLen += pr.start - last + pr.len;
 				last = pr.end;
 			}
@@ -505,7 +527,8 @@
 			last = 0;
 		}
 		byte[] rv = new byte[outcomeLen];
-		for (PatchRecord pr : patch) {
+		for (int i = 0, x = patch.size(); i < x; i++) {
+			PatchRecord pr = patch.get(i);
 			baseRevisionContent.seek(last);
 			baseRevisionContent.readBytes(rv, destIndex, pr.start-last);
 			destIndex += pr.start - last;
--- a/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java	Thu Aug 18 03:46:36 2011 +0200
+++ b/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java	Thu Aug 18 18:06:44 2011 +0200
@@ -7,6 +7,7 @@
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 import org.tmatesoft.hg.core.HgChangeset;
 import org.tmatesoft.hg.core.HgChangesetHandler;
@@ -19,6 +20,7 @@
 import org.tmatesoft.hg.repo.HgManifest;
 import org.tmatesoft.hg.repo.HgRepository;
 import org.tmatesoft.hg.repo.HgTags;
+import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
 import org.tmatesoft.hg.repo.HgTags.TagInfo;
 import org.tmatesoft.hg.util.CancelledException;
 import org.tmatesoft.hg.util.Path;
@@ -33,13 +35,63 @@
 	public static void main(String[] args) throws Exception {
 		MapTagsToFileRevisions m = new MapTagsToFileRevisions();
 		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
-		m.main();
+//		Pattern p = Pattern.compile("^doc/[^/]*?\\.[0-9]\\.(x|ht)ml");
+//		System.out.println(p.matcher("doc/asd.2.xml").matches());
+//		System.out.println(p.matcher("doc/zxc.6.html").matches());
+		m.collectTagsPerFile();
+//		m.manifestWalk();
 		m = null;
 		System.gc();
 		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
 	}
-	
-	private void main() throws HgException, CancelledException {
+
+	private void changelogWalk() throws Exception {
+		final long start = System.currentTimeMillis();
+		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
+		repository.getChangelog().all(new HgChangelog.Inspector() {
+			public int xx = 0;
+			
+			public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) {
+				if (xx+revisionNumber < 0) {
+					System.out.println(xx);
+					System.out.println(revisionNumber);
+				}
+				xx += revisionNumber;
+			}
+		});
+		// cpython: 17 seconds, mem  132,9 -> 129,0 -> 131,7
+		// cpyhton: 13 seconds. Of that, cumulative Patch.apply takes 8.8 seconds, RevlogStream.Inspector.next - 1.8
+		System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start);
+		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
+	}
+
+	private void manifestWalk() throws Exception {
+		final long start = System.currentTimeMillis();
+		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
+		repository.getManifest().walk(0, 10000, new HgManifest.Inspector() {
+
+			public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) {
+				return true;
+			}
+
+			public boolean next(Nodeid nid, String fname, String flags) {
+				return true;
+			}
+
+			public boolean end(int manifestRevision) {
+				return true;
+			}
+		});
+		// cpython: 1,1 sec for 0..1000, 43 sec for 0..10000, 115 sec for 0..20000 (Pool with HashMap)
+		// 2,4 sec for 1000..2000
+		// cpython -r 1000: 484 files, -r 2000: 1015 files. Iteration 1000..2000; fnamePool.size:1019 nodeidPool.size:2989
+		// nodeidPool for two subsequent revisions only: 840. 37 sec for 0..10000. 99 sec for 0..20k
+		// 0..10000 fnamePool: hits:15989152, misses:3020
+		System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start);
+		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
+	}
+
+	private void collectTagsPerFile() throws HgException, CancelledException {
 		final long start = System.currentTimeMillis();
 		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
 		final HgTags tags = repository.getTags();
@@ -71,6 +123,31 @@
 		}
 		System.out.printf("Prepared tag revisions to analyze: %d ms\n", System.currentTimeMillis() - start);
 		//
+		final int[] counts = new int[2];
+		HgManifest.Inspector emptyInsp = new HgManifest.Inspector() {
+			public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) {
+				counts[0]++;
+				return true;
+			}
+			public boolean next(Nodeid nid, String fname, String flags) {
+				counts[1]++;
+				return true;
+			}
+			public boolean end(int manifestRevision) {
+				return true;
+			}
+		};
+//		final long start0 = System.currentTimeMillis();
+//		repository.getManifest().walk(emptyInsp, tagLocalRevs[0]); // warm-up
+//		final long start1 = System.currentTimeMillis();
+//		repository.getManifest().walk(emptyInsp, tagLocalRevs[0]); // warm-up
+//		counts[0] = counts[1] = 0;
+//		final long start2 = System.currentTimeMillis();
+//		repository.getManifest().walk(emptyInsp, tagLocalRevs);
+//		// No-op iterate over selected revisions: 11719 ms (revs:189, files: 579652), warm-up: 218 ms.  Cache built: 25281 ms
+//		// No-op iterate over selected revisions: 11719 ms (revs:189, files: 579652), warm-up1: 234 ms, warm-up2: 16 ms.  Cache built: 25375 ms
+//		System.out.printf("No-op iterate over selected revisions: %d ms (revs:%d, files: %d), warm-up1: %d ms, warm-up2: %d ms \n", System.currentTimeMillis() - start2, counts[0], counts[1], start1-start0, start2-start1);
+		//
 		repository.getManifest().walk(new HgManifest.Inspector() {
 			private int[] tagIndexAtRev = new int[4]; // it's unlikely there would be a lot of tags associated with a given cset