diff src/org/tmatesoft/hg/repo/HgStatusCollector.java @ 195:c9b305df0b89

Optimization: use ParentWalker to get changeset's parents, if possible. Do not keep duplicating nodeids and strings in manifest revisions
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 15 Apr 2011 05:17:44 +0200
parents d5268ca7715b
children 3a7696fb457c
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/repo/HgStatusCollector.java	Fri Apr 15 03:35:08 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgStatusCollector.java	Fri Apr 15 05:17:44 2011 +0200
@@ -25,11 +25,13 @@
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
 import org.tmatesoft.hg.core.HgDataStreamException;
 import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.Pool;
 import org.tmatesoft.hg.util.Path;
 import org.tmatesoft.hg.util.PathPool;
 import org.tmatesoft.hg.util.PathRewrite;
@@ -44,13 +46,23 @@
 public class HgStatusCollector {
 
 	private final HgRepository repo;
-	private final Map<Integer, ManifestRevisionInspector> cache; // sparse array, in fact
+	private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact
+	// with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output 
+	// no cache limit, no nodeids and fname caching - OOME on changeset 1035
+	// no cache limit, but with cached nodeids and filenames - 1730+
+	// cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped)
+	private final int cacheMaxSize = 100; // do not keep too much manifest revisions
 	private PathPool pathPool;
+	private final Pool<Nodeid> cacheNodes;
+	private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution
+	
 
 	public HgStatusCollector(HgRepository hgRepo) {
 		this.repo = hgRepo;
 		cache = new TreeMap<Integer, ManifestRevisionInspector>();
-		ManifestRevisionInspector emptyFakeState = new ManifestRevisionInspector();
+		cacheNodes = new Pool<Nodeid>();
+		cacheFilenames = new Pool<String>();
+		ManifestRevisionInspector emptyFakeState = new ManifestRevisionInspector(null, null);
 		emptyFakeState.begin(-1, null);
 		emptyFakeState.end(-1); // FIXME HgRepo.TIP == -1 as well, need to distinguish fake "prior to first" revision from "the very last" 
 		cache.put(-1, emptyFakeState);
@@ -63,7 +75,11 @@
 	private ManifestRevisionInspector get(int rev) {
 		ManifestRevisionInspector i = cache.get(rev);
 		if (i == null) {
-			i = new ManifestRevisionInspector();
+			if (cache.size() > cacheMaxSize) {
+				// assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary
+				cache.remove(cache.firstKey());
+			}
+			i = new ManifestRevisionInspector(cacheNodes, cacheFilenames);
 			cache.put(rev, i);
 			repo.getManifest().walk(rev, rev, i);
 		}
@@ -130,7 +146,7 @@
 				private ManifestRevisionInspector delegate;
 
 				public boolean begin(int revision, Nodeid nid) {
-					cache.put(revision, delegate = new ManifestRevisionInspector());
+					cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames));
 					delegate.begin(revision, nid);
 					return true;
 				}
@@ -342,12 +358,18 @@
 			return l;
 		}
 	}
-
+	
 	/*package-local*/ static final class ManifestRevisionInspector implements HgManifest.Inspector {
 		private final TreeMap<String, Nodeid> idsMap;
 		private final TreeMap<String, String> flagsMap;
+		private final Pool<Nodeid> idsPool;
+		private final Pool<String> namesPool; 
 
-		public ManifestRevisionInspector() {
+		// optional pools for effective management of nodeids and filenames (they are likely
+		// to be duplicated among different manifest revisions
+		public ManifestRevisionInspector(Pool<Nodeid> nodeidPool, Pool<String> filenamePool) {
+			idsPool = nodeidPool;
+			namesPool = filenamePool;
 			idsMap = new TreeMap<String, Nodeid>();
 			flagsMap = new TreeMap<String, String>();
 		}
@@ -367,6 +389,12 @@
 		//
 
 		public boolean next(Nodeid nid, String fname, String flags) {
+			if (namesPool != null) {
+				fname = namesPool.unify(fname);
+			}
+			if (idsPool != null) {
+				nid = idsPool.unify(nid);
+			}
 			idsMap.put(fname, nid);
 			flagsMap.put(fname, flags);
 			return true;