changeset 195:c9b305df0b89

Optimization: use ParentWalker to get changeset's parents, if possible. Do not keep duplicating nodeids and strings in manifest revisions
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 15 Apr 2011 05:17:44 +0200
parents 344e8d7e4d6e
children e2115da4cf6a
files src/org/tmatesoft/hg/core/ChangesetTransformer.java src/org/tmatesoft/hg/core/HgChangeset.java src/org/tmatesoft/hg/core/HgIncomingCommand.java src/org/tmatesoft/hg/core/HgLogCommand.java src/org/tmatesoft/hg/core/HgOutgoingCommand.java src/org/tmatesoft/hg/internal/Pool.java src/org/tmatesoft/hg/repo/HgStatusCollector.java src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java
diffstat 8 files changed, 146 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/core/ChangesetTransformer.java	Fri Apr 15 03:35:08 2011 +0200
+++ b/src/org/tmatesoft/hg/core/ChangesetTransformer.java	Fri Apr 15 05:17:44 2011 +0200
@@ -36,7 +36,8 @@
 	private final HgChangeset changeset;
 	private Set<String> branches;
 
-	public ChangesetTransformer(HgRepository hgRepo, HgLogCommand.Handler delegate) {
+	// repo and delegate can't be null, parent walker can
+	public ChangesetTransformer(HgRepository hgRepo, HgLogCommand.Handler delegate, HgChangelog.ParentWalker pw) {
 		if (hgRepo == null || delegate == null) {
 			throw new IllegalArgumentException();
 		}
@@ -45,6 +46,7 @@
 		PathPool pp = new PathPool(new PathRewrite.Empty());
 		statusCollector.setPathPool(pp);
 		changeset = new HgChangeset(statusCollector, pp);
+		changeset.setParentHelper(pw);
 		handler = delegate;
 	}
 	
--- a/src/org/tmatesoft/hg/core/HgChangeset.java	Fri Apr 15 03:35:08 2011 +0200
+++ b/src/org/tmatesoft/hg/core/HgChangeset.java	Fri Apr 15 05:17:44 2011 +0200
@@ -16,12 +16,15 @@
  */
 package org.tmatesoft.hg.core;
 
+import static org.tmatesoft.hg.core.Nodeid.NULL;
+
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
 import org.tmatesoft.hg.core.HgLogCommand.FileRevision;
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
+import org.tmatesoft.hg.repo.HgChangelog;
 import org.tmatesoft.hg.repo.HgRepository;
 import org.tmatesoft.hg.repo.HgStatusCollector;
 import org.tmatesoft.hg.util.Path;
@@ -39,6 +42,8 @@
 	private final HgStatusCollector statusHelper;
 	private final Path.Source pathHelper;
 
+	private HgChangelog.ParentWalker parentHelper;
+
 	//
 	private RawChangeset changeset;
 	private Nodeid nodeid;
@@ -47,6 +52,7 @@
 	private List<FileRevision> modifiedFiles, addedFiles;
 	private List<Path> deletedFiles;
 	private int revNumber;
+	private byte[] parent1, parent2;
 
 	// XXX consider CommandContext with StatusCollector, PathPool etc. Commands optionally get CC through a cons or create new
 	// and pass it around
@@ -54,15 +60,26 @@
 		statusHelper = statusCollector;
 		pathHelper = pathFactory;
 	}
-	
-	/*package-local*/
-	void init(int localRevNumber, Nodeid nid, RawChangeset rawChangeset) {
+
+	/*package-local*/ void init(int localRevNumber, Nodeid nid, RawChangeset rawChangeset) {
 		revNumber = localRevNumber;
 		nodeid = nid;
 		changeset = rawChangeset;
 		modifiedFiles = addedFiles = null;
 		deletedFiles = null;
+		parent1 = parent2 = null;
+		// keep references to parentHelper, statusHelper and pathHelper
 	}
+
+	/*package-local*/ void setParentHelper(HgChangelog.ParentWalker pw) {
+		parentHelper = pw;
+		if (parentHelper != null) {
+			if (parentHelper.getRepo() != statusHelper.getRepo()) {
+				throw new IllegalArgumentException();
+			}
+		}
+	}
+
 	public int getRevision() {
 		return revNumber;
 	}
@@ -119,21 +136,33 @@
 	}
 
 	public boolean isMerge() {
-		return !Nodeid.NULL.equals(getSecondParentRevision());
+		// p1 == -1 and p2 != -1 is legitimate case
+		return !NULL.equals(getFirstParentRevision()) && !NULL.equals(getSecondParentRevision()); 
 	}
 	
 	public Nodeid getFirstParentRevision() {
-		// XXX may read once for both p1 and p2 
-		// or use ParentWalker to minimize reads even more.
-		byte[] p1 = new byte[20];
-		statusHelper.getRepo().getChangelog().parents(revNumber, new int[2], p1, null);
-		return Nodeid.fromBinary(p1, 0);
+		if (parentHelper != null) {
+			return parentHelper.safeFirstParent(nodeid);
+		}
+		// read once for both p1 and p2
+		if (parent1 == null) {
+			parent1 = new byte[20];
+			parent2 = new byte[20];
+			statusHelper.getRepo().getChangelog().parents(revNumber, new int[2], parent1, parent2);
+		}
+		return Nodeid.fromBinary(parent1, 0);
 	}
 	
 	public Nodeid getSecondParentRevision() {
-		byte[] p2 = new byte[20];
-		statusHelper.getRepo().getChangelog().parents(revNumber, new int[2], null, p2);
-		return Nodeid.fromBinary(p2, 0);
+		if (parentHelper != null) {
+			return parentHelper.safeSecondParent(nodeid);
+		}
+		if (parent2 == null) {
+			parent1 = new byte[20];
+			parent2 = new byte[20];
+			statusHelper.getRepo().getChangelog().parents(revNumber, new int[2], parent1, parent2);
+		}
+		return Nodeid.fromBinary(parent2, 0);
 	}
 
 	@Override
--- a/src/org/tmatesoft/hg/core/HgIncomingCommand.java	Fri Apr 15 03:35:08 2011 +0200
+++ b/src/org/tmatesoft/hg/core/HgIncomingCommand.java	Fri Apr 15 05:17:44 2011 +0200
@@ -133,7 +133,7 @@
 				private final HgChangelog changelog;
 				
 				{
-					transformer = new ChangesetTransformer(localRepo, handler);
+					transformer = new ChangesetTransformer(localRepo, handler, getParentHelper());
 					transformer.limitBranches(branches);
 					parentHelper = getParentHelper();
 					changelog = localRepo.getChangelog();
--- a/src/org/tmatesoft/hg/core/HgLogCommand.java	Fri Apr 15 03:35:08 2011 +0200
+++ b/src/org/tmatesoft/hg/core/HgLogCommand.java	Fri Apr 15 05:17:44 2011 +0200
@@ -59,6 +59,7 @@
 	private Path file;
 	private boolean followHistory; // makes sense only when file != null
 	private ChangesetTransformer csetTransform;
+	private HgChangelog.ParentWalker parentHelper;
 	
 	public HgLogCommand(HgRepository hgRepo) {
 		repo = hgRepo;
@@ -184,9 +185,13 @@
 		}
 		try {
 			count = 0;
+			HgChangelog.ParentWalker pw = parentHelper; // leave it uninitialized unless we iterate whole repo
+			if (file == null) {
+				pw = getParentHelper();
+			}
 			// ChangesetTransfrom creates a blank PathPool, and #file(String, boolean) above 
 			// may utilize it as well. CommandContext? How about StatusCollector there as well?
-			csetTransform = new ChangesetTransformer(repo, handler);
+			csetTransform = new ChangesetTransformer(repo, handler, pw);
 			if (file == null) {
 				repo.getChangelog().range(startRev, endRev, this);
 			} else {
@@ -244,6 +249,15 @@
 		count++;
 		csetTransform.next(revisionNumber, nodeid, cset);
 	}
+	
+	private HgChangelog.ParentWalker getParentHelper() {
+		if (parentHelper == null) {
+			parentHelper = repo.getChangelog().new ParentWalker();
+			parentHelper.init();
+		}
+		return parentHelper;
+	}
+
 
 	public interface Handler {
 		/**
--- a/src/org/tmatesoft/hg/core/HgOutgoingCommand.java	Fri Apr 15 03:35:08 2011 +0200
+++ b/src/org/tmatesoft/hg/core/HgOutgoingCommand.java	Fri Apr 15 05:17:44 2011 +0200
@@ -38,6 +38,7 @@
 	private HgRemoteRepository remoteRepo;
 	private boolean includeSubrepo;
 	private RepositoryComparator comparator;
+	private HgChangelog.ParentWalker parentHelper;
 	private Set<String> branches;
 
 	public HgOutgoingCommand(HgRepository hgRepo) {
@@ -104,7 +105,7 @@
 		if (handler == null) {
 			throw new IllegalArgumentException("Delegate can't be null");
 		}
-		ChangesetTransformer inspector = new ChangesetTransformer(localRepo, handler);
+		ChangesetTransformer inspector = new ChangesetTransformer(localRepo, handler, getParentHelper());
 		inspector.limitBranches(branches);
 		getComparator(handler).visitLocalOnlyRevisions(inspector);
 	}
@@ -114,11 +115,18 @@
 			throw new IllegalArgumentException("Shall specify remote repository to compare against");
 		}
 		if (comparator == null) {
-			HgChangelog.ParentWalker pw = localRepo.getChangelog().new ParentWalker();
-			pw.init();
-			comparator = new RepositoryComparator(pw, remoteRepo);
+			comparator = new RepositoryComparator(getParentHelper(), remoteRepo);
 			comparator.compare(context);
 		}
 		return comparator;
 	}
+	
+	private HgChangelog.ParentWalker getParentHelper() {
+		if (parentHelper == null) {
+			parentHelper = localRepo.getChangelog().new ParentWalker();
+			parentHelper.init();
+		}
+		return parentHelper;
+	}
+
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/Pool.java	Fri Apr 15 05:17:44 2011 +0200
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2011 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import java.util.HashMap;
+
+/**
+ * Instance pooling.
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class Pool<T> {
+	private final HashMap<T,T> unify = new HashMap<T, T>();
+	
+	public T unify(T t) {
+		T rv = unify.get(t);
+		if (rv == null) {
+			// first time we see a new value
+			unify.put(t, t);
+			rv = t;
+		}
+		return rv;
+	}
+}
\ No newline at end of file
--- a/src/org/tmatesoft/hg/repo/HgStatusCollector.java	Fri Apr 15 03:35:08 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgStatusCollector.java	Fri Apr 15 05:17:44 2011 +0200
@@ -25,11 +25,13 @@
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
 import org.tmatesoft.hg.core.HgDataStreamException;
 import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.Pool;
 import org.tmatesoft.hg.util.Path;
 import org.tmatesoft.hg.util.PathPool;
 import org.tmatesoft.hg.util.PathRewrite;
@@ -44,13 +46,23 @@
 public class HgStatusCollector {
 
 	private final HgRepository repo;
-	private final Map<Integer, ManifestRevisionInspector> cache; // sparse array, in fact
+	private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact
+	// with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output 
+	// no cache limit, no nodeids and fname caching - OOME on changeset 1035
+	// no cache limit, but with cached nodeids and filenames - 1730+
+	// cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped)
+	private final int cacheMaxSize = 100; // do not keep too much manifest revisions
 	private PathPool pathPool;
+	private final Pool<Nodeid> cacheNodes;
+	private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution
+	
 
 	public HgStatusCollector(HgRepository hgRepo) {
 		this.repo = hgRepo;
 		cache = new TreeMap<Integer, ManifestRevisionInspector>();
-		ManifestRevisionInspector emptyFakeState = new ManifestRevisionInspector();
+		cacheNodes = new Pool<Nodeid>();
+		cacheFilenames = new Pool<String>();
+		ManifestRevisionInspector emptyFakeState = new ManifestRevisionInspector(null, null);
 		emptyFakeState.begin(-1, null);
 		emptyFakeState.end(-1); // FIXME HgRepo.TIP == -1 as well, need to distinguish fake "prior to first" revision from "the very last" 
 		cache.put(-1, emptyFakeState);
@@ -63,7 +75,11 @@
 	private ManifestRevisionInspector get(int rev) {
 		ManifestRevisionInspector i = cache.get(rev);
 		if (i == null) {
-			i = new ManifestRevisionInspector();
+			if (cache.size() > cacheMaxSize) {
+				// assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary
+				cache.remove(cache.firstKey());
+			}
+			i = new ManifestRevisionInspector(cacheNodes, cacheFilenames);
 			cache.put(rev, i);
 			repo.getManifest().walk(rev, rev, i);
 		}
@@ -130,7 +146,7 @@
 				private ManifestRevisionInspector delegate;
 
 				public boolean begin(int revision, Nodeid nid) {
-					cache.put(revision, delegate = new ManifestRevisionInspector());
+					cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames));
 					delegate.begin(revision, nid);
 					return true;
 				}
@@ -342,12 +358,18 @@
 			return l;
 		}
 	}
-
+	
 	/*package-local*/ static final class ManifestRevisionInspector implements HgManifest.Inspector {
 		private final TreeMap<String, Nodeid> idsMap;
 		private final TreeMap<String, String> flagsMap;
+		private final Pool<Nodeid> idsPool;
+		private final Pool<String> namesPool; 
 
-		public ManifestRevisionInspector() {
+		// optional pools for effective management of nodeids and filenames (they are likely
+		// to be duplicated among different manifest revisions
+		public ManifestRevisionInspector(Pool<Nodeid> nodeidPool, Pool<String> filenamePool) {
+			idsPool = nodeidPool;
+			namesPool = filenamePool;
 			idsMap = new TreeMap<String, Nodeid>();
 			flagsMap = new TreeMap<String, String>();
 		}
@@ -367,6 +389,12 @@
 		//
 
 		public boolean next(Nodeid nid, String fname, String flags) {
+			if (namesPool != null) {
+				fname = namesPool.unify(fname);
+			}
+			if (idsPool != null) {
+				nid = idsPool.unify(nid);
+			}
 			idsMap.put(fname, nid);
 			flagsMap.put(fname, flags);
 			return true;
--- a/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java	Fri Apr 15 03:35:08 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java	Fri Apr 15 05:17:44 2011 +0200
@@ -113,7 +113,7 @@
 			if (baseRevisionCollector != null) {
 				collect = baseRevisionCollector.raw(baseRevision);
 			} else {
-				collect = new HgStatusCollector.ManifestRevisionInspector();
+				collect = new HgStatusCollector.ManifestRevisionInspector(null, null);
 				repo.getManifest().walk(baseRevision, baseRevision, collect);
 			}
 			baseRevFiles = new TreeSet<String>(collect.files());