changeset 656:a937e63b6e02

Performance: rebuild information about branches takes too long (my improvement: 3 times, 11-15 s to less than 4 sec)
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 04 Jul 2013 18:40:03 +0200
parents bcbcc318f250
children 6334b0267103
files cmdline/org/tmatesoft/hg/console/Main.java src/org/tmatesoft/hg/internal/IntMap.java src/org/tmatesoft/hg/repo/HgBranches.java src/org/tmatesoft/hg/repo/HgChangelog.java src/org/tmatesoft/hg/repo/HgParentChildMap.java test/org/tmatesoft/hg/test/TestRevisionMaps.java
diffstat 6 files changed, 114 insertions(+), 104 deletions(-) [+]
line wrap: on
line diff
--- a/cmdline/org/tmatesoft/hg/console/Main.java	Thu Jul 04 18:36:38 2013 +0200
+++ b/cmdline/org/tmatesoft/hg/console/Main.java	Thu Jul 04 18:40:03 2013 +0200
@@ -103,7 +103,7 @@
 
 	public static void main(String[] args) throws Exception {
 		Main m = new Main(args);
-		m.checkFileSneakerPerformance();
+//		m.checkFileSneakerPerformance();
 //		m.testRevert();
 //		m.testCheckout();
 //		m.tryExtensions();
@@ -119,7 +119,7 @@
 //		m.testEffectiveFileLog();
 //		m.testMergeState();
 //		m.testFileStatus();
-//		m.dumpBranches();
+		m.dumpBranches();
 //		m.inflaterLengthException();
 //		m.dumpIgnored();
 //		m.dumpDirstate();
--- a/src/org/tmatesoft/hg/internal/IntMap.java	Thu Jul 04 18:36:38 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/IntMap.java	Thu Jul 04 18:40:03 2013 +0200
@@ -17,6 +17,7 @@
 package org.tmatesoft.hg.internal;
 
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -216,6 +217,13 @@
 		}
 		return map;
 	}
+	
+	public Collection<V> values() {
+		@SuppressWarnings("unchecked")
+		V[] rv = (V[]) new Object[size];
+		System.arraycopy(values, 0, rv, 0, size);
+		return Arrays.<V>asList(rv);
+	}
 
 	// copy of Arrays.binarySearch, with upper search limit as argument
 	private static int binarySearch(int[] a, int high, int key) {
--- a/src/org/tmatesoft/hg/repo/HgBranches.java	Thu Jul 04 18:36:38 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgBranches.java	Thu Jul 04 18:40:03 2013 +0200
@@ -27,8 +27,8 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.LinkedList;
@@ -121,7 +121,7 @@
 		}
 		return -1; // deliberately not lastInCache, to avoid anything but -1 when 1st line was read and there's error is in lines 2..end
 	}
-
+	
 	void collect(final ProgressSupport ps) throws HgRuntimeException {
 		branches.clear();
 		final HgRepository repo = internalRepo.getRepo();
@@ -133,91 +133,68 @@
 			final HgParentChildMap<HgChangelog> pw = new HgParentChildMap<HgChangelog>(repo.getChangelog());
 			pw.init();
 			ps.worked(repo.getChangelog().getRevisionCount());
+			//
 			// first revision branch found at
 			final HashMap<String, Nodeid> branchStart = new HashMap<String, Nodeid>();
-			// last revision seen for the branch
-			final HashMap<String, Nodeid> branchLastSeen = new HashMap<String, Nodeid>();
 			// revisions from the branch that have no children at all
 			final HashMap<String, List<Nodeid>> branchHeads = new HashMap<String, List<Nodeid>>();
-			// revisions that are immediate children of a node from a given branch
-			// after iteration, there are some revisions left in this map (children of a branch last revision
-			// that doesn't belong to the branch. No use of this now, perhaps can deduce isInactive (e.g.those 
-			// branches that have non-empty candidates are inactive if all their heads are roots for those left)
-			final HashMap<String, List<Nodeid>> branchHeadCandidates = new HashMap<String, List<Nodeid>>();
 			HgChangelog.Inspector insp = new HgChangelog.Inspector() {
 				
+				private final ArrayList<Nodeid> parents = new ArrayList<Nodeid>(3);
+				
 				public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) {
 					String branchName = cset.branch();
+					List<Nodeid> _branchHeads;
+					// there are chances (with --force key) branch can get more than one start
+					// revision. Neither BranchInfo nor this code support this scenario at the moment. 
 					if (!branchStart.containsKey(branchName)) {
 						branchStart.put(branchName, nodeid);
-						branchHeads.put(branchName, new LinkedList<Nodeid>());
-						branchHeadCandidates.put(branchName, new LinkedList<Nodeid>());
+						branchHeads.put(branchName, _branchHeads = new LinkedList<Nodeid>());
 					} else {
-						final List<Nodeid> headCandidates = branchHeadCandidates.get(branchName);
-						if (headCandidates.remove(nodeid)) {
-							// likely we don't need to keep parent anymore, as we found at least 1 child thereof to be at the same branch
-							// however, it's possible the child we found is a result of an earlier fork, and revision in the 
-							// branchLastSeen is 'parallel' head, which needs to be kept
-							Nodeid lastSeenInBranch = branchLastSeen.get(branchName);
-							// check if current revision is on descendant line. Seems direct parents check is enough
-							if (pw.safeFirstParent(nodeid).equals(lastSeenInBranch) || pw.safeSecondParent(nodeid).equals(lastSeenInBranch)) {
-								branchLastSeen.remove(branchName);
+						_branchHeads = branchHeads.get(branchName);
+						if (_branchHeads == null) {
+							branchHeads.put(branchName, _branchHeads = new LinkedList<Nodeid>());
+						}
+					}
+					// so far present node is the best candidate for head
+					_branchHeads.add(nodeid);
+					parents.clear();
+					// parents of this node, however, cease to be heads (if they are from this branch)
+					pw.appendParentsOf(nodeid, parents);
+					_branchHeads.removeAll(parents);
+					ps.worked(1);
+				}
+			};
+			// XXX alternatively may iterate with pw.all().subList(lastCached)
+			// but need an effective way to find out branch of particular changeset
+			repo.getChangelog().range(lastCached == -1 ? 0 : lastCached+1, HgRepository.TIP, insp);
+			//
+			// build BranchInfo, based on found and cached 
+			for (String bn : branchStart.keySet()) {
+				BranchInfo bi = branches.get(bn);
+				if (bi != null) {
+					// combine heads found so far with those cached 
+					LinkedHashSet<Nodeid> oldHeads = new LinkedHashSet<Nodeid>(bi.getHeads());
+					// expect size of both oldHeads and newHeads sets to be small, and for x for hence acceptable.
+					for (Nodeid newHead : branchHeads.get(bn)) {
+						for (Iterator<Nodeid> it = oldHeads.iterator(); it.hasNext();) {
+							if (pw.isChild(it.next(), newHead)) {
+								it.remove();
 							}
 						}
 					}
-					List<Nodeid> immediateChildren = pw.directChildren(nodeid);
-					if (immediateChildren.size() > 0) {
-						// 1) children may be in another branch
-						// and unless we later came across another element from this branch,
-						// we need to record all these as potential heads
-						//
-						// 2) head1 with children in different branch, and head2 in this branch without children
-						branchLastSeen.put(branchName, nodeid);
-						branchHeadCandidates.get(branchName).addAll(immediateChildren);
-					} else {
-						// no more children known for this node, it's (one of the) head of the branch
-						branchHeads.get(branchName).add(nodeid);
-					}
-					ps.worked(1);
-				}
-			}; 
-			repo.getChangelog().range(lastCached == -1 ? 0 : lastCached+1, HgRepository.TIP, insp);
-			// those last seen revisions from the branch that had no children from the same branch are heads.
-			for (String bn : branchLastSeen.keySet()) {
-				// these are inactive branches? - there were children, but not from the same branch?
-				branchHeads.get(bn).add(branchLastSeen.get(bn));
-			}
-			for (String bn : branchStart.keySet()) {
-				BranchInfo bi = branches.get(bn);
-				if (bi != null) {
-					// although heads from cache shall not intersect with heads after lastCached,
-					// use of LHS doesn't hurt (and makes sense e.g. if cache is not completely correct in my tests) 
-					LinkedHashSet<Nodeid> heads = new LinkedHashSet<Nodeid>(bi.getHeads());
-					for (Nodeid oldHead : bi.getHeads()) {
-						// XXX perhaps, need pw.canReach(Nodeid from, Collection<Nodeid> to)
-						List<Nodeid> newChildren = pw.childrenOf(Collections.singletonList(oldHead));
-						if (!newChildren.isEmpty()) {
-							// likely not a head any longer,
-							// check if any new head can be reached from old one, and, if yes,
-							// do not consider that old head as head.
-							for (Nodeid newHead : branchHeads.get(bn)) {
-								if (newChildren.contains(newHead)) {
-									heads.remove(oldHead);
-									break;
-								}
-							}
-						} // else - oldHead still head for the branch
-					}
-					heads.addAll(branchHeads.get(bn));
-					bi = new BranchInfo(bn, bi.getStart(), heads.toArray(new Nodeid[0]));
+					oldHeads.addAll(branchHeads.get(bn));
+					assert oldHeads.size() > 0;
+					bi = new BranchInfo(bn, bi.getStart(), oldHeads.toArray(new Nodeid[oldHeads.size()]));
 				} else {
 					Nodeid[] heads = branchHeads.get(bn).toArray(new Nodeid[0]);
 					bi = new BranchInfo(bn, branchStart.get(bn), heads);
 				}
 				branches.put(bn, bi);
 			}
-		}
+		} // !cacheActual
 		final HgChangelog clog = repo.getChangelog();
+		/// FIXME use HgParentChildMap if available (need to decide how to get HgRevisionMap and HgParentChildMap to a common denominator)
 		final HgRevisionMap<HgChangelog> rmap = new HgRevisionMap<HgChangelog>(clog).init();
 		for (BranchInfo bi : branches.values()) {
 			bi.validate(clog, rmap);
@@ -388,6 +365,7 @@
 		}
 //		public Nodeid getTip() {
 //		}
+		// XXX Not public as there are chances for few possible branch starts, and I need to decide how to handle that
 		/*public*/ Nodeid getStart() {
 			// first node where branch appears
 			return start;
--- a/src/org/tmatesoft/hg/repo/HgChangelog.java	Thu Jul 04 18:36:38 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgChangelog.java	Thu Jul 04 18:40:03 2013 +0200
@@ -305,23 +305,7 @@
 			// unixTime is local time, and timezone records difference of the local time to UTC.
 			Date _time = new Date(unixTime * 1000);
 			String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null;
-			Map<String, String> _extrasMap;
-			final String extras_branch_key = "branch";
-			if (_extras == null || _extras.trim().length() == 0) {
-				_extrasMap = Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
-			} else {
-				_extrasMap = new HashMap<String, String>();
-				for (String pair : _extras.split("\00")) {
-					pair = decode(pair);
-					int eq = pair.indexOf(':');
-					_extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1));
-				}
-				if (!_extrasMap.containsKey(extras_branch_key)) {
-					_extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
-				}
-				_extrasMap = Collections.unmodifiableMap(_extrasMap);
-			}
-
+			Map<String, String> _extrasMap = parseExtras(_extras);
 			//
 			int lastStart = breakIndex3 + 1;
 			int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
@@ -329,6 +313,8 @@
 			if (breakIndex4 > lastStart) {
 				// if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision)
 				_files = new ArrayList<String>(5);
+				// TODO pool file names
+				// TODO encoding of filenames?
 				while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) {
 					_files.add(new String(data, lastStart, breakIndex4 - lastStart));
 					lastStart = breakIndex4 + 1;
@@ -364,6 +350,34 @@
 			this.extras = _extrasMap;
 		}
 
+		private Map<String, String> parseExtras(String _extras) {
+			final String extras_branch_key = "branch";
+			_extras = _extras == null ? null : _extras.trim();
+			if (_extras == null || _extras.length() == 0) {
+				return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
+			}
+			Map<String, String> _extrasMap = new HashMap<String, String>();
+			int lastIndex = 0;
+			do {
+				String pair;
+				int sp = _extras.indexOf('\0', lastIndex);
+				if (sp == -1) {
+					sp = _extras.length();
+				}
+				if (sp > lastIndex) {
+					pair = _extras.substring(lastIndex, sp);
+					pair = decode(pair);
+					int eq = pair.indexOf(':');
+					_extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1));
+					lastIndex = sp + 1;
+				}
+			} while (lastIndex < _extras.length());
+			if (!_extrasMap.containsKey(extras_branch_key)) {
+				_extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
+			}
+			return Collections.unmodifiableMap(_extrasMap);
+		}
+
 		private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) {
 			for (int i = startOffset; i < endIndex; i++) {
 				if (src[i] == what) {
--- a/src/org/tmatesoft/hg/repo/HgParentChildMap.java	Thu Jul 04 18:36:38 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgParentChildMap.java	Thu Jul 04 18:40:03 2013 +0200
@@ -28,6 +28,7 @@
 import java.util.List;
 
 import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.IntMap;
 import org.tmatesoft.hg.repo.Revlog.ParentInspector;
 
 /**
@@ -67,7 +68,8 @@
 	private Nodeid[] firstParent; // parents by natural order (i.e. firstParent[A] is parent of revision with index A)
 	private Nodeid[] secondParent;
 	private final T revlog;
-	private BitSet heads; // 1 indicates revision got children
+	private IntMap<Nodeid> heads;
+	private BitSet headsBitSet; // 1 indicates revision got children, != null only during init;
 
 
 	public HgParentChildMap(T owner) {
@@ -86,11 +88,11 @@
 		sequential[ix] = sorted[ix] = revision;
 		if (parent1Revision != -1) {
 			firstParent[ix] = sequential[parent1Revision];
-			heads.set(parent1Revision);
+			headsBitSet.set(parent1Revision);
 		}
 		if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1
 			secondParent[ix] = sequential[parent2Revision];
-			heads.set(parent2Revision);
+			headsBitSet.set(parent2Revision);
 		}
 	}
 	
@@ -104,12 +106,13 @@
 		firstParent = new Nodeid[revisionCount];
 		// TODO [post 1.1] Branches/merges are less frequent, and most of secondParent would be -1/null, hence 
 		// IntMap might be better alternative here, but need to carefully analyze (test) whether this brings
-		// real improvement (IntMap has 2n capacity, and element lookup is log(n) instead of array's constant)
+		// real improvement (IntMap has 2n capacity, and element lookup is log(n) instead of array's constant).
+		// FWIW: in cpython's repo, with 70k+ revisions, there are 2618 values in secondParent 
 		secondParent = new Nodeid[revisionCount];
 		//
 		sequential = new Nodeid[revisionCount];
 		sorted = new Nodeid[revisionCount];
-		heads = new BitSet(revisionCount);
+		headsBitSet = new BitSet(revisionCount);
 		revlog.indexWalk(0, TIP, this);
 		Arrays.sort(sorted);
 		// FIXME use ArrayHelper instead
@@ -120,6 +123,19 @@
 			assertSortedIndex(x);
 			sorted2natural[x] = i;
 		}
+		// no reason to keep BitSet, number of heads is usually small
+		IntMap<Nodeid> _heads = new IntMap<Nodeid>(headsBitSet.size() - headsBitSet.cardinality());
+		int index = 0;
+		while (index < sequential.length) {
+			index = headsBitSet.nextClearBit(index);
+			// nextClearBit(length-1) gives length when bit is set,
+			// however, last revision can't be a parent of any other, and
+			// the last bit would be always 0, and no AIOOBE 
+			_heads.put(index, sequential[index]);
+			index++;
+		} 
+		headsBitSet = null;
+		heads = _heads;
 	}
 	
 	private void assertSortedIndex(int x) {
@@ -215,6 +231,8 @@
 		return result;
 	}
 	
+	public long AAA = 0;
+	
 	/**
 	 * @return revisions that have supplied revision as their immediate parent
 	 */
@@ -226,8 +244,9 @@
 		if (!hasChildren(start)) {
 			return Collections.emptyList();
 		}
-		LinkedList<Nodeid> result = new LinkedList<Nodeid>();
+		ArrayList<Nodeid> result = new ArrayList<Nodeid>(5);
 		for (int i = start + 1; i < sequential.length; i++) {
+			AAA++;
 			if (nid == firstParent[i] || nid == secondParent[i]) {
 				result.add(sequential[i]);
 			}
@@ -291,20 +310,10 @@
 	 * @return elements of this map that do not have a child recorded therein.
 	 */
 	public Collection<Nodeid> heads() {
-		ArrayList<Nodeid> result = new ArrayList<Nodeid>();
-		int index = 0;
-		do {
-			index = heads.nextClearBit(index);
-			// nextClearBit(length-1) gives length when bit is set,
-			// however, last revision can't be a parent of any other, and
-			// the last bit would be always 0, and no AIOOBE 
-			result.add(sequential[index]);
-			index++;
-		} while (index < sequential.length);
-		return result;
+		return heads.values();
 	}
 
 	private boolean hasChildren(int sequentialIndex) {
-		return heads.get(sequentialIndex);
+		return !heads.containsKey(sequentialIndex);
 	}
 }
\ No newline at end of file
--- a/test/org/tmatesoft/hg/test/TestRevisionMaps.java	Thu Jul 04 18:36:38 2013 +0200
+++ b/test/org/tmatesoft/hg/test/TestRevisionMaps.java	Thu Jul 04 18:40:03 2013 +0200
@@ -16,6 +16,7 @@
  */
 package org.tmatesoft.hg.test;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
@@ -84,7 +85,7 @@
 			
 		}
 		// heads
-		errorCollector.assertEquals(Arrays.asList(allRevs[7], allRevs[9]), parentHelper.heads());
+		errorCollector.assertEquals(Arrays.asList(allRevs[7], allRevs[9]), new ArrayList<Nodeid>(parentHelper.heads()));
 		// isChild
 		errorCollector.assertTrue(parentHelper.isChild(allRevs[1], allRevs[9]));
 		errorCollector.assertTrue(parentHelper.isChild(allRevs[0], allRevs[7]));