changeset 236:883300108179

Speed up branches calculation when cached branch information is available
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 09 Jun 2011 06:13:43 +0200 (2011-06-09)
parents fd845a53f53d
children 6e1373b54e9b
files cmdline/org/tmatesoft/hg/console/Main.java src/org/tmatesoft/hg/repo/HgBranches.java src/org/tmatesoft/hg/repo/HgChangelog.java
diffstat 3 files changed, 216 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/cmdline/org/tmatesoft/hg/console/Main.java	Tue Jun 07 04:54:13 2011 +0200
+++ b/cmdline/org/tmatesoft/hg/console/Main.java	Thu Jun 09 06:13:43 2011 +0200
@@ -45,6 +45,7 @@
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
 import org.tmatesoft.hg.util.Pair;
 import org.tmatesoft.hg.util.Path;
+import org.tmatesoft.hg.util.ProgressSupport;
 
 /**
  * Various debug dumps. 
@@ -69,12 +70,12 @@
 
 	public static void main(String[] args) throws Exception {
 		Main m = new Main(args);
-		m.testParents();
+//		m.testParents();
 //		m.testEffectiveFileLog();
 //		m.testCatAtCsetRevision();
 //		m.testMergeState();
 //		m.testFileStatus();
-//		m.dumpBranches();
+		m.dumpBranches();
 //		m.inflaterLengthException();
 //		m.dumpIgnored();
 //		m.dumpDirstate();
@@ -153,18 +154,30 @@
 		wcsc.walk(TIP, new StatusDump());
 	}
 	
+	/*
+	 * Straightforward approach to collect branches, no use of branchheads.cache
+	 * First, single run - 18 563
+	 * 10 runs (after 1 warm up) of HgBranches.collect took 167391 ms, ~17 seconds per run.
+	 */
 	private void dumpBranches() {
+		final long start0 = System.currentTimeMillis();
 		HgBranches b = hgRepo.getBranches();
+		System.out.println("1:" + (System.currentTimeMillis() - start0));
 		for (HgBranches.BranchInfo bi : b.getAllBranches()) {
 			System.out.print(bi.getName());
-			if (bi.isClosed()) {
-				System.out.print("!");
-			}
-			System.out.print(" ");
-			System.out.print(bi.getStart());
+//			if (bi.isClosed()) {
+//				System.out.print("!");
+//			}
+//			System.out.print(" ");
+//			System.out.print(bi.getStart());
 			System.out.print(" ");
 			System.out.println(bi.getHeads());
 		}
+//		final long start = System.currentTimeMillis();
+//		for (int i = 0; i < 10; i++) {
+//			b.collect(ProgressSupport.Factory.get(null));
+//		}
+//		System.out.println("10:" + (System.currentTimeMillis() - start));
 	}
 	
 	private void inflaterLengthException() throws Exception {
--- a/src/org/tmatesoft/hg/repo/HgBranches.java	Tue Jun 07 04:54:13 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgBranches.java	Thu Jun 09 06:13:43 2011 +0200
@@ -16,15 +16,21 @@
  */
 package org.tmatesoft.hg.repo;
 
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.regex.Pattern;
 
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
@@ -44,51 +50,187 @@
 		repo = hgRepo;
 	}
 
+	private int readCache() {
+		File branchheadsCache = new File(repo.getRepositoryRoot(), "branchheads.cache");
+		int lastInCache = -1;
+		if (!branchheadsCache.canRead()) {
+			return lastInCache;
+		}
+		BufferedReader br = null;
+		final Pattern spacePattern = Pattern.compile(" ");
+		try {
+			br = new BufferedReader(new FileReader(branchheadsCache));
+			String line = br.readLine();
+			if (line == null || line.trim().length() == 0) {
+				return lastInCache;
+			}
+			String[] cacheIdentity = spacePattern.split(line.trim());
+			lastInCache = Integer.parseInt(cacheIdentity[1]);
+			// XXX may want to check if nodeid of cset from repo.getChangelog() of lastInCache index match cacheIdentity[0]
+			//
+			while ((line = br.readLine()) != null) {
+				String[] elements = line.trim().split(" ");
+				if (elements.length < 2) {
+					// bad entry
+					continue;
+				}
+				Nodeid[] branchHeads = new Nodeid[elements.length - 1];
+				for (int i = 0; i < elements.length - 1; i++) {
+					branchHeads[i] = Nodeid.fromAscii(elements[i]);
+				}
+				// I assume split returns substrings of the original string, hence copy of a branch name
+				String branchName = new String(elements[elements.length-1]);
+				BranchInfo bi = new BranchInfo(branchName, branchHeads);
+				branches.put(branchName, bi);
+			}
+			return lastInCache;
+		} catch (IOException ex) {
+			ex.printStackTrace(); // XXX log error, but otherwise do nothing 
+		} finally {
+			if (br != null) {
+				try {
+					br.close();
+				} catch (IOException ex) {
+					ex.printStackTrace(); // ignore
+				}
+			}
+		}
+		return -1; // deliberately not lastInCache, to avoid anything but -1 when 1st line was read and there's error is in lines 2..end
+	}
+
 	void collect(final ProgressSupport ps) {
+		branches.clear();
 		ps.start(1 + repo.getChangelog().getRevisionCount() * 2);
-		final HgChangelog.ParentWalker pw = repo.getChangelog().new ParentWalker();
-		pw.init();
-		ps.worked(repo.getChangelog().getRevisionCount());
-		final HashMap<String, Nodeid> branchStart = new HashMap<String, Nodeid>();
-		final HashMap<String, Nodeid> branchLastSeen = new HashMap<String, Nodeid>();
-		final HashMap<String, List<Nodeid>> branchHeads = new HashMap<String, List<Nodeid>>();
-		final HashSet<String> closedBranches = new HashSet<String>();
-		HgChangelog.Inspector insp = new HgChangelog.Inspector() {
-			
-			public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) {
-				String branchName = cset.branch();
-				if (!branchStart.containsKey(branchName)) {
-					branchStart.put(branchName, nodeid);
-					branchHeads.put(branchName, new LinkedList<Nodeid>());
-				}
-				branchLastSeen.remove(branchName);
-				if ("1".equals(cset.extras().get("close"))) {
-					branchHeads.get(branchName).add(nodeid); // XXX what if it still has children?
-					closedBranches.add(branchName);
-				} else {
-					if (pw.hasChildren(nodeid)) {
-						// children may be in another branch
-						// and unless we later came across another element from this branch,
-						// we need to record all these as valid heads
-						// XXX what about next case: head1 with children in different branch, and head2 without children
-						// head1 would get lost
-						branchLastSeen.put(branchName, nodeid);
-					} else {
-						// no more children known for this node, it's (one of the) head of the branch
-						branchHeads.get(branchName).add(nodeid);
+		//
+		int lastCached = readCache();
+		/*
+		 * Next code was supposed to fill missing aspects of the BranchInfo, but is too slow
+		 * 
+		if (lastCached != -1 && lastCached <= repo.getChangelog().getLastRevision()) {
+			LinkedList<BranchInfo> incompleteBranches = new LinkedList<HgBranches.BranchInfo>(branches.values());
+			for (BranchInfo bi : incompleteBranches) {
+				LinkedList<Nodeid> closedHeads = new LinkedList<Nodeid>();
+				for (Nodeid h : bi.getHeads()) {
+					if ("1".equals(repo.getChangelog().changeset(h).extras().get("close"))) {
+						closedHeads.add(h);
 					}
 				}
-				ps.worked(1);
+				HashSet<Nodeid> earliest = new HashSet<Nodeid>(bi.getHeads());
+				HashSet<Nodeid> visited = new HashSet<Nodeid>();
+				ArrayList<Nodeid> parents = new ArrayList<Nodeid>(2);
+				HashSet<Nodeid> candidate = new HashSet<Nodeid>();
+				do {
+					candidate.clear();
+					for (Nodeid e : earliest) {
+						parents.clear();
+						if (pw.appendParentsOf(e, parents)) {
+							// at least one parent
+							Nodeid p1 = parents.get(0);
+							if (p1 != null && !visited.contains(p1) && bi.getName().equals(repo.getChangelog().changeset(p1).branch())) {
+								visited.add(p1);
+								candidate.add(p1);
+							}
+							Nodeid p2 = parents.size() > 1 ? parents.get(1) : null;
+							if (p2 != null && !visited.contains(p2) && bi.getName().equals(repo.getChangelog().changeset(p2).branch())) {
+								visited.add(p2);
+								candidate.add(p2);
+							}
+						}
+					}
+					if (!candidate.isEmpty()) {
+						earliest.clear();
+						earliest.addAll(candidate);
+					}
+				} while (!candidate.isEmpty());
+				// earliest can't be empty, we've started with non-empty heads.
+				Nodeid first = null;
+				if (earliest.size() == 1) {
+					first = earliest.iterator().next();
+				} else {
+					int earliestRevNum = Integer.MAX_VALUE;
+					for (Nodeid e : earliest) {
+						int x = repo.getChangelog().getLocalRevision(e);
+						if (x < earliestRevNum) {
+							earliestRevNum = x;
+							first = e;
+						}
+					}
+				}
+				assert first != null;
+				System.out.println("Updated branch " + bi.getName());
+				branches.put(bi.getName(), new BranchInfo(bi.getName(), first, bi.getHeads().toArray(new Nodeid[0]), closedHeads.size() == bi.getHeads().size()));
 			}
-		}; 
-		repo.getChangelog().all(insp);
-		for (String bn : branchLastSeen.keySet()) {
-			branchHeads.get(bn).add(branchLastSeen.get(bn));
 		}
-		for (String bn : branchStart.keySet()) {
-			Nodeid[] heads = branchHeads.get(bn).toArray(new Nodeid[0]);
-			BranchInfo bi = new BranchInfo(bn, branchStart.get(bn), heads, closedBranches.contains(bn));
-			branches.put(bn, bi);
+		*/
+		if (lastCached != repo.getChangelog().getLastRevision()) {
+			final HgChangelog.ParentWalker pw = repo.getChangelog().new ParentWalker();
+			pw.init();
+			ps.worked(repo.getChangelog().getRevisionCount());
+			final HashMap<String, Nodeid> branchStart = new HashMap<String, Nodeid>();
+			final HashMap<String, Nodeid> branchLastSeen = new HashMap<String, Nodeid>();
+			final HashMap<String, List<Nodeid>> branchHeads = new HashMap<String, List<Nodeid>>();
+			final HashSet<String> closedBranches = new HashSet<String>();
+			HgChangelog.Inspector insp = new HgChangelog.Inspector() {
+				
+				public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) {
+					String branchName = cset.branch();
+					if (!branchStart.containsKey(branchName)) {
+						branchStart.put(branchName, nodeid);
+						branchHeads.put(branchName, new LinkedList<Nodeid>());
+					}
+					branchLastSeen.remove(branchName);
+					if ("1".equals(cset.extras().get("close"))) {
+						branchHeads.get(branchName).add(nodeid); // XXX what if it still has children?
+								closedBranches.add(branchName);
+					} else {
+						if (pw.hasChildren(nodeid)) {
+							// children may be in another branch
+							// and unless we later came across another element from this branch,
+							// we need to record all these as valid heads
+							// XXX what about next case: head1 with children in different branch, and head2 without children
+							// head1 would get lost
+							branchLastSeen.put(branchName, nodeid);
+						} else {
+							// no more children known for this node, it's (one of the) head of the branch
+							branchHeads.get(branchName).add(nodeid);
+						}
+					}
+					ps.worked(1);
+				}
+			}; 
+			repo.getChangelog().range(lastCached == -1 ? 0 : lastCached+1, HgRepository.TIP, insp);
+			for (String bn : branchLastSeen.keySet()) {
+				branchHeads.get(bn).add(branchLastSeen.get(bn));
+			}
+			for (String bn : branchStart.keySet()) {
+				BranchInfo bi = branches.get(bn);
+				if (bi != null) {
+					// although heads from cache shall not intersect with heads after lastCached,
+					// use of LHS doesn't hurt (and makes sense e.g. if cache is not completely correct in my tests) 
+					LinkedHashSet<Nodeid> heads = new LinkedHashSet<Nodeid>(bi.getHeads());
+					for (Nodeid oldHead : bi.getHeads()) {
+						// XXX perhaps, need pw.canReach(Nodeid from, Collection<Nodeid> to)
+						List<Nodeid> newChildren = pw.childrenOf(Collections.singletonList(oldHead));
+						if (!newChildren.isEmpty()) {
+							// likely not a head any longer,
+							// check if any new head can be reached from old one, and, if yes,
+							// do not consider that old head as head.
+							for (Nodeid newHead : branchHeads.get(bn)) {
+								if (newChildren.contains(newHead)) {
+									heads.remove(oldHead);
+									break;
+								}
+							}
+						} // else - oldHead still head for the branch
+					}
+					heads.addAll(branchHeads.get(bn));
+					bi = new BranchInfo(bn, bi.getStart(), heads.toArray(new Nodeid[0]), bi.isClosed() && closedBranches.contains(bn));
+				} else {
+					Nodeid[] heads = branchHeads.get(bn).toArray(new Nodeid[0]);
+					bi = new BranchInfo(bn, branchStart.get(bn), heads, closedBranches.contains(bn));
+				}
+				branches.put(bn, bi);
+			}
 		}
 		ps.done();
 	}
@@ -108,17 +250,25 @@
 		private final boolean closed;
 		private final Nodeid start;
 
+		// XXX in fact, few but not all branchHeads might be closed, and isClosed for whole branch is not
+		// possible to determine.
 		BranchInfo(String branchName, Nodeid first, Nodeid[] branchHeads, boolean isClosed) {
 			name = branchName;
 			start = first;
 			heads = Collections.unmodifiableList(new ArrayList<Nodeid>(Arrays.asList(branchHeads)));
 			closed = isClosed;
 		}
+		
+		// incomplete branch, there's not enough information at the time of creation. shall be replaced with
+		// proper BI in #collect()
+		BranchInfo(String branchName, Nodeid[] branchHeads) {
+			this(branchName, Nodeid.NULL, branchHeads, false);
+		}
 
 		public String getName() {
 			return name;
 		}
-		public boolean isClosed() {
+		/*public*/ boolean isClosed() {
 			return closed;
 		}
 		public List<Nodeid> getHeads() {
@@ -126,7 +276,7 @@
 		}
 //		public Nodeid getTip() {
 //		}
-		public Nodeid getStart() {
+		/*public*/ Nodeid getStart() {
 			// first node where branch appears
 			return start;
 		}
--- a/src/org/tmatesoft/hg/repo/HgChangelog.java	Tue Jun 07 04:54:13 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgChangelog.java	Thu Jun 09 06:13:43 2011 +0200
@@ -81,6 +81,11 @@
 		Arrays.sort(revisions);
 		content.iterate(revisions[0], revisions[revisions.length - 1], true, i);
 	}
+	
+	public RawChangeset changeset(Nodeid nid) {
+		int x = getLocalRevision(nid);
+		return range(x, x).get(0);
+	}
 
 	public interface Inspector {
 		// TODO describe whether cset is new instance each time