changeset 600:46f29b73e51e

Utilize RevisionLookup to speed-up getRevisionIndex of both manifest and changelog
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 03 May 2013 17:03:31 +0200 (2013-05-03)
parents 55b7987c1796
children 8143c1f77d45
files src/org/tmatesoft/hg/internal/RevisionLookup.java src/org/tmatesoft/hg/repo/HgChangelog.java src/org/tmatesoft/hg/repo/HgDataFile.java src/org/tmatesoft/hg/repo/HgManifest.java src/org/tmatesoft/hg/repo/Revlog.java
diffstat 5 files changed, 143 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/RevisionLookup.java	Fri May 03 17:03:31 2013 +0200
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION;
+
+import java.util.Arrays;
+
+import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.repo.HgRevisionMap;
+
+/**
+ * Lite alternative to {@link HgRevisionMap}, to speed up nodeid to index conversion without consuming too much memory.
+ * E.g. for a 100k revisions, {@link HgRevisionMap} consumes 3 * (N * sizeof(int)) for indexes plus 48 bytes per 
+ * Nodeid instance, total (12+48)*N = 6 mb of memory. {RevisionLookup} instead keeps only Nodeid hashes, (N * sizeof(int) = 400 kb),
+ * but is slower in lookup, O(N/2) to find potential match plus disk read operatin (or few, in an unlikely case of hash collisions).
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class RevisionLookup implements RevlogStream.Inspector {
+	
+	private final RevlogStream content;
+	private int[] nodeidHashes;
+
+	public RevisionLookup(RevlogStream stream) {
+		assert stream != null;
+		content = stream;
+	}
+	
+	public static RevisionLookup createFor(RevlogStream stream) {
+		RevisionLookup rv = new RevisionLookup(stream);
+		int revCount = stream.revisionCount();
+		rv.prepare(revCount);
+		if (revCount > 0) {
+			stream.iterate(0, revCount - 1, false, rv);
+		}
+		return rv;
+	}
+
+	public void prepare(int count) {
+		nodeidHashes = new int[count];
+		Arrays.fill(nodeidHashes, BAD_REVISION);
+	}
+	public void next(int index, byte[] nodeid) {
+		nodeidHashes[index] = Nodeid.hashCode(nodeid);
+	}
+	public void next(int index, Nodeid nodeid) {
+		nodeidHashes[index] = nodeid.hashCode();
+	}
+	public int findIndex(Nodeid nodeid) {
+		final int hash = nodeid.hashCode();
+		for (int i = 0; i < nodeidHashes.length; i++) {
+			if (nodeidHashes[i] == hash) {
+				byte[] nodeidAtI = content.nodeid(i);
+				if (nodeid.equalsTo(nodeidAtI)) {
+					return i;
+				}
+			}
+			// else: false match (only 4 head bytes matched, continue loop
+		}
+		return BAD_REVISION;
+	}
+
+	public void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) {
+		next(revisionIndex, nodeid);
+	}
+}
--- a/src/org/tmatesoft/hg/repo/HgChangelog.java	Fri May 03 15:29:26 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgChangelog.java	Fri May 03 17:03:31 2013 +0200
@@ -50,7 +50,7 @@
 public final class HgChangelog extends Revlog {
 
 	/* package-local */HgChangelog(HgRepository hgRepo, RevlogStream content) {
-		super(hgRepo, content);
+		super(hgRepo, content, true);
 	}
 
 	public void all(final HgChangelog.Inspector inspector) throws HgInvalidRevisionException, HgInvalidControlFileException {
--- a/src/org/tmatesoft/hg/repo/HgDataFile.java	Fri May 03 15:29:26 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgDataFile.java	Fri May 03 17:03:31 2013 +0200
@@ -67,7 +67,7 @@
 	private Metadata metadata; // get initialized on first access to file content.
 	
 	/*package-local*/HgDataFile(HgRepository hgRepo, Path filePath, RevlogStream content) {
-		super(hgRepo, content);
+		super(hgRepo, content, false);
 		path = filePath;
 	}
 
--- a/src/org/tmatesoft/hg/repo/HgManifest.java	Fri May 03 15:29:26 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgManifest.java	Fri May 03 17:03:31 2013 +0200
@@ -35,6 +35,7 @@
 import org.tmatesoft.hg.internal.IntVector;
 import org.tmatesoft.hg.internal.IterateControlMediator;
 import org.tmatesoft.hg.internal.Lifecycle;
+import org.tmatesoft.hg.internal.RevisionLookup;
 import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
 import org.tmatesoft.hg.util.CancelSupport;
@@ -123,7 +124,7 @@
 	}
 
 	/*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content, EncodingHelper eh) {
-		super(hgRepo, content);
+		super(hgRepo, content, true);
 		encodingHelper = eh;
 		pathFactory = hgRepo.getSessionContext().getPathFactory();
 	}
@@ -244,8 +245,14 @@
 		}
 		// revisionNumber == TIP is processed by RevisionMapper 
 		if (revisionMap == null) {
-			revisionMap = new RevisionMapper(getRepo());
+			revisionMap = new RevisionMapper(super.revisionLookup == null);
 			content.iterate(0, TIP, false, revisionMap);
+			revisionMap.fixReusedManifests();
+			if (super.useRevisionLookup && super.revisionLookup == null) {
+				// reuse RevisionLookup if there's none yet
+				super.revisionLookup = revisionMap.manifestNodeids;
+			}
+			revisionMap.manifestNodeids = null;
 		}
 		return revisionMap.at(changesetRevisionIndex);
 	}
@@ -572,18 +579,19 @@
 		}
 	}
 	
-	private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle {
+	private class RevisionMapper implements RevlogStream.Inspector, Lifecycle {
 		
 		private final int changelogRevisionCount;
 		private int[] changelog2manifest;
-		private final HgRepository repo;
-		private int[] manifestNodeidHashes; // first 4 bytes of manifest nodeid at corresponding index
+		RevisionLookup manifestNodeids;
 
-		public RevisionMapper(HgRepository hgRepo) {
-			repo = hgRepo;
-			changelogRevisionCount = repo.getChangelog().getRevisionCount();
+		private RevisionMapper(boolean useOwnRevisionLookup) {
+			changelogRevisionCount = HgManifest.this.getRepo().getChangelog().getRevisionCount();
+			if (useOwnRevisionLookup) {
+				manifestNodeids = new RevisionLookup(HgManifest.this.content);
+			}
 		}
-
+		
 		/**
 		 * Get index of manifest revision that corresponds to specified changeset
 		 * @param changesetRevisionIndex non-negative index of changelog revision, or {@link HgRepository#TIP}
@@ -603,7 +611,7 @@
 			return changesetRevisionIndex;
 		}
 
-		// XXX likely can be replaced with Revlog.RevisionInspector
+		// XXX can be replaced with Revlog.RevisionInspector, but I don't want Nodeid instances
 		public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) {
 			if (changelog2manifest != null) {
 				// next assertion is not an error, rather assumption check, which is too development-related to be explicit exception - 
@@ -620,7 +628,9 @@
 					changelog2manifest[linkRevision] = revisionNumber;
 				}
 			}
-			manifestNodeidHashes[revisionNumber] = Nodeid.hashCode(nodeid);
+			if (manifestNodeids != null) {
+				manifestNodeids.next(revisionNumber, nodeid);
+			}
 		}
 		
 		public void start(int count, Callback callback, Object token) {
@@ -633,11 +643,19 @@
 				changelog2manifest = new int[changelogRevisionCount];
 				Arrays.fill(changelog2manifest, BAD_REVISION);
 			}
-			manifestNodeidHashes = new int[count];
+			if (manifestNodeids != null) {
+				manifestNodeids.prepare(count);
+			}
 		}
 
 		public void finish(Object token) {
+			// it's not a nice idea to fix changesets that reuse existing manifest entries from inside
+			// #finish, as the manifest read operation is not complete at the moment.
+		}
+		
+		public void fixReusedManifests() {
 			if (changelog2manifest == null) {
+				// direct, 1-1 mapping of changeset indexes to manifest
 				return;
 			}
 			// I assume there'd be not too many revisions we don't know manifest of
@@ -652,7 +670,7 @@
 				final IntMap<Nodeid> missingCsetToManifest = new IntMap<Nodeid>(undefinedChangelogRevision.size());
 				int[] undefinedClogRevs = undefinedChangelogRevision.toArray();
 				// undefinedChangelogRevision is sorted by the nature it's created
-				repo.getChangelog().rangeInternal(new HgChangelog.Inspector() {
+				HgManifest.this.getRepo().getChangelog().rangeInternal(new HgChangelog.Inspector() {
 					
 					public void next(int revisionIndex, Nodeid nodeid, RawChangeset cset) {
 						missingCsetToManifest.put(revisionIndex, cset.manifest());
@@ -663,25 +681,24 @@
 				for (int u : undefinedClogRevs) {
 					Nodeid manifest = missingCsetToManifest.get(u);
 					if (manifest == null || manifest.isNull()) {
-						repo.getSessionContext().getLog().dump(getClass(), Severity.Warn, "Changeset %d has no associated manifest entry", u);
-						// keep -1 in the changelog2manifest map.
+						HgManifest.this.getRepo().getSessionContext().getLog().dump(getClass(), Severity.Warn, "Changeset %d has no associated manifest entry", u);
+						// keep BAD_REVISION in the changelog2manifest map.
 						continue;
 					}
-					int hash = manifest.hashCode();
-					for (int i = 0; i < manifestNodeidHashes.length; i++) {
-						if (manifestNodeidHashes[i] == hash) {
-							if (manifest.equals(repo.getManifest().getRevision(i))) {
-								changelog2manifest[u] = i;
-								break;
-							}
-							// else false match (only 4 head bytes matched, continue loop
+					if (manifestNodeids != null) {
+						int manifestRevIndex = manifestNodeids.findIndex(manifest);
+						// mimic HgManifest#getRevisionIndex() to keep behavior the same 
+						if (manifestRevIndex == BAD_REVISION) {
+							throw new HgInvalidRevisionException(String.format("Can't find index of revision %s", manifest.shortNotation()), manifest, null);
 						}
+						changelog2manifest[u] = manifestRevIndex;
+					} else {
+						changelog2manifest[u] = HgManifest.this.getRevisionIndex(manifest);
 					}
 				}
 				final long t3 = System.nanoTime();
 				System.out.printf("\tRevisionMapper#finish(), %d missing revs: %d + %d us\n", undefinedChangelogRevision.size(), (t2-t1) / 1000, (t3-t2) / 1000);
 			}
-			manifestNodeidHashes = null;
 		}
 	}
 	
--- a/src/org/tmatesoft/hg/repo/Revlog.java	Fri May 03 15:29:26 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/Revlog.java	Fri May 03 17:03:31 2013 +0200
@@ -30,6 +30,7 @@
 import org.tmatesoft.hg.internal.Experimental;
 import org.tmatesoft.hg.internal.IntMap;
 import org.tmatesoft.hg.internal.Preview;
+import org.tmatesoft.hg.internal.RevisionLookup;
 import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.util.Adaptable;
 import org.tmatesoft.hg.util.ByteChannel;
@@ -53,8 +54,10 @@
 
 	private final HgRepository repo;
 	protected final RevlogStream content;
+	protected final boolean useRevisionLookup;
+	protected RevisionLookup revisionLookup;
 
-	protected Revlog(HgRepository hgRepo, RevlogStream contentStream) {
+	protected Revlog(HgRepository hgRepo, RevlogStream contentStream, boolean needRevisionLookup) {
 		if (hgRepo == null) {
 			throw new IllegalArgumentException();
 		}
@@ -63,12 +66,14 @@
 		}
 		repo = hgRepo;
 		content = contentStream;
+		useRevisionLookup = needRevisionLookup;
 	}
 	
 	// invalid Revlog
 	protected Revlog(HgRepository hgRepo) {
 		repo = hgRepo;
 		content = null;
+		useRevisionLookup = false;
 	}
 
 	public final HgRepository getRepo() {
@@ -145,13 +150,24 @@
 	 * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
 	 */
 	public final int getRevisionIndex(Nodeid nid) throws HgRuntimeException {
-		int revision = content.findRevisionIndex(nid);
+//		final long t1 = System.nanoTime();
+		int revision;
+		if (useRevisionLookup) {
+			if (revisionLookup == null) {
+				revisionLookup = RevisionLookup.createFor(content);
+			}
+			revision = revisionLookup.findIndex(nid);
+		} else {
+			revision = content.findRevisionIndex(nid);
+		}
 		if (revision == BAD_REVISION) {
 			// using toString() to identify revlog. HgDataFile.toString includes path, HgManifest and HgChangelog instances 
 			// are fine with default (class name)
 			// Perhaps, more tailored description method would be suitable here
 			throw new HgInvalidRevisionException(String.format("Can't find revision %s in %s", nid.shortNotation(), this), nid, null);
 		}
+//		final long t2 = System.nanoTime();
+//		System.out.printf("\tgetRevisionIndex(%s): %d us\n", nid.shortNotation(), (t2-t1)/1000);
 		return revision;
 	}