changeset 243:0e01f9182e16

External cache Nodeid<->int added, Revlog.RevisionMap
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 23 Jun 2011 16:58:38 +0200
parents ad6a046943be
children 4b661efb9374
files cmdline/org/tmatesoft/hg/console/Main.java src/org/tmatesoft/hg/internal/RevlogStream.java src/org/tmatesoft/hg/repo/Revlog.java
diffstat 3 files changed, 126 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/cmdline/org/tmatesoft/hg/console/Main.java	Thu Jun 23 15:19:07 2011 +0200
+++ b/cmdline/org/tmatesoft/hg/console/Main.java	Thu Jun 23 16:58:38 2011 +0200
@@ -72,10 +72,11 @@
 
 	public static void main(String[] args) throws Exception {
 		Main m = new Main(args);
+		m.testRevisionMap();
 //		m.testSubrepos();
 //		m.testReadWorkingCopy();
 //		m.testParents();
-		m.testEffectiveFileLog();
+//		m.testEffectiveFileLog();
 //		m.testCatAtCsetRevision();
 //		m.testMergeState();
 //		m.testFileStatus();
@@ -89,6 +90,48 @@
 //		m.dumpCompleteManifestHigh();
 //		m.bunchOfTests();
 	}
+	
+	/*
+	 * cpython repo with 70715 revisions.
+	 	3 revisions - 80 ms vs 250 ms (250ms init)
+		4 revisions - 110 ms vs 265 ms (265 ms init)
+		5 revisions - 94 vs 266.
+		complete iteration in changelog.getLocalRevision(tipNodeid) takes 47 ms
+		compared to complete iteration inside RevisionMap.init() of 171 ms.
+		The only difference is latter instantiates Nodeids, while former compares binary content as is.
+		Hence, with 20-30 ms per regular getLocalRevision, it pays off to use RevisionMap with at least 15-20
+		queries 
+	 */
+	private void testRevisionMap() throws Exception {
+		HgChangelog changelog = hgRepo.getChangelog();
+		HgChangelog.RevisionMap rmap = changelog.new RevisionMap().init(); // warm-up, ensure complete file read
+		int tip = changelog.getLastRevision();
+		// take 5 arbitrary revisions at 0, 1/4, 2/4, 3/4 and 4/4 
+		final Nodeid[] revs = new Nodeid[5];
+		revs[4] = changelog.getRevision(0);
+		revs[3] = changelog.getRevision(tip / 4);
+		revs[2] = changelog.getRevision(tip / 2);
+		revs[1] = changelog.getRevision(tip / 4 + tip / 2);
+		revs[0] = changelog.getRevision(tip);
+		long start = System.currentTimeMillis();
+		for (int i = 0; i < revs.length; i++) {
+			final int localRev = changelog.getLocalRevision(revs[i]);
+			System.out.printf("%d:%s\n", localRev, revs[i]);
+		}
+		System.out.println(System.currentTimeMillis() - start);
+		System.out.println();
+		//
+		start = System.currentTimeMillis();
+		rmap = changelog.new RevisionMap().init();
+		long s2 = System.currentTimeMillis();
+		for (int i = 0; i < revs.length; i++) {
+			final int localRev = rmap.localRevision(revs[i]);
+			System.out.printf("%d:%s\n", localRev, revs[i]);
+		}
+		System.out.println(System.currentTimeMillis() - start);
+		System.out.printf("\t from that, init took %d ms\n", s2 - start);
+		
+	}
 
 	private void testSubrepos() throws Exception {
 		for (HgSubrepoLocation l : hgRepo.getSubrepositories()) {
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java	Thu Jun 23 15:19:07 2011 +0200
+++ b/src/org/tmatesoft/hg/internal/RevlogStream.java	Thu Jun 23 16:58:38 2011 +0200
@@ -167,7 +167,7 @@
 				daIndex.skip(inline ? 12 + compressedLen : 12);
 			}
 		} catch (IOException ex) {
-			ex.printStackTrace(); // log error. FIXME better handling
+			ex.printStackTrace(); // log error. FIXME better handling. Perhaps, shall return BAD_REVISION here as well?
 			throw new IllegalStateException(ex);
 		} finally {
 			daIndex.done();
@@ -371,7 +371,6 @@
 		}
 		
 		public boolean range(int start, int end) throws IOException {
-//			System.out.printf("RevlogStream.ReaderN1.range(): [%d, %d]\n", start, end);
 			byte[] nodeidBuf = new byte[20];
 			int i;
 			boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without
--- a/src/org/tmatesoft/hg/repo/Revlog.java	Thu Jun 23 15:19:07 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/Revlog.java	Thu Jun 23 16:58:38 2011 +0200
@@ -16,6 +16,7 @@
  */
 package org.tmatesoft.hg.repo;
 
+import static org.tmatesoft.hg.core.Nodeid.NULL;
 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION;
 import static org.tmatesoft.hg.repo.HgRepository.TIP;
 
@@ -87,6 +88,15 @@
 		return Nodeid.fromBinary(content.nodeid(revision), 0);
 	}
 
+	/**
+	 * Get local revision number (index) of the specified revision.
+	 * 
+	 * For occasional queries, this method works with decent performance, despite its O(n/2) approach.
+	 * Alternatively, if you need to perform multiple queries (e.g. at least 15-20), {@link RevisionMap} may come handy.
+	 *   
+	 * @param nid revision to look up 
+	 * @return revision index, or {@link HgRepository#BAD_REVISION} if specified revision not found. 
+	 */
 	public final int getLocalRevision(Nodeid nid) {
 		int revision = content.findLocalRevisionNumber(nid);
 		if (revision == BAD_REVISION) {
@@ -229,6 +239,7 @@
 				public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
 					if (ix != revisionNumber) {
 						// XXX temp code, just to make sure I understand what's going on here
+						// FIXME check against cpython or another tool-mangled repository
 						throw new IllegalStateException();
 					}
 					if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) {
@@ -370,6 +381,76 @@
 		}
 	}
 
+	/**
+	 * Effective int to Nodeid and vice versa translation. It's advised to use this class instead of 
+	 * multiple {@link Revlog#getLocalRevision(Nodeid)} calls.
+	 * 
+	 * getLocalRevision(Nodeid) with straightforward lookup approach performs O(n/2)
+	 * #localRevision() is log(n), plus initialization is O(n) 
+	 */
+	public final class RevisionMap {
+		/*
+		 * in fact, initialization is much slower as it instantiates Nodeids, while #getLocalRevision
+		 * compares directly against byte buffer. Measuring cpython with 70k+ gives 3 times difference (47 vs 171)
+		 * for complete changelog iteration. 
+		 */
+		
+		/*
+		 * XXX 3 * (x * 4) bytes. Can I do better? 
+		 */
+		private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering
+		private Nodeid[] sorted; // for binary search
+		private int[] sorted2natural;
+
+		public RevisionMap() {
+		}
+		
+		public HgRepository getRepo() {
+			return Revlog.this.getRepo();
+		}
+
+		/**
+		 * @return <code>this</code> for convenience.
+		 */
+		public RevisionMap init(/*XXX Pool<Nodeid> to reuse nodeids, if possible. */) {
+			// XXX HgRepository.register((RepoChangeListener) this); // listen to changes in repo, re-init if needed?
+			final int revisionCount = Revlog.this.content.revisionCount();
+			sequential = new Nodeid[revisionCount];
+			sorted = new Nodeid[revisionCount];
+			sorted2natural = new int[revisionCount];
+			RevlogStream.Inspector insp = new RevlogStream.Inspector() {
+				
+				public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) {
+					final Nodeid nid = new Nodeid(nodeid, true);
+					sequential[revisionNumber] = sorted[revisionNumber] = nid;
+				}
+			};
+			Revlog.this.content.iterate(0, TIP, false, insp);
+			Arrays.sort(sorted);
+			for (int i = 0; i < revisionCount; i++) {
+				Nodeid n = sequential[i];
+				int x = Arrays.binarySearch(sorted, n);
+				sorted2natural[x] = i;
+			}
+			return this;
+		}
+		
+		public Nodeid revision(int localRevision) {
+			return sequential[localRevision];
+		}
+		public int localRevision(Nodeid revision) {
+			if (revision == null || NULL.equals(revision)) {
+				return BAD_REVISION;
+			}
+			int x = Arrays.binarySearch(sorted, revision);
+			if (x < 0) {
+				return BAD_REVISION;
+			}
+			return sorted2natural[x];
+		}
+	}
+
+
 	protected static class ContentPipe implements RevlogStream.Inspector, CancelSupport {
 		private final ByteChannel sink;
 		private final CancelSupport cancelSupport;