changeset 598:d29d9dc6c128

Utilize the fact nodeids are very different and are read anyway to speed up reverse lookup
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 03 May 2013 15:19:18 +0200
parents c895b5556937
children 55b7987c1796
files src/org/tmatesoft/hg/core/Nodeid.java src/org/tmatesoft/hg/repo/HgManifest.java
diffstat 2 files changed, 56 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/core/Nodeid.java	Fri May 03 14:10:40 2013 +0200
+++ b/src/org/tmatesoft/hg/core/Nodeid.java	Fri May 03 15:19:18 2013 +0200
@@ -33,11 +33,21 @@
  *
  */
 public final class Nodeid implements Comparable<Nodeid> {
-	
+
+	/**
+	 * Length of the nodeid in bytes
+	 */
+	public static final int SIZE = 20;
+
+	/**
+	 * Length of nodeid string representation, in bytes
+	 */
+	public static final int SIZE_ASCII = 40;
+
 	/**
 	 * <b>nullid</b>, empty root revision.
 	 */
-	public static final Nodeid NULL = new Nodeid(new byte[20], false);
+	public static final Nodeid NULL = new Nodeid(new byte[SIZE], false);
 
 	private final byte[] binaryData; 
 
@@ -49,7 +59,7 @@
 	public Nodeid(byte[] binaryRepresentation, boolean shallClone) {
 		// 5 int fields => 32 bytes
 		// byte[20] => 48 bytes (16 bytes is Nodeid with one field, 32 bytes for byte[20] 
-		if (binaryRepresentation == null || binaryRepresentation.length != 20) {
+		if (binaryRepresentation == null || binaryRepresentation.length != SIZE) {
 			throw new HgBadNodeidFormatException(String.format("Bad value: %s", String.valueOf(binaryRepresentation)));
 		}
 		/*
@@ -69,8 +79,18 @@
 
 	@Override
 	public int hashCode() {
+		return hashCode(binaryData);
+	}
+	
+	/**
+	 * Handy alternative to calculate hashcode without need to get {@link Nodeid} instance
+	 * @param binaryNodeid array of exactly 20 bytes
+	 * @return same value as <code>new Nodeid(binaryNodeid, false).hashCode()</code>
+	 */
+	public static int hashCode(byte[] binaryNodeid) {
+		assert binaryNodeid.length == SIZE;
 		// digest (part thereof) seems to be nice candidate for the hashCode
-		byte[] b = binaryData;
+		byte[] b = binaryNodeid;
 		return b[0] << 24 | (b[1] & 0xFF) << 16 | (b[2] & 0xFF) << 8 | (b[3] & 0xFF);
 	}
 	
@@ -93,7 +113,7 @@
 		if (this == o) {
 			return 0;
 		}
-		for (int i = 0; i < 20; i++) {
+		for (int i = 0; i < SIZE; i++) {
 			if (binaryData[i] != o.binaryData[i]) {
 				// if we need truly ascending sort, need to respect byte sign 
 				// return (binaryData[i] & 0xFF) < (o.binaryData[i] & 0xFF) ? -1 : 1;
@@ -121,7 +141,7 @@
 		if (this == NULL) {
 			return true;
 		}
-		for (int i = 0; i < 20; i++) {
+		for (int i = 0; i < SIZE; i++) {
 			if (this.binaryData[i] != 0) {
 				return false;
 			}
@@ -143,19 +163,19 @@
 	 * @throws HgBadNodeidFormatException custom {@link IllegalArgumentException} subclass when arguments don't select 20 bytes
 	 */
 	public static Nodeid fromBinary(byte[] binaryRepresentation, int offset) {
-		if (binaryRepresentation == null || binaryRepresentation.length - offset < 20) {
+		if (binaryRepresentation == null || binaryRepresentation.length - offset < SIZE) {
 			throw new HgBadNodeidFormatException(String.format("Bad value: %s", String.valueOf(binaryRepresentation)));
 		}
 		int i = 0;
-		while (i < 20 && binaryRepresentation[offset+i] == 0) i++;
-		if (i == 20) {
+		while (i < SIZE && binaryRepresentation[offset+i] == 0) i++;
+		if (i == SIZE) {
 			return NULL;
 		}
-		if (offset == 0 && binaryRepresentation.length == 20) {
+		if (offset == 0 && binaryRepresentation.length == SIZE) {
 			return new Nodeid(binaryRepresentation, true);
 		}
-		byte[] b = new byte[20]; // create new instance if no other reasonable guesses possible
-		System.arraycopy(binaryRepresentation, offset, b, 0, 20);
+		byte[] b = new byte[SIZE]; // create new instance if no other reasonable guesses possible
+		System.arraycopy(binaryRepresentation, offset, b, 0, SIZE);
 		return new Nodeid(b, false);
 	}
 
@@ -167,11 +187,11 @@
 	 * @throws HgBadNodeidFormatException custom {@link IllegalArgumentException} subclass when argument doesn't match encoded form of 20-bytes sha1 digest. 
 	 */
 	public static Nodeid fromAscii(String asciiRepresentation) throws HgBadNodeidFormatException {
-		if (asciiRepresentation.length() != 40) {
+		if (asciiRepresentation.length() != SIZE_ASCII) {
 			throw new HgBadNodeidFormatException(String.format("Bad value: %s", asciiRepresentation));
 		}
 		// XXX is better impl for String possible?
-		return fromAscii(asciiRepresentation.toCharArray(), 0, 40);
+		return fromAscii(asciiRepresentation.toCharArray(), 0, SIZE_ASCII);
 	}
 	
 	/**
@@ -179,11 +199,11 @@
 	 * @throws HgBadNodeidFormatException custom {@link IllegalArgumentException} subclass when bytes are not hex digits or number of bytes != 40 (160 bits) 
 	 */
 	public static Nodeid fromAscii(byte[] asciiRepresentation, int offset, int length) throws HgBadNodeidFormatException {
-		if (length != 40) {
-			throw new HgBadNodeidFormatException(String.format("Expected 40 hex characters for nodeid, not %d", length));
+		if (length != SIZE_ASCII) {
+			throw new HgBadNodeidFormatException(String.format("Expected %d hex characters for nodeid, not %d", SIZE_ASCII, length));
 		}
 		try {
-			byte[] data = new byte[20];
+			byte[] data = new byte[SIZE];
 			boolean zeroBytes = DigestHelper.ascii2bin(asciiRepresentation, offset, length, data);
 			if (zeroBytes) {
 				return NULL;
--- a/src/org/tmatesoft/hg/repo/HgManifest.java	Fri May 03 14:10:40 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgManifest.java	Fri May 03 15:19:18 2013 +0200
@@ -577,6 +577,7 @@
 		private final int changelogRevisionCount;
 		private int[] changelog2manifest;
 		private final HgRepository repo;
+		private int[] manifestNodeidHashes; // first 4 bytes of manifest nodeid at corresponding index
 
 		public RevisionMapper(HgRepository hgRepo) {
 			repo = hgRepo;
@@ -619,6 +620,7 @@
 					changelog2manifest[linkRevision] = revisionNumber;
 				}
 			}
+			manifestNodeidHashes[revisionNumber] = Nodeid.hashCode(nodeid);
 		}
 		
 		public void start(int count, Callback callback, Object token) {
@@ -631,6 +633,7 @@
 				changelog2manifest = new int[changelogRevisionCount];
 				Arrays.fill(changelog2manifest, BAD_REVISION);
 			}
+			manifestNodeidHashes = new int[count];
 		}
 
 		public void finish(Object token) {
@@ -645,6 +648,7 @@
 				}
 			}
 			if (undefinedChangelogRevision.size() > 0) {
+				final long t1 = System.nanoTime();
 				final IntMap<Nodeid> missingCsetToManifest = new IntMap<Nodeid>(undefinedChangelogRevision.size());
 				int[] undefinedClogRevs = undefinedChangelogRevision.toArray();
 				// undefinedChangelogRevision is sorted by the nature it's created
@@ -655,18 +659,29 @@
 					}
 				}, undefinedClogRevs);
 				assert missingCsetToManifest.size() == undefinedChangelogRevision.size();
+				final long t2 = System.nanoTime();
 				for (int u : undefinedClogRevs) {
 					Nodeid manifest = missingCsetToManifest.get(u);
-					// TODO calculate those missing effectively (e.g. cache and sort nodeids to speed lookup
-					// right away in the #next (may refactor ParentWalker's sequential and sorted into dedicated helper and reuse here)
 					if (manifest == null || manifest.isNull()) {
 						repo.getSessionContext().getLog().dump(getClass(), Severity.Warn, "Changeset %d has no associated manifest entry", u);
 						// keep -1 in the changelog2manifest map.
-					} else {
-						changelog2manifest[u] = repo.getManifest().getRevisionIndex(manifest);
+						continue;
+					}
+					int hash = manifest.hashCode();
+					for (int i = 0; i < manifestNodeidHashes.length; i++) {
+						if (manifestNodeidHashes[i] == hash) {
+							if (manifest.equals(repo.getManifest().getRevision(i))) {
+								changelog2manifest[u] = i;
+								break;
+							}
+							// else false match (only 4 head bytes matched, continue loop
+						}
 					}
 				}
+				final long t3 = System.nanoTime();
+				System.out.printf("\tRevisionMapper#finish(), %d missing revs: %d + %d us\n", undefinedChangelogRevision.size(), (t2-t1) / 1000, (t3-t2) / 1000);
 			}
+			manifestNodeidHashes = null;
 		}
 	}