changeset 264:6bb5e7ed051a

Optimize memory usage (reduce number of objects instantiated) when pooling file names and nodeids during manifest parsing
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 19 Aug 2011 03:36:25 +0200 (2011-08-19)
parents 31f67be94e71
children 3dd953c65619
files src/org/tmatesoft/hg/internal/Pool.java src/org/tmatesoft/hg/internal/Pool2.java src/org/tmatesoft/hg/internal/RevlogStream.java src/org/tmatesoft/hg/repo/HgManifest.java src/org/tmatesoft/hg/util/SparseSet.java
diffstat 5 files changed, 259 insertions(+), 93 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/Pool.java	Thu Aug 18 18:06:44 2011 +0200
+++ b/src/org/tmatesoft/hg/internal/Pool.java	Fri Aug 19 03:36:25 2011 +0200
@@ -18,8 +18,6 @@
 
 import java.util.HashMap;
 
-import org.tmatesoft.hg.util.SparseSet;
-
 /**
  * Instance pooling.
  * 
@@ -28,7 +26,6 @@
  */
 public class Pool<T> {
 	private final HashMap<T,T> unify;
-//	private final SparseSet<T> unify = new SparseSet<T>();
 	
 	public Pool() {
 		unify = new HashMap<T, T>();
@@ -73,9 +70,9 @@
 		StringBuilder sb = new StringBuilder();
 		sb.append(Pool.class.getSimpleName());
 		sb.append('<');
-//		if (!unify.isEmpty()) {
-//			sb.append(unify.keySet().iterator().next().getClass().getName());
-//		}
+		if (!unify.isEmpty()) {
+			sb.append(unify.keySet().iterator().next().getClass().getName());
+		}
 		sb.append('>');
 		sb.append(':');
 		sb.append(unify.size());
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/Pool2.java	Fri Aug 19 03:36:25 2011 +0200
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2011 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import org.tmatesoft.hg.util.SparseSet;
+
+/**
+ *
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class Pool2<T> {
+	private final SparseSet<T> unify = new SparseSet<T>();
+	
+	public Pool2() {
+	}
+	
+	public Pool2(int sizeHint) {
+	}
+	
+	public T unify(T t) {
+		T rv = unify.get(t);
+		if (rv == null) {
+			// first time we see a new value
+			unify.put(t);
+			rv = t;
+		}
+		return rv;
+	}
+	
+	public boolean contains(T t) {
+		return unify.get(t) != null;
+	}
+	
+	public void record(T t) {
+		unify.put(t);
+	}
+	
+	public void clear() {
+		unify.clear();
+	}
+
+	public int size() {
+		return unify.size();
+	}
+	
+	public void x() {
+		unify.dump();
+	}
+
+	@Override
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append(Pool2.class.getSimpleName());
+		sb.append('@');
+		sb.append(Integer.toString(System.identityHashCode(this)));
+		sb.append(' ');
+		sb.append(unify.toString());
+		return sb.toString();
+	}
+}
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java	Thu Aug 18 18:06:44 2011 +0200
+++ b/src/org/tmatesoft/hg/internal/RevlogStream.java	Fri Aug 19 03:36:25 2011 +0200
@@ -339,7 +339,7 @@
 		private int lastRevisionRead = BAD_REVISION;
 		private DataAccess lastUserData;
 		// next are to track two major bottlenecks - patch application and actual time spent in inspector 
-//		private long applyTime, inspectorTime;
+//		private long applyTime, inspectorTime; // TIMING
 
 
 		public ReaderN1(boolean needData, Inspector insp) {
@@ -357,7 +357,7 @@
 				cb = new Lifecycle.BasicCallback();
 				((Lifecycle) inspector).start(totalWork, cb, cb);
 			}
-//			applyTime = inspectorTime = 0;
+//			applyTime = inspectorTime = 0; // TIMING
 		}
 
 		public void finish() {
@@ -372,7 +372,7 @@
 			if (daData != null) {
 				daData.done();
 			}
-//			System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime);
+//			System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING
 		}
 
 		public boolean range(int start, int end) throws IOException {
@@ -468,9 +468,9 @@
 						// however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset
 						// Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess)
 						lastUserData.reset();
-//						final long startMeasuring = System.currentTimeMillis();
+//						final long startMeasuring = System.currentTimeMillis(); // TIMING
 						byte[] userData = apply(lastUserData, actualLen, patches);
-//						applyTime += (System.currentTimeMillis() - startMeasuring);
+//						applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING
 						patches.clear(); // do not keep any reference, allow PatchRecord to be gc'd
 						userDataAccess = new ByteArrayDataAccess(userData);
 					}
@@ -480,9 +480,9 @@
 					}
 				}
 				if (!extraReadsToBaseRev || i >= start) {
-//					final long startMeasuring = System.currentTimeMillis();
+//					final long startMeasuring = System.currentTimeMillis(); // TIMING
 					inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
-//					inspectorTime += (System.currentTimeMillis() - startMeasuring);
+//					inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING
 				}
 				if (cb != null) {
 					if (cb.isStopped()) {
--- a/src/org/tmatesoft/hg/repo/HgManifest.java	Thu Aug 18 18:06:44 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgManifest.java	Fri Aug 19 03:36:25 2011 +0200
@@ -29,6 +29,7 @@
 import org.tmatesoft.hg.internal.Experimental;
 import org.tmatesoft.hg.internal.Lifecycle;
 import org.tmatesoft.hg.internal.Pool;
+import org.tmatesoft.hg.internal.Pool2;
 import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.util.Path;
 
@@ -149,19 +150,20 @@
 		boolean end(int manifestRevision);
 	}
 
-	private static class ManifestParser implements RevlogStream.Inspector {
+	private static class ManifestParser implements RevlogStream.Inspector/*, Lifecycle*/ {
 		private boolean gtg = true; // good to go
 		private final Inspector inspector;
-		private Pool<Nodeid> nodeidPool;
-		private final Pool<String> fnamePool;
+		private Pool2<Nodeid> nodeidPool, thisRevPool;
+		private final Pool2<String> fnamePool;
 		private final Pool<String> flagsPool;
 		
 		public ManifestParser(Inspector delegate) {
 			assert delegate != null;
 			inspector = delegate;
-			nodeidPool = new Pool<Nodeid>();
-			fnamePool = new Pool<String>();
+			nodeidPool = new Pool2<Nodeid>();
+			fnamePool = new Pool2<String>();
 			flagsPool = new Pool<String>();
+			thisRevPool = new Pool2<Nodeid>();
 		}
 		
 		public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
@@ -170,7 +172,6 @@
 			}
 			try {
 				gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision);
-				Pool<Nodeid> thisRevPool = new Pool<Nodeid>(nodeidPool.size()); // supply hint to minimize map resize/rehash
 				String fname = null;
 				String flags = null;
 				Nodeid nid = null;
@@ -216,11 +217,22 @@
 				// (next manifest is likely to refer to most of them, although in specific cases 
 				// like commit in another branch a lot may be useless)
 				nodeidPool.clear();
+				Pool2<Nodeid> t = nodeidPool;
 				nodeidPool = thisRevPool;
+				thisRevPool = t;
 			} catch (IOException ex) {
 				throw new HgBadStateException(ex);
 			}
 		}
+//
+//		public void start(int count, Callback callback, Object token) {
+//		}
+//
+//		public void finish(Object token) {
+//			System.out.println(fnamePool);
+//			System.out.println(nodeidPool);
+//			System.out.printf("Free mem once parse done: %,d\n", Runtime.getRuntime().freeMemory());
+//		}
 	}
 	
 	private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle {
--- a/src/org/tmatesoft/hg/util/SparseSet.java	Thu Aug 18 18:06:44 2011 +0200
+++ b/src/org/tmatesoft/hg/util/SparseSet.java	Fri Aug 19 03:36:25 2011 +0200
@@ -16,6 +16,8 @@
  */
 package org.tmatesoft.hg.util;
 
+import java.util.Arrays;
+
 import org.tmatesoft.hg.internal.Experimental;
 
 /**
@@ -47,78 +49,47 @@
 		ss.dump();
 	}
 
-	private static class IndexBranch {
-		private final LeafBranch[] leafs = new LeafBranch[64];
-	}
-	private static class LeafBranch {
-		private final Object[] data = new Object[64];
-	}
+	@SuppressWarnings("unused")
+	private static final int MASK_8BIT = 0xFF, MASK_7BIT = 0x7F, MASK_6BIT = 0x3F, MASK_5BIT = 0x1F, MASK_4BIT = 0x0F;
+	private static final int I1_SHIFT = 15, I2_SHIFT = 6, I3_SHIFT = 0;
+	// 6, 5, 5
+	private static final int I1_MASK = MASK_5BIT, I2_MASK = MASK_4BIT, I3_MASK = MASK_4BIT;
 
 	private final int[] fixups = new int[] {0x1, 0x10, 0xA, 0xD, 0x1F }; // rehash attempts
-	private final IndexBranch[] level2 = new IndexBranch[64];
+	private final IndexBranch[] level2 = new IndexBranch[I1_MASK + 1];
 	private int size = 0;
+	
+
+	//
+	int directPut, neighborPut;
+	int[] fixupPut1 = new int[fixups.length], fixupPut2 = new int[fixups.length];;
 
 	public void put(T o) {
-		int hash = o.hashCode();
-		//
-		// 8 bits per level
-//		int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF;
-		//
-		// 10, 8, 8 and 6 bits
-//		final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F;
-		//
-		// 8, 6, 6, 6, 6
-		// 10, 6, 6, 6, 4
-		//
-		// 6, 5, 5, 5 = 21 bit
-//		hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it
-//		final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F;
-		// 6, 5, 5
-//		hash = hash ^ (hash >>> 16);
-//		final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F;
-		//
-		// 6, 6, 6
-		final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F;
+		final int hash = hash(o);
+		final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK;
 		LeafBranch l3 = leafBranchPut(i1, i2);
-		if (l3.data[i3] == null) {
-			l3.data[i3] = o;
+		int res;
+		if ((res = l3.put(i3, o)) != 0) {
 			size++;
+			if (res == 1) {
+				directPut++;
+			} else if (res == 2) {
+				neighborPut++;
+			}
 			return;
 		}
-		int neighbour = (i3+1) & 0x3F; 
-		if (l3.data[neighbour] == null) {
-			l3.data[neighbour] = o;
-			size++;
-			return;
-		}
-		int conflictCount = 0;
-		for (int fixup : fixups) {
-//			if (showConflicts) {
-//				System.out.printf("(fixup: 0x%x) ", fixup);
-//			}
+		for (int i = 0; i < fixups.length; i++) {
+			int fixup = fixups[i];
 			l3 = leafBranchPut(i1 ^ fixup, i2);
-			conflictCount++;
-			if (l3.data[i3] != null) {
-//				if (showConflicts) {
-//					System.out.printf("i1 failed ");
-//				}
-				l3 = leafBranchPut(i1, i2 ^ fixup);
-				conflictCount++;
-//				if (showConflicts) {
-//					System.out.printf("i2 %s ",  (l3.data[i3] == null) ? "ok" : "failed");
-//				}
-//			} else {
-//				if (showConflicts) {
-//					System.out.printf("i1 ok");
-//				}
+			if (l3.putIfEmptyOrSame(i3, o)) {
+				size++;
+				fixupPut1[i]++;
+				return;
 			}
-//			if (showConflicts) {
-//				System.out.println();
-//			}
-			if (l3.data[i3] == null) {
-				l3.data[i3] = o;
-//				System.out.printf("Resolved conflict in %d steps (fixup 0x%X)\n", conflictCount, fixup);
+			l3 = leafBranchPut(i1, i2 ^ fixup);
+			if (l3.putIfEmptyOrSame(i3, o)) {
 				size++;
+				fixupPut2[i]++;
 				return;
 			}
 		}
@@ -127,25 +98,26 @@
 	
 	@SuppressWarnings("unchecked")
 	public T get(T o) {
-		int hash = o.hashCode();
-		//hash = hash ^ (hash >>> 16);
-		final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F;
+		final int hash = hash(o);
+		final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK;
 		//
 		LeafBranch l3 = leafBranchGet(i1, i2);
-		if (l3 == null || l3.data[i3] == null) {
+		if (l3 == null) {
 			return null;
 		}
-		if (o.equals(l3.data[i3])) {
-			return (T) l3.data[i3];
+		Object c;
+		if ((c = l3.get(i3, o)) != null) {
+			return c == l3 ? null : (T) c;
 		}
-		//
-		int neighbour = (i3+1) & 0x3F; 
-		if (o.equals(l3.data[neighbour])) {
-			return (T) l3.data[neighbour];
+		if ((c = l3.get(i3 ^ 0x1, o)) != null) {
+			return c == l3 ? null : (T) c;
 		}
-
-		//
-		// resolve conflict
+		if ((c = l3.get(i3 ^ 0x2, o)) != null) {
+			return c == l3 ? null : (T) c;
+		}
+		if ((c = l3.get(i3 ^ 0x3, o)) != null) {
+			return c == l3 ? null : (T) c;
+		}
 		for (int fixup : fixups) {
 			Object data = leafValueGet(i1 ^ fixup, i2, i3);
 			if (data == null) {
@@ -181,6 +153,39 @@
 		return l3;
 	}
 
+	// unlike regular collection clear, keeps all allocated arrays to minimize gc/reallocate costs
+	// do force clean, use #drop
+	public void clear() {
+		for (int i1 = 0; i1 < level2.length; i1++) {
+			IndexBranch l2 = level2[i1];
+			if (l2 == null) {
+				continue;
+			}
+			for (int i2 = 0; i2 < l2.leafs.length; i2++) {
+				LeafBranch l3 = l2.leafs[i2];
+				if (l3 == null) {
+					continue;
+				}
+				for (int i3 = 0; i3 < l3.data.length; i3++) {
+					l3.data[i3] = null;
+				}
+			}
+		}
+		reset();
+	}
+	
+	public void drop() {
+		reset();
+		for (int i1 = 0; i1 < level2.length; level2[i1++] = null);
+	}
+	
+	private void reset() {
+		size = 0;
+		directPut = neighborPut = 0;
+		Arrays.fill(fixupPut1, 0);
+		Arrays.fill(fixupPut2, 0);
+	}
+
 	private LeafBranch leafBranchGet(int i1, int i2) {
 		IndexBranch l2 = level2[i1];
 		if (l2 == null) {
@@ -200,9 +205,22 @@
 		}
 		return l3.data[i3];
 	}
+	
+	private int hash(Object o) {
+		int h = o.hashCode();
+		// HashMap.newHash()
+		h ^= (h >>> 20) ^ (h >>> 12);
+        return h ^ (h >>> 7) ^ (h >>> 4);
+	}
+
+	@Override
+	public String toString() {
+		return String.format("SparseSet (0x%02X-0x%02X-0x%02X), %d elements. Direct: %d. Resolutions: neighbour: %d, i1: %s. i2: %s", I1_MASK, I2_MASK, I3_MASK, size, directPut, neighborPut, Arrays.toString(fixupPut1), Arrays.toString(fixupPut2));
+	}
 
 	public void dump() {
 		int count = 0;
+		System.out.println(toString());
 		for (int i = 0; i < level2.length; i++) {
 			IndexBranch l2 = level2[i];
 			if (l2 == null) {
@@ -222,6 +240,70 @@
 				}
 			}
 		}
-		System.out.printf("Total: %d elements", count);
+		System.out.printf("Total: %d elements\n", count);
 	}
+
+	private static class IndexBranch {
+		private final LeafBranch[] leafs = new LeafBranch[64];
+	}
+	
+	private static final class LeafBranch {
+		public final Object[] data = new Object[64];
+
+		public int put(int ix, Object d) {
+			if (putIfEmptyOrSame(ix, d)) {
+				return 1;
+			}
+			// try neighbour elements
+			if (putIfEmptyOrSame(ix ^ 0x1, d) || putIfEmptyOrSame(ix ^ 0x2, d) || putIfEmptyOrSame(ix ^ 0x3, d)) {
+				return 2;
+			}
+			return 0;
+		}
+
+		public boolean putIfEmptyOrSame(int ix, Object d) {
+			if (data[ix] == null || data[ix].equals(d)) {
+				data[ix] = d;
+				return true;
+			}
+			return false;
+		}
+
+		/**
+		 * <code>null</code> result indicates further checks make sense
+		 * @return <code>this</code> if there's no entry at all, <code>null</code> if entry doesn't match, or entry value itself otherwise
+		 */
+		public Object get(int ix, Object o) {
+			if (data[ix] == null) {
+				return this;
+			}
+			if (data[ix].equals(o)) {
+				return data[ix];
+			}
+			return null;
+		}
+	}
+
+	//
+	// 8 bits per level
+//	int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF;
+	//
+	// 10, 8, 8 and 6 bits
+//	final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F;
+	//
+	// 8, 6, 6, 6, 6
+	// 10, 6, 6, 6, 4
+	//
+	// 6, 5, 5, 5 = 21 bit
+//	hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it
+//final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F;
+// 6, 5, 5
+//hash = hash ^ (hash >>> 16);
+//final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F;
+//
+// 6, 6, 6
+//final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F;
+//
+// 8, 5, 5
+
 }