Mercurial > hg4j
changeset 264:6bb5e7ed051a
Optimize memory usage (reduce number of objects instantiated) when pooling file names and nodeids during manifest parsing
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Fri, 19 Aug 2011 03:36:25 +0200 |
parents | 31f67be94e71 |
children | 3dd953c65619 |
files | src/org/tmatesoft/hg/internal/Pool.java src/org/tmatesoft/hg/internal/Pool2.java src/org/tmatesoft/hg/internal/RevlogStream.java src/org/tmatesoft/hg/repo/HgManifest.java src/org/tmatesoft/hg/util/SparseSet.java |
diffstat | 5 files changed, 259 insertions(+), 93 deletions(-) [+] |
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/Pool.java Thu Aug 18 18:06:44 2011 +0200 +++ b/src/org/tmatesoft/hg/internal/Pool.java Fri Aug 19 03:36:25 2011 +0200 @@ -18,8 +18,6 @@ import java.util.HashMap; -import org.tmatesoft.hg.util.SparseSet; - /** * Instance pooling. * @@ -28,7 +26,6 @@ */ public class Pool<T> { private final HashMap<T,T> unify; -// private final SparseSet<T> unify = new SparseSet<T>(); public Pool() { unify = new HashMap<T, T>(); @@ -73,9 +70,9 @@ StringBuilder sb = new StringBuilder(); sb.append(Pool.class.getSimpleName()); sb.append('<'); -// if (!unify.isEmpty()) { -// sb.append(unify.keySet().iterator().next().getClass().getName()); -// } + if (!unify.isEmpty()) { + sb.append(unify.keySet().iterator().next().getClass().getName()); + } sb.append('>'); sb.append(':'); sb.append(unify.size());
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/Pool2.java Fri Aug 19 03:36:25 2011 +0200 @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2011 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.internal; + +import org.tmatesoft.hg.util.SparseSet; + +/** + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class Pool2<T> { + private final SparseSet<T> unify = new SparseSet<T>(); + + public Pool2() { + } + + public Pool2(int sizeHint) { + } + + public T unify(T t) { + T rv = unify.get(t); + if (rv == null) { + // first time we see a new value + unify.put(t); + rv = t; + } + return rv; + } + + public boolean contains(T t) { + return unify.get(t) != null; + } + + public void record(T t) { + unify.put(t); + } + + public void clear() { + unify.clear(); + } + + public int size() { + return unify.size(); + } + + public void x() { + unify.dump(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(Pool2.class.getSimpleName()); + sb.append('@'); + sb.append(Integer.toString(System.identityHashCode(this))); + sb.append(' '); + sb.append(unify.toString()); + return sb.toString(); + } +}
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java Thu Aug 18 18:06:44 2011 +0200 +++ b/src/org/tmatesoft/hg/internal/RevlogStream.java Fri Aug 19 03:36:25 2011 +0200 @@ -339,7 +339,7 @@ private int lastRevisionRead = BAD_REVISION; private DataAccess lastUserData; // next are to track two major bottlenecks - patch application and actual time spent in inspector -// private long applyTime, inspectorTime; +// private long applyTime, inspectorTime; // TIMING public ReaderN1(boolean needData, Inspector insp) { @@ -357,7 +357,7 @@ cb = new Lifecycle.BasicCallback(); ((Lifecycle) inspector).start(totalWork, cb, cb); } -// applyTime = inspectorTime = 0; +// applyTime = inspectorTime = 0; // TIMING } public void finish() { @@ -372,7 +372,7 @@ if (daData != null) { daData.done(); } -// System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); +// System.out.printf("applyTime:%d ms, inspectorTime: %d ms\n", applyTime, inspectorTime); // TIMING } public boolean range(int start, int end) throws IOException { @@ -468,9 +468,9 @@ // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess) lastUserData.reset(); -// final long startMeasuring = System.currentTimeMillis(); +// final long startMeasuring = System.currentTimeMillis(); // TIMING byte[] userData = apply(lastUserData, actualLen, patches); -// applyTime += (System.currentTimeMillis() - startMeasuring); +// applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING patches.clear(); // do not keep any reference, allow PatchRecord to be gc'd userDataAccess = new ByteArrayDataAccess(userData); } @@ -480,9 +480,9 @@ } } if (!extraReadsToBaseRev || i >= start) { -// final long startMeasuring = System.currentTimeMillis(); +// final long startMeasuring = System.currentTimeMillis(); // TIMING inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); -// inspectorTime += (System.currentTimeMillis() - startMeasuring); +// inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING } if (cb != null) { if (cb.isStopped()) {
--- a/src/org/tmatesoft/hg/repo/HgManifest.java Thu Aug 18 18:06:44 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/HgManifest.java Fri Aug 19 03:36:25 2011 +0200 @@ -29,6 +29,7 @@ import org.tmatesoft.hg.internal.Experimental; import org.tmatesoft.hg.internal.Lifecycle; import org.tmatesoft.hg.internal.Pool; +import org.tmatesoft.hg.internal.Pool2; import org.tmatesoft.hg.internal.RevlogStream; import org.tmatesoft.hg.util.Path; @@ -149,19 +150,20 @@ boolean end(int manifestRevision); } - private static class ManifestParser implements RevlogStream.Inspector { + private static class ManifestParser implements RevlogStream.Inspector/*, Lifecycle*/ { private boolean gtg = true; // good to go private final Inspector inspector; - private Pool<Nodeid> nodeidPool; - private final Pool<String> fnamePool; + private Pool2<Nodeid> nodeidPool, thisRevPool; + private final Pool2<String> fnamePool; private final Pool<String> flagsPool; public ManifestParser(Inspector delegate) { assert delegate != null; inspector = delegate; - nodeidPool = new Pool<Nodeid>(); - fnamePool = new Pool<String>(); + nodeidPool = new Pool2<Nodeid>(); + fnamePool = new Pool2<String>(); flagsPool = new Pool<String>(); + thisRevPool = new Pool2<Nodeid>(); } public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { @@ -170,7 +172,6 @@ } try { gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision); - Pool<Nodeid> thisRevPool = new Pool<Nodeid>(nodeidPool.size()); // supply hint to minimize map resize/rehash String fname = null; String flags = null; Nodeid nid = null; @@ -216,11 +217,22 @@ // (next manifest is likely to refer to most of them, although in specific cases // like commit in another branch a lot may be useless) nodeidPool.clear(); + Pool2<Nodeid> t = nodeidPool; nodeidPool = thisRevPool; + thisRevPool = t; } catch (IOException ex) { throw new HgBadStateException(ex); } } +// +// public void start(int count, Callback callback, Object token) { +// } +// +// public void finish(Object token) { +// System.out.println(fnamePool); +// System.out.println(nodeidPool); +// System.out.printf("Free mem once parse done: %,d\n", Runtime.getRuntime().freeMemory()); +// } } private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle {
--- a/src/org/tmatesoft/hg/util/SparseSet.java Thu Aug 18 18:06:44 2011 +0200 +++ b/src/org/tmatesoft/hg/util/SparseSet.java Fri Aug 19 03:36:25 2011 +0200 @@ -16,6 +16,8 @@ */ package org.tmatesoft.hg.util; +import java.util.Arrays; + import org.tmatesoft.hg.internal.Experimental; /** @@ -47,78 +49,47 @@ ss.dump(); } - private static class IndexBranch { - private final LeafBranch[] leafs = new LeafBranch[64]; - } - private static class LeafBranch { - private final Object[] data = new Object[64]; - } + @SuppressWarnings("unused") + private static final int MASK_8BIT = 0xFF, MASK_7BIT = 0x7F, MASK_6BIT = 0x3F, MASK_5BIT = 0x1F, MASK_4BIT = 0x0F; + private static final int I1_SHIFT = 15, I2_SHIFT = 6, I3_SHIFT = 0; + // 6, 5, 5 + private static final int I1_MASK = MASK_5BIT, I2_MASK = MASK_4BIT, I3_MASK = MASK_4BIT; private final int[] fixups = new int[] {0x1, 0x10, 0xA, 0xD, 0x1F }; // rehash attempts - private final IndexBranch[] level2 = new IndexBranch[64]; + private final IndexBranch[] level2 = new IndexBranch[I1_MASK + 1]; private int size = 0; + + + // + int directPut, neighborPut; + int[] fixupPut1 = new int[fixups.length], fixupPut2 = new int[fixups.length];; public void put(T o) { - int hash = o.hashCode(); - // - // 8 bits per level -// int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF; - // - // 10, 8, 8 and 6 bits -// final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F; - // - // 8, 6, 6, 6, 6 - // 10, 6, 6, 6, 4 - // - // 6, 5, 5, 5 = 21 bit -// hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it -// final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F; - // 6, 5, 5 -// hash = hash ^ (hash >>> 16); -// final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F; - // - // 6, 6, 6 - final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F; + final int hash = hash(o); + final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; LeafBranch l3 = leafBranchPut(i1, i2); - if (l3.data[i3] == null) { - l3.data[i3] = o; + int res; + if ((res = l3.put(i3, o)) != 0) { size++; + if (res == 1) { + directPut++; + } else if (res == 2) { + neighborPut++; + } return; } - int neighbour = (i3+1) & 0x3F; - if (l3.data[neighbour] == null) { - l3.data[neighbour] = o; - size++; - return; - } - int conflictCount = 0; - for (int fixup : fixups) { -// if (showConflicts) { -// System.out.printf("(fixup: 0x%x) ", fixup); -// } + for (int i = 0; i < fixups.length; i++) { + int fixup = fixups[i]; l3 = leafBranchPut(i1 ^ fixup, i2); - conflictCount++; - if (l3.data[i3] != null) { -// if (showConflicts) { -// System.out.printf("i1 failed "); -// } - l3 = leafBranchPut(i1, i2 ^ fixup); - conflictCount++; -// if (showConflicts) { -// System.out.printf("i2 %s ", (l3.data[i3] == null) ? "ok" : "failed"); -// } -// } else { -// if (showConflicts) { -// System.out.printf("i1 ok"); -// } + if (l3.putIfEmptyOrSame(i3, o)) { + size++; + fixupPut1[i]++; + return; } -// if (showConflicts) { -// System.out.println(); -// } - if (l3.data[i3] == null) { - l3.data[i3] = o; -// System.out.printf("Resolved conflict in %d steps (fixup 0x%X)\n", conflictCount, fixup); + l3 = leafBranchPut(i1, i2 ^ fixup); + if (l3.putIfEmptyOrSame(i3, o)) { size++; + fixupPut2[i]++; return; } } @@ -127,25 +98,26 @@ @SuppressWarnings("unchecked") public T get(T o) { - int hash = o.hashCode(); - //hash = hash ^ (hash >>> 16); - final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F; + final int hash = hash(o); + final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; // LeafBranch l3 = leafBranchGet(i1, i2); - if (l3 == null || l3.data[i3] == null) { + if (l3 == null) { return null; } - if (o.equals(l3.data[i3])) { - return (T) l3.data[i3]; + Object c; + if ((c = l3.get(i3, o)) != null) { + return c == l3 ? null : (T) c; } - // - int neighbour = (i3+1) & 0x3F; - if (o.equals(l3.data[neighbour])) { - return (T) l3.data[neighbour]; + if ((c = l3.get(i3 ^ 0x1, o)) != null) { + return c == l3 ? null : (T) c; } - - // - // resolve conflict + if ((c = l3.get(i3 ^ 0x2, o)) != null) { + return c == l3 ? null : (T) c; + } + if ((c = l3.get(i3 ^ 0x3, o)) != null) { + return c == l3 ? null : (T) c; + } for (int fixup : fixups) { Object data = leafValueGet(i1 ^ fixup, i2, i3); if (data == null) { @@ -181,6 +153,39 @@ return l3; } + // unlike regular collection clear, keeps all allocated arrays to minimize gc/reallocate costs + // do force clean, use #drop + public void clear() { + for (int i1 = 0; i1 < level2.length; i1++) { + IndexBranch l2 = level2[i1]; + if (l2 == null) { + continue; + } + for (int i2 = 0; i2 < l2.leafs.length; i2++) { + LeafBranch l3 = l2.leafs[i2]; + if (l3 == null) { + continue; + } + for (int i3 = 0; i3 < l3.data.length; i3++) { + l3.data[i3] = null; + } + } + } + reset(); + } + + public void drop() { + reset(); + for (int i1 = 0; i1 < level2.length; level2[i1++] = null); + } + + private void reset() { + size = 0; + directPut = neighborPut = 0; + Arrays.fill(fixupPut1, 0); + Arrays.fill(fixupPut2, 0); + } + private LeafBranch leafBranchGet(int i1, int i2) { IndexBranch l2 = level2[i1]; if (l2 == null) { @@ -200,9 +205,22 @@ } return l3.data[i3]; } + + private int hash(Object o) { + int h = o.hashCode(); + // HashMap.newHash() + h ^= (h >>> 20) ^ (h >>> 12); + return h ^ (h >>> 7) ^ (h >>> 4); + } + + @Override + public String toString() { + return String.format("SparseSet (0x%02X-0x%02X-0x%02X), %d elements. Direct: %d. Resolutions: neighbour: %d, i1: %s. i2: %s", I1_MASK, I2_MASK, I3_MASK, size, directPut, neighborPut, Arrays.toString(fixupPut1), Arrays.toString(fixupPut2)); + } public void dump() { int count = 0; + System.out.println(toString()); for (int i = 0; i < level2.length; i++) { IndexBranch l2 = level2[i]; if (l2 == null) { @@ -222,6 +240,70 @@ } } } - System.out.printf("Total: %d elements", count); + System.out.printf("Total: %d elements\n", count); } + + private static class IndexBranch { + private final LeafBranch[] leafs = new LeafBranch[64]; + } + + private static final class LeafBranch { + public final Object[] data = new Object[64]; + + public int put(int ix, Object d) { + if (putIfEmptyOrSame(ix, d)) { + return 1; + } + // try neighbour elements + if (putIfEmptyOrSame(ix ^ 0x1, d) || putIfEmptyOrSame(ix ^ 0x2, d) || putIfEmptyOrSame(ix ^ 0x3, d)) { + return 2; + } + return 0; + } + + public boolean putIfEmptyOrSame(int ix, Object d) { + if (data[ix] == null || data[ix].equals(d)) { + data[ix] = d; + return true; + } + return false; + } + + /** + * <code>null</code> result indicates further checks make sense + * @return <code>this</code> if there's no entry at all, <code>null</code> if entry doesn't match, or entry value itself otherwise + */ + public Object get(int ix, Object o) { + if (data[ix] == null) { + return this; + } + if (data[ix].equals(o)) { + return data[ix]; + } + return null; + } + } + + // + // 8 bits per level +// int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF; + // + // 10, 8, 8 and 6 bits +// final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F; + // + // 8, 6, 6, 6, 6 + // 10, 6, 6, 6, 4 + // + // 6, 5, 5, 5 = 21 bit +// hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it +//final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F; +// 6, 5, 5 +//hash = hash ^ (hash >>> 16); +//final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F; +// +// 6, 6, 6 +//final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F; +// +// 8, 5, 5 + }