Mercurial > hg4j
diff src/org/tmatesoft/hg/util/SparseSet.java @ 264:6bb5e7ed051a
Optimize memory usage (reduce number of objects instantiated) when pooling file names and nodeids during manifest parsing
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Fri, 19 Aug 2011 03:36:25 +0200 |
parents | 61cb6724ff36 |
children | 3dd953c65619 |
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/util/SparseSet.java Thu Aug 18 18:06:44 2011 +0200 +++ b/src/org/tmatesoft/hg/util/SparseSet.java Fri Aug 19 03:36:25 2011 +0200 @@ -16,6 +16,8 @@ */ package org.tmatesoft.hg.util; +import java.util.Arrays; + import org.tmatesoft.hg.internal.Experimental; /** @@ -47,78 +49,47 @@ ss.dump(); } - private static class IndexBranch { - private final LeafBranch[] leafs = new LeafBranch[64]; - } - private static class LeafBranch { - private final Object[] data = new Object[64]; - } + @SuppressWarnings("unused") + private static final int MASK_8BIT = 0xFF, MASK_7BIT = 0x7F, MASK_6BIT = 0x3F, MASK_5BIT = 0x1F, MASK_4BIT = 0x0F; + private static final int I1_SHIFT = 15, I2_SHIFT = 6, I3_SHIFT = 0; + // 6, 5, 5 + private static final int I1_MASK = MASK_5BIT, I2_MASK = MASK_4BIT, I3_MASK = MASK_4BIT; private final int[] fixups = new int[] {0x1, 0x10, 0xA, 0xD, 0x1F }; // rehash attempts - private final IndexBranch[] level2 = new IndexBranch[64]; + private final IndexBranch[] level2 = new IndexBranch[I1_MASK + 1]; private int size = 0; + + + // + int directPut, neighborPut; + int[] fixupPut1 = new int[fixups.length], fixupPut2 = new int[fixups.length];; public void put(T o) { - int hash = o.hashCode(); - // - // 8 bits per level -// int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF; - // - // 10, 8, 8 and 6 bits -// final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F; - // - // 8, 6, 6, 6, 6 - // 10, 6, 6, 6, 4 - // - // 6, 5, 5, 5 = 21 bit -// hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it -// final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F; - // 6, 5, 5 -// hash = hash ^ (hash >>> 16); -// final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F; - // - // 6, 6, 6 - final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F; + final int hash = hash(o); + final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; LeafBranch l3 = leafBranchPut(i1, i2); - if (l3.data[i3] == null) { - l3.data[i3] = o; + int res; + if ((res = l3.put(i3, o)) != 0) { size++; + if (res == 1) { + directPut++; + } else if (res == 2) { + neighborPut++; + } return; } - int neighbour = (i3+1) & 0x3F; - if (l3.data[neighbour] == null) { - l3.data[neighbour] = o; - size++; - return; - } - int conflictCount = 0; - for (int fixup : fixups) { -// if (showConflicts) { -// System.out.printf("(fixup: 0x%x) ", fixup); -// } + for (int i = 0; i < fixups.length; i++) { + int fixup = fixups[i]; l3 = leafBranchPut(i1 ^ fixup, i2); - conflictCount++; - if (l3.data[i3] != null) { -// if (showConflicts) { -// System.out.printf("i1 failed "); -// } - l3 = leafBranchPut(i1, i2 ^ fixup); - conflictCount++; -// if (showConflicts) { -// System.out.printf("i2 %s ", (l3.data[i3] == null) ? "ok" : "failed"); -// } -// } else { -// if (showConflicts) { -// System.out.printf("i1 ok"); -// } + if (l3.putIfEmptyOrSame(i3, o)) { + size++; + fixupPut1[i]++; + return; } -// if (showConflicts) { -// System.out.println(); -// } - if (l3.data[i3] == null) { - l3.data[i3] = o; -// System.out.printf("Resolved conflict in %d steps (fixup 0x%X)\n", conflictCount, fixup); + l3 = leafBranchPut(i1, i2 ^ fixup); + if (l3.putIfEmptyOrSame(i3, o)) { size++; + fixupPut2[i]++; return; } } @@ -127,25 +98,26 @@ @SuppressWarnings("unchecked") public T get(T o) { - int hash = o.hashCode(); - //hash = hash ^ (hash >>> 16); - final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F; + final int hash = hash(o); + final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; // LeafBranch l3 = leafBranchGet(i1, i2); - if (l3 == null || l3.data[i3] == null) { + if (l3 == null) { return null; } - if (o.equals(l3.data[i3])) { - return (T) l3.data[i3]; + Object c; + if ((c = l3.get(i3, o)) != null) { + return c == l3 ? null : (T) c; } - // - int neighbour = (i3+1) & 0x3F; - if (o.equals(l3.data[neighbour])) { - return (T) l3.data[neighbour]; + if ((c = l3.get(i3 ^ 0x1, o)) != null) { + return c == l3 ? null : (T) c; } - - // - // resolve conflict + if ((c = l3.get(i3 ^ 0x2, o)) != null) { + return c == l3 ? null : (T) c; + } + if ((c = l3.get(i3 ^ 0x3, o)) != null) { + return c == l3 ? null : (T) c; + } for (int fixup : fixups) { Object data = leafValueGet(i1 ^ fixup, i2, i3); if (data == null) { @@ -181,6 +153,39 @@ return l3; } + // unlike regular collection clear, keeps all allocated arrays to minimize gc/reallocate costs + // do force clean, use #drop + public void clear() { + for (int i1 = 0; i1 < level2.length; i1++) { + IndexBranch l2 = level2[i1]; + if (l2 == null) { + continue; + } + for (int i2 = 0; i2 < l2.leafs.length; i2++) { + LeafBranch l3 = l2.leafs[i2]; + if (l3 == null) { + continue; + } + for (int i3 = 0; i3 < l3.data.length; i3++) { + l3.data[i3] = null; + } + } + } + reset(); + } + + public void drop() { + reset(); + for (int i1 = 0; i1 < level2.length; level2[i1++] = null); + } + + private void reset() { + size = 0; + directPut = neighborPut = 0; + Arrays.fill(fixupPut1, 0); + Arrays.fill(fixupPut2, 0); + } + private LeafBranch leafBranchGet(int i1, int i2) { IndexBranch l2 = level2[i1]; if (l2 == null) { @@ -200,9 +205,22 @@ } return l3.data[i3]; } + + private int hash(Object o) { + int h = o.hashCode(); + // HashMap.newHash() + h ^= (h >>> 20) ^ (h >>> 12); + return h ^ (h >>> 7) ^ (h >>> 4); + } + + @Override + public String toString() { + return String.format("SparseSet (0x%02X-0x%02X-0x%02X), %d elements. Direct: %d. Resolutions: neighbour: %d, i1: %s. i2: %s", I1_MASK, I2_MASK, I3_MASK, size, directPut, neighborPut, Arrays.toString(fixupPut1), Arrays.toString(fixupPut2)); + } public void dump() { int count = 0; + System.out.println(toString()); for (int i = 0; i < level2.length; i++) { IndexBranch l2 = level2[i]; if (l2 == null) { @@ -222,6 +240,70 @@ } } } - System.out.printf("Total: %d elements", count); + System.out.printf("Total: %d elements\n", count); } + + private static class IndexBranch { + private final LeafBranch[] leafs = new LeafBranch[64]; + } + + private static final class LeafBranch { + public final Object[] data = new Object[64]; + + public int put(int ix, Object d) { + if (putIfEmptyOrSame(ix, d)) { + return 1; + } + // try neighbour elements + if (putIfEmptyOrSame(ix ^ 0x1, d) || putIfEmptyOrSame(ix ^ 0x2, d) || putIfEmptyOrSame(ix ^ 0x3, d)) { + return 2; + } + return 0; + } + + public boolean putIfEmptyOrSame(int ix, Object d) { + if (data[ix] == null || data[ix].equals(d)) { + data[ix] = d; + return true; + } + return false; + } + + /** + * <code>null</code> result indicates further checks make sense + * @return <code>this</code> if there's no entry at all, <code>null</code> if entry doesn't match, or entry value itself otherwise + */ + public Object get(int ix, Object o) { + if (data[ix] == null) { + return this; + } + if (data[ix].equals(o)) { + return data[ix]; + } + return null; + } + } + + // + // 8 bits per level +// int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF; + // + // 10, 8, 8 and 6 bits +// final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F; + // + // 8, 6, 6, 6, 6 + // 10, 6, 6, 6, 4 + // + // 6, 5, 5, 5 = 21 bit +// hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it +//final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F; +// 6, 5, 5 +//hash = hash ^ (hash >>> 16); +//final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F; +// +// 6, 6, 6 +//final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F; +// +// 8, 5, 5 + }