Mercurial > jhg
changeset 304:85b8efde5586
Use memory-friendly set implementation to canonicalize filenames and nodeids
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> | 
|---|---|
| date | Wed, 21 Sep 2011 18:26:16 +0200 | 
| parents | 2ffcbf360fd5 | 
| children | ae8d116f4ee2 | 
| files | src/org/tmatesoft/hg/core/Nodeid.java src/org/tmatesoft/hg/internal/Pool2.java src/org/tmatesoft/hg/repo/HgManifest.java src/org/tmatesoft/hg/util/DirectHashSet.java src/org/tmatesoft/hg/util/SparseSet.java test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java | 
| diffstat | 6 files changed, 150 insertions(+), 325 deletions(-) [+] | 
line wrap: on
 line diff
--- a/src/org/tmatesoft/hg/core/Nodeid.java Tue Sep 20 04:43:39 2011 +0200 +++ b/src/org/tmatesoft/hg/core/Nodeid.java Wed Sep 21 18:26:16 2011 +0200 @@ -76,8 +76,11 @@ @Override public boolean equals(Object o) { + if (o == this) { + return true; + } if (o instanceof Nodeid) { - return this == o || equalsTo(((Nodeid) o).binaryData); + return equalsTo(((Nodeid) o).binaryData); } return false; }
--- a/src/org/tmatesoft/hg/internal/Pool2.java Tue Sep 20 04:43:39 2011 +0200 +++ b/src/org/tmatesoft/hg/internal/Pool2.java Wed Sep 21 18:26:16 2011 +0200 @@ -16,7 +16,8 @@ */ package org.tmatesoft.hg.internal; -import org.tmatesoft.hg.util.SparseSet; +import org.tmatesoft.hg.util.DirectHashSet; + /** * @@ -24,7 +25,7 @@ * @author TMate Software Ltd. */ public class Pool2<T> { - private final SparseSet<T> unify = new SparseSet<T>(); + private final DirectHashSet<T> unify = new DirectHashSet<T>(); public Pool2() { } @@ -57,10 +58,6 @@ public int size() { return unify.size(); } - - public void x() { - unify.dump(); - } @Override public String toString() {
--- a/src/org/tmatesoft/hg/repo/HgManifest.java Tue Sep 20 04:43:39 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/HgManifest.java Wed Sep 21 18:26:16 2011 +0200 @@ -29,7 +29,7 @@ import org.tmatesoft.hg.internal.DigestHelper; import org.tmatesoft.hg.internal.Experimental; import org.tmatesoft.hg.internal.Lifecycle; -import org.tmatesoft.hg.internal.Pool; +import org.tmatesoft.hg.internal.Pool2; import org.tmatesoft.hg.internal.RevlogStream; import org.tmatesoft.hg.util.Path; @@ -281,17 +281,17 @@ private boolean gtg = true; // good to go private final Inspector inspector; private final Inspector2 inspector2; - private Pool<Nodeid> nodeidPool, thisRevPool; - private final Pool<PathProxy> fnamePool; + private Pool2<Nodeid> nodeidPool, thisRevPool; + private final Pool2<PathProxy> fnamePool; private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool public ManifestParser(Inspector delegate) { assert delegate != null; inspector = delegate; inspector2 = delegate instanceof Inspector2 ? (Inspector2) delegate : null; - nodeidPool = new Pool<Nodeid>(); - fnamePool = new Pool<PathProxy>(); - thisRevPool = new Pool<Nodeid>(); + nodeidPool = new Pool2<Nodeid>(); + fnamePool = new Pool2<PathProxy>(); + thisRevPool = new Pool2<Nodeid>(); } public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { @@ -354,7 +354,7 @@ // (next manifest is likely to refer to most of them, although in specific cases // like commit in another branch a lot may be useless) nodeidPool.clear(); - Pool<Nodeid> t = nodeidPool; + Pool2<Nodeid> t = nodeidPool; nodeidPool = thisRevPool; thisRevPool = t; } catch (IOException ex) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/util/DirectHashSet.java Wed Sep 21 18:26:16 2011 +0200 @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.util; + +import org.tmatesoft.hg.internal.Experimental; + +/** + * Memory-friendly alternative to HashSet. With slightly worse performance than that of HashSet, uses n * sizeof(HashMap.Entry) less memory + * (i.e. for set of 50k elements saves more than 1 Mb of memory). Besides, elements of this set can be obtained (not only queried for presence) - + * the option essential for canonical mappings (aka Pool) + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +@Experimental +public class DirectHashSet<T> { + + private Object[] table; + private int size; + private int threshold; + + public DirectHashSet() { + this (16); + } + + public DirectHashSet(int capacity) { + int result = 2; + while (result < capacity) { + result <<= 1; + } + table = new Object[result]; + threshold = result - (result >>> 2); + } + + /** + * Add element to the set. + * @param o element, shall not be <code>null</code> + * @return previous element from the set equal to the argument + */ + @SuppressWarnings("unchecked") + public T put(T o) { + final int h = hash(o); + final int mask = table.length - 1; + int i = h & mask; + Object t; + while ((t = table[i]) != null) { + if (t.equals(o)) { + table[i] = o; + return (T) t; + } + i = (i+1) & mask; + } + table[i] = o; + if (++size >= threshold) { + resize(); + } + return null; + } + + /** + * Query set for the element. + * @param o element, shall not be <code>null</code> + * @return element from the set, if present + */ + @SuppressWarnings("unchecked") + public T get(T o) { + final int h = hash(o); + final int mask = table.length - 1; + int i = h & mask; + Object t; + while ((t = table[i]) != null) { + if (t == o || t.equals(o)) { + return (T) t; + } + i = (i+1) & mask; + } + return null; + } + + public int size() { + return size; + } + + public void clear() { + Object[] t = table; + for (int i = 0, top = t.length; i < top; i++) { + t[i] = null; + } + size = 0; + } + + private void resize() { + final int newSize = table.length << 1; + final int newMask = newSize - 1; + Object[] newTable = new Object[newSize]; + for (int i = 0, size = table.length; i < size; i++) { + Object t = table[i]; + if (t != null) { + table[i] = null; + int x = hash(t) & newMask; + while (newTable[x] != null) { + x = (x+1) & newMask; + } + newTable[x] = t; + } + } + table = newTable; + threshold = newSize - (newSize >>> 2); + } + + private static int hash(Object o) { + int h = o.hashCode(); +// return h; + // HashMap.newHash() + h ^= (h >>> 20) ^ (h >>> 12); + return h ^ (h >>> 7) ^ (h >>> 4); + } + +}
--- a/src/org/tmatesoft/hg/util/SparseSet.java Tue Sep 20 04:43:39 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,309 +0,0 @@ -/* - * Copyright (c) 2011 TMate Software Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * For information on how to redistribute this software under - * the terms of a license other than GNU General Public License - * contact TMate Software at support@hg4j.com - */ -package org.tmatesoft.hg.util; - -import java.util.Arrays; - -import org.tmatesoft.hg.internal.Experimental; - -/** - * WORK IN PROGRESS, DO NOT USE - * Memory-friendly alternative to HashMap-backed Pool. Set where object can be obtained (not only queried for presence) - * - * cpython repo, use of HashMap Pool results in ~6 Mb of Map.Entry and Map.Entry[], - * while use of SparseSet result in 2 Mb. - * - * @author Artem Tikhomirov - * @author TMate Software Ltd. - */ -@Experimental(reason="Requires tuning to accomodate to collection size. Present state (6-6-6) is too much for a lot of uses") -public class SparseSet<T> { - - public static void main(String[] args) { - SparseSet<String> ss = new SparseSet<String>(); - String one = Integer.toString(156), two = Integer.toString(1024), three = Integer.toString(1123123); - ss.put(one); - ss.put(two); - ss.put(three); - System.out.println(one == ss.get(one)); - System.out.println(two == ss.get(two)); - System.out.println(three == ss.get(three)); - System.out.println(null == ss.get("one")); - System.out.println(one == ss.get(Integer.toString(156))); - System.out.println(two == ss.get(Integer.toString(1024))); - System.out.println(three == ss.get(Integer.toString(1123123))); - ss.dump(); - } - - @SuppressWarnings("unused") - private static final int MASK_8BIT = 0xFF, MASK_7BIT = 0x7F, MASK_6BIT = 0x3F, MASK_5BIT = 0x1F, MASK_4BIT = 0x0F; - private static final int I1_SHIFT = 15, I2_SHIFT = 6, I3_SHIFT = 0; - // 6, 5, 5 - private static final int I1_MASK = MASK_7BIT, I2_MASK = MASK_4BIT, I3_MASK = MASK_4BIT; - - private final int[] fixups = new int[] {0x1, 0x10, 0xA, 0xD, 0x1F }; // rehash attempts - private final IndexBranch[] level2 = new IndexBranch[I1_MASK + 1]; - private int size = 0; - - - // - int directPut, neighborPut; - int[] fixupPut1 = new int[fixups.length], fixupPut2 = new int[fixups.length];; - - public void put(T o) { - final int hash = hash(o); - final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; - LeafBranch l3 = leafBranchPut(i1, i2); - int res; - if ((res = l3.put(i3, o)) != 0) { - size++; - if (res == 1) { - directPut++; - } else if (res == 2) { - neighborPut++; - } - return; - } - for (int i = 0; i < fixups.length; i++) { - int fixup = fixups[i]; - l3 = leafBranchPut(i1 ^ fixup, i2); - if (l3.putIfEmptyOrSame(i3, o)) { - size++; - fixupPut1[i]++; - return; - } - l3 = leafBranchPut(i1, i2 ^ fixup); - if (l3.putIfEmptyOrSame(i3, o)) { - size++; - fixupPut2[i]++; - return; - } - } - throw new IllegalStateException(String.valueOf(o)); - } - - @SuppressWarnings("unchecked") - public T get(T o) { - final int hash = hash(o); - final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; - // - LeafBranch l3 = leafBranchGet(i1, i2); - if (l3 == null) { - return null; - } - Object c; - if ((c = l3.get(i3, o)) != null) { - return c == l3 ? null : (T) c; - } - if ((c = l3.get(i3 ^ 0x1, o)) != null) { - return c == l3 ? null : (T) c; - } - if ((c = l3.get(i3 ^ 0x2, o)) != null) { - return c == l3 ? null : (T) c; - } - if ((c = l3.get(i3 ^ 0x3, o)) != null) { - return c == l3 ? null : (T) c; - } - for (int fixup : fixups) { - Object data = leafValueGet(i1 ^ fixup, i2, i3); - if (data == null) { - return null; - } - if (o.equals(data)) { - return (T)data; - } - data = leafValueGet(i1, i2 ^ fixup, i3); - if (data == null) { - return null; - } - if (o.equals(data)) { - return (T)data; - } - } - dump(); - throw new IllegalStateException(String.format("[%d,%d,%d] hash: 0x%X, hash2: 0x%X, %s", i1, i2, i3, o.hashCode(), hash, o)); - } - - public int size() { - return size; - } - private LeafBranch leafBranchPut(int i1, int i2) { - IndexBranch l2 = level2[i1]; - if (l2 == null) { - level2[i1] = l2 = new IndexBranch(); - } - LeafBranch l3 = l2.leafs[i2]; - if (l3 == null) { - l2.leafs[i2] = l3 = new LeafBranch(); - } - return l3; - } - - // unlike regular collection clear, keeps all allocated arrays to minimize gc/reallocate costs - // do force clean, use #drop - public void clear() { - for (int i1 = 0; i1 < level2.length; i1++) { - IndexBranch l2 = level2[i1]; - if (l2 == null) { - continue; - } - for (int i2 = 0; i2 < l2.leafs.length; i2++) { - LeafBranch l3 = l2.leafs[i2]; - if (l3 == null) { - continue; - } - for (int i3 = 0; i3 < l3.data.length; i3++) { - l3.data[i3] = null; - } - } - } - reset(); - } - - public void drop() { - reset(); - for (int i1 = 0; i1 < level2.length; level2[i1++] = null); - } - - private void reset() { - size = 0; - directPut = neighborPut = 0; - Arrays.fill(fixupPut1, 0); - Arrays.fill(fixupPut2, 0); - } - - private LeafBranch leafBranchGet(int i1, int i2) { - IndexBranch l2 = level2[i1]; - if (l2 == null) { - return null; - } - return l2.leafs[i2]; - } - - private Object leafValueGet(int i1, int i2, int i3) { - IndexBranch l2 = level2[i1]; - if (l2 == null) { - return null; - } - LeafBranch l3 = l2.leafs[i2]; - if (l3 == null) { - return null; - } - return l3.data[i3]; - } - - private int hash(Object o) { - int h = o.hashCode(); - // HashMap.newHash() - h ^= (h >>> 20) ^ (h >>> 12); - return h ^ (h >>> 7) ^ (h >>> 4); - } - - @Override - public String toString() { - return String.format("SparseSet (0x%02X-0x%02X-0x%02X), %d elements. Direct: %d. Resolutions: neighbour: %d, i1: %s. i2: %s", I1_MASK, I2_MASK, I3_MASK, size, directPut, neighborPut, Arrays.toString(fixupPut1), Arrays.toString(fixupPut2)); - } - - public void dump() { - int count = 0; - System.out.println(toString()); - for (int i = 0; i < level2.length; i++) { - IndexBranch l2 = level2[i]; - if (l2 == null) { - continue; - } - for (int j = 0; j < l2.leafs.length; j++) { - LeafBranch l3 = l2.leafs[j]; - if (l3 == null) { - continue; - } - for (int k = 0; k < l3.data.length; k++) { - Object d = l3.data[k]; - if (d != null) { - System.out.printf("[%3d,%3d,%3d] %s\n", i,j,k,d); - count++; - } - } - } - } - System.out.printf("Total: %d elements\n", count); - } - - private static class IndexBranch { - private final LeafBranch[] leafs = new LeafBranch[64]; - } - - private static final class LeafBranch { - public final Object[] data = new Object[64]; - - public int put(int ix, Object d) { - if (putIfEmptyOrSame(ix, d)) { - return 1; - } - // try neighbour elements - if (putIfEmptyOrSame(ix ^ 0x1, d) || putIfEmptyOrSame(ix ^ 0x2, d) || putIfEmptyOrSame(ix ^ 0x3, d)) { - return 2; - } - return 0; - } - - public boolean putIfEmptyOrSame(int ix, Object d) { - if (data[ix] == null || data[ix].equals(d)) { - data[ix] = d; - return true; - } - return false; - } - - /** - * <code>null</code> result indicates further checks make sense - * @return <code>this</code> if there's no entry at all, <code>null</code> if entry doesn't match, or entry value itself otherwise - */ - public Object get(int ix, Object o) { - if (data[ix] == null) { - return this; - } - if (data[ix].equals(o)) { - return data[ix]; - } - return null; - } - } - - // - // 8 bits per level -// int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF; - // - // 10, 8, 8 and 6 bits -// final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F; - // - // 8, 6, 6, 6, 6 - // 10, 6, 6, 6, 4 - // - // 6, 5, 5, 5 = 21 bit -// hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it -//final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F; -// 6, 5, 5 -//hash = hash ^ (hash >>> 16); -//final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F; -// -// 6, 6, 6 -//final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F; -// -// 8, 5, 5 - -}
--- a/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java Tue Sep 20 04:43:39 2011 +0200 +++ b/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java Wed Sep 21 18:26:16 2011 +0200 @@ -39,8 +39,8 @@ // Pattern p = Pattern.compile("^doc/[^/]*?\\.[0-9]\\.(x|ht)ml"); // System.out.println(p.matcher("doc/asd.2.xml").matches()); // System.out.println(p.matcher("doc/zxc.6.html").matches()); - m.collectTagsPerFile(); -// m.manifestWalk(); +// m.collectTagsPerFile(); + m.manifestWalk(); m = null; System.gc(); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); @@ -92,6 +92,7 @@ // // With Pool<StringProxy> for fname and flags, Nodeid's ascii2bin through local array, overall byte[] iteration, // 0..10k is 34 seconds now + // Another run, 23 seconds now, seems nothing has been changed. Switched to Pool2 with DirectHashSet: 22,5 seconds System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); }
