tikhomirov@260: /* tikhomirov@260: * Copyright (c) 2011 TMate Software Ltd tikhomirov@260: * tikhomirov@260: * This program is free software; you can redistribute it and/or modify tikhomirov@260: * it under the terms of the GNU General Public License as published by tikhomirov@260: * the Free Software Foundation; version 2 of the License. tikhomirov@260: * tikhomirov@260: * This program is distributed in the hope that it will be useful, tikhomirov@260: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@260: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@260: * GNU General Public License for more details. tikhomirov@260: * tikhomirov@260: * For information on how to redistribute this software under tikhomirov@260: * the terms of a license other than GNU General Public License tikhomirov@260: * contact TMate Software at support@hg4j.com tikhomirov@260: */ tikhomirov@260: package org.tmatesoft.hg.util; tikhomirov@260: tikhomirov@264: import java.util.Arrays; tikhomirov@264: tikhomirov@260: import org.tmatesoft.hg.internal.Experimental; tikhomirov@260: tikhomirov@260: /** tikhomirov@260: * WORK IN PROGRESS, DO NOT USE tikhomirov@260: * Memory-friendly alternative to HashMap-backed Pool. Set where object can be obtained (not only queried for presence) tikhomirov@260: * tikhomirov@260: * cpython repo, use of HashMap Pool results in ~6 Mb of Map.Entry and Map.Entry[], tikhomirov@260: * while use of SparseSet result in 2 Mb. tikhomirov@260: * tikhomirov@260: * @author Artem Tikhomirov tikhomirov@260: * @author TMate Software Ltd. tikhomirov@260: */ tikhomirov@260: @Experimental(reason="Requires tuning to accomodate to collection size. Present state (6-6-6) is too much for a lot of uses") tikhomirov@260: public class SparseSet { tikhomirov@260: tikhomirov@260: public static void main(String[] args) { tikhomirov@260: SparseSet ss = new SparseSet(); tikhomirov@260: String one = Integer.toString(156), two = Integer.toString(1024), three = Integer.toString(1123123); tikhomirov@260: ss.put(one); tikhomirov@260: ss.put(two); tikhomirov@260: ss.put(three); tikhomirov@260: System.out.println(one == ss.get(one)); tikhomirov@260: System.out.println(two == ss.get(two)); tikhomirov@260: System.out.println(three == ss.get(three)); tikhomirov@260: System.out.println(null == ss.get("one")); tikhomirov@260: System.out.println(one == ss.get(Integer.toString(156))); tikhomirov@260: System.out.println(two == ss.get(Integer.toString(1024))); tikhomirov@260: System.out.println(three == ss.get(Integer.toString(1123123))); tikhomirov@260: ss.dump(); tikhomirov@260: } tikhomirov@260: tikhomirov@264: @SuppressWarnings("unused") tikhomirov@264: private static final int MASK_8BIT = 0xFF, MASK_7BIT = 0x7F, MASK_6BIT = 0x3F, MASK_5BIT = 0x1F, MASK_4BIT = 0x0F; tikhomirov@264: private static final int I1_SHIFT = 15, I2_SHIFT = 6, I3_SHIFT = 0; tikhomirov@264: // 6, 5, 5 tikhomirov@264: private static final int I1_MASK = MASK_5BIT, I2_MASK = MASK_4BIT, I3_MASK = MASK_4BIT; tikhomirov@260: tikhomirov@260: private final int[] fixups = new int[] {0x1, 0x10, 0xA, 0xD, 0x1F }; // rehash attempts tikhomirov@264: private final IndexBranch[] level2 = new IndexBranch[I1_MASK + 1]; tikhomirov@260: private int size = 0; tikhomirov@264: tikhomirov@264: tikhomirov@264: // tikhomirov@264: int directPut, neighborPut; tikhomirov@264: int[] fixupPut1 = new int[fixups.length], fixupPut2 = new int[fixups.length];; tikhomirov@260: tikhomirov@260: public void put(T o) { tikhomirov@264: final int hash = hash(o); tikhomirov@264: final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; tikhomirov@260: LeafBranch l3 = leafBranchPut(i1, i2); tikhomirov@264: int res; tikhomirov@264: if ((res = l3.put(i3, o)) != 0) { tikhomirov@260: size++; tikhomirov@264: if (res == 1) { tikhomirov@264: directPut++; tikhomirov@264: } else if (res == 2) { tikhomirov@264: neighborPut++; tikhomirov@264: } tikhomirov@260: return; tikhomirov@260: } tikhomirov@264: for (int i = 0; i < fixups.length; i++) { tikhomirov@264: int fixup = fixups[i]; tikhomirov@260: l3 = leafBranchPut(i1 ^ fixup, i2); tikhomirov@264: if (l3.putIfEmptyOrSame(i3, o)) { tikhomirov@264: size++; tikhomirov@264: fixupPut1[i]++; tikhomirov@264: return; tikhomirov@260: } tikhomirov@264: l3 = leafBranchPut(i1, i2 ^ fixup); tikhomirov@264: if (l3.putIfEmptyOrSame(i3, o)) { tikhomirov@260: size++; tikhomirov@264: fixupPut2[i]++; tikhomirov@260: return; tikhomirov@260: } tikhomirov@260: } tikhomirov@260: throw new IllegalStateException(String.valueOf(o)); tikhomirov@260: } tikhomirov@260: tikhomirov@260: @SuppressWarnings("unchecked") tikhomirov@260: public T get(T o) { tikhomirov@264: final int hash = hash(o); tikhomirov@264: final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; tikhomirov@260: // tikhomirov@260: LeafBranch l3 = leafBranchGet(i1, i2); tikhomirov@264: if (l3 == null) { tikhomirov@260: return null; tikhomirov@260: } tikhomirov@264: Object c; tikhomirov@264: if ((c = l3.get(i3, o)) != null) { tikhomirov@264: return c == l3 ? null : (T) c; tikhomirov@260: } tikhomirov@264: if ((c = l3.get(i3 ^ 0x1, o)) != null) { tikhomirov@264: return c == l3 ? null : (T) c; tikhomirov@260: } tikhomirov@264: if ((c = l3.get(i3 ^ 0x2, o)) != null) { tikhomirov@264: return c == l3 ? null : (T) c; tikhomirov@264: } tikhomirov@264: if ((c = l3.get(i3 ^ 0x3, o)) != null) { tikhomirov@264: return c == l3 ? null : (T) c; tikhomirov@264: } tikhomirov@260: for (int fixup : fixups) { tikhomirov@260: Object data = leafValueGet(i1 ^ fixup, i2, i3); tikhomirov@260: if (data == null) { tikhomirov@260: return null; tikhomirov@260: } tikhomirov@260: if (o.equals(data)) { tikhomirov@260: return (T)data; tikhomirov@260: } tikhomirov@260: data = leafValueGet(i1, i2 ^ fixup, i3); tikhomirov@260: if (data == null) { tikhomirov@260: return null; tikhomirov@260: } tikhomirov@260: if (o.equals(data)) { tikhomirov@260: return (T)data; tikhomirov@260: } tikhomirov@260: } tikhomirov@260: dump(); tikhomirov@260: throw new IllegalStateException(String.format("[%d,%d,%d] hash: 0x%X, hash2: 0x%X, %s", i1, i2, i3, o.hashCode(), hash, o)); tikhomirov@260: } tikhomirov@260: tikhomirov@260: public int size() { tikhomirov@260: return size; tikhomirov@260: } tikhomirov@260: private LeafBranch leafBranchPut(int i1, int i2) { tikhomirov@260: IndexBranch l2 = level2[i1]; tikhomirov@260: if (l2 == null) { tikhomirov@260: level2[i1] = l2 = new IndexBranch(); tikhomirov@260: } tikhomirov@260: LeafBranch l3 = l2.leafs[i2]; tikhomirov@260: if (l3 == null) { tikhomirov@260: l2.leafs[i2] = l3 = new LeafBranch(); tikhomirov@260: } tikhomirov@260: return l3; tikhomirov@260: } tikhomirov@260: tikhomirov@264: // unlike regular collection clear, keeps all allocated arrays to minimize gc/reallocate costs tikhomirov@264: // do force clean, use #drop tikhomirov@264: public void clear() { tikhomirov@264: for (int i1 = 0; i1 < level2.length; i1++) { tikhomirov@264: IndexBranch l2 = level2[i1]; tikhomirov@264: if (l2 == null) { tikhomirov@264: continue; tikhomirov@264: } tikhomirov@264: for (int i2 = 0; i2 < l2.leafs.length; i2++) { tikhomirov@264: LeafBranch l3 = l2.leafs[i2]; tikhomirov@264: if (l3 == null) { tikhomirov@264: continue; tikhomirov@264: } tikhomirov@264: for (int i3 = 0; i3 < l3.data.length; i3++) { tikhomirov@264: l3.data[i3] = null; tikhomirov@264: } tikhomirov@264: } tikhomirov@264: } tikhomirov@264: reset(); tikhomirov@264: } tikhomirov@264: tikhomirov@264: public void drop() { tikhomirov@264: reset(); tikhomirov@264: for (int i1 = 0; i1 < level2.length; level2[i1++] = null); tikhomirov@264: } tikhomirov@264: tikhomirov@264: private void reset() { tikhomirov@264: size = 0; tikhomirov@264: directPut = neighborPut = 0; tikhomirov@264: Arrays.fill(fixupPut1, 0); tikhomirov@264: Arrays.fill(fixupPut2, 0); tikhomirov@264: } tikhomirov@264: tikhomirov@260: private LeafBranch leafBranchGet(int i1, int i2) { tikhomirov@260: IndexBranch l2 = level2[i1]; tikhomirov@260: if (l2 == null) { tikhomirov@260: return null; tikhomirov@260: } tikhomirov@260: return l2.leafs[i2]; tikhomirov@260: } tikhomirov@260: tikhomirov@260: private Object leafValueGet(int i1, int i2, int i3) { tikhomirov@260: IndexBranch l2 = level2[i1]; tikhomirov@260: if (l2 == null) { tikhomirov@260: return null; tikhomirov@260: } tikhomirov@260: LeafBranch l3 = l2.leafs[i2]; tikhomirov@260: if (l3 == null) { tikhomirov@260: return null; tikhomirov@260: } tikhomirov@260: return l3.data[i3]; tikhomirov@260: } tikhomirov@264: tikhomirov@264: private int hash(Object o) { tikhomirov@264: int h = o.hashCode(); tikhomirov@264: // HashMap.newHash() tikhomirov@264: h ^= (h >>> 20) ^ (h >>> 12); tikhomirov@264: return h ^ (h >>> 7) ^ (h >>> 4); tikhomirov@264: } tikhomirov@264: tikhomirov@264: @Override tikhomirov@264: public String toString() { tikhomirov@264: return String.format("SparseSet (0x%02X-0x%02X-0x%02X), %d elements. Direct: %d. Resolutions: neighbour: %d, i1: %s. i2: %s", I1_MASK, I2_MASK, I3_MASK, size, directPut, neighborPut, Arrays.toString(fixupPut1), Arrays.toString(fixupPut2)); tikhomirov@264: } tikhomirov@260: tikhomirov@260: public void dump() { tikhomirov@260: int count = 0; tikhomirov@264: System.out.println(toString()); tikhomirov@260: for (int i = 0; i < level2.length; i++) { tikhomirov@260: IndexBranch l2 = level2[i]; tikhomirov@260: if (l2 == null) { tikhomirov@260: continue; tikhomirov@260: } tikhomirov@260: for (int j = 0; j < l2.leafs.length; j++) { tikhomirov@260: LeafBranch l3 = l2.leafs[j]; tikhomirov@260: if (l3 == null) { tikhomirov@260: continue; tikhomirov@260: } tikhomirov@260: for (int k = 0; k < l3.data.length; k++) { tikhomirov@260: Object d = l3.data[k]; tikhomirov@260: if (d != null) { tikhomirov@260: System.out.printf("[%3d,%3d,%3d] %s\n", i,j,k,d); tikhomirov@260: count++; tikhomirov@260: } tikhomirov@260: } tikhomirov@260: } tikhomirov@260: } tikhomirov@264: System.out.printf("Total: %d elements\n", count); tikhomirov@260: } tikhomirov@264: tikhomirov@264: private static class IndexBranch { tikhomirov@264: private final LeafBranch[] leafs = new LeafBranch[64]; tikhomirov@264: } tikhomirov@264: tikhomirov@264: private static final class LeafBranch { tikhomirov@264: public final Object[] data = new Object[64]; tikhomirov@264: tikhomirov@264: public int put(int ix, Object d) { tikhomirov@264: if (putIfEmptyOrSame(ix, d)) { tikhomirov@264: return 1; tikhomirov@264: } tikhomirov@264: // try neighbour elements tikhomirov@264: if (putIfEmptyOrSame(ix ^ 0x1, d) || putIfEmptyOrSame(ix ^ 0x2, d) || putIfEmptyOrSame(ix ^ 0x3, d)) { tikhomirov@264: return 2; tikhomirov@264: } tikhomirov@264: return 0; tikhomirov@264: } tikhomirov@264: tikhomirov@264: public boolean putIfEmptyOrSame(int ix, Object d) { tikhomirov@264: if (data[ix] == null || data[ix].equals(d)) { tikhomirov@264: data[ix] = d; tikhomirov@264: return true; tikhomirov@264: } tikhomirov@264: return false; tikhomirov@264: } tikhomirov@264: tikhomirov@264: /** tikhomirov@264: * null result indicates further checks make sense tikhomirov@264: * @return this if there's no entry at all, null if entry doesn't match, or entry value itself otherwise tikhomirov@264: */ tikhomirov@264: public Object get(int ix, Object o) { tikhomirov@264: if (data[ix] == null) { tikhomirov@264: return this; tikhomirov@264: } tikhomirov@264: if (data[ix].equals(o)) { tikhomirov@264: return data[ix]; tikhomirov@264: } tikhomirov@264: return null; tikhomirov@264: } tikhomirov@264: } tikhomirov@264: tikhomirov@264: // tikhomirov@264: // 8 bits per level tikhomirov@264: // int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF; tikhomirov@264: // tikhomirov@264: // 10, 8, 8 and 6 bits tikhomirov@264: // final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F; tikhomirov@264: // tikhomirov@264: // 8, 6, 6, 6, 6 tikhomirov@264: // 10, 6, 6, 6, 4 tikhomirov@264: // tikhomirov@264: // 6, 5, 5, 5 = 21 bit tikhomirov@264: // hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it tikhomirov@264: //final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F; tikhomirov@264: // 6, 5, 5 tikhomirov@264: //hash = hash ^ (hash >>> 16); tikhomirov@264: //final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F; tikhomirov@264: // tikhomirov@264: // 6, 6, 6 tikhomirov@264: //final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F; tikhomirov@264: // tikhomirov@264: // 8, 5, 5 tikhomirov@264: tikhomirov@260: }