Mercurial > jhg
view src/org/tmatesoft/hg/util/SparseSet.java @ 265:3dd953c65619
Generous defaults for SparseSet not to fail on big manifests
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Fri, 19 Aug 2011 04:02:48 +0200 |
parents | 6bb5e7ed051a |
children |
line wrap: on
line source
/* * Copyright (c) 2011 TMate Software Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * For information on how to redistribute this software under * the terms of a license other than GNU General Public License * contact TMate Software at support@hg4j.com */ package org.tmatesoft.hg.util; import java.util.Arrays; import org.tmatesoft.hg.internal.Experimental; /** * WORK IN PROGRESS, DO NOT USE * Memory-friendly alternative to HashMap-backed Pool. Set where object can be obtained (not only queried for presence) * * cpython repo, use of HashMap Pool results in ~6 Mb of Map.Entry and Map.Entry[], * while use of SparseSet result in 2 Mb. * * @author Artem Tikhomirov * @author TMate Software Ltd. */ @Experimental(reason="Requires tuning to accomodate to collection size. Present state (6-6-6) is too much for a lot of uses") public class SparseSet<T> { public static void main(String[] args) { SparseSet<String> ss = new SparseSet<String>(); String one = Integer.toString(156), two = Integer.toString(1024), three = Integer.toString(1123123); ss.put(one); ss.put(two); ss.put(three); System.out.println(one == ss.get(one)); System.out.println(two == ss.get(two)); System.out.println(three == ss.get(three)); System.out.println(null == ss.get("one")); System.out.println(one == ss.get(Integer.toString(156))); System.out.println(two == ss.get(Integer.toString(1024))); System.out.println(three == ss.get(Integer.toString(1123123))); ss.dump(); } @SuppressWarnings("unused") private static final int MASK_8BIT = 0xFF, MASK_7BIT = 0x7F, MASK_6BIT = 0x3F, MASK_5BIT = 0x1F, MASK_4BIT = 0x0F; private static final int I1_SHIFT = 15, I2_SHIFT = 6, I3_SHIFT = 0; // 6, 5, 5 private static final int I1_MASK = MASK_7BIT, I2_MASK = MASK_4BIT, I3_MASK = MASK_4BIT; private final int[] fixups = new int[] {0x1, 0x10, 0xA, 0xD, 0x1F }; // rehash attempts private final IndexBranch[] level2 = new IndexBranch[I1_MASK + 1]; private int size = 0; // int directPut, neighborPut; int[] fixupPut1 = new int[fixups.length], fixupPut2 = new int[fixups.length];; public void put(T o) { final int hash = hash(o); final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; LeafBranch l3 = leafBranchPut(i1, i2); int res; if ((res = l3.put(i3, o)) != 0) { size++; if (res == 1) { directPut++; } else if (res == 2) { neighborPut++; } return; } for (int i = 0; i < fixups.length; i++) { int fixup = fixups[i]; l3 = leafBranchPut(i1 ^ fixup, i2); if (l3.putIfEmptyOrSame(i3, o)) { size++; fixupPut1[i]++; return; } l3 = leafBranchPut(i1, i2 ^ fixup); if (l3.putIfEmptyOrSame(i3, o)) { size++; fixupPut2[i]++; return; } } throw new IllegalStateException(String.valueOf(o)); } @SuppressWarnings("unchecked") public T get(T o) { final int hash = hash(o); final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK; // LeafBranch l3 = leafBranchGet(i1, i2); if (l3 == null) { return null; } Object c; if ((c = l3.get(i3, o)) != null) { return c == l3 ? null : (T) c; } if ((c = l3.get(i3 ^ 0x1, o)) != null) { return c == l3 ? null : (T) c; } if ((c = l3.get(i3 ^ 0x2, o)) != null) { return c == l3 ? null : (T) c; } if ((c = l3.get(i3 ^ 0x3, o)) != null) { return c == l3 ? null : (T) c; } for (int fixup : fixups) { Object data = leafValueGet(i1 ^ fixup, i2, i3); if (data == null) { return null; } if (o.equals(data)) { return (T)data; } data = leafValueGet(i1, i2 ^ fixup, i3); if (data == null) { return null; } if (o.equals(data)) { return (T)data; } } dump(); throw new IllegalStateException(String.format("[%d,%d,%d] hash: 0x%X, hash2: 0x%X, %s", i1, i2, i3, o.hashCode(), hash, o)); } public int size() { return size; } private LeafBranch leafBranchPut(int i1, int i2) { IndexBranch l2 = level2[i1]; if (l2 == null) { level2[i1] = l2 = new IndexBranch(); } LeafBranch l3 = l2.leafs[i2]; if (l3 == null) { l2.leafs[i2] = l3 = new LeafBranch(); } return l3; } // unlike regular collection clear, keeps all allocated arrays to minimize gc/reallocate costs // do force clean, use #drop public void clear() { for (int i1 = 0; i1 < level2.length; i1++) { IndexBranch l2 = level2[i1]; if (l2 == null) { continue; } for (int i2 = 0; i2 < l2.leafs.length; i2++) { LeafBranch l3 = l2.leafs[i2]; if (l3 == null) { continue; } for (int i3 = 0; i3 < l3.data.length; i3++) { l3.data[i3] = null; } } } reset(); } public void drop() { reset(); for (int i1 = 0; i1 < level2.length; level2[i1++] = null); } private void reset() { size = 0; directPut = neighborPut = 0; Arrays.fill(fixupPut1, 0); Arrays.fill(fixupPut2, 0); } private LeafBranch leafBranchGet(int i1, int i2) { IndexBranch l2 = level2[i1]; if (l2 == null) { return null; } return l2.leafs[i2]; } private Object leafValueGet(int i1, int i2, int i3) { IndexBranch l2 = level2[i1]; if (l2 == null) { return null; } LeafBranch l3 = l2.leafs[i2]; if (l3 == null) { return null; } return l3.data[i3]; } private int hash(Object o) { int h = o.hashCode(); // HashMap.newHash() h ^= (h >>> 20) ^ (h >>> 12); return h ^ (h >>> 7) ^ (h >>> 4); } @Override public String toString() { return String.format("SparseSet (0x%02X-0x%02X-0x%02X), %d elements. Direct: %d. Resolutions: neighbour: %d, i1: %s. i2: %s", I1_MASK, I2_MASK, I3_MASK, size, directPut, neighborPut, Arrays.toString(fixupPut1), Arrays.toString(fixupPut2)); } public void dump() { int count = 0; System.out.println(toString()); for (int i = 0; i < level2.length; i++) { IndexBranch l2 = level2[i]; if (l2 == null) { continue; } for (int j = 0; j < l2.leafs.length; j++) { LeafBranch l3 = l2.leafs[j]; if (l3 == null) { continue; } for (int k = 0; k < l3.data.length; k++) { Object d = l3.data[k]; if (d != null) { System.out.printf("[%3d,%3d,%3d] %s\n", i,j,k,d); count++; } } } } System.out.printf("Total: %d elements\n", count); } private static class IndexBranch { private final LeafBranch[] leafs = new LeafBranch[64]; } private static final class LeafBranch { public final Object[] data = new Object[64]; public int put(int ix, Object d) { if (putIfEmptyOrSame(ix, d)) { return 1; } // try neighbour elements if (putIfEmptyOrSame(ix ^ 0x1, d) || putIfEmptyOrSame(ix ^ 0x2, d) || putIfEmptyOrSame(ix ^ 0x3, d)) { return 2; } return 0; } public boolean putIfEmptyOrSame(int ix, Object d) { if (data[ix] == null || data[ix].equals(d)) { data[ix] = d; return true; } return false; } /** * <code>null</code> result indicates further checks make sense * @return <code>this</code> if there's no entry at all, <code>null</code> if entry doesn't match, or entry value itself otherwise */ public Object get(int ix, Object o) { if (data[ix] == null) { return this; } if (data[ix].equals(o)) { return data[ix]; } return null; } } // // 8 bits per level // int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF; // // 10, 8, 8 and 6 bits // final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F; // // 8, 6, 6, 6, 6 // 10, 6, 6, 6, 4 // // 6, 5, 5, 5 = 21 bit // hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it //final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F; // 6, 5, 5 //hash = hash ^ (hash >>> 16); //final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F; // // 6, 6, 6 //final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F; // // 8, 5, 5 }