comparison src/org/tmatesoft/hg/util/SparseSet.java @ 264:6bb5e7ed051a

Optimize memory usage (reduce number of objects instantiated) when pooling file names and nodeids during manifest parsing
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 19 Aug 2011 03:36:25 +0200
parents 61cb6724ff36
children 3dd953c65619
comparison
equal deleted inserted replaced
263:31f67be94e71 264:6bb5e7ed051a
14 * the terms of a license other than GNU General Public License 14 * the terms of a license other than GNU General Public License
15 * contact TMate Software at support@hg4j.com 15 * contact TMate Software at support@hg4j.com
16 */ 16 */
17 package org.tmatesoft.hg.util; 17 package org.tmatesoft.hg.util;
18 18
19 import java.util.Arrays;
20
19 import org.tmatesoft.hg.internal.Experimental; 21 import org.tmatesoft.hg.internal.Experimental;
20 22
21 /** 23 /**
22 * WORK IN PROGRESS, DO NOT USE 24 * WORK IN PROGRESS, DO NOT USE
23 * Memory-friendly alternative to HashMap-backed Pool. Set where object can be obtained (not only queried for presence) 25 * Memory-friendly alternative to HashMap-backed Pool. Set where object can be obtained (not only queried for presence)
45 System.out.println(two == ss.get(Integer.toString(1024))); 47 System.out.println(two == ss.get(Integer.toString(1024)));
46 System.out.println(three == ss.get(Integer.toString(1123123))); 48 System.out.println(three == ss.get(Integer.toString(1123123)));
47 ss.dump(); 49 ss.dump();
48 } 50 }
49 51
50 private static class IndexBranch { 52 @SuppressWarnings("unused")
51 private final LeafBranch[] leafs = new LeafBranch[64]; 53 private static final int MASK_8BIT = 0xFF, MASK_7BIT = 0x7F, MASK_6BIT = 0x3F, MASK_5BIT = 0x1F, MASK_4BIT = 0x0F;
52 } 54 private static final int I1_SHIFT = 15, I2_SHIFT = 6, I3_SHIFT = 0;
53 private static class LeafBranch { 55 // 6, 5, 5
54 private final Object[] data = new Object[64]; 56 private static final int I1_MASK = MASK_5BIT, I2_MASK = MASK_4BIT, I3_MASK = MASK_4BIT;
55 }
56 57
57 private final int[] fixups = new int[] {0x1, 0x10, 0xA, 0xD, 0x1F }; // rehash attempts 58 private final int[] fixups = new int[] {0x1, 0x10, 0xA, 0xD, 0x1F }; // rehash attempts
58 private final IndexBranch[] level2 = new IndexBranch[64]; 59 private final IndexBranch[] level2 = new IndexBranch[I1_MASK + 1];
59 private int size = 0; 60 private int size = 0;
61
62
63 //
64 int directPut, neighborPut;
65 int[] fixupPut1 = new int[fixups.length], fixupPut2 = new int[fixups.length];;
60 66
61 public void put(T o) { 67 public void put(T o) {
62 int hash = o.hashCode(); 68 final int hash = hash(o);
63 // 69 final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK;
64 // 8 bits per level
65 // int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF;
66 //
67 // 10, 8, 8 and 6 bits
68 // final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F;
69 //
70 // 8, 6, 6, 6, 6
71 // 10, 6, 6, 6, 4
72 //
73 // 6, 5, 5, 5 = 21 bit
74 // hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it
75 // final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F;
76 // 6, 5, 5
77 // hash = hash ^ (hash >>> 16);
78 // final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F;
79 //
80 // 6, 6, 6
81 final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F;
82 LeafBranch l3 = leafBranchPut(i1, i2); 70 LeafBranch l3 = leafBranchPut(i1, i2);
83 if (l3.data[i3] == null) { 71 int res;
84 l3.data[i3] = o; 72 if ((res = l3.put(i3, o)) != 0) {
85 size++; 73 size++;
74 if (res == 1) {
75 directPut++;
76 } else if (res == 2) {
77 neighborPut++;
78 }
86 return; 79 return;
87 } 80 }
88 int neighbour = (i3+1) & 0x3F; 81 for (int i = 0; i < fixups.length; i++) {
89 if (l3.data[neighbour] == null) { 82 int fixup = fixups[i];
90 l3.data[neighbour] = o;
91 size++;
92 return;
93 }
94 int conflictCount = 0;
95 for (int fixup : fixups) {
96 // if (showConflicts) {
97 // System.out.printf("(fixup: 0x%x) ", fixup);
98 // }
99 l3 = leafBranchPut(i1 ^ fixup, i2); 83 l3 = leafBranchPut(i1 ^ fixup, i2);
100 conflictCount++; 84 if (l3.putIfEmptyOrSame(i3, o)) {
101 if (l3.data[i3] != null) {
102 // if (showConflicts) {
103 // System.out.printf("i1 failed ");
104 // }
105 l3 = leafBranchPut(i1, i2 ^ fixup);
106 conflictCount++;
107 // if (showConflicts) {
108 // System.out.printf("i2 %s ", (l3.data[i3] == null) ? "ok" : "failed");
109 // }
110 // } else {
111 // if (showConflicts) {
112 // System.out.printf("i1 ok");
113 // }
114 }
115 // if (showConflicts) {
116 // System.out.println();
117 // }
118 if (l3.data[i3] == null) {
119 l3.data[i3] = o;
120 // System.out.printf("Resolved conflict in %d steps (fixup 0x%X)\n", conflictCount, fixup);
121 size++; 85 size++;
86 fixupPut1[i]++;
87 return;
88 }
89 l3 = leafBranchPut(i1, i2 ^ fixup);
90 if (l3.putIfEmptyOrSame(i3, o)) {
91 size++;
92 fixupPut2[i]++;
122 return; 93 return;
123 } 94 }
124 } 95 }
125 throw new IllegalStateException(String.valueOf(o)); 96 throw new IllegalStateException(String.valueOf(o));
126 } 97 }
127 98
128 @SuppressWarnings("unchecked") 99 @SuppressWarnings("unchecked")
129 public T get(T o) { 100 public T get(T o) {
130 int hash = o.hashCode(); 101 final int hash = hash(o);
131 //hash = hash ^ (hash >>> 16); 102 final int i1 = (hash >>> I1_SHIFT) & I1_MASK, i2 = (hash >>> I2_SHIFT) & I2_MASK, i3 = (hash >>> I3_SHIFT) & I3_MASK;
132 final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F;
133 // 103 //
134 LeafBranch l3 = leafBranchGet(i1, i2); 104 LeafBranch l3 = leafBranchGet(i1, i2);
135 if (l3 == null || l3.data[i3] == null) { 105 if (l3 == null) {
136 return null; 106 return null;
137 } 107 }
138 if (o.equals(l3.data[i3])) { 108 Object c;
139 return (T) l3.data[i3]; 109 if ((c = l3.get(i3, o)) != null) {
140 } 110 return c == l3 ? null : (T) c;
141 // 111 }
142 int neighbour = (i3+1) & 0x3F; 112 if ((c = l3.get(i3 ^ 0x1, o)) != null) {
143 if (o.equals(l3.data[neighbour])) { 113 return c == l3 ? null : (T) c;
144 return (T) l3.data[neighbour]; 114 }
145 } 115 if ((c = l3.get(i3 ^ 0x2, o)) != null) {
146 116 return c == l3 ? null : (T) c;
147 // 117 }
148 // resolve conflict 118 if ((c = l3.get(i3 ^ 0x3, o)) != null) {
119 return c == l3 ? null : (T) c;
120 }
149 for (int fixup : fixups) { 121 for (int fixup : fixups) {
150 Object data = leafValueGet(i1 ^ fixup, i2, i3); 122 Object data = leafValueGet(i1 ^ fixup, i2, i3);
151 if (data == null) { 123 if (data == null) {
152 return null; 124 return null;
153 } 125 }
179 l2.leafs[i2] = l3 = new LeafBranch(); 151 l2.leafs[i2] = l3 = new LeafBranch();
180 } 152 }
181 return l3; 153 return l3;
182 } 154 }
183 155
156 // unlike regular collection clear, keeps all allocated arrays to minimize gc/reallocate costs
157 // do force clean, use #drop
158 public void clear() {
159 for (int i1 = 0; i1 < level2.length; i1++) {
160 IndexBranch l2 = level2[i1];
161 if (l2 == null) {
162 continue;
163 }
164 for (int i2 = 0; i2 < l2.leafs.length; i2++) {
165 LeafBranch l3 = l2.leafs[i2];
166 if (l3 == null) {
167 continue;
168 }
169 for (int i3 = 0; i3 < l3.data.length; i3++) {
170 l3.data[i3] = null;
171 }
172 }
173 }
174 reset();
175 }
176
177 public void drop() {
178 reset();
179 for (int i1 = 0; i1 < level2.length; level2[i1++] = null);
180 }
181
182 private void reset() {
183 size = 0;
184 directPut = neighborPut = 0;
185 Arrays.fill(fixupPut1, 0);
186 Arrays.fill(fixupPut2, 0);
187 }
188
184 private LeafBranch leafBranchGet(int i1, int i2) { 189 private LeafBranch leafBranchGet(int i1, int i2) {
185 IndexBranch l2 = level2[i1]; 190 IndexBranch l2 = level2[i1];
186 if (l2 == null) { 191 if (l2 == null) {
187 return null; 192 return null;
188 } 193 }
198 if (l3 == null) { 203 if (l3 == null) {
199 return null; 204 return null;
200 } 205 }
201 return l3.data[i3]; 206 return l3.data[i3];
202 } 207 }
208
209 private int hash(Object o) {
210 int h = o.hashCode();
211 // HashMap.newHash()
212 h ^= (h >>> 20) ^ (h >>> 12);
213 return h ^ (h >>> 7) ^ (h >>> 4);
214 }
215
216 @Override
217 public String toString() {
218 return String.format("SparseSet (0x%02X-0x%02X-0x%02X), %d elements. Direct: %d. Resolutions: neighbour: %d, i1: %s. i2: %s", I1_MASK, I2_MASK, I3_MASK, size, directPut, neighborPut, Arrays.toString(fixupPut1), Arrays.toString(fixupPut2));
219 }
203 220
204 public void dump() { 221 public void dump() {
205 int count = 0; 222 int count = 0;
223 System.out.println(toString());
206 for (int i = 0; i < level2.length; i++) { 224 for (int i = 0; i < level2.length; i++) {
207 IndexBranch l2 = level2[i]; 225 IndexBranch l2 = level2[i];
208 if (l2 == null) { 226 if (l2 == null) {
209 continue; 227 continue;
210 } 228 }
220 count++; 238 count++;
221 } 239 }
222 } 240 }
223 } 241 }
224 } 242 }
225 System.out.printf("Total: %d elements", count); 243 System.out.printf("Total: %d elements\n", count);
226 } 244 }
245
246 private static class IndexBranch {
247 private final LeafBranch[] leafs = new LeafBranch[64];
248 }
249
250 private static final class LeafBranch {
251 public final Object[] data = new Object[64];
252
253 public int put(int ix, Object d) {
254 if (putIfEmptyOrSame(ix, d)) {
255 return 1;
256 }
257 // try neighbour elements
258 if (putIfEmptyOrSame(ix ^ 0x1, d) || putIfEmptyOrSame(ix ^ 0x2, d) || putIfEmptyOrSame(ix ^ 0x3, d)) {
259 return 2;
260 }
261 return 0;
262 }
263
264 public boolean putIfEmptyOrSame(int ix, Object d) {
265 if (data[ix] == null || data[ix].equals(d)) {
266 data[ix] = d;
267 return true;
268 }
269 return false;
270 }
271
272 /**
273 * <code>null</code> result indicates further checks make sense
274 * @return <code>this</code> if there's no entry at all, <code>null</code> if entry doesn't match, or entry value itself otherwise
275 */
276 public Object get(int ix, Object o) {
277 if (data[ix] == null) {
278 return this;
279 }
280 if (data[ix].equals(o)) {
281 return data[ix];
282 }
283 return null;
284 }
285 }
286
287 //
288 // 8 bits per level
289 // int i1 = (hash >>> 24) & 0xFF, i2 = (hash >>> 16) & 0xFF , i3 = (hash >>> 8) & 0xFF, i4 = hash & 0xFF;
290 //
291 // 10, 8, 8 and 6 bits
292 // final int i1 = (hash >>> 22) & 0x3FF, i2 = (hash >>> 14) & 0xFF , i3 = (hash >>> 6) & 0xFF, i4 = hash & 0x3F;
293 //
294 // 8, 6, 6, 6, 6
295 // 10, 6, 6, 6, 4
296 //
297 // 6, 5, 5, 5 = 21 bit
298 // hash = hash ^ (hash >>> 24); // incorporate upper byte we don't use into lower to value it
299 //final int i1 = (hash >>> 18) & 0x3F, i2 = (hash >>> 12) & 0x1F , i3 = (hash >>> 7) & 0x1F, i4 = (hash >>> 2) & 0x1F;
300 // 6, 5, 5
301 //hash = hash ^ (hash >>> 16);
302 //final int i1 = (hash >>> 10) & 0x3F, i2 = (hash >>> 5) & 0x1F , i3 = hash & 0x1F;
303 //
304 // 6, 6, 6
305 //final int i1 = (hash >>> 15) & 0x3F, i2 = (hash >>> 6) & 0x3F , i3 = hash & 0x3F;
306 //
307 // 8, 5, 5
308
227 } 309 }