diff src/org/tmatesoft/hg/util/DirectHashSet.java @ 304:85b8efde5586

Use memory-friendly set implementation to canonicalize filenames and nodeids
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 21 Sep 2011 18:26:16 +0200
parents
children 51d682cf9cdc
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/util/DirectHashSet.java	Wed Sep 21 18:26:16 2011 +0200
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2011 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.util;
+
+import org.tmatesoft.hg.internal.Experimental;
+
+/**
+ * Memory-friendly alternative to HashSet. With slightly worse performance than that of HashSet, uses n * sizeof(HashMap.Entry) less memory 
+ * (i.e. for set of 50k elements saves more than 1 Mb of memory). Besides, elements of this set can be obtained (not only queried for presence) -  
+ * the option essential for canonical mappings (aka Pool)
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+@Experimental
+public class DirectHashSet<T> {
+	
+	private Object[] table;
+	private int size;
+	private int threshold;
+
+	public DirectHashSet() {
+		this (16);
+	}
+
+	public DirectHashSet(int capacity) {
+        int result = 2;
+        while (result < capacity) {
+        	result <<= 1;
+        }
+		table = new Object[result];
+		threshold = result - (result >>> 2);
+	}
+
+	/**
+	 * Add element to the set.
+	 * @param o element, shall not be <code>null</code>
+	 * @return previous element from the set equal to the argument
+	 */
+	@SuppressWarnings("unchecked")
+	public T put(T o) {
+		final int h = hash(o);
+		final int mask = table.length - 1;
+		int i = h & mask;
+		Object t;
+		while ((t = table[i]) != null) {
+			if (t.equals(o)) {
+				table[i] = o;
+				return (T) t;
+			}
+			i = (i+1) & mask;
+		}
+		table[i] = o;
+		if (++size >= threshold) {
+			resize();
+		}
+		return null;
+	}
+	
+	/**
+	 * Query set for the element.
+	 * @param o element, shall not be <code>null</code>
+	 * @return element from the set, if present
+	 */
+	@SuppressWarnings("unchecked")
+	public T get(T o) {
+		final int h = hash(o);
+		final int mask = table.length - 1;
+		int i = h & mask;
+		Object t;
+		while ((t = table[i]) != null) {
+			if (t == o || t.equals(o)) {
+				return (T) t;
+			}
+			i = (i+1) & mask;
+		}
+		return null;
+	}
+	
+	public int size() {
+		return size;
+	}
+	
+	public void clear() {
+		Object[] t = table;
+		for (int i = 0, top = t.length; i < top; i++) {
+			t[i] = null;
+		}
+		size = 0;
+	}
+	
+	private void resize() {
+		final int newSize = table.length << 1;
+		final int newMask = newSize - 1;
+		Object[] newTable = new Object[newSize];
+		for (int i = 0, size = table.length; i < size; i++) {
+			Object t = table[i];
+			if (t != null) {
+				table[i] = null;
+				int x = hash(t) & newMask;
+				while (newTable[x] != null) {
+					x = (x+1) & newMask;
+				}
+				newTable[x] = t;
+			}
+		}
+		table = newTable;
+		threshold = newSize - (newSize >>> 2);
+	}
+
+	private static int hash(Object o) {
+		int h = o.hashCode();
+//		return h;
+		// HashMap.newHash()
+		h ^= (h >>> 20) ^ (h >>> 12);
+        return h ^ (h >>> 7) ^ (h >>> 4);
+	}
+
+}