changeset 307:2f2ab5c27f41

Collect sort reverse indexes along with array sorting
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Sat, 24 Sep 2011 04:06:27 +0200 (2011-09-24)
parents 971baa95fb07
children 3f40262153a4
files src/org/tmatesoft/hg/internal/ArrayHelper.java src/org/tmatesoft/hg/repo/Revlog.java test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java
diffstat 3 files changed, 190 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/ArrayHelper.java	Sat Sep 24 04:06:27 2011 +0200
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2011 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+/**
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class ArrayHelper {
+	private int[] reverse;
+
+	@SuppressWarnings("unchecked")
+	public void sort(Comparable<?>[] a) {
+//		Object[] aux = (Object[]) a.clone();
+		reverse = new int[a.length];
+		sort1((Comparable<Object>[])a, 0, a.length);
+	}
+	
+    private void sort1(Comparable<Object> x[], int off, int len) {
+    	// Insertion sort on smallest arrays
+    	if (len < 7) {
+    	    for (int i=off; i<len+off; i++)
+    			for (int j=i; j>off && x[j-1].compareTo(x[j]) > 0; j--)
+    			    swap(x, j, j-1);
+    	    return;
+    	}
+
+    	// Choose a partition element, v
+    	int m = off + (len >> 1);       // Small arrays, middle element
+    	if (len > 7) {
+    	    int l = off;
+    	    int n = off + len - 1;
+    	    if (len > 40) {        // Big arrays, pseudomedian of 9
+    			int s = len/8;
+	    		l = med3(x, l,     l+s, l+2*s);
+	    		m = med3(x, m-s,   m,   m+s);
+	    		n = med3(x, n-2*s, n-s, n);
+    	    }
+    	    m = med3(x, l, m, n); // Mid-size, med of 3
+    	}
+    	Comparable<Object> v = x[m];
+
+    	// Establish Invariant: v* (<v)* (>v)* v*
+    	int a = off, b = a, c = off + len - 1, d = c;
+    	while(true) {
+    	    while (b <= c && x[b].compareTo(v) <= 0) {
+    			if (x[b] == v)
+    			    swap(x, a++, b);
+    			b++;
+    	    }
+    	    while (c >= b && x[c].compareTo(v) >= 0) {
+    			if (x[c] == v)
+    			    swap(x, c, d--);
+    			c--;
+    	    }
+    	    if (b > c)
+    			break;
+    	    swap(x, b++, c--);
+    	}
+
+    	// Swap partition elements back to middle
+    	int s, n = off + len;
+    	s = Math.min(a-off, b-a  );  vecswap(x, off, b-s, s);
+    	s = Math.min(d-c,   n-d-1);  vecswap(x, b,   n-s, s);
+
+    	// Recursively sort non-partition-elements
+    	if ((s = b-a) > 1)
+    	    sort1(x, off, s);
+    	if ((s = d-c) > 1)
+    	    sort1(x, n-s, s);
+    }
+
+    /**
+     * Swaps x[a .. (a+n-1)] with x[b .. (b+n-1)].
+     */
+    private void vecswap(Object[] x, int a, int b, int n) {
+		for (int i=0; i<n; i++, a++, b++) {
+		    swap(x, a, b);
+		}
+    }
+
+    /**
+     * Returns the index of the median of the three indexed integers.
+     */
+    private static int med3(Comparable<Object>[] x, int a, int b, int c) {
+	return (x[a].compareTo(x[b]) < 0 ?
+		(x[b].compareTo(x[c]) < 0 ? b : x[a].compareTo(x[c]) < 0 ? c : a) :
+		(x[b].compareTo(x[c]) > 0 ? b : x[a].compareTo(x[c]) > 0 ? c : a));
+    }
+
+
+	/**
+	 * @return the reverse
+	 */
+	public int[] getReverse() {
+		return reverse;
+	}
+
+	/**
+	 * Swaps x[a] with x[b].
+	 */
+	private void swap(Object[] x, int a, int b) {
+		Object t = x[a];
+		x[a] = x[b];
+		x[b] = t;
+		int z1 = reverse[a] != 0 ? reverse[a] : a+1;
+		int z2 = reverse[b] != 0 ? reverse[b] : b+1;
+		reverse[b] = z1;
+		reverse[a] = z2;
+	}
+}
--- a/src/org/tmatesoft/hg/repo/Revlog.java	Thu Sep 22 04:05:41 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/Revlog.java	Sat Sep 24 04:06:27 2011 +0200
@@ -30,6 +30,7 @@
 import org.tmatesoft.hg.core.HgBadStateException;
 import org.tmatesoft.hg.core.HgException;
 import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.ArrayHelper;
 import org.tmatesoft.hg.internal.DataAccess;
 import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.util.ByteChannel;
@@ -416,7 +417,6 @@
 			final int revisionCount = Revlog.this.content.revisionCount();
 			sequential = new Nodeid[revisionCount];
 			sorted = new Nodeid[revisionCount];
-			sorted2natural = new int[revisionCount];
 			RevlogStream.Inspector insp = new RevlogStream.Inspector() {
 				
 				public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) {
@@ -425,12 +425,12 @@
 				}
 			};
 			Revlog.this.content.iterate(0, TIP, false, insp);
-			Arrays.sort(sorted);
-			for (int i = 0; i < revisionCount; i++) {
-				Nodeid n = sequential[i];
-				int x = Arrays.binarySearch(sorted, n);
-				sorted2natural[x] = i;
-			}
+			// next is alternative to Arrays.sort(sorted), and build sorted2natural looking up each element of sequential in sorted.
+			// the way sorted2natural was build is O(n*log n).  
+			final ArrayHelper ah = new ArrayHelper();
+			ah.sort(sorted);
+			// note, values in ArrayHelper#getReversed are 1-based indexes, not 0-based 
+			sorted2natural = ah.getReverse();
 			return this;
 		}
 		
@@ -445,7 +445,7 @@
 			if (x < 0) {
 				return BAD_REVISION;
 			}
-			return sorted2natural[x];
+			return sorted2natural[x]-1;
 		}
 	}
 
--- a/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java	Thu Sep 22 04:05:41 2011 +0200
+++ b/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java	Sat Sep 24 04:06:27 2011 +0200
@@ -14,6 +14,7 @@
 import org.tmatesoft.hg.core.HgException;
 import org.tmatesoft.hg.core.HgLogCommand;
 import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.ArrayHelper;
 import org.tmatesoft.hg.repo.HgChangelog;
 import org.tmatesoft.hg.repo.HgDataFile;
 import org.tmatesoft.hg.repo.HgLookup;
@@ -36,19 +37,69 @@
 	public static void main(String[] args) throws Exception {
 		MapTagsToFileRevisions m = new MapTagsToFileRevisions();
 		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
-//		Pattern p = Pattern.compile("^doc/[^/]*?\\.[0-9]\\.(x|ht)ml");
-//		System.out.println(p.matcher("doc/asd.2.xml").matches());
-//		System.out.println(p.matcher("doc/zxc.6.html").matches());
 //		m.collectTagsPerFile();
-		m.manifestWalk();
+//		m.manifestWalk();
+//		m.changelogWalk();
+		m.revisionMap();
 		m = null;
 		System.gc();
 		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
 	}
 
+	/*
+	 * Each 5000 revisions from cpython, total 15 revisions
+	 * Direct clog.getLocalRevision: ~260 ms
+	 * RevisionMap.localRevision: ~265 ms (almost 100% in #init())
+	 * each 1000'th revision, total 71 revision: 1 230 vs 270
+	 * each 2000'th revision, total 36 revision: 620 vs 270
+	 * each 3000'th revision, total 24 revision: 410 vs 275
+	 */
+	private void revisionMap() throws Exception {
+		ArrayHelper ah = new ArrayHelper();
+		final List<String> initial = Arrays.asList("d", "w", "k", "b", "c", "i", "a", "r", "e", "h");
+		String[] a = (String[]) initial.toArray(); 
+		ah.sort(a);
+		System.out.println(Arrays.toString(initial.toArray()));
+		System.out.println(Arrays.toString(a));
+		System.out.println(Arrays.toString(ah.getReverse()));
+		Object[] rebuilt = new Object[a.length];
+		for (int i = 0; i < a.length; i++) {
+			int indexInOriginal = ah.getReverse()[i];
+			rebuilt[indexInOriginal-1] = a[i];
+		}
+		System.out.println(Arrays.toString(rebuilt));
+		//
+		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
+		final HgChangelog clog = repository.getChangelog();
+		ArrayList<Nodeid> revisions = new ArrayList<Nodeid>();
+		final int step = 5000;
+		for (int i = 0, top = clog.getLastRevision(); i < top; i += step) {
+			revisions.add(clog.getRevision(i));
+		}
+		final long s1 = System.nanoTime();
+		for (Nodeid n : revisions) {
+			int r = clog.getLocalRevision(n);
+			if (r % step != 0) {
+				throw new IllegalStateException(Integer.toString(r));
+			}
+		}
+		System.out.printf("Direct lookup of %d revisions took %,d ns\n", revisions.size(), System.nanoTime() - s1);
+		HgChangelog.RevisionMap rmap = clog.new RevisionMap();
+		final long s2 = System.nanoTime();
+		rmap.init();
+		final long s3 = System.nanoTime();
+		for (Nodeid n : revisions) {
+			int r = rmap.localRevision(n);
+			if (r % step != 0) {
+				throw new IllegalStateException(Integer.toString(r));
+			}
+		}
+		System.out.printf("RevisionMap time: %d ms, of that init() %,d ns\n", (System.nanoTime() - s2) / 1000000, s3 - s2);
+	}
+
 	private void changelogWalk() throws Exception {
+		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
 		final long start = System.currentTimeMillis();
-		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
 		repository.getChangelog().all(new HgChangelog.Inspector() {
 			public int xx = 0;