Mercurial > jhg
changeset 307:2f2ab5c27f41
Collect sort reverse indexes along with array sorting
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Sat, 24 Sep 2011 04:06:27 +0200 |
parents | 971baa95fb07 |
children | 3f40262153a4 |
files | src/org/tmatesoft/hg/internal/ArrayHelper.java src/org/tmatesoft/hg/repo/Revlog.java test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java |
diffstat | 3 files changed, 190 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/ArrayHelper.java Sat Sep 24 04:06:27 2011 +0200 @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2011 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.internal; + +/** + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class ArrayHelper { + private int[] reverse; + + @SuppressWarnings("unchecked") + public void sort(Comparable<?>[] a) { +// Object[] aux = (Object[]) a.clone(); + reverse = new int[a.length]; + sort1((Comparable<Object>[])a, 0, a.length); + } + + private void sort1(Comparable<Object> x[], int off, int len) { + // Insertion sort on smallest arrays + if (len < 7) { + for (int i=off; i<len+off; i++) + for (int j=i; j>off && x[j-1].compareTo(x[j]) > 0; j--) + swap(x, j, j-1); + return; + } + + // Choose a partition element, v + int m = off + (len >> 1); // Small arrays, middle element + if (len > 7) { + int l = off; + int n = off + len - 1; + if (len > 40) { // Big arrays, pseudomedian of 9 + int s = len/8; + l = med3(x, l, l+s, l+2*s); + m = med3(x, m-s, m, m+s); + n = med3(x, n-2*s, n-s, n); + } + m = med3(x, l, m, n); // Mid-size, med of 3 + } + Comparable<Object> v = x[m]; + + // Establish Invariant: v* (<v)* (>v)* v* + int a = off, b = a, c = off + len - 1, d = c; + while(true) { + while (b <= c && x[b].compareTo(v) <= 0) { + if (x[b] == v) + swap(x, a++, b); + b++; + } + while (c >= b && x[c].compareTo(v) >= 0) { + if (x[c] == v) + swap(x, c, d--); + c--; + } + if (b > c) + break; + swap(x, b++, c--); + } + + // Swap partition elements back to middle + int s, n = off + len; + s = Math.min(a-off, b-a ); vecswap(x, off, b-s, s); + s = Math.min(d-c, n-d-1); vecswap(x, b, n-s, s); + + // Recursively sort non-partition-elements + if ((s = b-a) > 1) + sort1(x, off, s); + if ((s = d-c) > 1) + sort1(x, n-s, s); + } + + /** + * Swaps x[a .. (a+n-1)] with x[b .. (b+n-1)]. + */ + private void vecswap(Object[] x, int a, int b, int n) { + for (int i=0; i<n; i++, a++, b++) { + swap(x, a, b); + } + } + + /** + * Returns the index of the median of the three indexed integers. + */ + private static int med3(Comparable<Object>[] x, int a, int b, int c) { + return (x[a].compareTo(x[b]) < 0 ? + (x[b].compareTo(x[c]) < 0 ? b : x[a].compareTo(x[c]) < 0 ? c : a) : + (x[b].compareTo(x[c]) > 0 ? b : x[a].compareTo(x[c]) > 0 ? c : a)); + } + + + /** + * @return the reverse + */ + public int[] getReverse() { + return reverse; + } + + /** + * Swaps x[a] with x[b]. + */ + private void swap(Object[] x, int a, int b) { + Object t = x[a]; + x[a] = x[b]; + x[b] = t; + int z1 = reverse[a] != 0 ? reverse[a] : a+1; + int z2 = reverse[b] != 0 ? reverse[b] : b+1; + reverse[b] = z1; + reverse[a] = z2; + } +}
--- a/src/org/tmatesoft/hg/repo/Revlog.java Thu Sep 22 04:05:41 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/Revlog.java Sat Sep 24 04:06:27 2011 +0200 @@ -30,6 +30,7 @@ import org.tmatesoft.hg.core.HgBadStateException; import org.tmatesoft.hg.core.HgException; import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.internal.ArrayHelper; import org.tmatesoft.hg.internal.DataAccess; import org.tmatesoft.hg.internal.RevlogStream; import org.tmatesoft.hg.util.ByteChannel; @@ -416,7 +417,6 @@ final int revisionCount = Revlog.this.content.revisionCount(); sequential = new Nodeid[revisionCount]; sorted = new Nodeid[revisionCount]; - sorted2natural = new int[revisionCount]; RevlogStream.Inspector insp = new RevlogStream.Inspector() { public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) { @@ -425,12 +425,12 @@ } }; Revlog.this.content.iterate(0, TIP, false, insp); - Arrays.sort(sorted); - for (int i = 0; i < revisionCount; i++) { - Nodeid n = sequential[i]; - int x = Arrays.binarySearch(sorted, n); - sorted2natural[x] = i; - } + // next is alternative to Arrays.sort(sorted), and build sorted2natural looking up each element of sequential in sorted. + // the way sorted2natural was build is O(n*log n). + final ArrayHelper ah = new ArrayHelper(); + ah.sort(sorted); + // note, values in ArrayHelper#getReversed are 1-based indexes, not 0-based + sorted2natural = ah.getReverse(); return this; } @@ -445,7 +445,7 @@ if (x < 0) { return BAD_REVISION; } - return sorted2natural[x]; + return sorted2natural[x]-1; } }
--- a/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java Thu Sep 22 04:05:41 2011 +0200 +++ b/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java Sat Sep 24 04:06:27 2011 +0200 @@ -14,6 +14,7 @@ import org.tmatesoft.hg.core.HgException; import org.tmatesoft.hg.core.HgLogCommand; import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.internal.ArrayHelper; import org.tmatesoft.hg.repo.HgChangelog; import org.tmatesoft.hg.repo.HgDataFile; import org.tmatesoft.hg.repo.HgLookup; @@ -36,19 +37,69 @@ public static void main(String[] args) throws Exception { MapTagsToFileRevisions m = new MapTagsToFileRevisions(); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); -// Pattern p = Pattern.compile("^doc/[^/]*?\\.[0-9]\\.(x|ht)ml"); -// System.out.println(p.matcher("doc/asd.2.xml").matches()); -// System.out.println(p.matcher("doc/zxc.6.html").matches()); // m.collectTagsPerFile(); - m.manifestWalk(); +// m.manifestWalk(); +// m.changelogWalk(); + m.revisionMap(); m = null; System.gc(); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); } + /* + * Each 5000 revisions from cpython, total 15 revisions + * Direct clog.getLocalRevision: ~260 ms + * RevisionMap.localRevision: ~265 ms (almost 100% in #init()) + * each 1000'th revision, total 71 revision: 1 230 vs 270 + * each 2000'th revision, total 36 revision: 620 vs 270 + * each 3000'th revision, total 24 revision: 410 vs 275 + */ + private void revisionMap() throws Exception { + ArrayHelper ah = new ArrayHelper(); + final List<String> initial = Arrays.asList("d", "w", "k", "b", "c", "i", "a", "r", "e", "h"); + String[] a = (String[]) initial.toArray(); + ah.sort(a); + System.out.println(Arrays.toString(initial.toArray())); + System.out.println(Arrays.toString(a)); + System.out.println(Arrays.toString(ah.getReverse())); + Object[] rebuilt = new Object[a.length]; + for (int i = 0; i < a.length; i++) { + int indexInOriginal = ah.getReverse()[i]; + rebuilt[indexInOriginal-1] = a[i]; + } + System.out.println(Arrays.toString(rebuilt)); + // + final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); + final HgChangelog clog = repository.getChangelog(); + ArrayList<Nodeid> revisions = new ArrayList<Nodeid>(); + final int step = 5000; + for (int i = 0, top = clog.getLastRevision(); i < top; i += step) { + revisions.add(clog.getRevision(i)); + } + final long s1 = System.nanoTime(); + for (Nodeid n : revisions) { + int r = clog.getLocalRevision(n); + if (r % step != 0) { + throw new IllegalStateException(Integer.toString(r)); + } + } + System.out.printf("Direct lookup of %d revisions took %,d ns\n", revisions.size(), System.nanoTime() - s1); + HgChangelog.RevisionMap rmap = clog.new RevisionMap(); + final long s2 = System.nanoTime(); + rmap.init(); + final long s3 = System.nanoTime(); + for (Nodeid n : revisions) { + int r = rmap.localRevision(n); + if (r % step != 0) { + throw new IllegalStateException(Integer.toString(r)); + } + } + System.out.printf("RevisionMap time: %d ms, of that init() %,d ns\n", (System.nanoTime() - s2) / 1000000, s3 - s2); + } + private void changelogWalk() throws Exception { + final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); final long start = System.currentTimeMillis(); - final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); repository.getChangelog().all(new HgChangelog.Inspector() { public int xx = 0;