# HG changeset patch # User Artem Tikhomirov # Date 1372962465 -7200 # Node ID 6334b026710381f62e2ab64c97d868542f4e92af # Parent a937e63b6e02bf7a9cad26fbd5708ff968d811af ParentChildMap can supply RevisionMap. Refactor ArrayHelper to keep most of sorted/reverse index magic inside diff -r a937e63b6e02 -r 6334b0267103 src/org/tmatesoft/hg/internal/ArrayHelper.java --- a/src/org/tmatesoft/hg/internal/ArrayHelper.java Thu Jul 04 18:40:03 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/ArrayHelper.java Thu Jul 04 20:27:45 2013 +0200 @@ -16,40 +16,107 @@ */ package org.tmatesoft.hg.internal; +import java.util.Arrays; + /** * Internal alternative to Arrays.sort to build reversed index along with sorting + * and to perform lookup (binary search) without sorted array, using reversed index. * * @author Artem Tikhomirov * @author TMate Software Ltd. */ -public class ArrayHelper { - private int[] reverse; +public final class ArrayHelper> { + private int[] reverse; // aka sorted2natural + private final T[] data; + private T[] sorted; + + public ArrayHelper(T[] _data) { + assert _data != null; + data = _data; + } - @SuppressWarnings("unchecked") - public void sort(Comparable[] a) { -// Object[] aux = (Object[]) a.clone(); - reverse = new int[a.length]; - sort1((Comparable[])a, 0, a.length); + /** + * Sort data this helper wraps, possibly using supplied array (optional) + * to keep sorted elements + * @param sortDest array to keep sorted values at, or null + * @param sortDestIsEmpty false when sortDest already contains copy of data to be sorted + * @param keepSorted true to save sorted array for future use (e.g. in + */ + public void sort(T[] sortDest, boolean sortDestIsEmpty, boolean keepSorted) { + if (sortDest != null) { + assert sortDest.length >= data.length; + if (sortDestIsEmpty) { + System.arraycopy(data, 0, sortDest, 0, data.length); + } + sorted = sortDest; + } else { + sorted = data.clone(); + } + reverse = new int[data.length]; for (int i = 0; i < reverse.length; i++) { - // element that was not moved don't have an index in reverse. - // perhaps, can do it inside sort alg? - // Alternatively, may start with filling reverse[] array with initial indexes and - // avoid != 0 comparisons in #swap altogether? - if (reverse[i] == 0) { - reverse[i] = i+1; - } + // initial reverse indexes, so that elements that do + // not move during sort got correct indexes + reverse[i] = i; } + sort1(0, data.length); + if (!keepSorted) { + sorted = null; + } + } + + /** + * @return all reverse indexes + */ + public int[] getReverseIndexes() { + return reverse; + } + + public int getReverseIndex(int sortedIndex) { + return reverse[sortedIndex]; + } + + public T get(int index) { + return data[index]; + } + + public T[] getData() { + return data; + } + + /** + * Look up sorted index of the value, using sort information + * @return same value as {@link Arrays#binarySearch(Object[], Object)} does + */ + public int binarySearchSorted(T value) { + if (sorted != null) { + return Arrays.binarySearch(sorted, 0, data.length, value); + } + return binarySearchWithReverse(0, data.length, value); + } + + /** + * Look up index of the value in the original array. + * @return index in original data, or defaultValue if value not found + */ + public int binarySearch(T value, int defaultValue) { + int x = binarySearchSorted(value); + if (x < 0) { + return defaultValue; + } + return reverse[x]; } /** * Slightly modified version of Arrays.sort1(int[], int, int) quicksort alg (just to deal with Object[]) */ - private void sort1(Comparable x[], int off, int len) { + private void sort1(int off, int len) { + @SuppressWarnings("unchecked") + Comparable[] x = (Comparable[]) sorted; // Insertion sort on smallest arrays if (len < 7) { for (int i=off; ioff && x[j-1].compareTo(x[j]) > 0; j--) - swap(x, j, j-1); + swap(j, j-1); return; } @@ -60,11 +127,11 @@ int n = off + len - 1; if (len > 40) { // Big arrays, pseudomedian of 9 int s = len/8; - l = med3(x, l, l+s, l+2*s); - m = med3(x, m-s, m, m+s); - n = med3(x, n-2*s, n-s, n); + l = med3(l, l+s, l+2*s); + m = med3(m-s, m, m+s); + n = med3(n-2*s, n-s, n); } - m = med3(x, l, m, n); // Mid-size, med of 3 + m = med3(l, m, n); // Mid-size, med of 3 } Comparable v = x[m]; @@ -73,67 +140,84 @@ while(true) { while (b <= c && x[b].compareTo(v) <= 0) { if (x[b] == v) - swap(x, a++, b); + swap(a++, b); b++; } while (c >= b && x[c].compareTo(v) >= 0) { if (x[c] == v) - swap(x, c, d--); + swap(c, d--); c--; } if (b > c) break; - swap(x, b++, c--); + swap(b++, c--); } // Swap partition elements back to middle int s, n = off + len; - s = Math.min(a-off, b-a ); vecswap(x, off, b-s, s); - s = Math.min(d-c, n-d-1); vecswap(x, b, n-s, s); + s = Math.min(a-off, b-a ); vecswap(off, b-s, s); + s = Math.min(d-c, n-d-1); vecswap(b, n-s, s); // Recursively sort non-partition-elements if ((s = b-a) > 1) - sort1(x, off, s); + sort1(off, s); if ((s = d-c) > 1) - sort1(x, n-s, s); + sort1(n-s, s); } /** * Swaps x[a .. (a+n-1)] with x[b .. (b+n-1)]. */ - private void vecswap(Object[] x, int a, int b, int n) { + private void vecswap(int a, int b, int n) { for (int i=0; i[] x, int a, int b, int c) { - return (x[a].compareTo(x[b]) < 0 ? - (x[b].compareTo(x[c]) < 0 ? b : x[a].compareTo(x[c]) < 0 ? c : a) : - (x[b].compareTo(x[c]) > 0 ? b : x[a].compareTo(x[c]) > 0 ? c : a)); + private int med3(int a, int b, int c) { + @SuppressWarnings("unchecked") + Comparable[] x = (Comparable[]) sorted; + return (x[a].compareTo(x[b]) < 0 ? + (x[b].compareTo(x[c]) < 0 ? b : x[a].compareTo(x[c]) < 0 ? c : a) : + (x[b].compareTo(x[c]) > 0 ? b : x[a].compareTo(x[c]) > 0 ? c : a)); } - - /** - * @return the reverse - */ - public int[] getReverse() { - return reverse; - } - - /** + /** * Swaps x[a] with x[b]. */ - private void swap(Object[] x, int a, int b) { + private void swap(int a, int b) { + Object[] x = sorted; Object t = x[a]; x[a] = x[b]; x[b] = t; - int z1 = reverse[a] != 0 ? reverse[a] : a+1; - int z2 = reverse[b] != 0 ? reverse[b] : b+1; + int z1 = reverse[a]; + int z2 = reverse[b]; reverse[b] = z1; reverse[a] = z2; } + + // copied from Arrays.binarySearch0, update to be instance method and to use reverse indexes + private int binarySearchWithReverse(int fromIndex, int toIndex, T key) { + int low = fromIndex; + int high = toIndex - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + // data[reverse[x]] gives sorted value at index x + T midVal = data[reverse[mid]]; + int cmp = midVal.compareTo(key); + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return mid; // key found + } + return -(low + 1); // key not found. + } + } diff -r a937e63b6e02 -r 6334b0267103 src/org/tmatesoft/hg/repo/HgBranches.java --- a/src/org/tmatesoft/hg/repo/HgBranches.java Thu Jul 04 18:40:03 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgBranches.java Thu Jul 04 20:27:45 2013 +0200 @@ -46,7 +46,8 @@ import org.tmatesoft.hg.util.ProgressSupport; /** - * + * Access information about branches in the repository + * * @author Artem Tikhomirov * @author TMate Software Ltd. */ @@ -125,14 +126,17 @@ void collect(final ProgressSupport ps) throws HgRuntimeException { branches.clear(); final HgRepository repo = internalRepo.getRepo(); - ps.start(1 + repo.getChangelog().getRevisionCount() * 2); + final HgChangelog clog = repo.getChangelog(); + final HgRevisionMap rmap; + ps.start(1 + clog.getRevisionCount() * 2); // int lastCached = readCache(); - isCacheActual = lastCached == repo.getChangelog().getLastRevision(); + isCacheActual = lastCached == clog.getLastRevision(); if (!isCacheActual) { - final HgParentChildMap pw = new HgParentChildMap(repo.getChangelog()); + // XXX need a way to share HgParentChildMap + final HgParentChildMap pw = new HgParentChildMap(clog); pw.init(); - ps.worked(repo.getChangelog().getRevisionCount()); + ps.worked(clog.getRevisionCount()); // // first revision branch found at final HashMap branchStart = new HashMap(); @@ -167,7 +171,7 @@ }; // XXX alternatively may iterate with pw.all().subList(lastCached) // but need an effective way to find out branch of particular changeset - repo.getChangelog().range(lastCached == -1 ? 0 : lastCached+1, HgRepository.TIP, insp); + clog.range(lastCached == -1 ? 0 : lastCached+1, HgRepository.TIP, insp); // // build BranchInfo, based on found and cached for (String bn : branchStart.keySet()) { @@ -192,10 +196,10 @@ } branches.put(bn, bi); } - } // !cacheActual - final HgChangelog clog = repo.getChangelog(); - /// FIXME use HgParentChildMap if available (need to decide how to get HgRevisionMap and HgParentChildMap to a common denominator) - final HgRevisionMap rmap = new HgRevisionMap(clog).init(); + rmap = pw.getRevisionMap(); + } else { // !cacheActual + rmap = new HgRevisionMap(clog).init(); + } for (BranchInfo bi : branches.values()) { bi.validate(clog, rmap); } diff -r a937e63b6e02 -r 6334b0267103 src/org/tmatesoft/hg/repo/HgParentChildMap.java --- a/src/org/tmatesoft/hg/repo/HgParentChildMap.java Thu Jul 04 18:40:03 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgParentChildMap.java Thu Jul 04 20:27:45 2013 +0200 @@ -28,6 +28,7 @@ import java.util.List; import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.internal.ArrayHelper; import org.tmatesoft.hg.internal.IntMap; import org.tmatesoft.hg.repo.Revlog.ParentInspector; @@ -62,14 +63,15 @@ // IMPORTANT: Nodeid instances shall be shared between all arrays + private final T revlog; private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering - private Nodeid[] sorted; // for binary search - private int[] sorted2natural; // indexes in sorted to indexes in sequential + private Nodeid[] sorted; // for binary search, just an origin of the actual value in use, the one inside seqWrapper private Nodeid[] firstParent; // parents by natural order (i.e. firstParent[A] is parent of revision with index A) private Nodeid[] secondParent; - private final T revlog; private IntMap heads; private BitSet headsBitSet; // 1 indicates revision got children, != null only during init; + private HgRevisionMap revisionIndexMap; + private ArrayHelper seqWrapper; public HgParentChildMap(T owner) { @@ -111,18 +113,13 @@ secondParent = new Nodeid[revisionCount]; // sequential = new Nodeid[revisionCount]; - sorted = new Nodeid[revisionCount]; + sorted = new Nodeid[revisionCount]; headsBitSet = new BitSet(revisionCount); revlog.indexWalk(0, TIP, this); - Arrays.sort(sorted); - // FIXME use ArrayHelper instead - sorted2natural = new int[revisionCount]; - for (int i = 0; i < revisionCount; i++) { - Nodeid n = sequential[i]; - int x = Arrays.binarySearch(sorted, n); - assertSortedIndex(x); - sorted2natural[x] = i; - } + seqWrapper = new ArrayHelper(sequential); + // HgRevisionMap doesn't keep sorted, try alternative here. + // reference this.sorted (not only from ArrayHelper) helps to track ownership in hprof/mem dumps + seqWrapper.sort(sorted, false, true); // no reason to keep BitSet, number of heads is usually small IntMap _heads = new IntMap(headsBitSet.size() - headsBitSet.cardinality()); int index = 0; @@ -151,16 +148,16 @@ * @return true if revision matches any revision in this revlog */ public boolean knownNode(Nodeid nid) { - return Arrays.binarySearch(sorted, nid) >= 0; + return seqWrapper.binarySearchSorted(nid) >= 0; } /** * null if none. only known nodes (as per #knownNode) are accepted as arguments */ public Nodeid firstParent(Nodeid nid) { - int x = Arrays.binarySearch(sorted, nid); + int x = seqWrapper.binarySearchSorted(nid); assertSortedIndex(x); - int i = sorted2natural[x]; + int i = seqWrapper.getReverseIndex(x); return firstParent[i]; } @@ -171,9 +168,9 @@ } public Nodeid secondParent(Nodeid nid) { - int x = Arrays.binarySearch(sorted, nid); + int x = seqWrapper.binarySearchSorted(nid); assertSortedIndex(x); - int i = sorted2natural[x]; + int i = seqWrapper.getReverseIndex(x); return secondParent[i]; } @@ -183,9 +180,9 @@ } public boolean appendParentsOf(Nodeid nid, Collection c) { - int x = Arrays.binarySearch(sorted, nid); + int x = seqWrapper.binarySearchSorted(nid); assertSortedIndex(x); - int i = sorted2natural[x]; + int i = seqWrapper.getReverseIndex(x); Nodeid p1 = firstParent[i]; boolean modified = false; if (p1 != null) { @@ -214,9 +211,9 @@ // first, find earliest index of roots in question, as there's no sense // to check children among nodes prior to branch's root node for (Nodeid r : roots) { - int x = Arrays.binarySearch(sorted, r); + int x = seqWrapper.binarySearchSorted(r); assertSortedIndex(x); - int i = sorted2natural[x]; + int i = seqWrapper.getReverseIndex(x); if (i < earliestRevision) { earliestRevision = i; } @@ -231,22 +228,19 @@ return result; } - public long AAA = 0; - /** * @return revisions that have supplied revision as their immediate parent */ public List directChildren(Nodeid nid) { - int x = Arrays.binarySearch(sorted, nid); + int x = seqWrapper.binarySearchSorted(nid); assertSortedIndex(x); - nid = sorted[x]; // canonical instance - int start = sorted2natural[x]; + int start = seqWrapper.getReverseIndex(x); + nid = sequential[start]; // canonical instance if (!hasChildren(start)) { return Collections.emptyList(); } ArrayList result = new ArrayList(5); for (int i = start + 1; i < sequential.length; i++) { - AAA++; if (nid == firstParent[i] || nid == secondParent[i]) { result.add(sequential[i]); } @@ -259,9 +253,9 @@ * @return true if there's any node in this revlog that has specified node as one of its parents. */ public boolean hasChildren(Nodeid nid) { - int x = Arrays.binarySearch(sorted, nid); + int x = seqWrapper.binarySearchSorted(nid); assertSortedIndex(x); - int i = sorted2natural[x]; + int i = seqWrapper.getReverseIndex(x); return hasChildren(i); } @@ -280,19 +274,19 @@ * @return true if wannaBeChild is among children of root */ public boolean isChild(Nodeid root, Nodeid wannaBeChild) { - int x = Arrays.binarySearch(sorted, root); + int x = seqWrapper.binarySearchSorted(root); assertSortedIndex(x); - root = sorted[x]; // canonical instance - final int start = sorted2natural[x]; + final int start = seqWrapper.getReverseIndex(x); + root = sequential[start]; // canonical instance if (!hasChildren(start)) { return false; // root got no children at all } - int y = Arrays.binarySearch(sorted, wannaBeChild); + int y = seqWrapper.binarySearchSorted(wannaBeChild); if (y < 0) { return false; // not found } - wannaBeChild = sorted[y]; // canonicalize - final int end = sorted2natural[y]; + final int end = seqWrapper.getReverseIndex(y); + wannaBeChild = sequential[end]; // canonicalize if (end <= start) { return false; // potential child was in repository earlier than root } @@ -312,6 +306,17 @@ public Collection heads() { return heads.values(); } + + /** + * @return map of revision to indexes + */ + public HgRevisionMap getRevisionMap() { + if (revisionIndexMap == null) { + revisionIndexMap = new HgRevisionMap(revlog); + revisionIndexMap.init(seqWrapper); + } + return revisionIndexMap; + } private boolean hasChildren(int sequentialIndex) { return !heads.containsKey(sequentialIndex); diff -r a937e63b6e02 -r 6334b0267103 src/org/tmatesoft/hg/repo/HgRevisionMap.java --- a/src/org/tmatesoft/hg/repo/HgRevisionMap.java Thu Jul 04 18:40:03 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgRevisionMap.java Thu Jul 04 20:27:45 2013 +0200 @@ -19,8 +19,6 @@ import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; import static org.tmatesoft.hg.repo.HgRepository.TIP; -import java.util.Arrays; - import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.internal.ArrayHelper; import org.tmatesoft.hg.repo.Revlog.RevisionInspector; @@ -60,15 +58,14 @@ * for complete changelog iteration. */ + private final T revlog; /* * XXX 3 * (x * 4) bytes. Can I do better? * It seems, yes. Don't need to keep sorted, always can emulate it with indirect access to sequential through sorted2natural. * i.e. instead sorted[mid].compareTo(toFind), do sequential[sorted2natural[mid]].compareTo(toFind) */ - private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering - private Nodeid[] sorted; // for binary search - private int[] sorted2natural; - private final T revlog; + private Nodeid[] sequential; // natural repository order + private ArrayHelper seqWrapper; public HgRevisionMap(T owner) { revlog = owner; @@ -79,7 +76,7 @@ } public void next(int revisionIndex, Nodeid revision, int linkedRevision) { - sequential[revisionIndex] = sorted[revisionIndex] = revision; + sequential[revisionIndex] = revision; } /** @@ -89,28 +86,29 @@ // XXX HgRepository.register((RepoChangeListener) this); // listen to changes in repo, re-init if needed? final int revisionCount = revlog.getRevisionCount(); sequential = new Nodeid[revisionCount]; - sorted = new Nodeid[revisionCount]; revlog.indexWalk(0, TIP, this); // next is alternative to Arrays.sort(sorted), and build sorted2natural looking up each element of sequential in sorted. // the way sorted2natural was build is O(n*log n). - final ArrayHelper ah = new ArrayHelper(); - ah.sort(sorted); - // note, values in ArrayHelper#getReversed are 1-based indexes, not 0-based - sorted2natural = ah.getReverse(); + seqWrapper = new ArrayHelper(sequential); + seqWrapper.sort(null, true, false); return this; } + + /* friendly initializer to use from HgParentChildMap + /*package*/ void init(ArrayHelper _seqWrapper) { + assert _seqWrapper.getData().length == revlog.getRevisionCount(); + sequential = _seqWrapper.getData(); + seqWrapper = _seqWrapper; + } public Nodeid revision(int revisionIndex) { return sequential[revisionIndex]; } + public int revisionIndex(Nodeid revision) { if (revision == null || revision.isNull()) { return BAD_REVISION; } - int x = Arrays.binarySearch(sorted, revision); - if (x < 0) { - return BAD_REVISION; - } - return sorted2natural[x]-1; + return seqWrapper.binarySearch(revision, BAD_REVISION); } } \ No newline at end of file diff -r a937e63b6e02 -r 6334b0267103 test/org/tmatesoft/hg/test/TestAuxUtilities.java --- a/test/org/tmatesoft/hg/test/TestAuxUtilities.java Thu Jul 04 18:40:03 2013 +0200 +++ b/test/org/tmatesoft/hg/test/TestAuxUtilities.java Thu Jul 04 20:27:45 2013 +0200 @@ -23,6 +23,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; import org.junit.Assert; import org.junit.Rule; @@ -62,25 +63,31 @@ @Test public void testArrayHelper() { String[] initial = {"d", "w", "k", "b", "c", "i", "a", "r", "e", "h" }; - ArrayHelper ah = new ArrayHelper(); + ArrayHelper ah = new ArrayHelper(initial); String[] result = initial.clone(); - ah.sort(result); - String[] restored = restore(result, ah.getReverse()); + ah.sort(result, false, false); + String[] restored = restore(result, ah.getReverseIndexes()); assertArrayEquals(initial, restored); // // few elements are on the right place from the very start and do not shift during sort. // make sure for them we've got correct reversed indexes as well initial = new String[] {"d", "h", "c", "b", "k", "i", "a", "r", "e", "w" }; - ah.sort(result = initial.clone()); - restored = restore(result, ah.getReverse()); + ah = new ArrayHelper(initial); + ah.sort(result = new String[initial.length], true, true); + restored = restore(result, ah.getReverseIndexes()); assertArrayEquals(initial, restored); + for (int i = 0; i < initial.length; i++) { + String s = initial[i]; + errorCollector.assertEquals(i, ah.binarySearch(s, -1)); + errorCollector.assertEquals(Arrays.binarySearch(result, s), ah.binarySearchSorted(s)); + } } private static String[] restore(String[] sorted, int[] sortReverse) { String[] rebuilt = new String[sorted.length]; for (int i = 0; i < sorted.length; i++) { int indexInOriginal = sortReverse[i]; - rebuilt[indexInOriginal-1] = sorted[i]; + rebuilt[indexInOriginal] = sorted[i]; } return rebuilt; } diff -r a937e63b6e02 -r 6334b0267103 test/org/tmatesoft/hg/test/TestRevisionMaps.java --- a/test/org/tmatesoft/hg/test/TestRevisionMaps.java Thu Jul 04 18:40:03 2013 +0200 +++ b/test/org/tmatesoft/hg/test/TestRevisionMaps.java Thu Jul 04 20:27:45 2013 +0200 @@ -28,6 +28,7 @@ import org.tmatesoft.hg.repo.HgChangelog; import org.tmatesoft.hg.repo.HgParentChildMap; import org.tmatesoft.hg.repo.HgRepository; +import org.tmatesoft.hg.repo.HgRevisionMap; /** * @@ -101,4 +102,29 @@ errorCollector.assertEquals(Collections.emptyList(), parentHelper.directChildren(allRevs[7])); } + @Test + public void testRevisionMap() throws HgException { + // XXX this test may benefit from external huge repository + final HgRepository repo = Configuration.get().find("test-annotate"); + Nodeid[] allRevs = RepoUtils.allRevisions(repo); + final HgChangelog clog = repo.getChangelog(); + final HgRevisionMap rmap = new HgRevisionMap(clog).init(); + doTestRevisionMap(allRevs, rmap); + } + + @Test + public void testRevisionMapFromParentChildMap() throws HgException { + final HgRepository repo = Configuration.get().find("test-annotate"); + Nodeid[] allRevs = RepoUtils.allRevisions(repo); + HgParentChildMap parentHelper = new HgParentChildMap(repo.getChangelog()); + parentHelper.init(); + doTestRevisionMap(allRevs, parentHelper.getRevisionMap()); + } + + private void doTestRevisionMap(Nodeid[] allRevs, HgRevisionMap rmap) { + for (int i = 0; i < allRevs.length; i++) { + errorCollector.assertEquals(i, rmap.revisionIndex(allRevs[i])); + errorCollector.assertEquals(allRevs[i], rmap.revision(i)); + } + } }