view src/org/tmatesoft/hg/repo/HgParentChildMap.java @ 709:497e697636fc

Report merged lines as changed block if possible, not as a sequence of added/deleted blocks. To facilitate access to merge parent lines AddBlock got mergeLineAt() method that reports index of the line in the second parent (if any), while insertedAt() has been changed to report index in the first parent always
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 21 Aug 2013 16:23:27 +0200
parents 19f5167c2155
children
line wrap: on
line source
/*
 * Copyright (c) 2011-2013 TMate Software Ltd
 *  
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * For information on how to redistribute this software under
 * the terms of a license other than GNU General Public License
 * contact TMate Software at support@hg4j.com
 */
package org.tmatesoft.hg.repo;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;

import org.tmatesoft.hg.core.Nodeid;
import org.tmatesoft.hg.internal.ArrayHelper;
import org.tmatesoft.hg.internal.IntMap;
import org.tmatesoft.hg.repo.Revlog.ParentInspector;

/**
 * Helper class to deal with parent-child relationship between revisions <i>en masse</i>.
 * Works in terms of {@link Nodeid nodeids}, there's no need to deal with revision indexes. 
 * For a given revision, answers questions like "who's my parent and what are my immediate children".
 * 
 * <p>Comes handy when multiple revisions are analyzed and distinct {@link Revlog#parents(int, int[], byte[], byte[])} 
 * queries are ineffective. 
 * 
 * <p>Next code snippet shows typical use: 
 * <pre>
 *   HgChangelog clog = repo.getChangelog();
 *   ParentWalker&lt;HgChangelog> pw = new ParentWalker&lt;HgChangelog>(clog);
 *   pw.init();
 *   
 *   Nodeid me = Nodeid.fromAscii("...");
 *   List<Nodei> immediateChildren = pw.directChildren(me);
 * </pre>
 * 
 * <p>Note, this map represents a snapshot of repository state at specific point, and is not automatically
 * updated/refreshed along with repository changes. I.e. any revision committed after this map was initialized
 * won't be recognized as known.
 * 
 * <p> Perhaps, later may add alternative way to access (and reuse) map instance, Revlog#getParentWalker(), 
 * that instantiates and initializes ParentWalker, and keep SoftReference to allow its reuse.
 * 
 * @see HgRevisionMap
 * 
 * @author Artem Tikhomirov
 * @author TMate Software Ltd.
 */
public final class HgParentChildMap<T extends Revlog> implements ParentInspector {

	// IMPORTANT: Nodeid instances shall be shared between all arrays

	private final T revlog;
	private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering
	private Nodeid[] sorted; // for binary search, just an origin of the actual value in use, the one inside seqWrapper
	private Nodeid[] firstParent; // parents by natural order (i.e. firstParent[A] is parent of revision with index A)
	private Nodeid[] secondParent;
	private IntMap<Nodeid> heads;
	private BitSet headsBitSet; // 1 indicates revision got children, != null only during init;
	private HgRevisionMap<T> revisionIndexMap;
	private ArrayHelper<Nodeid> seqWrapper; 


	public HgParentChildMap(T owner) {
		revlog = owner;
	}
	
	public HgRepository getRepo() {
		return revlog.getRepo();
	}
	
	public void next(int revisionNumber, Nodeid revision, int parent1Revision, int parent2Revision, Nodeid nidParent1, Nodeid nidParent2) {
		if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) {
			throw new IllegalStateException(); // sanity, revisions are sequential
		}
		int ix = revisionNumber;
		sequential[ix] = sorted[ix] = revision;
		if (parent1Revision != -1) {
			firstParent[ix] = sequential[parent1Revision];
			headsBitSet.set(parent1Revision);
		}
		if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1
			secondParent[ix] = sequential[parent2Revision];
			headsBitSet.set(parent2Revision);
		}
	}
	
	/**
	 * Prepare (initialize or update) the map. Once {@link HgParentChildMap} was initialized, it keeps snapshot
	 * of repository state. New revisions committed to the repository are not visible. To update the map, call 
	 * {@link #init()} once again, it tries to refresh in effective way, and to bring in only relevant changes.
	 *  
	 * @throws HgInvalidControlFileException if failed to access revlog index/data entry. <em>Runtime exception</em>
	 * @throws HgRuntimeException subclass thereof to indicate other issues with the library. <em>Runtime exception</em>
	 */
	public void init() throws HgRuntimeException {
		final int revisionCount = revlog.getRevisionCount();
		Nodeid[] oldSequential = null, oldFirstParent = null, oldSecondParent = null, oldSorted = null;
		if (sequential != null && sequential.length > 0 && sequential.length < revisionCount) {
			int lastRecordedRevIndex = sequential.length-1;
			if (sequential[lastRecordedRevIndex].equals(revlog.getRevision(lastRecordedRevIndex))) {
				oldSequential = sequential;
				oldFirstParent = firstParent;
				oldSecondParent = secondParent;
				oldSorted = sorted;
				// not sure if there's a benefit in keeping sorted. assume quite some of them
				// might end up on the same place and thus minimize rearrangements
			}
		}
		firstParent = new Nodeid[revisionCount];
		// TODO [post 1.1] Branches/merges are less frequent, and most of secondParent would be -1/null, hence 
		// IntMap might be better alternative here, but need to carefully analyze (test) whether this brings
		// real improvement (IntMap has 2n capacity, and element lookup is log(n) instead of array's constant).
		// FWIW: in cpython's repo, with 70k+ revisions, there are 2618 values in secondParent 
		secondParent = new Nodeid[revisionCount];
		//
		sequential = new Nodeid[revisionCount];
		sorted = new Nodeid[revisionCount];
		headsBitSet = new BitSet(revisionCount);
		if (oldSequential != null) {
			assert oldFirstParent.length == oldSequential.length;
			assert oldSecondParent.length == oldSequential.length;
			assert oldSorted.length == oldSequential.length;
			System.arraycopy(oldSequential, 0, sequential, 0, oldSequential.length);
			System.arraycopy(oldFirstParent, 0, firstParent, 0, oldFirstParent.length);
			System.arraycopy(oldSecondParent, 0, secondParent, 0, oldSecondParent.length);
			System.arraycopy(oldSorted, 0, sorted, 0, oldSorted.length);
			// restore old heads so that new one are calculated correctly
			headsBitSet.set(0, oldSequential.length);
			for (int headIndex : heads.keys()) {
				headsBitSet.clear(headIndex);
			}
		}
		revlog.indexWalk(oldSequential == null ? 0 : oldSequential.length, revisionCount-1, this);
		seqWrapper = new ArrayHelper<Nodeid>(sequential);
		// HgRevisionMap doesn't keep sorted, try alternative here.
		// reference this.sorted (not only from ArrayHelper) helps to track ownership in hprof/mem dumps
		seqWrapper.sort(sorted, false, true);
		// no reason to keep BitSet, number of heads is usually small
		IntMap<Nodeid> _heads = new IntMap<Nodeid>(revisionCount - headsBitSet.cardinality());
		int index = 0;
		while (index < sequential.length) {
			index = headsBitSet.nextClearBit(index);
			// nextClearBit(length-1) gives length when bit is set,
			// however, last revision can't be a parent of any other, and
			// the last bit would be always 0, and no AIOOBE 
			_heads.put(index, sequential[index]);
			index++;
		} 
		headsBitSet = null;
		heads = _heads;
	}
	
	private static void assertSortedIndex(int x) {
		if (x < 0) {
			throw new HgInvalidStateException(String.format("Bad index %d", x));
		}
	}
	
	/**
	 * Tells whether supplied revision is from the walker's associated revlog.
	 * Note, {@link Nodeid#NULL}, although implicitly present as parent of a first revision, is not recognized as known. 
	 * @param nid revision to check, not <code>null</code>
	 * @return <code>true</code> if revision matches any revision in this revlog
	 */
	public boolean knownNode(Nodeid nid) {
		return seqWrapper.binarySearchSorted(nid) >= 0;
	}

	/**
	 * null if none. only known nodes (as per #knownNode) are accepted as arguments
	 */
	public Nodeid firstParent(Nodeid nid) {
		int x = seqWrapper.binarySearchSorted(nid);
		assertSortedIndex(x);
		int i = seqWrapper.getReverseIndex(x);
		return firstParent[i];
	}

	// never null, Nodeid.NULL if none known
	public Nodeid safeFirstParent(Nodeid nid) {
		Nodeid rv = firstParent(nid);
		return rv == null ? Nodeid.NULL : rv;
	}
	
	public Nodeid secondParent(Nodeid nid) {
		int x = seqWrapper.binarySearchSorted(nid);
		assertSortedIndex(x);
		int i = seqWrapper.getReverseIndex(x);
		return secondParent[i];
	}

	public Nodeid safeSecondParent(Nodeid nid) {
		Nodeid rv = secondParent(nid);
		return rv == null ? Nodeid.NULL : rv;
	}

	public boolean appendParentsOf(Nodeid nid, Collection<Nodeid> c) {
		int x = seqWrapper.binarySearchSorted(nid);
		assertSortedIndex(x);
		int i = seqWrapper.getReverseIndex(x);
		Nodeid p1 = firstParent[i];
		boolean modified = false;
		if (p1 != null) {
			modified = c.add(p1);
		}
		Nodeid p2 = secondParent[i];
		if (p2 != null) {
			modified = c.add(p2) || modified;
		}
		return modified;
	}

	// XXX alternative (and perhaps more reliable) approach would be to make a copy of allNodes and remove 
	// nodes, their parents and so on.
	
	// @return ordered collection of all children rooted at supplied nodes. Nodes shall not be descendants of each other!
	// Nodeids shall belong to this revlog
	public List<Nodeid> childrenOf(Collection<Nodeid> roots) {
		if (roots.isEmpty()) {
			return Collections.emptyList();
		}
		HashSet<Nodeid> parents = new HashSet<Nodeid>();
		LinkedList<Nodeid> result = new LinkedList<Nodeid>();
		int earliestRevision = Integer.MAX_VALUE;
		assert sequential.length == firstParent.length && firstParent.length == secondParent.length;
		// first, find earliest index of roots in question, as there's  no sense 
		// to check children among nodes prior to branch's root node
		for (Nodeid r : roots) {
			int x = seqWrapper.binarySearchSorted(r);
			assertSortedIndex(x);
			int i = seqWrapper.getReverseIndex(x);
			if (i < earliestRevision) {
				earliestRevision = i;
			}
			parents.add(sequential[i]); // add canonical instance in hope equals() is bit faster when can do a ==
		}
		for (int i = earliestRevision + 1; i < sequential.length; i++) {
			if (parents.contains(firstParent[i]) || parents.contains(secondParent[i])) {
				parents.add(sequential[i]); // to find next child
				result.add(sequential[i]);
			}
		}
		return result;
	}
	
	/**
	 * @return revisions that have supplied revision as their immediate parent
	 */
	public List<Nodeid> directChildren(Nodeid nid) {
		int x = seqWrapper.binarySearchSorted(nid);
		assertSortedIndex(x);
		int start = seqWrapper.getReverseIndex(x);
		nid = sequential[start]; // canonical instance
		if (!hasChildren(start)) {
			return Collections.emptyList();
		}
		ArrayList<Nodeid> result = new ArrayList<Nodeid>(5);
		for (int i = start + 1; i < sequential.length; i++) {
			if (nid == firstParent[i] || nid == secondParent[i]) {
				result.add(sequential[i]);
			}
		}
		return result;
	}
	
	/**
	 * @param nid possibly parent node, shall be {@link #knownNode(Nodeid) known} in this revlog.
	 * @return <code>true</code> if there's any node in this revlog that has specified node as one of its parents. 
	 */
	public boolean hasChildren(Nodeid nid) {
		int x = seqWrapper.binarySearchSorted(nid);
		assertSortedIndex(x);
		int i = seqWrapper.getReverseIndex(x);
		return hasChildren(i);
	}

	/**
	 * @return all revisions this map knows about
	 */
	public List<Nodeid> all() {
		return Arrays.asList(sequential);
	}

	/**
	 * Find out whether a given node is among descendants of another.
	 * 
	 * @param root revision to check for being (grand-)*parent of a child
	 * @param wannaBeChild candidate descendant revision
	 * @return <code>true</code> if <code>wannaBeChild</code> is among children of <code>root</code>
	 */
	public boolean isChild(Nodeid root, Nodeid wannaBeChild) {
		int x = seqWrapper.binarySearchSorted(root);
		assertSortedIndex(x);
		final int start = seqWrapper.getReverseIndex(x);
		root = sequential[start]; // canonical instance
		if (!hasChildren(start)) {
			return false; // root got no children at all
		}
		int y = seqWrapper.binarySearchSorted(wannaBeChild);
		if (y < 0) {
			return false; // not found
		}
		final int end = seqWrapper.getReverseIndex(y);
		wannaBeChild = sequential[end]; // canonicalize
		if (end <= start) {
			return false; // potential child was in repository earlier than root
		}
		HashSet<Nodeid> parents = new HashSet<Nodeid>();
		parents.add(root);
		for (int i = start + 1; i < end; i++) {
			if (parents.contains(firstParent[i]) || parents.contains(secondParent[i])) {
				parents.add(sequential[i]); // collect ancestors line
			}
		}
		return parents.contains(firstParent[end]) || parents.contains(secondParent[end]);
	}
	
	/**
	 * @return elements of this map that do not have a child recorded therein.
	 */
	public Collection<Nodeid> heads() {
		return heads.values();
	}
	
	/**
	 * @return map of revision to indexes
	 */
	public HgRevisionMap<T> getRevisionMap() {
		if (revisionIndexMap == null) {
			revisionIndexMap = new HgRevisionMap<T>(revlog);
			revisionIndexMap.init(seqWrapper);
		}
		return revisionIndexMap;
	}
	
	/**
	 * @return common ancestor of two revisions
	 */
	public Nodeid ancestor(Nodeid r1, Nodeid r2) {
		if (r1.equals(r2)) {
			return r1;
		}
		BitSet a1 = buildAncestors(r1);
		BitSet a2 = buildAncestors(r2);
		// BitSet.and() trims to shorter bitset, it's ok as we are not 
		// interested in bits that are part of one bitset only
		a1.and(a2);
		final int cardinality = a1.cardinality();
		if (cardinality == 1) {
			return sequential[a1.nextSetBit(0)];
		}
		assert cardinality > 0; // every revision is child of at least rev0
		final int length = sequential.length;
		int index = length / 2;
		int lastBitSet = -1;
		do {
			int nextSetBit = a1.nextSetBit(index);
			int nextIndex;
			if (nextSetBit == -1) {
				assert lastBitSet == -1 || lastBitSet <= index;
				nextIndex = index - (index - (lastBitSet == -1 ? 0 : lastBitSet)) / 2;
			} else {
				lastBitSet = nextSetBit;
				nextIndex = lastBitSet + (length - lastBitSet) / 2;
			}
			if (nextIndex == index) {
				break;
			}
			index = nextIndex;
		} while (true);
		if (lastBitSet == -1) {
			assert false; // likely error in the algorithm above (e.g. can't reach index==0)
			return sequential[0];
		}
		return sequential[lastBitSet];
	}

	private boolean hasChildren(int sequentialIndex) {
		return !heads.containsKey(sequentialIndex);
	}
	
	private BitSet buildAncestors(Nodeid nid) {
		int x = seqWrapper.binarySearchSorted(nid);
		assertSortedIndex(x);
		int i = seqWrapper.getReverseIndex(x);
		BitSet rv = new BitSet(sequential.length);
		HashSet<Nodeid> ancestors = new HashSet<Nodeid>();
		ancestors.add(nid);
		do {
			if (ancestors.contains(sequential[i])) {
				rv.set(i);
				if(firstParent[i] != null) {
					ancestors.add(firstParent[i]);
				}
				if (secondParent[i] != null) {
					ancestors.add(secondParent[i]);
				}
			}
			i--;
		} while (i >= 0);
		return rv;
	}
}