Mercurial > hg4j
view src/org/tmatesoft/hg/internal/RepositoryComparator.java @ 709:497e697636fc
Report merged lines as changed block if possible, not as a sequence of added/deleted blocks. To facilitate access to merge parent lines AddBlock got mergeLineAt() method that reports index of the line in the second parent (if any), while insertedAt() has been changed to report index in the first parent always
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Wed, 21 Aug 2013 16:23:27 +0200 |
parents | 822f3a83ff57 |
children |
line wrap: on
line source
/* * Copyright (c) 2011-2013 TMate Software Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * For information on how to redistribute this software under * the terms of a license other than GNU General Public License * contact TMate Software at support@hg4j.com */ package org.tmatesoft.hg.internal; import static org.tmatesoft.hg.core.Nodeid.NULL; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.tmatesoft.hg.core.HgRemoteConnectionException; import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.repo.HgChangelog; import org.tmatesoft.hg.repo.HgInvalidStateException; import org.tmatesoft.hg.repo.HgParentChildMap; import org.tmatesoft.hg.repo.HgRemoteRepository; import org.tmatesoft.hg.repo.HgRemoteRepository.Range; import org.tmatesoft.hg.repo.HgRemoteRepository.RemoteBranch; import org.tmatesoft.hg.util.CancelSupport; import org.tmatesoft.hg.util.CancelledException; import org.tmatesoft.hg.util.ProgressSupport; /** * * @author Artem Tikhomirov * @author TMate Software Ltd. */ public class RepositoryComparator { private final boolean debug = Boolean.parseBoolean(System.getProperty("hg4j.remote.debug")); private final HgParentChildMap<HgChangelog> localRepo; private final HgRemoteRepository remoteRepo; private List<Nodeid> common; private List<Nodeid> remoteHeads; public RepositoryComparator(HgParentChildMap<HgChangelog> pwLocal, HgRemoteRepository hgRemote) { localRepo = pwLocal; remoteRepo = hgRemote; } public RepositoryComparator compare(ProgressSupport progressSupport, CancelSupport cancelSupport) throws HgRemoteConnectionException, CancelledException { cancelSupport.checkCancelled(); progressSupport.start(10); common = Collections.unmodifiableList(findCommonWithRemote()); // sanity check for (Nodeid n : common) { if (!localRepo.knownNode(n)) { throw new HgInvalidStateException("Unknown node reported as common:" + n); } } progressSupport.done(); return this; } public List<Nodeid> getCommon() { if (common == null) { throw new HgInvalidStateException("Call #compare(Object) first"); } return common; } public List<Nodeid> getRemoteHeads() { assert remoteHeads != null; return remoteHeads; } /** * @return revisions that are children of common entries, i.e. revisions that are present on the local server and not on remote. */ public List<Nodeid> getLocalOnlyRevisions() { final List<Nodeid> c = getCommon(); if (c.isEmpty()) { return localRepo.all(); } else { final RevisionSet rsCommon = new RevisionSet(c); final RevisionSet localHeads = new RevisionSet(localRepo.heads()); final List<Nodeid> commonChildren = localRepo.childrenOf(c); final RevisionSet rsCommonChildren = new RevisionSet(commonChildren); // check if there's any revision in the repository that doesn't trace to common // e.g. branches from one of common ancestors RevisionSet headsNotFromCommon = localHeads.subtract(rsCommonChildren).subtract(rsCommon); if (headsNotFromCommon.isEmpty()) { return commonChildren; } RevisionSet all = new RevisionSet(localRepo.all()); // need outgoing := ancestors(missing) - ancestors(common): RevisionSet rsAncestors = all.ancestors(headsNotFromCommon, localRepo); // #ancestors gives only parents, we need terminating children as well rsAncestors = rsAncestors.union(headsNotFromCommon); final RevisionSet rsAncestorsCommon = all.ancestors(rsCommon, localRepo); RevisionSet outgoing = rsAncestors.subtract(rsAncestorsCommon).subtract(rsCommon); // outgoing keeps children that spined off prior to common revisions return outgoing.union(rsCommonChildren).asList(); } } private List<Nodeid> findCommonWithRemote() throws HgRemoteConnectionException { remoteHeads = remoteRepo.heads(); LinkedList<Nodeid> resultCommon = new LinkedList<Nodeid>(); // these remotes are known in local LinkedList<Nodeid> toQuery = new LinkedList<Nodeid>(); // these need further queries to find common for (Nodeid rh : remoteHeads) { if (localRepo.knownNode(rh)) { resultCommon.add(rh); } else { toQuery.add(rh); } } if (toQuery.isEmpty()) { return resultCommon; } LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>(); // branch.root and branch.head are of interest only. // these are branches with unknown head but known root, which might not be the last common known, // i.e. there might be children changeset that are also available at remote, [..?..common-head..remote-head] - need to // scroll up to common head. while (!toQuery.isEmpty()) { List<RemoteBranch> remoteBranches = remoteRepo.branches(toQuery); //head, root, first parent, second parent toQuery.clear(); while(!remoteBranches.isEmpty()) { RemoteBranch rb = remoteBranches.remove(0); // I assume branches remote call gives branches with head equal to what I pass there, i.e. // that I don't need to check whether rb.head is unknown. if (localRepo.knownNode(rb.root)) { // we known branch start, common head is somewhere in its descendants line checkUp2Head.add(rb); } else { // dig deeper in the history, if necessary if (!rb.p1.isNull() && !localRepo.knownNode(rb.p1)) { toQuery.add(rb.p1); } if (!rb.p2.isNull() && !localRepo.knownNode(rb.p2)) { toQuery.add(rb.p2); } } } } // can't check nodes between checkUp2Head element and local heads, remote might have distinct descendants sequence for (RemoteBranch rb : checkUp2Head) { // rb.root is known locally List<Nodeid> remoteRevisions = remoteRepo.between(rb.head, rb.root); if (remoteRevisions.isEmpty()) { // head is immediate child resultCommon.add(rb.root); } else { // between gives result from head to root, I'd like to go in reverse direction Collections.reverse(remoteRevisions); Nodeid root = rb.root; while(!remoteRevisions.isEmpty()) { Nodeid n = remoteRevisions.remove(0); if (localRepo.knownNode(n)) { if (remoteRevisions.isEmpty()) { // this is the last known node before an unknown resultCommon.add(n); break; } if (remoteRevisions.size() == 1) { // there's only one left between known n and unknown head // this check is to save extra between query, not really essential Nodeid last = remoteRevisions.remove(0); resultCommon.add(localRepo.knownNode(last) ? last : n); break; } // might get handy for next between query, to narrow search down root = n; } else { remoteRevisions = remoteRepo.between(n, root); Collections.reverse(remoteRevisions); if (remoteRevisions.isEmpty()) { resultCommon.add(root); } } } } } // TODO ensure unique elements in the list return resultCommon; } // somewhat similar to Outgoing.findCommonWithRemote() public List<BranchChain> calculateMissingBranches() throws HgRemoteConnectionException { List<Nodeid> remoteHeads = remoteRepo.heads(); LinkedList<Nodeid> common = new LinkedList<Nodeid>(); // these remotes are known in local LinkedHashSet<Nodeid> toQuery = new LinkedHashSet<Nodeid>(); // these need further queries to find common for (Nodeid rh : remoteHeads) { if (localRepo.knownNode(rh)) { common.add(rh); } else { toQuery.add(rh); } } if (toQuery.isEmpty()) { return Collections.emptyList(); // no incoming changes } LinkedList<BranchChain> branches2load = new LinkedList<BranchChain>(); // return value // detailed comments are in Outgoing.findCommonWithRemote LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>(); // records relation between branch head and its parent branch, if any HashMap<Nodeid, BranchChain> head2chain = new HashMap<Nodeid, BranchChain>(); while (!toQuery.isEmpty()) { List<RemoteBranch> remoteBranches = remoteRepo.branches(new ArrayList<Nodeid>(toQuery)); //head, root, first parent, second parent toQuery.clear(); while(!remoteBranches.isEmpty()) { RemoteBranch rb = remoteBranches.remove(0); BranchChain chainElement = head2chain.get(rb.head); if (chainElement == null) { chainElement = new BranchChain(rb.head); // record this unknown branch to download later branches2load.add(chainElement); // the only chance we'll need chainElement in the head2chain is when we know this branch's root head2chain.put(rb.head, chainElement); } if (localRepo.knownNode(rb.root)) { // we known branch start, common head is somewhere in its descendants line checkUp2Head.add(rb); } else { chainElement.branchRoot = rb.root; // dig deeper in the history, if necessary boolean hasP1 = !rb.p1.isNull(), hasP2 = !rb.p2.isNull(); if (hasP1 && !localRepo.knownNode(rb.p1)) { toQuery.add(rb.p1); // we might have seen parent node already, and recorded it as a branch chain // we shall reuse existing BC to get it completely initialized (head2chain map // on second put with the same key would leave first BC uninitialized. // It seems there's no reason to be afraid (XXX although shall double-check) // that BC's chain would get corrupt (its p1 and p2 fields assigned twice with different values) // as parents are always the same (and likely, BC that is common would be the last unknown) BranchChain bc = head2chain.get(rb.p1); if (bc == null) { head2chain.put(rb.p1, bc = new BranchChain(rb.p1)); } chainElement.p1 = bc; } if (hasP2 && !localRepo.knownNode(rb.p2)) { toQuery.add(rb.p2); BranchChain bc = head2chain.get(rb.p2); if (bc == null) { head2chain.put(rb.p2, bc = new BranchChain(rb.p2)); } chainElement.p2 = bc; } if (!hasP1 && !hasP2) { // special case, when we do incoming against blank repository, chainElement.branchRoot // is first unknown element (revision 0). We need to add another fake BranchChain // to fill the promise that terminal BranchChain has branchRoot that is known both locally and remotely BranchChain fake = new BranchChain(NULL); fake.branchRoot = NULL; chainElement.p1 = chainElement.p2 = fake; } } } } for (RemoteBranch rb : checkUp2Head) { Nodeid h = rb.head; Nodeid r = rb.root; int watchdog = 1000; assert head2chain.containsKey(h); BranchChain bc = head2chain.get(h); assert bc != null : h.toString(); // if we know branch root locally, there could be no parent branch chain elements. assert bc.p1 == null; assert bc.p2 == null; do { List<Nodeid> between = remoteRepo.between(h, r); if (between.isEmpty()) { bc.branchRoot = r; break; } else { Collections.reverse(between); for (Nodeid n : between) { if (localRepo.knownNode(n)) { r = n; } else { h = n; break; } } Nodeid lastInBetween = between.get(between.size() - 1); if (r.equals(lastInBetween)) { bc.branchRoot = r; break; } else if (h.equals(lastInBetween)) { // the only chance for current head pointer to point to the sequence tail // is when r is second from the between list end (iow, head,1,[2],4,8...,root) bc.branchRoot = r; break; } } } while(--watchdog > 0); if (watchdog == 0) { throw new HgInvalidStateException(String.format("Can't narrow down branch [%s, %s]", rb.head.shortNotation(), rb.root.shortNotation())); } } if (debug) { System.out.println("calculateMissingBranches:"); for (BranchChain bc : branches2load) { bc.dump(); } } return branches2load; } // root and head (and all between) are unknown for each chain element but last (terminal), which has known root (revision // known to be locally and at remote server // alternative would be to keep only unknown elements (so that promise of calculateMissingBranches would be 100% true), but that // seems to complicate the method, while being useful only for the case when we ask incoming for an empty repository (i.e. // where branch chain return all nodes, -1..tip. public static final class BranchChain { // when we construct a chain, we know head which is missing locally, hence init it right away. // as for root (branch unknown start), we might happen to have one locally, and need further digging to find out right branch start public final Nodeid branchHead; public Nodeid branchRoot; // either of these can be null, or both. // although RemoteBranch has either both parents null, or both non-null, when we construct a chain // we might encounter that we locally know one of branch's parent, hence in the chain corresponding field will be blank. public BranchChain p1; public BranchChain p2; public BranchChain(Nodeid head) { assert head != null; branchHead = head; } public boolean isTerminal() { return p1 == null && p2 == null; // either can be null, see comment above. Terminal is only when no way to descent } // true when this BranchChain is a branch that spans up to very start of the repository // Thus, the only common revision is NULL, recorded in a fake BranchChain object shared between p1 and p2 /*package-local*/ boolean isRepoStart() { return p1 == p2 && p1 != null && p1.branchHead == p1.branchRoot && p1.branchHead.isNull(); } @Override public String toString() { return String.format("BranchChain [root:%s, head:%s]", branchRoot, branchHead); } void dump() { System.out.println(toString()); internalDump(" "); } private void internalDump(String prefix) { if (p1 != null) { System.out.println(prefix + p1.toString()); } else if (p2 != null) { System.out.println(prefix + "NONE?!"); } if (p2 != null) { System.out.println(prefix + p2.toString()); } else if (p1 != null) { System.out.println(prefix + "NONE?!"); } prefix += " "; if (p1 != null) { p1.internalDump(prefix); } if (p2 != null) { p2.internalDump(prefix); } } } /** * @return list of nodeids from branchRoot to branchHead, inclusive. IOW, first element of the list is always root of the branch */ public List<Nodeid> completeBranch(final Nodeid branchRoot, final Nodeid branchHead) throws HgRemoteConnectionException { class DataEntry { public final Nodeid queryHead; public final int headIndex; public List<Nodeid> entries; public DataEntry(Nodeid head, int index, List<Nodeid> data) { queryHead = head; headIndex = index; entries = data; } }; List<Nodeid> initial = remoteRepo.between(branchHead, branchRoot); Nodeid[] result = new Nodeid[1 + (1 << initial.size())]; result[0] = branchHead; int rootIndex = -1; // index in the result, where to place branche's root. if (initial.isEmpty()) { rootIndex = 1; } else if (initial.size() == 1) { rootIndex = 2; } LinkedList<DataEntry> datas = new LinkedList<DataEntry>(); // DataEntry in datas has entries list filled with 'between' data, whereas // DataEntry in toQuery keeps only nodeid and its index, with entries to be initialized before // moving to datas. LinkedList<DataEntry> toQuery = new LinkedList<DataEntry>(); // datas.add(new DataEntry(branchHead, 0, initial)); int totalQueries = 1; HashSet<Nodeid> queried = new HashSet<Nodeid>(); while(!datas.isEmpty()) { // keep record of those planned to be queried next time we call between() // although may keep these in queried, if really don't want separate collection HashSet<Nodeid> scheduled = new HashSet<Nodeid>(); do { DataEntry de = datas.removeFirst(); // populate result with discovered elements between de.qiueryRoot and branch's head for (int i = 1, j = 0; j < de.entries.size(); i = i << 1, j++) { int idx = de.headIndex + i; result[idx] = de.entries.get(j); } // form next query entries from new unknown elements if (de.entries.size() > 1) { /* when entries has only one element, it means de.queryRoot was at head-2 position, and thus * no new information can be obtained. E.g. when it's 2, it might be case of [0..4] query with * [1,2] result, and we need one more query to get element 3. */ for (int i =1, j = 0; j < de.entries.size(); i = i<<1, j++) { int idx = de.headIndex + i; Nodeid x = de.entries.get(j); if (!queried.contains(x) && !scheduled.contains(x) && (rootIndex == -1 || rootIndex - de.headIndex > 1)) { /*queries for elements right before head is senseless, but unless we know head's index, do it anyway*/ toQuery.add(new DataEntry(x, idx, null)); scheduled.add(x); } } } } while (!datas.isEmpty()); if (!toQuery.isEmpty()) { totalQueries++; } // for each query, create an between request range, keep record Range->DataEntry to know range's start index LinkedList<HgRemoteRepository.Range> betweenBatch = new LinkedList<HgRemoteRepository.Range>(); HashMap<HgRemoteRepository.Range, DataEntry> rangeToEntry = new HashMap<HgRemoteRepository.Range, DataEntry>(); for (DataEntry de : toQuery) { queried.add(de.queryHead); HgRemoteRepository.Range r = new HgRemoteRepository.Range(branchRoot, de.queryHead); betweenBatch.add(r); rangeToEntry.put(r, de); } if (!betweenBatch.isEmpty()) { Map<Range, List<Nodeid>> between = remoteRepo.between(betweenBatch); for (Entry<Range, List<Nodeid>> e : between.entrySet()) { DataEntry de = rangeToEntry.get(e.getKey()); assert de != null; de.entries = e.getValue(); if (rootIndex == -1 && de.entries.size() == 1) { // returned sequence of length 1 means we used element from [head-2] as root int numberOfElementsExcludingRootAndHead = de.headIndex + 1; rootIndex = numberOfElementsExcludingRootAndHead + 1; if (debug) { System.out.printf("On query %d found out exact number of missing elements: %d\n", totalQueries, numberOfElementsExcludingRootAndHead); } } datas.add(de); // queue up to record result and construct further requests } betweenBatch.clear(); rangeToEntry.clear(); } toQuery.clear(); } if (rootIndex == -1) { throw new HgInvalidStateException("Shall not happen, provided between output is correct"); } result[rootIndex] = branchRoot; boolean resultOk = true; LinkedList<Nodeid> fromRootToHead = new LinkedList<Nodeid>(); IntVector missing = new IntVector(); for (int i = 0; i <= rootIndex; i++) { Nodeid n = result[i]; if (n == null) { missing.add(i); resultOk = false; } fromRootToHead.addFirst(n); // reverse order } if (debug) { System.out.println("Total queries:" + totalQueries); } if (!resultOk) { assert missing.size() > 0; // TODO post-1.0 perhaps, there's better alternative than HgInvalidStateException, e.g. HgDataFormatException? throw new HgInvalidStateException(String.format("Missing elements with indexes: %s", Arrays.toString(missing.toArray()))); } return fromRootToHead; } /** * returns in order from branch root to head * for a non-empty BranchChain, shall return modifiable list */ public List<Nodeid> visitBranches(BranchChain bc) throws HgRemoteConnectionException { if (bc == null) { return Collections.emptyList(); } List<Nodeid> mine = completeBranch(bc.branchRoot, bc.branchHead); if (bc.isTerminal() || bc.isRepoStart()) { return mine; } List<Nodeid> parentBranch1 = visitBranches(bc.p1); List<Nodeid> parentBranch2 = visitBranches(bc.p2); // merge LinkedList<Nodeid> merged = new LinkedList<Nodeid>(); ListIterator<Nodeid> i1 = parentBranch1.listIterator(), i2 = parentBranch2.listIterator(); while (i1.hasNext() && i2.hasNext()) { Nodeid n1 = i1.next(); Nodeid n2 = i2.next(); if (n1.equals(n2)) { merged.addLast(n1); } else { // first different => add both, and continue adding both tails sequentially merged.add(n2); merged.add(n1); break; } } // copy rest of second parent branch while (i2.hasNext()) { merged.add(i2.next()); } // copy rest of first parent branch while (i1.hasNext()) { merged.add(i1.next()); } // ArrayList<Nodeid> rv = new ArrayList<Nodeid>(mine.size() + merged.size()); rv.addAll(merged); rv.addAll(mine); return rv; } public void collectKnownRoots(BranchChain bc, Set<Nodeid> result) { if (bc == null) { return; } if (bc.isTerminal()) { result.add(bc.branchRoot); return; } if (bc.isRepoStart()) { return; } collectKnownRoots(bc.p1, result); collectKnownRoots(bc.p2, result); } }