# HG changeset patch # User Artem Tikhomirov # Date 1373049765 -7200 # Node ID a5cf64f2e7e42180c03754b4c32f167d17702f3d # Parent 2f33f102a8fa59274a27ebbe1c2903cecac6c5d5 Poor performance when reading/collecting branch information. Respect new cache location for recent mercurial revisions. Use different algorithm to build branch cache diff -r 2f33f102a8fa -r a5cf64f2e7e4 src/org/tmatesoft/hg/repo/HgBranches.java --- a/src/org/tmatesoft/hg/repo/HgBranches.java Tue Jun 11 16:31:42 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgBranches.java Fri Jul 05 20:42:45 2013 +0200 @@ -27,8 +27,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; @@ -46,7 +46,8 @@ import org.tmatesoft.hg.util.ProgressSupport; /** - * + * Access information about branches in the repository + * * @author Artem Tikhomirov * @author TMate Software Ltd. */ @@ -125,91 +126,69 @@ void collect(final ProgressSupport ps) throws HgRuntimeException { branches.clear(); final HgRepository repo = internalRepo.getRepo(); - ps.start(1 + repo.getChangelog().getRevisionCount() * 2); + final HgChangelog clog = repo.getChangelog(); + ps.start(1 + clog.getRevisionCount() * 2); // int lastCached = readCache(); - isCacheActual = lastCached == repo.getChangelog().getLastRevision(); + isCacheActual = lastCached == clog.getLastRevision(); if (!isCacheActual) { - final HgParentChildMap pw = new HgParentChildMap(repo.getChangelog()); + // XXX need a way to share HgParentChildMap + final HgParentChildMap pw = new HgParentChildMap(clog); pw.init(); - ps.worked(repo.getChangelog().getRevisionCount()); + ps.worked(clog.getRevisionCount()); + // // first revision branch found at final HashMap branchStart = new HashMap(); - // last revision seen for the branch - final HashMap branchLastSeen = new HashMap(); // revisions from the branch that have no children at all final HashMap> branchHeads = new HashMap>(); - // revisions that are immediate children of a node from a given branch - // after iteration, there are some revisions left in this map (children of a branch last revision - // that doesn't belong to the branch. No use of this now, perhaps can deduce isInactive (e.g.those - // branches that have non-empty candidates are inactive if all their heads are roots for those left) - final HashMap> branchHeadCandidates = new HashMap>(); HgChangelog.Inspector insp = new HgChangelog.Inspector() { + private final ArrayList parents = new ArrayList(3); + public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) { String branchName = cset.branch(); + List _branchHeads; + // there are chances (with --force key) branch can get more than one start + // revision. Neither BranchInfo nor this code support this scenario at the moment. if (!branchStart.containsKey(branchName)) { branchStart.put(branchName, nodeid); - branchHeads.put(branchName, new LinkedList()); - branchHeadCandidates.put(branchName, new LinkedList()); + branchHeads.put(branchName, _branchHeads = new LinkedList()); } else { - final List headCandidates = branchHeadCandidates.get(branchName); - if (headCandidates.remove(nodeid)) { - // likely we don't need to keep parent anymore, as we found at least 1 child thereof to be at the same branch - // however, it's possible the child we found is a result of an earlier fork, and revision in the - // branchLastSeen is 'parallel' head, which needs to be kept - Nodeid lastSeenInBranch = branchLastSeen.get(branchName); - // check if current revision is on descendant line. Seems direct parents check is enough - if (pw.safeFirstParent(nodeid).equals(lastSeenInBranch) || pw.safeSecondParent(nodeid).equals(lastSeenInBranch)) { - branchLastSeen.remove(branchName); + _branchHeads = branchHeads.get(branchName); + if (_branchHeads == null) { + branchHeads.put(branchName, _branchHeads = new LinkedList()); + } + } + // so far present node is the best candidate for head + _branchHeads.add(nodeid); + parents.clear(); + // parents of this node, however, cease to be heads (if they are from this branch) + pw.appendParentsOf(nodeid, parents); + _branchHeads.removeAll(parents); + ps.worked(1); + } + }; + // XXX alternatively may iterate with pw.all().subList(lastCached) + // but need an effective way to find out branch of particular changeset + clog.range(lastCached == -1 ? 0 : lastCached+1, HgRepository.TIP, insp); + // + // build BranchInfo, based on found and cached + for (String bn : branchStart.keySet()) { + BranchInfo bi = branches.get(bn); + if (bi != null) { + // combine heads found so far with those cached + LinkedHashSet oldHeads = new LinkedHashSet(bi.getHeads()); + // expect size of both oldHeads and newHeads sets to be small, and for x for hence acceptable. + for (Nodeid newHead : branchHeads.get(bn)) { + for (Iterator it = oldHeads.iterator(); it.hasNext();) { + if (pw.isChild(it.next(), newHead)) { + it.remove(); } } } - List immediateChildren = pw.directChildren(nodeid); - if (immediateChildren.size() > 0) { - // 1) children may be in another branch - // and unless we later came across another element from this branch, - // we need to record all these as potential heads - // - // 2) head1 with children in different branch, and head2 in this branch without children - branchLastSeen.put(branchName, nodeid); - branchHeadCandidates.get(branchName).addAll(immediateChildren); - } else { - // no more children known for this node, it's (one of the) head of the branch - branchHeads.get(branchName).add(nodeid); - } - ps.worked(1); - } - }; - repo.getChangelog().range(lastCached == -1 ? 0 : lastCached+1, HgRepository.TIP, insp); - // those last seen revisions from the branch that had no children from the same branch are heads. - for (String bn : branchLastSeen.keySet()) { - // these are inactive branches? - there were children, but not from the same branch? - branchHeads.get(bn).add(branchLastSeen.get(bn)); - } - for (String bn : branchStart.keySet()) { - BranchInfo bi = branches.get(bn); - if (bi != null) { - // although heads from cache shall not intersect with heads after lastCached, - // use of LHS doesn't hurt (and makes sense e.g. if cache is not completely correct in my tests) - LinkedHashSet heads = new LinkedHashSet(bi.getHeads()); - for (Nodeid oldHead : bi.getHeads()) { - // XXX perhaps, need pw.canReach(Nodeid from, Collection to) - List newChildren = pw.childrenOf(Collections.singletonList(oldHead)); - if (!newChildren.isEmpty()) { - // likely not a head any longer, - // check if any new head can be reached from old one, and, if yes, - // do not consider that old head as head. - for (Nodeid newHead : branchHeads.get(bn)) { - if (newChildren.contains(newHead)) { - heads.remove(oldHead); - break; - } - } - } // else - oldHead still head for the branch - } - heads.addAll(branchHeads.get(bn)); - bi = new BranchInfo(bn, bi.getStart(), heads.toArray(new Nodeid[0])); + oldHeads.addAll(branchHeads.get(bn)); + assert oldHeads.size() > 0; + bi = new BranchInfo(bn, bi.getStart(), oldHeads.toArray(new Nodeid[oldHeads.size()])); } else { Nodeid[] heads = branchHeads.get(bn).toArray(new Nodeid[0]); bi = new BranchInfo(bn, branchStart.get(bn), heads); @@ -217,7 +196,6 @@ branches.put(bn, bi); } } - final HgChangelog clog = repo.getChangelog(); final HgRevisionMap rmap = new HgRevisionMap(clog).init(); for (BranchInfo bi : branches.values()) { bi.validate(clog, rmap); @@ -275,6 +253,11 @@ private File getCacheFile() { // prior to 1.8 used to be .hg/branchheads.cache + // since 2.5 there is filter suffix + File f = internalRepo.getFileFromRepoDir("cache/branchheads-base"); + if (f.exists()) { + return f; + } return internalRepo.getFileFromRepoDir("cache/branchheads"); } @@ -388,6 +371,7 @@ } // public Nodeid getTip() { // } + // XXX Not public as there are chances for few possible branch starts, and I need to decide how to handle that /*public*/ Nodeid getStart() { // first node where branch appears return start; diff -r 2f33f102a8fa -r a5cf64f2e7e4 src/org/tmatesoft/hg/repo/HgParentChildMap.java --- a/src/org/tmatesoft/hg/repo/HgParentChildMap.java Tue Jun 11 16:31:42 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgParentChildMap.java Fri Jul 05 20:42:45 2013 +0200 @@ -244,4 +244,35 @@ } return false; } + + /** + * Find out whether a given node is among descendants of another. + * + * @param root revision to check for being (grand-)*parent of a child + * @param wannaBeChild candidate descendant revision + * @return true if wannaBeChild is among children of root + */ + public boolean isChild(Nodeid root, Nodeid wannaBeChild) { + int x = Arrays.binarySearch(sorted, root); + assertSortedIndex(x); + root = sorted[x]; // canonical instance + final int start = sorted2natural[x]; + int y = Arrays.binarySearch(sorted, wannaBeChild); + if (y < 0) { + return false; // not found + } + wannaBeChild = sorted[y]; // canonicalize + final int end = sorted2natural[y]; + if (end <= start) { + return false; // potential child was in repository earlier than root + } + HashSet parents = new HashSet(); + parents.add(root); + for (int i = start + 1; i < end; i++) { + if (parents.contains(firstParent[i]) || parents.contains(secondParent[i])) { + parents.add(sequential[i]); // collect ancestors line + } + } + return parents.contains(firstParent[end]) || parents.contains(secondParent[end]); + } } \ No newline at end of file