# HG changeset patch # User Artem Tikhomirov # Date 1302837464 -7200 # Node ID c9b305df0b890b6657285abb5ddbdeb902e5037a # Parent 344e8d7e4d6eb769799d321bcbbe555f64e577db Optimization: use ParentWalker to get changeset's parents, if possible. Do not keep duplicating nodeids and strings in manifest revisions diff -r 344e8d7e4d6e -r c9b305df0b89 src/org/tmatesoft/hg/core/ChangesetTransformer.java --- a/src/org/tmatesoft/hg/core/ChangesetTransformer.java Fri Apr 15 03:35:08 2011 +0200 +++ b/src/org/tmatesoft/hg/core/ChangesetTransformer.java Fri Apr 15 05:17:44 2011 +0200 @@ -36,7 +36,8 @@ private final HgChangeset changeset; private Set branches; - public ChangesetTransformer(HgRepository hgRepo, HgLogCommand.Handler delegate) { + // repo and delegate can't be null, parent walker can + public ChangesetTransformer(HgRepository hgRepo, HgLogCommand.Handler delegate, HgChangelog.ParentWalker pw) { if (hgRepo == null || delegate == null) { throw new IllegalArgumentException(); } @@ -45,6 +46,7 @@ PathPool pp = new PathPool(new PathRewrite.Empty()); statusCollector.setPathPool(pp); changeset = new HgChangeset(statusCollector, pp); + changeset.setParentHelper(pw); handler = delegate; } diff -r 344e8d7e4d6e -r c9b305df0b89 src/org/tmatesoft/hg/core/HgChangeset.java --- a/src/org/tmatesoft/hg/core/HgChangeset.java Fri Apr 15 03:35:08 2011 +0200 +++ b/src/org/tmatesoft/hg/core/HgChangeset.java Fri Apr 15 05:17:44 2011 +0200 @@ -16,12 +16,15 @@ */ package org.tmatesoft.hg.core; +import static org.tmatesoft.hg.core.Nodeid.NULL; + import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.tmatesoft.hg.core.HgLogCommand.FileRevision; import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; +import org.tmatesoft.hg.repo.HgChangelog; import org.tmatesoft.hg.repo.HgRepository; import org.tmatesoft.hg.repo.HgStatusCollector; import org.tmatesoft.hg.util.Path; @@ -39,6 +42,8 @@ private final HgStatusCollector statusHelper; private final Path.Source pathHelper; + private HgChangelog.ParentWalker parentHelper; + // private RawChangeset changeset; private Nodeid nodeid; @@ -47,6 +52,7 @@ private List modifiedFiles, addedFiles; private List deletedFiles; private int revNumber; + private byte[] parent1, parent2; // XXX consider CommandContext with StatusCollector, PathPool etc. Commands optionally get CC through a cons or create new // and pass it around @@ -54,15 +60,26 @@ statusHelper = statusCollector; pathHelper = pathFactory; } - - /*package-local*/ - void init(int localRevNumber, Nodeid nid, RawChangeset rawChangeset) { + + /*package-local*/ void init(int localRevNumber, Nodeid nid, RawChangeset rawChangeset) { revNumber = localRevNumber; nodeid = nid; changeset = rawChangeset; modifiedFiles = addedFiles = null; deletedFiles = null; + parent1 = parent2 = null; + // keep references to parentHelper, statusHelper and pathHelper } + + /*package-local*/ void setParentHelper(HgChangelog.ParentWalker pw) { + parentHelper = pw; + if (parentHelper != null) { + if (parentHelper.getRepo() != statusHelper.getRepo()) { + throw new IllegalArgumentException(); + } + } + } + public int getRevision() { return revNumber; } @@ -119,21 +136,33 @@ } public boolean isMerge() { - return !Nodeid.NULL.equals(getSecondParentRevision()); + // p1 == -1 and p2 != -1 is legitimate case + return !NULL.equals(getFirstParentRevision()) && !NULL.equals(getSecondParentRevision()); } public Nodeid getFirstParentRevision() { - // XXX may read once for both p1 and p2 - // or use ParentWalker to minimize reads even more. - byte[] p1 = new byte[20]; - statusHelper.getRepo().getChangelog().parents(revNumber, new int[2], p1, null); - return Nodeid.fromBinary(p1, 0); + if (parentHelper != null) { + return parentHelper.safeFirstParent(nodeid); + } + // read once for both p1 and p2 + if (parent1 == null) { + parent1 = new byte[20]; + parent2 = new byte[20]; + statusHelper.getRepo().getChangelog().parents(revNumber, new int[2], parent1, parent2); + } + return Nodeid.fromBinary(parent1, 0); } public Nodeid getSecondParentRevision() { - byte[] p2 = new byte[20]; - statusHelper.getRepo().getChangelog().parents(revNumber, new int[2], null, p2); - return Nodeid.fromBinary(p2, 0); + if (parentHelper != null) { + return parentHelper.safeSecondParent(nodeid); + } + if (parent2 == null) { + parent1 = new byte[20]; + parent2 = new byte[20]; + statusHelper.getRepo().getChangelog().parents(revNumber, new int[2], parent1, parent2); + } + return Nodeid.fromBinary(parent2, 0); } @Override diff -r 344e8d7e4d6e -r c9b305df0b89 src/org/tmatesoft/hg/core/HgIncomingCommand.java --- a/src/org/tmatesoft/hg/core/HgIncomingCommand.java Fri Apr 15 03:35:08 2011 +0200 +++ b/src/org/tmatesoft/hg/core/HgIncomingCommand.java Fri Apr 15 05:17:44 2011 +0200 @@ -133,7 +133,7 @@ private final HgChangelog changelog; { - transformer = new ChangesetTransformer(localRepo, handler); + transformer = new ChangesetTransformer(localRepo, handler, getParentHelper()); transformer.limitBranches(branches); parentHelper = getParentHelper(); changelog = localRepo.getChangelog(); diff -r 344e8d7e4d6e -r c9b305df0b89 src/org/tmatesoft/hg/core/HgLogCommand.java --- a/src/org/tmatesoft/hg/core/HgLogCommand.java Fri Apr 15 03:35:08 2011 +0200 +++ b/src/org/tmatesoft/hg/core/HgLogCommand.java Fri Apr 15 05:17:44 2011 +0200 @@ -59,6 +59,7 @@ private Path file; private boolean followHistory; // makes sense only when file != null private ChangesetTransformer csetTransform; + private HgChangelog.ParentWalker parentHelper; public HgLogCommand(HgRepository hgRepo) { repo = hgRepo; @@ -184,9 +185,13 @@ } try { count = 0; + HgChangelog.ParentWalker pw = parentHelper; // leave it uninitialized unless we iterate whole repo + if (file == null) { + pw = getParentHelper(); + } // ChangesetTransfrom creates a blank PathPool, and #file(String, boolean) above // may utilize it as well. CommandContext? How about StatusCollector there as well? - csetTransform = new ChangesetTransformer(repo, handler); + csetTransform = new ChangesetTransformer(repo, handler, pw); if (file == null) { repo.getChangelog().range(startRev, endRev, this); } else { @@ -244,6 +249,15 @@ count++; csetTransform.next(revisionNumber, nodeid, cset); } + + private HgChangelog.ParentWalker getParentHelper() { + if (parentHelper == null) { + parentHelper = repo.getChangelog().new ParentWalker(); + parentHelper.init(); + } + return parentHelper; + } + public interface Handler { /** diff -r 344e8d7e4d6e -r c9b305df0b89 src/org/tmatesoft/hg/core/HgOutgoingCommand.java --- a/src/org/tmatesoft/hg/core/HgOutgoingCommand.java Fri Apr 15 03:35:08 2011 +0200 +++ b/src/org/tmatesoft/hg/core/HgOutgoingCommand.java Fri Apr 15 05:17:44 2011 +0200 @@ -38,6 +38,7 @@ private HgRemoteRepository remoteRepo; private boolean includeSubrepo; private RepositoryComparator comparator; + private HgChangelog.ParentWalker parentHelper; private Set branches; public HgOutgoingCommand(HgRepository hgRepo) { @@ -104,7 +105,7 @@ if (handler == null) { throw new IllegalArgumentException("Delegate can't be null"); } - ChangesetTransformer inspector = new ChangesetTransformer(localRepo, handler); + ChangesetTransformer inspector = new ChangesetTransformer(localRepo, handler, getParentHelper()); inspector.limitBranches(branches); getComparator(handler).visitLocalOnlyRevisions(inspector); } @@ -114,11 +115,18 @@ throw new IllegalArgumentException("Shall specify remote repository to compare against"); } if (comparator == null) { - HgChangelog.ParentWalker pw = localRepo.getChangelog().new ParentWalker(); - pw.init(); - comparator = new RepositoryComparator(pw, remoteRepo); + comparator = new RepositoryComparator(getParentHelper(), remoteRepo); comparator.compare(context); } return comparator; } + + private HgChangelog.ParentWalker getParentHelper() { + if (parentHelper == null) { + parentHelper = localRepo.getChangelog().new ParentWalker(); + parentHelper.init(); + } + return parentHelper; + } + } diff -r 344e8d7e4d6e -r c9b305df0b89 src/org/tmatesoft/hg/internal/Pool.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/Pool.java Fri Apr 15 05:17:44 2011 +0200 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.internal; + +import java.util.HashMap; + +/** + * Instance pooling. + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class Pool { + private final HashMap unify = new HashMap(); + + public T unify(T t) { + T rv = unify.get(t); + if (rv == null) { + // first time we see a new value + unify.put(t, t); + rv = t; + } + return rv; + } +} \ No newline at end of file diff -r 344e8d7e4d6e -r c9b305df0b89 src/org/tmatesoft/hg/repo/HgStatusCollector.java --- a/src/org/tmatesoft/hg/repo/HgStatusCollector.java Fri Apr 15 03:35:08 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/HgStatusCollector.java Fri Apr 15 05:17:44 2011 +0200 @@ -25,11 +25,13 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.SortedMap; import java.util.TreeMap; import java.util.TreeSet; import org.tmatesoft.hg.core.HgDataStreamException; import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.internal.Pool; import org.tmatesoft.hg.util.Path; import org.tmatesoft.hg.util.PathPool; import org.tmatesoft.hg.util.PathRewrite; @@ -44,13 +46,23 @@ public class HgStatusCollector { private final HgRepository repo; - private final Map cache; // sparse array, in fact + private final SortedMap cache; // sparse array, in fact + // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output + // no cache limit, no nodeids and fname caching - OOME on changeset 1035 + // no cache limit, but with cached nodeids and filenames - 1730+ + // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) + private final int cacheMaxSize = 100; // do not keep too much manifest revisions private PathPool pathPool; + private final Pool cacheNodes; + private final Pool cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution + public HgStatusCollector(HgRepository hgRepo) { this.repo = hgRepo; cache = new TreeMap(); - ManifestRevisionInspector emptyFakeState = new ManifestRevisionInspector(); + cacheNodes = new Pool(); + cacheFilenames = new Pool(); + ManifestRevisionInspector emptyFakeState = new ManifestRevisionInspector(null, null); emptyFakeState.begin(-1, null); emptyFakeState.end(-1); // FIXME HgRepo.TIP == -1 as well, need to distinguish fake "prior to first" revision from "the very last" cache.put(-1, emptyFakeState); @@ -63,7 +75,11 @@ private ManifestRevisionInspector get(int rev) { ManifestRevisionInspector i = cache.get(rev); if (i == null) { - i = new ManifestRevisionInspector(); + if (cache.size() > cacheMaxSize) { + // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary + cache.remove(cache.firstKey()); + } + i = new ManifestRevisionInspector(cacheNodes, cacheFilenames); cache.put(rev, i); repo.getManifest().walk(rev, rev, i); } @@ -130,7 +146,7 @@ private ManifestRevisionInspector delegate; public boolean begin(int revision, Nodeid nid) { - cache.put(revision, delegate = new ManifestRevisionInspector()); + cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames)); delegate.begin(revision, nid); return true; } @@ -342,12 +358,18 @@ return l; } } - + /*package-local*/ static final class ManifestRevisionInspector implements HgManifest.Inspector { private final TreeMap idsMap; private final TreeMap flagsMap; + private final Pool idsPool; + private final Pool namesPool; - public ManifestRevisionInspector() { + // optional pools for effective management of nodeids and filenames (they are likely + // to be duplicated among different manifest revisions + public ManifestRevisionInspector(Pool nodeidPool, Pool filenamePool) { + idsPool = nodeidPool; + namesPool = filenamePool; idsMap = new TreeMap(); flagsMap = new TreeMap(); } @@ -367,6 +389,12 @@ // public boolean next(Nodeid nid, String fname, String flags) { + if (namesPool != null) { + fname = namesPool.unify(fname); + } + if (idsPool != null) { + nid = idsPool.unify(nid); + } idsMap.put(fname, nid); flagsMap.put(fname, flags); return true; diff -r 344e8d7e4d6e -r c9b305df0b89 src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java --- a/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java Fri Apr 15 03:35:08 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java Fri Apr 15 05:17:44 2011 +0200 @@ -113,7 +113,7 @@ if (baseRevisionCollector != null) { collect = baseRevisionCollector.raw(baseRevision); } else { - collect = new HgStatusCollector.ManifestRevisionInspector(); + collect = new HgStatusCollector.ManifestRevisionInspector(null, null); repo.getManifest().walk(baseRevision, baseRevision, collect); } baseRevFiles = new TreeSet(collect.files());