kitaev@213: /* kitaev@213: * Copyright (c) 2011 TMate Software Ltd kitaev@213: * kitaev@213: * This program is free software; you can redistribute it and/or modify kitaev@213: * it under the terms of the GNU General Public License as published by kitaev@213: * the Free Software Foundation; version 2 of the License. kitaev@213: * kitaev@213: * This program is distributed in the hope that it will be useful, kitaev@213: * but WITHOUT ANY WARRANTY; without even the implied warranty of kitaev@213: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the kitaev@213: * GNU General Public License for more details. kitaev@213: * kitaev@213: * For information on how to redistribute this software under kitaev@213: * the terms of a license other than GNU General Public License kitaev@213: * contact TMate Software at support@hg4j.com kitaev@213: */ kitaev@213: package org.tmatesoft.hg.repo; kitaev@213: kitaev@213: import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; kitaev@213: import static org.tmatesoft.hg.repo.HgRepository.TIP; kitaev@213: kitaev@213: import java.util.Collection; kitaev@213: import java.util.Collections; kitaev@213: import java.util.LinkedHashMap; kitaev@213: import java.util.LinkedList; kitaev@213: import java.util.List; kitaev@213: import java.util.Map; kitaev@213: import java.util.SortedMap; kitaev@213: import java.util.TreeMap; kitaev@213: import java.util.TreeSet; kitaev@213: kitaev@213: import org.tmatesoft.hg.core.HgDataStreamException; kitaev@213: import org.tmatesoft.hg.core.Nodeid; kitaev@213: import org.tmatesoft.hg.internal.Pool; kitaev@213: import org.tmatesoft.hg.util.Path; kitaev@213: import org.tmatesoft.hg.util.PathPool; kitaev@213: import org.tmatesoft.hg.util.PathRewrite; kitaev@213: kitaev@213: kitaev@213: /** kitaev@213: * RevisionWalker? kitaev@213: * kitaev@213: * @author Artem Tikhomirov kitaev@213: * @author TMate Software Ltd. kitaev@213: */ kitaev@213: public class HgStatusCollector { kitaev@213: kitaev@213: private final HgRepository repo; kitaev@213: private final SortedMap cache; // sparse array, in fact kitaev@213: // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output kitaev@213: // no cache limit, no nodeids and fname caching - OOME on changeset 1035 kitaev@213: // no cache limit, but with cached nodeids and filenames - 1730+ kitaev@213: // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) kitaev@213: private final int cacheMaxSize = 50; // do not keep too much manifest revisions kitaev@213: private PathPool pathPool; kitaev@213: private final Pool cacheNodes; kitaev@213: private final Pool cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution kitaev@213: private final ManifestRevisionInspector emptyFakeState; kitaev@213: kitaev@213: kitaev@213: public HgStatusCollector(HgRepository hgRepo) { kitaev@213: this.repo = hgRepo; kitaev@213: cache = new TreeMap(); kitaev@213: cacheNodes = new Pool(); kitaev@213: cacheFilenames = new Pool(); kitaev@213: kitaev@213: emptyFakeState = new ManifestRevisionInspector(null, null); kitaev@213: emptyFakeState.begin(-1, null); kitaev@213: emptyFakeState.end(-1); kitaev@213: } kitaev@213: kitaev@213: public HgRepository getRepo() { kitaev@213: return repo; kitaev@213: } kitaev@213: kitaev@213: private ManifestRevisionInspector get(int rev) { kitaev@213: ManifestRevisionInspector i = cache.get(rev); kitaev@213: if (i == null) { kitaev@213: if (rev == -1) { kitaev@213: return emptyFakeState; kitaev@213: } kitaev@213: while (cache.size() > cacheMaxSize) { kitaev@213: // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary kitaev@213: cache.remove(cache.firstKey()); kitaev@213: } kitaev@213: i = new ManifestRevisionInspector(cacheNodes, cacheFilenames); kitaev@213: cache.put(rev, i); kitaev@213: repo.getManifest().walk(rev, rev, i); kitaev@213: } kitaev@213: return i; kitaev@213: } kitaev@213: kitaev@213: private boolean cached(int revision) { kitaev@213: return cache.containsKey(revision) || revision == -1; kitaev@213: } kitaev@213: kitaev@213: private void initCacheRange(int minRev, int maxRev) { kitaev@213: while (cache.size() > cacheMaxSize) { kitaev@213: // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary kitaev@213: cache.remove(cache.firstKey()); kitaev@213: } kitaev@213: repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() { kitaev@213: private ManifestRevisionInspector delegate; kitaev@213: private boolean cacheHit; // range may include revisions we already know about, do not re-create them kitaev@213: kitaev@213: public boolean begin(int revision, Nodeid nid) { kitaev@213: assert delegate == null; kitaev@213: if (cache.containsKey(revision)) { // don't need to check emptyFakeState hit as revision never -1 here kitaev@213: cacheHit = true; kitaev@213: } else { kitaev@213: cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames)); kitaev@213: // cache may grow bigger than max size here, but it's ok as present simplistic cache clearing mechanism may kitaev@213: // otherwise remove entries we just added kitaev@213: delegate.begin(revision, nid); kitaev@213: cacheHit = false; kitaev@213: } kitaev@213: return true; kitaev@213: } kitaev@213: kitaev@213: public boolean next(Nodeid nid, String fname, String flags) { kitaev@213: if (!cacheHit) { kitaev@213: delegate.next(nid, fname, flags); kitaev@213: } kitaev@213: return true; kitaev@213: } kitaev@213: kitaev@213: public boolean end(int revision) { kitaev@213: if (!cacheHit) { kitaev@213: delegate.end(revision); kitaev@213: } kitaev@213: cacheHit = false; kitaev@213: delegate = null; kitaev@213: return true; kitaev@213: } kitaev@213: }); kitaev@213: } kitaev@213: kitaev@213: /*package-local*/ ManifestRevisionInspector raw(int rev) { kitaev@213: return get(rev); kitaev@213: } kitaev@213: /*package-local*/ PathPool getPathPool() { kitaev@213: if (pathPool == null) { kitaev@213: pathPool = new PathPool(new PathRewrite.Empty()); kitaev@213: } kitaev@213: return pathPool; kitaev@213: } kitaev@213: kitaev@213: /** kitaev@213: * Allows sharing of a common path cache kitaev@213: */ kitaev@213: public void setPathPool(PathPool pathPool) { kitaev@213: this.pathPool = pathPool; kitaev@213: } kitaev@213: kitaev@213: kitaev@213: // hg status --change kitaev@213: public void change(int rev, HgStatusInspector inspector) { kitaev@213: int[] parents = new int[2]; kitaev@213: repo.getChangelog().parents(rev, parents, null, null); kitaev@213: walk(parents[0], rev, inspector); kitaev@213: } kitaev@213: kitaev@213: // I assume revision numbers are the same for changelog and manifest - here kitaev@213: // user would like to pass changelog revision numbers, and I use them directly to walk manifest. kitaev@213: // if this assumption is wrong, fix this (lookup manifest revisions from changeset). kitaev@213: // rev1 and rev2 may be -1 to indicate comparison to empty repository kitaev@213: // argument order matters kitaev@213: public void walk(int rev1, int rev2, HgStatusInspector inspector) { kitaev@213: if (rev1 == rev2) { kitaev@213: throw new IllegalArgumentException(); kitaev@213: } kitaev@213: if (inspector == null) { kitaev@213: throw new IllegalArgumentException(); kitaev@213: } kitaev@213: if (inspector instanceof Record) { kitaev@213: ((Record) inspector).init(rev1, rev2, this); kitaev@213: } kitaev@213: final int lastManifestRevision = repo.getManifest().getLastRevision(); kitaev@213: if (rev1 == TIP) { kitaev@213: rev1 = lastManifestRevision; kitaev@213: } kitaev@213: if (rev2 == TIP) { kitaev@213: rev2 = lastManifestRevision; kitaev@213: } kitaev@213: // in fact, rev1 and rev2 are often next (or close) to each other, kitaev@213: // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2)) kitaev@213: ManifestRevisionInspector r1, r2 ; kitaev@213: boolean need1 = !cached(rev1), need2 = !cached(rev2); kitaev@213: if (need1 || need2) { kitaev@213: int minRev, maxRev; kitaev@213: if (need1 && need2 && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) { kitaev@213: minRev = rev1 < rev2 ? rev1 : rev2; kitaev@213: maxRev = minRev == rev1 ? rev2 : rev1; kitaev@213: if (minRev > 0) { kitaev@213: minRev--; // expand range a bit kitaev@213: } kitaev@213: initCacheRange(minRev, maxRev); kitaev@213: need1 = need2 = false; kitaev@213: } kitaev@213: // either both unknown and far from each other, or just one of them. kitaev@213: // read with neighbors to save potential subsequent calls for neighboring elements kitaev@213: // XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision kitaev@213: // which going to be read anyway kitaev@213: if (need1) { kitaev@213: minRev = rev1; kitaev@213: maxRev = rev1 < lastManifestRevision-5 ? rev1+5 : lastManifestRevision; kitaev@213: initCacheRange(minRev, maxRev); kitaev@213: } kitaev@213: if (need2) { kitaev@213: minRev = rev2; kitaev@213: maxRev = rev2 < lastManifestRevision-5 ? rev2+5 : lastManifestRevision; kitaev@213: initCacheRange(minRev, maxRev); kitaev@213: } kitaev@213: } kitaev@213: r1 = get(rev1); kitaev@213: r2 = get(rev2); kitaev@213: kitaev@213: PathPool pp = getPathPool(); kitaev@213: kitaev@213: TreeSet r1Files = new TreeSet(r1.files()); kitaev@213: for (String fname : r2.files()) { kitaev@213: if (r1Files.remove(fname)) { kitaev@213: Nodeid nidR1 = r1.nodeid(fname); kitaev@213: Nodeid nidR2 = r2.nodeid(fname); kitaev@213: String flagsR1 = r1.flags(fname); kitaev@213: String flagsR2 = r2.flags(fname); kitaev@213: if (nidR1.equals(nidR2) && ((flagsR2 == null && flagsR1 == null) || flagsR2.equals(flagsR1))) { kitaev@213: inspector.clean(pp.path(fname)); kitaev@213: } else { kitaev@213: inspector.modified(pp.path(fname)); kitaev@213: } kitaev@213: } else { kitaev@213: try { kitaev@213: Path copyTarget = pp.path(fname); kitaev@213: Path copyOrigin = getOriginIfCopy(repo, copyTarget, r1Files, rev1); kitaev@213: if (copyOrigin != null) { kitaev@213: inspector.copied(pp.path(copyOrigin) /*pipe through pool, just in case*/, copyTarget); kitaev@213: } else { kitaev@213: inspector.added(copyTarget); kitaev@213: } kitaev@213: } catch (HgDataStreamException ex) { kitaev@213: ex.printStackTrace(); kitaev@213: // FIXME perhaps, shall record this exception to dedicated mediator and continue kitaev@213: // for a single file not to be irresolvable obstacle for a status operation kitaev@213: } kitaev@213: } kitaev@213: } kitaev@213: for (String left : r1Files) { kitaev@213: inspector.removed(pp.path(left)); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: public Record status(int rev1, int rev2) { kitaev@213: Record rv = new Record(); kitaev@213: walk(rev1, rev2, rv); kitaev@213: return rv; kitaev@213: } kitaev@213: kitaev@213: /*package-local*/static Path getOriginIfCopy(HgRepository hgRepo, Path fname, Collection originals, int originalChangelogRevision) throws HgDataStreamException { kitaev@213: HgDataFile df = hgRepo.getFileNode(fname); kitaev@213: while (df.isCopy()) { kitaev@213: Path original = df.getCopySourceName(); kitaev@213: if (originals.contains(original.toString())) { kitaev@213: df = hgRepo.getFileNode(original); kitaev@213: int changelogRevision = df.getChangesetLocalRevision(0); kitaev@213: if (changelogRevision <= originalChangelogRevision) { kitaev@213: // copy/rename source was known prior to rev1 kitaev@213: // (both r1Files.contains is true and original was created earlier than rev1) kitaev@213: // without r1Files.contains changelogRevision <= rev1 won't suffice as the file kitaev@213: // might get removed somewhere in between (changelogRevision < R < rev1) kitaev@213: return original; kitaev@213: } kitaev@213: break; // copy/rename done later kitaev@213: } kitaev@213: df = hgRepo.getFileNode(original); // try more steps away kitaev@213: } kitaev@213: return null; kitaev@213: } kitaev@213: kitaev@213: // XXX for r1..r2 status, only modified, added, removed (and perhaps, clean) make sense kitaev@213: // XXX Need to specify whether copy targets are in added or not (@see Inspector#copied above) kitaev@213: public static class Record implements HgStatusInspector { kitaev@213: private List modified, added, removed, clean, missing, unknown, ignored; kitaev@213: private Map copied; kitaev@213: kitaev@213: private int startRev, endRev; kitaev@213: private HgStatusCollector statusHelper; kitaev@213: kitaev@213: // XXX StatusCollector may additionally initialize Record instance to speed lookup of changed file revisions kitaev@213: // here I need access to ManifestRevisionInspector via #raw(). Perhaps, non-static class (to get kitaev@213: // implicit reference to StatusCollector) may be better? kitaev@213: // Since users may want to reuse Record instance we've once created (and initialized), we need to kitaev@213: // ensure functionality is correct for each/any call (#walk checks instanceof Record and fixes it up) kitaev@213: // Perhaps, distinct helper (sc.getRevisionHelper().nodeid(fname)) would be better, just not clear kitaev@213: // how to supply [start..end] values there easily kitaev@213: /*package-local*/void init(int startRevision, int endRevision, HgStatusCollector self) { kitaev@213: startRev = startRevision; kitaev@213: endRev = endRevision; kitaev@213: statusHelper = self; kitaev@213: } kitaev@213: kitaev@213: public Nodeid nodeidBeforeChange(Path fname) { kitaev@213: if (statusHelper == null || startRev == BAD_REVISION) { kitaev@213: return null; kitaev@213: } kitaev@213: if ((modified == null || !modified.contains(fname)) && (removed == null || !removed.contains(fname))) { kitaev@213: return null; kitaev@213: } kitaev@213: return statusHelper.raw(startRev).nodeid(fname.toString()); kitaev@213: } kitaev@213: public Nodeid nodeidAfterChange(Path fname) { kitaev@213: if (statusHelper == null || endRev == BAD_REVISION) { kitaev@213: return null; kitaev@213: } kitaev@213: if ((modified == null || !modified.contains(fname)) && (added == null || !added.contains(fname))) { kitaev@213: return null; kitaev@213: } kitaev@213: return statusHelper.raw(endRev).nodeid(fname.toString()); kitaev@213: } kitaev@213: kitaev@213: public List getModified() { kitaev@213: return proper(modified); kitaev@213: } kitaev@213: kitaev@213: public List getAdded() { kitaev@213: return proper(added); kitaev@213: } kitaev@213: kitaev@213: public List getRemoved() { kitaev@213: return proper(removed); kitaev@213: } kitaev@213: kitaev@213: public Map getCopied() { kitaev@213: if (copied == null) { kitaev@213: return Collections.emptyMap(); kitaev@213: } kitaev@213: return Collections.unmodifiableMap(copied); kitaev@213: } kitaev@213: kitaev@213: public List getClean() { kitaev@213: return proper(clean); kitaev@213: } kitaev@213: kitaev@213: public List getMissing() { kitaev@213: return proper(missing); kitaev@213: } kitaev@213: kitaev@213: public List getUnknown() { kitaev@213: return proper(unknown); kitaev@213: } kitaev@213: kitaev@213: public List getIgnored() { kitaev@213: return proper(ignored); kitaev@213: } kitaev@213: kitaev@213: private List proper(List l) { kitaev@213: if (l == null) { kitaev@213: return Collections.emptyList(); kitaev@213: } kitaev@213: return Collections.unmodifiableList(l); kitaev@213: } kitaev@213: kitaev@213: // kitaev@213: // kitaev@213: kitaev@213: public void modified(Path fname) { kitaev@213: modified = doAdd(modified, fname); kitaev@213: } kitaev@213: kitaev@213: public void added(Path fname) { kitaev@213: added = doAdd(added, fname); kitaev@213: } kitaev@213: kitaev@213: public void copied(Path fnameOrigin, Path fnameAdded) { kitaev@213: if (copied == null) { kitaev@213: copied = new LinkedHashMap(); kitaev@213: } kitaev@213: added(fnameAdded); kitaev@213: copied.put(fnameAdded, fnameOrigin); kitaev@213: } kitaev@213: kitaev@213: public void removed(Path fname) { kitaev@213: removed = doAdd(removed, fname); kitaev@213: } kitaev@213: kitaev@213: public void clean(Path fname) { kitaev@213: clean = doAdd(clean, fname); kitaev@213: } kitaev@213: kitaev@213: public void missing(Path fname) { kitaev@213: missing = doAdd(missing, fname); kitaev@213: } kitaev@213: kitaev@213: public void unknown(Path fname) { kitaev@213: unknown = doAdd(unknown, fname); kitaev@213: } kitaev@213: kitaev@213: public void ignored(Path fname) { kitaev@213: ignored = doAdd(ignored, fname); kitaev@213: } kitaev@213: kitaev@213: private static List doAdd(List l, Path p) { kitaev@213: if (l == null) { kitaev@213: l = new LinkedList(); kitaev@213: } kitaev@213: l.add(p); kitaev@213: return l; kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: /*package-local*/ static final class ManifestRevisionInspector implements HgManifest.Inspector { kitaev@213: private final TreeMap idsMap; kitaev@213: private final TreeMap flagsMap; kitaev@213: private final Pool idsPool; kitaev@213: private final Pool namesPool; kitaev@213: kitaev@213: // optional pools for effective management of nodeids and filenames (they are likely kitaev@213: // to be duplicated among different manifest revisions kitaev@213: public ManifestRevisionInspector(Pool nodeidPool, Pool filenamePool) { kitaev@213: idsPool = nodeidPool; kitaev@213: namesPool = filenamePool; kitaev@213: idsMap = new TreeMap(); kitaev@213: flagsMap = new TreeMap(); kitaev@213: } kitaev@213: kitaev@213: public Collection files() { kitaev@213: return idsMap.keySet(); kitaev@213: } kitaev@213: kitaev@213: public Nodeid nodeid(String fname) { kitaev@213: return idsMap.get(fname); kitaev@213: } kitaev@213: kitaev@213: public String flags(String fname) { kitaev@213: return flagsMap.get(fname); kitaev@213: } kitaev@213: kitaev@213: // kitaev@213: kitaev@213: public boolean next(Nodeid nid, String fname, String flags) { kitaev@213: if (namesPool != null) { kitaev@213: fname = namesPool.unify(fname); kitaev@213: } kitaev@213: if (idsPool != null) { kitaev@213: nid = idsPool.unify(nid); kitaev@213: } kitaev@213: idsMap.put(fname, nid); kitaev@213: if (flags != null) { kitaev@213: // TreeMap$Entry takes 32 bytes. No reason to keep null for such price kitaev@213: // Perhaps, Map> might be better solution kitaev@213: flagsMap.put(fname, flags); kitaev@213: } kitaev@213: return true; kitaev@213: } kitaev@213: kitaev@213: public boolean end(int revision) { kitaev@213: // in fact, this class cares about single revision kitaev@213: return false; kitaev@213: } kitaev@213: kitaev@213: public boolean begin(int revision, Nodeid nid) { kitaev@213: return true; kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: }