tikhomirov@55: /* tikhomirov@74: * Copyright (c) 2011 TMate Software Ltd tikhomirov@74: * tikhomirov@74: * This program is free software; you can redistribute it and/or modify tikhomirov@74: * it under the terms of the GNU General Public License as published by tikhomirov@74: * the Free Software Foundation; version 2 of the License. tikhomirov@74: * tikhomirov@74: * This program is distributed in the hope that it will be useful, tikhomirov@74: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@74: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@74: * GNU General Public License for more details. tikhomirov@74: * tikhomirov@74: * For information on how to redistribute this software under tikhomirov@74: * the terms of a license other than GNU General Public License tikhomirov@102: * contact TMate Software at support@hg4j.com tikhomirov@55: */ tikhomirov@74: package org.tmatesoft.hg.repo; tikhomirov@55: tikhomirov@74: import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; tikhomirov@74: import static org.tmatesoft.hg.repo.HgRepository.TIP; tikhomirov@68: tikhomirov@55: import java.util.Collection; tikhomirov@55: import java.util.Collections; tikhomirov@55: import java.util.LinkedHashMap; tikhomirov@55: import java.util.LinkedList; tikhomirov@55: import java.util.List; tikhomirov@55: import java.util.Map; tikhomirov@55: import java.util.TreeSet; tikhomirov@55: tikhomirov@285: import org.tmatesoft.hg.core.HgBadStateException; tikhomirov@157: import org.tmatesoft.hg.core.HgDataStreamException; tikhomirov@354: import org.tmatesoft.hg.core.HgException; tikhomirov@354: import org.tmatesoft.hg.core.HgInvalidControlFileException; tikhomirov@74: import org.tmatesoft.hg.core.Nodeid; tikhomirov@281: import org.tmatesoft.hg.internal.IntMap; tikhomirov@248: import org.tmatesoft.hg.internal.ManifestRevision; tikhomirov@195: import org.tmatesoft.hg.internal.Pool; tikhomirov@133: import org.tmatesoft.hg.util.Path; tikhomirov@93: import org.tmatesoft.hg.util.PathPool; tikhomirov@93: import org.tmatesoft.hg.util.PathRewrite; tikhomirov@74: tikhomirov@74: tikhomirov@55: /** tikhomirov@55: * RevisionWalker? tikhomirov@74: * tikhomirov@74: * @author Artem Tikhomirov tikhomirov@74: * @author TMate Software Ltd. tikhomirov@55: */ tikhomirov@94: public class HgStatusCollector { tikhomirov@55: tikhomirov@55: private final HgRepository repo; tikhomirov@281: private final IntMap cache; // sparse array, in fact tikhomirov@195: // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output tikhomirov@195: // no cache limit, no nodeids and fname caching - OOME on changeset 1035 tikhomirov@195: // no cache limit, but with cached nodeids and filenames - 1730+ tikhomirov@195: // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) tikhomirov@197: private final int cacheMaxSize = 50; // do not keep too much manifest revisions tikhomirov@93: private PathPool pathPool; tikhomirov@195: private final Pool cacheNodes; tikhomirov@285: private final Pool cacheFilenames; tikhomirov@248: private final ManifestRevision emptyFakeState; tikhomirov@229: private Path.Matcher scope = new Path.Matcher.Any(); tikhomirov@195: tikhomirov@55: tikhomirov@94: public HgStatusCollector(HgRepository hgRepo) { tikhomirov@55: this.repo = hgRepo; tikhomirov@281: cache = new IntMap(cacheMaxSize); tikhomirov@195: cacheNodes = new Pool(); tikhomirov@285: cacheFilenames = new Pool(); tikhomirov@197: tikhomirov@284: emptyFakeState = createEmptyManifestRevision(); tikhomirov@55: } tikhomirov@55: tikhomirov@64: public HgRepository getRepo() { tikhomirov@64: return repo; tikhomirov@64: } tikhomirov@64: tikhomirov@248: private ManifestRevision get(int rev) { tikhomirov@248: ManifestRevision i = cache.get(rev); tikhomirov@55: if (i == null) { tikhomirov@197: if (rev == -1) { tikhomirov@197: return emptyFakeState; tikhomirov@197: } tikhomirov@281: ensureCacheSize(); tikhomirov@248: i = new ManifestRevision(cacheNodes, cacheFilenames); tikhomirov@55: cache.put(rev, i); tikhomirov@55: repo.getManifest().walk(rev, rev, i); tikhomirov@55: } tikhomirov@55: return i; tikhomirov@55: } tikhomirov@197: tikhomirov@197: private boolean cached(int revision) { tikhomirov@197: return cache.containsKey(revision) || revision == -1; tikhomirov@197: } tikhomirov@197: tikhomirov@281: private void ensureCacheSize() { tikhomirov@281: if (cache.size() > cacheMaxSize) { tikhomirov@281: // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary tikhomirov@281: cache.removeFromStart(cache.size() - cacheMaxSize + 1 /* room for new element */); tikhomirov@281: } tikhomirov@281: } tikhomirov@281: tikhomirov@197: private void initCacheRange(int minRev, int maxRev) { tikhomirov@281: ensureCacheSize(); tikhomirov@302: // In fact, walk(minRev, maxRev) doesn't imply tikhomirov@302: // there would be maxRev-minRev+1 revisions visited. For example, tikhomirov@302: // check cpython repo with 'hg log -r 22418:22420 --debug' and admire tikhomirov@302: // manifest revisions 66650, 21683, 21684. Thus, innocent walk(22418,22420) results in 40k+ revisions and OOME tikhomirov@302: // Instead, be explicit of what revisions are of interest tikhomirov@302: assert minRev <= maxRev; tikhomirov@302: int[] revisionsToCollect = new int[maxRev - minRev + 1]; tikhomirov@302: for (int x = minRev, i = 0; x <= maxRev; i++, x++) { tikhomirov@302: revisionsToCollect[i] = x; tikhomirov@302: } tikhomirov@302: repo.getManifest().walk(new HgManifest.Inspector2() { tikhomirov@248: private ManifestRevision delegate; tikhomirov@197: private boolean cacheHit; // range may include revisions we already know about, do not re-create them tikhomirov@197: tikhomirov@222: public boolean begin(int manifestRevision, Nodeid nid, int changelogRevision) { tikhomirov@197: assert delegate == null; tikhomirov@222: if (cache.containsKey(changelogRevision)) { // don't need to check emptyFakeState hit as revision never -1 here tikhomirov@197: cacheHit = true; tikhomirov@197: } else { tikhomirov@248: cache.put(changelogRevision, delegate = new ManifestRevision(cacheNodes, cacheFilenames)); tikhomirov@197: // cache may grow bigger than max size here, but it's ok as present simplistic cache clearing mechanism may tikhomirov@197: // otherwise remove entries we just added tikhomirov@222: delegate.begin(manifestRevision, nid, changelogRevision); tikhomirov@197: cacheHit = false; tikhomirov@197: } tikhomirov@197: return true; tikhomirov@197: } tikhomirov@197: tikhomirov@197: public boolean next(Nodeid nid, String fname, String flags) { tikhomirov@285: throw new HgBadStateException(HgManifest.Inspector2.class.getName()); tikhomirov@285: } tikhomirov@285: tikhomirov@285: public boolean next(Nodeid nid, Path fname, HgManifest.Flags flags) { tikhomirov@197: if (!cacheHit) { tikhomirov@197: delegate.next(nid, fname, flags); tikhomirov@197: } tikhomirov@197: return true; tikhomirov@197: } tikhomirov@197: tikhomirov@197: public boolean end(int revision) { tikhomirov@197: if (!cacheHit) { tikhomirov@197: delegate.end(revision); tikhomirov@197: } tikhomirov@197: cacheHit = false; tikhomirov@197: delegate = null; tikhomirov@197: return true; tikhomirov@197: } tikhomirov@302: }, revisionsToCollect); tikhomirov@197: } tikhomirov@56: tikhomirov@284: /*package-local*/ static ManifestRevision createEmptyManifestRevision() { tikhomirov@284: ManifestRevision fakeEmptyRev = new ManifestRevision(null, null); tikhomirov@284: fakeEmptyRev.begin(-1, null, -1); tikhomirov@284: fakeEmptyRev.end(-1); tikhomirov@284: return fakeEmptyRev; tikhomirov@284: } tikhomirov@284: tikhomirov@248: /*package-local*/ ManifestRevision raw(int rev) { tikhomirov@59: return get(rev); tikhomirov@59: } tikhomirov@93: /*package-local*/ PathPool getPathPool() { tikhomirov@93: if (pathPool == null) { tikhomirov@93: pathPool = new PathPool(new PathRewrite.Empty()); tikhomirov@93: } tikhomirov@93: return pathPool; tikhomirov@93: } tikhomirov@93: tikhomirov@142: /** tikhomirov@142: * Allows sharing of a common path cache tikhomirov@142: */ tikhomirov@93: public void setPathPool(PathPool pathPool) { tikhomirov@93: this.pathPool = pathPool; tikhomirov@93: } tikhomirov@229: tikhomirov@229: /** tikhomirov@229: * Limit activity of the collector to certain sub-tree of the repository. tikhomirov@229: * @param scopeMatcher tells whether collector shall report specific path, can be null tikhomirov@229: */ tikhomirov@229: public void setScope(Path.Matcher scopeMatcher) { tikhomirov@229: // do not assign null, ever tikhomirov@229: scope = scopeMatcher == null ? new Path.Matcher.Any() : scopeMatcher; tikhomirov@229: } tikhomirov@59: tikhomirov@56: // hg status --change tikhomirov@93: public void change(int rev, HgStatusInspector inspector) { tikhomirov@56: int[] parents = new int[2]; tikhomirov@56: repo.getChangelog().parents(rev, parents, null, null); tikhomirov@56: walk(parents[0], rev, inspector); tikhomirov@56: } tikhomirov@197: tikhomirov@282: // rev1 and rev2 are changelog revision numbers, argument order matters. tikhomirov@282: // Either rev1 or rev2 may be -1 to indicate comparison to empty repository (XXX this is due to use of tikhomirov@282: // parents in #change(), I believe. Perhaps, need a constant for this? Otherwise this hidden knowledge gets tikhomirov@282: // exposed to e.g. Record tikhomirov@93: public void walk(int rev1, int rev2, HgStatusInspector inspector) { tikhomirov@55: if (rev1 == rev2) { tikhomirov@55: throw new IllegalArgumentException(); tikhomirov@55: } tikhomirov@64: if (inspector == null) { tikhomirov@64: throw new IllegalArgumentException(); tikhomirov@64: } tikhomirov@218: final int lastManifestRevision = repo.getChangelog().getLastRevision(); tikhomirov@68: if (rev1 == TIP) { tikhomirov@197: rev1 = lastManifestRevision; tikhomirov@68: } tikhomirov@68: if (rev2 == TIP) { tikhomirov@197: rev2 = lastManifestRevision; tikhomirov@68: } tikhomirov@282: if (inspector instanceof Record) { tikhomirov@282: ((Record) inspector).init(rev1, rev2, this); tikhomirov@282: } tikhomirov@55: // in fact, rev1 and rev2 are often next (or close) to each other, tikhomirov@55: // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2)) tikhomirov@248: ManifestRevision r1, r2 ; tikhomirov@197: boolean need1 = !cached(rev1), need2 = !cached(rev2); tikhomirov@197: if (need1 || need2) { tikhomirov@197: int minRev, maxRev; tikhomirov@197: if (need1 && need2 && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) { tikhomirov@197: minRev = rev1 < rev2 ? rev1 : rev2; tikhomirov@197: maxRev = minRev == rev1 ? rev2 : rev1; tikhomirov@197: if (minRev > 0) { tikhomirov@197: minRev--; // expand range a bit tikhomirov@197: } tikhomirov@197: initCacheRange(minRev, maxRev); tikhomirov@197: need1 = need2 = false; tikhomirov@55: } tikhomirov@197: // either both unknown and far from each other, or just one of them. tikhomirov@197: // read with neighbors to save potential subsequent calls for neighboring elements tikhomirov@197: // XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision tikhomirov@197: // which going to be read anyway tikhomirov@197: if (need1) { tikhomirov@197: minRev = rev1; tikhomirov@197: maxRev = rev1 < lastManifestRevision-5 ? rev1+5 : lastManifestRevision; tikhomirov@197: initCacheRange(minRev, maxRev); tikhomirov@197: } tikhomirov@197: if (need2) { tikhomirov@197: minRev = rev2; tikhomirov@197: maxRev = rev2 < lastManifestRevision-5 ? rev2+5 : lastManifestRevision; tikhomirov@197: initCacheRange(minRev, maxRev); tikhomirov@197: } tikhomirov@55: } tikhomirov@89: r1 = get(rev1); tikhomirov@89: r2 = get(rev2); tikhomirov@89: tikhomirov@285: TreeSet r1Files = new TreeSet(r1.files()); tikhomirov@285: for (Path r2fname : r2.files()) { tikhomirov@285: if (!scope.accept(r2fname)) { tikhomirov@226: continue; tikhomirov@226: } tikhomirov@285: if (r1Files.remove(r2fname)) { tikhomirov@285: Nodeid nidR1 = r1.nodeid(r2fname); tikhomirov@285: Nodeid nidR2 = r2.nodeid(r2fname); tikhomirov@285: HgManifest.Flags flagsR1 = r1.flags(r2fname); tikhomirov@285: HgManifest.Flags flagsR2 = r2.flags(r2fname); tikhomirov@285: if (nidR1.equals(nidR2) && flagsR2 == flagsR1) { tikhomirov@285: inspector.clean(r2fname); tikhomirov@55: } else { tikhomirov@285: inspector.modified(r2fname); tikhomirov@55: } tikhomirov@55: } else { tikhomirov@157: try { tikhomirov@285: Path copyTarget = r2fname; tikhomirov@157: Path copyOrigin = getOriginIfCopy(repo, copyTarget, r1Files, rev1); tikhomirov@157: if (copyOrigin != null) { tikhomirov@285: inspector.copied(getPathPool().path(copyOrigin) /*pipe through pool, just in case*/, copyTarget); tikhomirov@157: } else { tikhomirov@157: inspector.added(copyTarget); tikhomirov@157: } tikhomirov@354: } catch (HgException ex) { tikhomirov@360: // record exception to a mediator and continue, tikhomirov@157: // for a single file not to be irresolvable obstacle for a status operation tikhomirov@360: inspector.invalid(r2fname, ex); tikhomirov@88: } tikhomirov@55: } tikhomirov@55: } tikhomirov@285: for (Path r1fname : r1Files) { tikhomirov@285: if (scope.accept(r1fname)) { tikhomirov@285: inspector.removed(r1fname); tikhomirov@226: } tikhomirov@55: } tikhomirov@55: } tikhomirov@55: tikhomirov@55: public Record status(int rev1, int rev2) { tikhomirov@55: Record rv = new Record(); tikhomirov@55: walk(rev1, rev2, rv); tikhomirov@55: return rv; tikhomirov@55: } tikhomirov@90: tikhomirov@354: /*package-local*/static Path getOriginIfCopy(HgRepository hgRepo, Path fname, Collection originals, int originalChangelogRevision) throws HgDataStreamException, HgInvalidControlFileException { tikhomirov@90: HgDataFile df = hgRepo.getFileNode(fname); tikhomirov@320: if (!df.exists()) { tikhomirov@320: String msg = String.format("Didn't find file '%s' in the repo. Perhaps, bad storage name conversion?", fname); tikhomirov@320: throw new HgDataStreamException(fname, msg, null).setRevisionNumber(originalChangelogRevision); tikhomirov@320: } tikhomirov@90: while (df.isCopy()) { tikhomirov@90: Path original = df.getCopySourceName(); tikhomirov@285: if (originals.contains(original)) { tikhomirov@90: df = hgRepo.getFileNode(original); tikhomirov@90: int changelogRevision = df.getChangesetLocalRevision(0); tikhomirov@90: if (changelogRevision <= originalChangelogRevision) { tikhomirov@90: // copy/rename source was known prior to rev1 tikhomirov@90: // (both r1Files.contains is true and original was created earlier than rev1) tikhomirov@90: // without r1Files.contains changelogRevision <= rev1 won't suffice as the file tikhomirov@90: // might get removed somewhere in between (changelogRevision < R < rev1) tikhomirov@141: return original; tikhomirov@90: } tikhomirov@90: break; // copy/rename done later tikhomirov@90: } tikhomirov@90: df = hgRepo.getFileNode(original); // try more steps away tikhomirov@90: } tikhomirov@90: return null; tikhomirov@90: } tikhomirov@55: tikhomirov@55: // XXX for r1..r2 status, only modified, added, removed (and perhaps, clean) make sense tikhomirov@74: // XXX Need to specify whether copy targets are in added or not (@see Inspector#copied above) tikhomirov@316: /** tikhomirov@316: * Straightforward {@link HgStatusInspector} implementation that collects all status values. tikhomirov@316: * tikhomirov@316: *

Naturally, {@link Record Records} originating from {@link HgStatusCollector} would report only modified, added, tikhomirov@316: * removed and clean values, other are available only when using {@link Record} with {@link HgWorkingCopyStatusCollector}. tikhomirov@316: * tikhomirov@316: *

Note, this implementation records copied files as added, thus key values in {@link #getCopied()} map are subset of paths tikhomirov@316: * from {@link #getAdded()}. tikhomirov@316: */ tikhomirov@93: public static class Record implements HgStatusInspector { tikhomirov@93: private List modified, added, removed, clean, missing, unknown, ignored; tikhomirov@93: private Map copied; tikhomirov@360: private Map failures; tikhomirov@55: tikhomirov@64: private int startRev, endRev; tikhomirov@94: private HgStatusCollector statusHelper; tikhomirov@64: tikhomirov@64: // XXX StatusCollector may additionally initialize Record instance to speed lookup of changed file revisions tikhomirov@64: // here I need access to ManifestRevisionInspector via #raw(). Perhaps, non-static class (to get tikhomirov@64: // implicit reference to StatusCollector) may be better? tikhomirov@64: // Since users may want to reuse Record instance we've once created (and initialized), we need to tikhomirov@64: // ensure functionality is correct for each/any call (#walk checks instanceof Record and fixes it up) tikhomirov@64: // Perhaps, distinct helper (sc.getRevisionHelper().nodeid(fname)) would be better, just not clear tikhomirov@64: // how to supply [start..end] values there easily tikhomirov@94: /*package-local*/void init(int startRevision, int endRevision, HgStatusCollector self) { tikhomirov@64: startRev = startRevision; tikhomirov@64: endRev = endRevision; tikhomirov@64: statusHelper = self; tikhomirov@64: } tikhomirov@64: tikhomirov@93: public Nodeid nodeidBeforeChange(Path fname) { tikhomirov@68: if (statusHelper == null || startRev == BAD_REVISION) { tikhomirov@68: return null; tikhomirov@68: } tikhomirov@64: if ((modified == null || !modified.contains(fname)) && (removed == null || !removed.contains(fname))) { tikhomirov@64: return null; tikhomirov@64: } tikhomirov@285: return statusHelper.raw(startRev).nodeid(fname); tikhomirov@64: } tikhomirov@93: public Nodeid nodeidAfterChange(Path fname) { tikhomirov@68: if (statusHelper == null || endRev == BAD_REVISION) { tikhomirov@68: return null; tikhomirov@68: } tikhomirov@64: if ((modified == null || !modified.contains(fname)) && (added == null || !added.contains(fname))) { tikhomirov@64: return null; tikhomirov@64: } tikhomirov@285: return statusHelper.raw(endRev).nodeid(fname); tikhomirov@64: } tikhomirov@64: tikhomirov@93: public List getModified() { tikhomirov@55: return proper(modified); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public List getAdded() { tikhomirov@55: return proper(added); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public List getRemoved() { tikhomirov@55: return proper(removed); tikhomirov@55: } tikhomirov@55: tikhomirov@316: /** tikhomirov@316: * Map files from {@link #getAdded()} to their original filenames, if were copied/moved. tikhomirov@316: */ tikhomirov@93: public Map getCopied() { tikhomirov@55: if (copied == null) { tikhomirov@55: return Collections.emptyMap(); tikhomirov@55: } tikhomirov@55: return Collections.unmodifiableMap(copied); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public List getClean() { tikhomirov@55: return proper(clean); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public List getMissing() { tikhomirov@55: return proper(missing); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public List getUnknown() { tikhomirov@55: return proper(unknown); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public List getIgnored() { tikhomirov@55: return proper(ignored); tikhomirov@55: } tikhomirov@360: tikhomirov@360: public Map getInvalid() { tikhomirov@360: if (failures == null) { tikhomirov@360: return Collections.emptyMap(); tikhomirov@360: } tikhomirov@360: return Collections.unmodifiableMap(failures); tikhomirov@360: } tikhomirov@55: tikhomirov@316: private static List proper(List l) { tikhomirov@55: if (l == null) { tikhomirov@55: return Collections.emptyList(); tikhomirov@55: } tikhomirov@55: return Collections.unmodifiableList(l); tikhomirov@55: } tikhomirov@55: tikhomirov@55: // tikhomirov@55: // tikhomirov@55: tikhomirov@93: public void modified(Path fname) { tikhomirov@55: modified = doAdd(modified, fname); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public void added(Path fname) { tikhomirov@55: added = doAdd(added, fname); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public void copied(Path fnameOrigin, Path fnameAdded) { tikhomirov@55: if (copied == null) { tikhomirov@93: copied = new LinkedHashMap(); tikhomirov@55: } tikhomirov@74: added(fnameAdded); tikhomirov@74: copied.put(fnameAdded, fnameOrigin); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public void removed(Path fname) { tikhomirov@55: removed = doAdd(removed, fname); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public void clean(Path fname) { tikhomirov@55: clean = doAdd(clean, fname); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public void missing(Path fname) { tikhomirov@55: missing = doAdd(missing, fname); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public void unknown(Path fname) { tikhomirov@55: unknown = doAdd(unknown, fname); tikhomirov@55: } tikhomirov@55: tikhomirov@93: public void ignored(Path fname) { tikhomirov@55: ignored = doAdd(ignored, fname); tikhomirov@55: } tikhomirov@360: tikhomirov@360: public void invalid(Path fname, Exception ex) { tikhomirov@360: if (failures == null) { tikhomirov@360: failures = new LinkedHashMap(); tikhomirov@360: } tikhomirov@360: failures.put(fname, ex); tikhomirov@360: } tikhomirov@55: tikhomirov@93: private static List doAdd(List l, Path p) { tikhomirov@55: if (l == null) { tikhomirov@93: l = new LinkedList(); tikhomirov@55: } tikhomirov@93: l.add(p); tikhomirov@55: return l; tikhomirov@55: } tikhomirov@55: } tikhomirov@55: tikhomirov@55: }