Mercurial > hg4j
view src/org/tmatesoft/hg/repo/HgStatusCollector.java @ 242:ad6a046943be
Improved reading of sparse revisions from a revlog
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 23 Jun 2011 15:19:07 +0200 |
parents | 1ec6b327a6ac |
children | 3fbfce107f94 |
line wrap: on
line source
/* * Copyright (c) 2011 TMate Software Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * For information on how to redistribute this software under * the terms of a license other than GNU General Public License * contact TMate Software at support@hg4j.com */ package org.tmatesoft.hg.repo; import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; import static org.tmatesoft.hg.repo.HgRepository.TIP; import java.util.Collection; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import java.util.TreeSet; import org.tmatesoft.hg.core.HgDataStreamException; import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.internal.Pool; import org.tmatesoft.hg.util.Path; import org.tmatesoft.hg.util.PathPool; import org.tmatesoft.hg.util.PathRewrite; /** * RevisionWalker? * * @author Artem Tikhomirov * @author TMate Software Ltd. */ public class HgStatusCollector { private final HgRepository repo; private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output // no cache limit, no nodeids and fname caching - OOME on changeset 1035 // no cache limit, but with cached nodeids and filenames - 1730+ // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) private final int cacheMaxSize = 50; // do not keep too much manifest revisions private PathPool pathPool; private final Pool<Nodeid> cacheNodes; private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution private final ManifestRevisionInspector emptyFakeState; private Path.Matcher scope = new Path.Matcher.Any(); public HgStatusCollector(HgRepository hgRepo) { this.repo = hgRepo; cache = new TreeMap<Integer, ManifestRevisionInspector>(); cacheNodes = new Pool<Nodeid>(); cacheFilenames = new Pool<String>(); emptyFakeState = new ManifestRevisionInspector(null, null); emptyFakeState.begin(-1, null, -1); emptyFakeState.end(-1); } public HgRepository getRepo() { return repo; } private ManifestRevisionInspector get(int rev) { ManifestRevisionInspector i = cache.get(rev); if (i == null) { if (rev == -1) { return emptyFakeState; } while (cache.size() > cacheMaxSize) { // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary cache.remove(cache.firstKey()); } i = new ManifestRevisionInspector(cacheNodes, cacheFilenames); cache.put(rev, i); repo.getManifest().walk(rev, rev, i); } return i; } private boolean cached(int revision) { return cache.containsKey(revision) || revision == -1; } private void initCacheRange(int minRev, int maxRev) { while (cache.size() > cacheMaxSize) { // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary cache.remove(cache.firstKey()); } repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() { private ManifestRevisionInspector delegate; private boolean cacheHit; // range may include revisions we already know about, do not re-create them public boolean begin(int manifestRevision, Nodeid nid, int changelogRevision) { assert delegate == null; if (cache.containsKey(changelogRevision)) { // don't need to check emptyFakeState hit as revision never -1 here cacheHit = true; } else { cache.put(changelogRevision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames)); // cache may grow bigger than max size here, but it's ok as present simplistic cache clearing mechanism may // otherwise remove entries we just added delegate.begin(manifestRevision, nid, changelogRevision); cacheHit = false; } return true; } public boolean next(Nodeid nid, String fname, String flags) { if (!cacheHit) { delegate.next(nid, fname, flags); } return true; } public boolean end(int revision) { if (!cacheHit) { delegate.end(revision); } cacheHit = false; delegate = null; return true; } }); } /*package-local*/ ManifestRevisionInspector raw(int rev) { return get(rev); } /*package-local*/ PathPool getPathPool() { if (pathPool == null) { pathPool = new PathPool(new PathRewrite.Empty()); } return pathPool; } /** * Allows sharing of a common path cache */ public void setPathPool(PathPool pathPool) { this.pathPool = pathPool; } /** * Limit activity of the collector to certain sub-tree of the repository. * @param scopeMatcher tells whether collector shall report specific path, can be <code>null</code> */ public void setScope(Path.Matcher scopeMatcher) { // do not assign null, ever scope = scopeMatcher == null ? new Path.Matcher.Any() : scopeMatcher; } // hg status --change <rev> public void change(int rev, HgStatusInspector inspector) { int[] parents = new int[2]; repo.getChangelog().parents(rev, parents, null, null); walk(parents[0], rev, inspector); } // I assume revision numbers are the same for changelog and manifest - here // user would like to pass changelog revision numbers, and I use them directly to walk manifest. // if this assumption is wrong, fix this (lookup manifest revisions from changeset). // rev1 and rev2 may be -1 to indicate comparison to empty repository // argument order matters public void walk(int rev1, int rev2, HgStatusInspector inspector) { if (rev1 == rev2) { throw new IllegalArgumentException(); } if (inspector == null) { throw new IllegalArgumentException(); } if (inspector instanceof Record) { ((Record) inspector).init(rev1, rev2, this); } final int lastManifestRevision = repo.getChangelog().getLastRevision(); if (rev1 == TIP) { rev1 = lastManifestRevision; } if (rev2 == TIP) { rev2 = lastManifestRevision; } // in fact, rev1 and rev2 are often next (or close) to each other, // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2)) ManifestRevisionInspector r1, r2 ; boolean need1 = !cached(rev1), need2 = !cached(rev2); if (need1 || need2) { int minRev, maxRev; if (need1 && need2 && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) { minRev = rev1 < rev2 ? rev1 : rev2; maxRev = minRev == rev1 ? rev2 : rev1; if (minRev > 0) { minRev--; // expand range a bit } initCacheRange(minRev, maxRev); need1 = need2 = false; } // either both unknown and far from each other, or just one of them. // read with neighbors to save potential subsequent calls for neighboring elements // XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision // which going to be read anyway if (need1) { minRev = rev1; maxRev = rev1 < lastManifestRevision-5 ? rev1+5 : lastManifestRevision; initCacheRange(minRev, maxRev); } if (need2) { minRev = rev2; maxRev = rev2 < lastManifestRevision-5 ? rev2+5 : lastManifestRevision; initCacheRange(minRev, maxRev); } } r1 = get(rev1); r2 = get(rev2); PathPool pp = getPathPool(); TreeSet<String> r1Files = new TreeSet<String>(r1.files()); for (String fname : r2.files()) { final Path r2filePath = pp.path(fname); if (!scope.accept(r2filePath)) { continue; } if (r1Files.remove(fname)) { Nodeid nidR1 = r1.nodeid(fname); Nodeid nidR2 = r2.nodeid(fname); String flagsR1 = r1.flags(fname); String flagsR2 = r2.flags(fname); if (nidR1.equals(nidR2) && ((flagsR2 == null && flagsR1 == null) || (flagsR2 != null && flagsR2.equals(flagsR1)))) { inspector.clean(r2filePath); } else { inspector.modified(r2filePath); } } else { try { Path copyTarget = r2filePath; Path copyOrigin = getOriginIfCopy(repo, copyTarget, r1Files, rev1); if (copyOrigin != null) { inspector.copied(pp.path(copyOrigin) /*pipe through pool, just in case*/, copyTarget); } else { inspector.added(copyTarget); } } catch (HgDataStreamException ex) { ex.printStackTrace(); // FIXME perhaps, shall record this exception to dedicated mediator and continue // for a single file not to be irresolvable obstacle for a status operation } } } for (String left : r1Files) { final Path r2filePath = pp.path(left); if (scope.accept(r2filePath)) { inspector.removed(r2filePath); } } } public Record status(int rev1, int rev2) { Record rv = new Record(); walk(rev1, rev2, rv); return rv; } /*package-local*/static Path getOriginIfCopy(HgRepository hgRepo, Path fname, Collection<String> originals, int originalChangelogRevision) throws HgDataStreamException { HgDataFile df = hgRepo.getFileNode(fname); while (df.isCopy()) { Path original = df.getCopySourceName(); if (originals.contains(original.toString())) { df = hgRepo.getFileNode(original); int changelogRevision = df.getChangesetLocalRevision(0); if (changelogRevision <= originalChangelogRevision) { // copy/rename source was known prior to rev1 // (both r1Files.contains is true and original was created earlier than rev1) // without r1Files.contains changelogRevision <= rev1 won't suffice as the file // might get removed somewhere in between (changelogRevision < R < rev1) return original; } break; // copy/rename done later } df = hgRepo.getFileNode(original); // try more steps away } return null; } // XXX for r1..r2 status, only modified, added, removed (and perhaps, clean) make sense // XXX Need to specify whether copy targets are in added or not (@see Inspector#copied above) public static class Record implements HgStatusInspector { private List<Path> modified, added, removed, clean, missing, unknown, ignored; private Map<Path, Path> copied; private int startRev, endRev; private HgStatusCollector statusHelper; // XXX StatusCollector may additionally initialize Record instance to speed lookup of changed file revisions // here I need access to ManifestRevisionInspector via #raw(). Perhaps, non-static class (to get // implicit reference to StatusCollector) may be better? // Since users may want to reuse Record instance we've once created (and initialized), we need to // ensure functionality is correct for each/any call (#walk checks instanceof Record and fixes it up) // Perhaps, distinct helper (sc.getRevisionHelper().nodeid(fname)) would be better, just not clear // how to supply [start..end] values there easily /*package-local*/void init(int startRevision, int endRevision, HgStatusCollector self) { startRev = startRevision; endRev = endRevision; statusHelper = self; } public Nodeid nodeidBeforeChange(Path fname) { if (statusHelper == null || startRev == BAD_REVISION) { return null; } if ((modified == null || !modified.contains(fname)) && (removed == null || !removed.contains(fname))) { return null; } return statusHelper.raw(startRev).nodeid(fname.toString()); } public Nodeid nodeidAfterChange(Path fname) { if (statusHelper == null || endRev == BAD_REVISION) { return null; } if ((modified == null || !modified.contains(fname)) && (added == null || !added.contains(fname))) { return null; } return statusHelper.raw(endRev).nodeid(fname.toString()); } public List<Path> getModified() { return proper(modified); } public List<Path> getAdded() { return proper(added); } public List<Path> getRemoved() { return proper(removed); } public Map<Path,Path> getCopied() { if (copied == null) { return Collections.emptyMap(); } return Collections.unmodifiableMap(copied); } public List<Path> getClean() { return proper(clean); } public List<Path> getMissing() { return proper(missing); } public List<Path> getUnknown() { return proper(unknown); } public List<Path> getIgnored() { return proper(ignored); } private List<Path> proper(List<Path> l) { if (l == null) { return Collections.emptyList(); } return Collections.unmodifiableList(l); } // // public void modified(Path fname) { modified = doAdd(modified, fname); } public void added(Path fname) { added = doAdd(added, fname); } public void copied(Path fnameOrigin, Path fnameAdded) { if (copied == null) { copied = new LinkedHashMap<Path, Path>(); } added(fnameAdded); copied.put(fnameAdded, fnameOrigin); } public void removed(Path fname) { removed = doAdd(removed, fname); } public void clean(Path fname) { clean = doAdd(clean, fname); } public void missing(Path fname) { missing = doAdd(missing, fname); } public void unknown(Path fname) { unknown = doAdd(unknown, fname); } public void ignored(Path fname) { ignored = doAdd(ignored, fname); } private static List<Path> doAdd(List<Path> l, Path p) { if (l == null) { l = new LinkedList<Path>(); } l.add(p); return l; } } /*package-local*/ static final class ManifestRevisionInspector implements HgManifest.Inspector { private final TreeMap<String, Nodeid> idsMap; private final TreeMap<String, String> flagsMap; private final Pool<Nodeid> idsPool; private final Pool<String> namesPool; // optional pools for effective management of nodeids and filenames (they are likely // to be duplicated among different manifest revisions public ManifestRevisionInspector(Pool<Nodeid> nodeidPool, Pool<String> filenamePool) { idsPool = nodeidPool; namesPool = filenamePool; idsMap = new TreeMap<String, Nodeid>(); flagsMap = new TreeMap<String, String>(); } public Collection<String> files() { return idsMap.keySet(); } public Nodeid nodeid(String fname) { return idsMap.get(fname); } public String flags(String fname) { return flagsMap.get(fname); } // public boolean next(Nodeid nid, String fname, String flags) { if (namesPool != null) { fname = namesPool.unify(fname); } if (idsPool != null) { nid = idsPool.unify(nid); } idsMap.put(fname, nid); if (flags != null) { // TreeMap$Entry takes 32 bytes. No reason to keep null for such price // Perhaps, Map<String, Pair<Nodeid, String>> might be better solution flagsMap.put(fname, flags); } return true; } public boolean end(int revision) { // in fact, this class cares about single revision return false; } public boolean begin(int revision, Nodeid nid, int changelogRevision) { return true; } } }