diff hg4j/src/main/java/org/tmatesoft/hg/repo/HgStatusCollector.java @ 213:6ec4af642ba8 gradle

Project uses Gradle for build - actual changes
author Alexander Kitaev <kitaev@gmail.com>
date Tue, 10 May 2011 10:52:53 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hg4j/src/main/java/org/tmatesoft/hg/repo/HgStatusCollector.java	Tue May 10 10:52:53 2011 +0200
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 2011 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.repo;
+
+import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION;
+import static org.tmatesoft.hg.repo.HgRepository.TIP;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.tmatesoft.hg.core.HgDataStreamException;
+import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.Pool;
+import org.tmatesoft.hg.util.Path;
+import org.tmatesoft.hg.util.PathPool;
+import org.tmatesoft.hg.util.PathRewrite;
+
+
+/**
+ * RevisionWalker?
+ *
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class HgStatusCollector {
+
+	private final HgRepository repo;
+	private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact
+	// with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output 
+	// no cache limit, no nodeids and fname caching - OOME on changeset 1035
+	// no cache limit, but with cached nodeids and filenames - 1730+
+	// cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped)
+	private final int cacheMaxSize = 50; // do not keep too much manifest revisions
+	private PathPool pathPool;
+	private final Pool<Nodeid> cacheNodes;
+	private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution
+	private final ManifestRevisionInspector emptyFakeState;
+	
+
+	public HgStatusCollector(HgRepository hgRepo) {
+		this.repo = hgRepo;
+		cache = new TreeMap<Integer, ManifestRevisionInspector>();
+		cacheNodes = new Pool<Nodeid>();
+		cacheFilenames = new Pool<String>();
+
+		emptyFakeState = new ManifestRevisionInspector(null, null);
+		emptyFakeState.begin(-1, null);
+		emptyFakeState.end(-1);
+	}
+	
+	public HgRepository getRepo() {
+		return repo;
+	}
+	
+	private ManifestRevisionInspector get(int rev) {
+		ManifestRevisionInspector i = cache.get(rev);
+		if (i == null) {
+			if (rev == -1) {
+				return emptyFakeState;
+			}
+			while (cache.size() > cacheMaxSize) {
+				// assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary
+				cache.remove(cache.firstKey());
+			}
+			i = new ManifestRevisionInspector(cacheNodes, cacheFilenames);
+			cache.put(rev, i);
+			repo.getManifest().walk(rev, rev, i);
+		}
+		return i;
+	}
+
+	private boolean cached(int revision) {
+		return cache.containsKey(revision) || revision == -1;
+	}
+	
+	private void initCacheRange(int minRev, int maxRev) {
+		while (cache.size() > cacheMaxSize) {
+			// assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary
+			cache.remove(cache.firstKey());
+		}
+		repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() {
+			private ManifestRevisionInspector delegate;
+			private boolean cacheHit; // range may include revisions we already know about, do not re-create them
+
+			public boolean begin(int revision, Nodeid nid) {
+				assert delegate == null;
+				if (cache.containsKey(revision)) { // don't need to check emptyFakeState hit as revision never -1 here
+					cacheHit = true;
+				} else {
+					cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames));
+					// cache may grow bigger than max size here, but it's ok as present simplistic cache clearing mechanism may
+					// otherwise remove entries we just added
+					delegate.begin(revision, nid);
+					cacheHit = false;
+				}
+				return true;
+			}
+
+			public boolean next(Nodeid nid, String fname, String flags) {
+				if (!cacheHit) {
+					delegate.next(nid, fname, flags);
+				}
+				return true;
+			}
+			
+			public boolean end(int revision) {
+				if (!cacheHit) {
+					delegate.end(revision);
+				}
+				cacheHit = false;				
+				delegate = null;
+				return true;
+			}
+		});
+	}
+	
+	/*package-local*/ ManifestRevisionInspector raw(int rev) {
+		return get(rev);
+	}
+	/*package-local*/ PathPool getPathPool() {
+		if (pathPool == null) {
+			pathPool = new PathPool(new PathRewrite.Empty());
+		}
+		return pathPool;
+	}
+
+	/**
+	 * Allows sharing of a common path cache 
+	 */
+	public void setPathPool(PathPool pathPool) {
+		this.pathPool = pathPool;
+	}
+		
+	
+	// hg status --change <rev>
+	public void change(int rev, HgStatusInspector inspector) {
+		int[] parents = new int[2];
+		repo.getChangelog().parents(rev, parents, null, null);
+		walk(parents[0], rev, inspector);
+	}
+	
+	// I assume revision numbers are the same for changelog and manifest - here 
+	// user would like to pass changelog revision numbers, and I use them directly to walk manifest.
+	// if this assumption is wrong, fix this (lookup manifest revisions from changeset).
+	// rev1 and rev2 may be -1 to indicate comparison to empty repository
+	// argument order matters 
+	public void walk(int rev1, int rev2, HgStatusInspector inspector) {
+		if (rev1 == rev2) {
+			throw new IllegalArgumentException();
+		}
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		if (inspector instanceof Record) {
+			((Record) inspector).init(rev1, rev2, this);
+		}
+		final int lastManifestRevision = repo.getManifest().getLastRevision();
+		if (rev1 == TIP) {
+			rev1 = lastManifestRevision;
+		}
+		if (rev2 == TIP) {
+			rev2 = lastManifestRevision; 
+		}
+		// in fact, rev1 and rev2 are often next (or close) to each other,
+		// thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2))
+		ManifestRevisionInspector r1, r2 ;
+		boolean need1 = !cached(rev1), need2 = !cached(rev2);
+		if (need1 || need2) {
+			int minRev, maxRev;
+			if (need1 && need2 && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) {
+				minRev = rev1 < rev2 ? rev1 : rev2;
+				maxRev = minRev == rev1 ? rev2 : rev1;
+				if (minRev > 0) {
+					minRev--; // expand range a bit
+				}
+				initCacheRange(minRev, maxRev);
+				need1 = need2 = false;
+			}
+			// either both unknown and far from each other, or just one of them.
+			// read with neighbors to save potential subsequent calls for neighboring elements
+			// XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision
+			// which going to be read anyway
+			if (need1) {
+				minRev = rev1;
+				maxRev = rev1 < lastManifestRevision-5 ? rev1+5 : lastManifestRevision;
+				initCacheRange(minRev, maxRev);
+			}
+			if (need2) {
+				minRev = rev2;
+				maxRev = rev2 < lastManifestRevision-5 ? rev2+5 : lastManifestRevision;
+				initCacheRange(minRev, maxRev);
+			}
+		}
+		r1 = get(rev1);
+		r2 = get(rev2);
+
+		PathPool pp = getPathPool();
+
+		TreeSet<String> r1Files = new TreeSet<String>(r1.files());
+		for (String fname : r2.files()) {
+			if (r1Files.remove(fname)) {
+				Nodeid nidR1 = r1.nodeid(fname);
+				Nodeid nidR2 = r2.nodeid(fname);
+				String flagsR1 = r1.flags(fname);
+				String flagsR2 = r2.flags(fname);
+				if (nidR1.equals(nidR2) && ((flagsR2 == null && flagsR1 == null) || flagsR2.equals(flagsR1))) {
+					inspector.clean(pp.path(fname));
+				} else {
+					inspector.modified(pp.path(fname));
+				}
+			} else {
+				try {
+					Path copyTarget = pp.path(fname);
+					Path copyOrigin = getOriginIfCopy(repo, copyTarget, r1Files, rev1);
+					if (copyOrigin != null) {
+						inspector.copied(pp.path(copyOrigin) /*pipe through pool, just in case*/, copyTarget);
+					} else {
+						inspector.added(copyTarget);
+					}
+				} catch (HgDataStreamException ex) {
+					ex.printStackTrace();
+					// FIXME perhaps, shall record this exception to dedicated mediator and continue
+					// for a single file not to be irresolvable obstacle for a status operation
+				}
+			}
+		}
+		for (String left : r1Files) {
+			inspector.removed(pp.path(left));
+		}
+	}
+	
+	public Record status(int rev1, int rev2) {
+		Record rv = new Record();
+		walk(rev1, rev2, rv);
+		return rv;
+	}
+	
+	/*package-local*/static Path getOriginIfCopy(HgRepository hgRepo, Path fname, Collection<String> originals, int originalChangelogRevision) throws HgDataStreamException {
+		HgDataFile df = hgRepo.getFileNode(fname);
+		while (df.isCopy()) {
+			Path original = df.getCopySourceName();
+			if (originals.contains(original.toString())) {
+				df = hgRepo.getFileNode(original);
+				int changelogRevision = df.getChangesetLocalRevision(0);
+				if (changelogRevision <= originalChangelogRevision) {
+					// copy/rename source was known prior to rev1 
+					// (both r1Files.contains is true and original was created earlier than rev1)
+					// without r1Files.contains changelogRevision <= rev1 won't suffice as the file
+					// might get removed somewhere in between (changelogRevision < R < rev1)
+					return original;
+				}
+				break; // copy/rename done later
+			} 
+			df = hgRepo.getFileNode(original); // try more steps away
+		}
+		return null;
+	}
+
+	// XXX for r1..r2 status, only modified, added, removed (and perhaps, clean) make sense
+	// XXX Need to specify whether copy targets are in added or not (@see Inspector#copied above)
+	public static class Record implements HgStatusInspector {
+		private List<Path> modified, added, removed, clean, missing, unknown, ignored;
+		private Map<Path, Path> copied;
+		
+		private int startRev, endRev;
+		private HgStatusCollector statusHelper;
+		
+		// XXX StatusCollector may additionally initialize Record instance to speed lookup of changed file revisions
+		// here I need access to ManifestRevisionInspector via #raw(). Perhaps, non-static class (to get
+		// implicit reference to StatusCollector) may be better?
+		// Since users may want to reuse Record instance we've once created (and initialized), we need to  
+		// ensure functionality is correct for each/any call (#walk checks instanceof Record and fixes it up)
+		// Perhaps, distinct helper (sc.getRevisionHelper().nodeid(fname)) would be better, just not clear
+		// how to supply [start..end] values there easily
+		/*package-local*/void init(int startRevision, int endRevision, HgStatusCollector self) {
+			startRev = startRevision;
+			endRev = endRevision;
+			statusHelper = self;
+		}
+		
+		public Nodeid nodeidBeforeChange(Path fname) {
+			if (statusHelper == null || startRev == BAD_REVISION) {
+				return null;
+			}
+			if ((modified == null || !modified.contains(fname)) && (removed == null || !removed.contains(fname))) {
+				return null;
+			}
+			return statusHelper.raw(startRev).nodeid(fname.toString());
+		}
+		public Nodeid nodeidAfterChange(Path fname) {
+			if (statusHelper == null || endRev == BAD_REVISION) {
+				return null;
+			}
+			if ((modified == null || !modified.contains(fname)) && (added == null || !added.contains(fname))) {
+				return null;
+			}
+			return statusHelper.raw(endRev).nodeid(fname.toString());
+		}
+		
+		public List<Path> getModified() {
+			return proper(modified);
+		}
+
+		public List<Path> getAdded() {
+			return proper(added);
+		}
+
+		public List<Path> getRemoved() {
+			return proper(removed);
+		}
+
+		public Map<Path,Path> getCopied() {
+			if (copied == null) {
+				return Collections.emptyMap();
+			}
+			return Collections.unmodifiableMap(copied);
+		}
+
+		public List<Path> getClean() {
+			return proper(clean);
+		}
+
+		public List<Path> getMissing() {
+			return proper(missing);
+		}
+
+		public List<Path> getUnknown() {
+			return proper(unknown);
+		}
+
+		public List<Path> getIgnored() {
+			return proper(ignored);
+		}
+		
+		private List<Path> proper(List<Path> l) {
+			if (l == null) {
+				return Collections.emptyList();
+			}
+			return Collections.unmodifiableList(l);
+		}
+
+		//
+		//
+		
+		public void modified(Path fname) {
+			modified = doAdd(modified, fname);
+		}
+
+		public void added(Path fname) {
+			added = doAdd(added, fname);
+		}
+
+		public void copied(Path fnameOrigin, Path fnameAdded) {
+			if (copied == null) {
+				copied = new LinkedHashMap<Path, Path>();
+			}
+			added(fnameAdded);
+			copied.put(fnameAdded, fnameOrigin);
+		}
+
+		public void removed(Path fname) {
+			removed = doAdd(removed, fname);
+		}
+
+		public void clean(Path fname) {
+			clean = doAdd(clean, fname);
+		}
+
+		public void missing(Path fname) {
+			missing = doAdd(missing, fname);
+		}
+
+		public void unknown(Path fname) {
+			unknown = doAdd(unknown, fname);
+		}
+
+		public void ignored(Path fname) {
+			ignored = doAdd(ignored, fname);
+		}
+
+		private static List<Path> doAdd(List<Path> l, Path p) {
+			if (l == null) {
+				l = new LinkedList<Path>();
+			}
+			l.add(p);
+			return l;
+		}
+	}
+	
+	/*package-local*/ static final class ManifestRevisionInspector implements HgManifest.Inspector {
+		private final TreeMap<String, Nodeid> idsMap;
+		private final TreeMap<String, String> flagsMap;
+		private final Pool<Nodeid> idsPool;
+		private final Pool<String> namesPool; 
+
+		// optional pools for effective management of nodeids and filenames (they are likely
+		// to be duplicated among different manifest revisions
+		public ManifestRevisionInspector(Pool<Nodeid> nodeidPool, Pool<String> filenamePool) {
+			idsPool = nodeidPool;
+			namesPool = filenamePool;
+			idsMap = new TreeMap<String, Nodeid>();
+			flagsMap = new TreeMap<String, String>();
+		}
+		
+		public Collection<String> files() {
+			return idsMap.keySet();
+		}
+
+		public Nodeid nodeid(String fname) {
+			return idsMap.get(fname);
+		}
+
+		public String flags(String fname) {
+			return flagsMap.get(fname);
+		}
+
+		//
+
+		public boolean next(Nodeid nid, String fname, String flags) {
+			if (namesPool != null) {
+				fname = namesPool.unify(fname);
+			}
+			if (idsPool != null) {
+				nid = idsPool.unify(nid);
+			}
+			idsMap.put(fname, nid);
+			if (flags != null) {
+				// TreeMap$Entry takes 32 bytes. No reason to keep null for such price
+				// Perhaps, Map<String, Pair<Nodeid, String>> might be better solution
+				flagsMap.put(fname, flags);
+			}
+			return true;
+		}
+
+		public boolean end(int revision) {
+			// in fact, this class cares about single revision
+			return false; 
+		}
+
+		public boolean begin(int revision, Nodeid nid) {
+			return true;
+		}
+	}
+
+}