changeset 694:7efabe0cddcf

Speed up (a) file rename history to minimize file reads; (b) file.isCopy(int) to read metadata for few revisions at once (use pattern assumes earlier revisions are likely to be queried, too); (c) HgIgnore.isIgnored by caching matched initial fragments (to substitute more expensive Matcher.matches with cheaper HashMap.contains)
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Mon, 05 Aug 2013 17:42:10 +0200 (2013-08-05)
parents 32b0d19e8aba
children 053bb4397bf9
files src/org/tmatesoft/hg/internal/FileRenameHistory.java src/org/tmatesoft/hg/internal/Metadata.java src/org/tmatesoft/hg/repo/HgDataFile.java src/org/tmatesoft/hg/repo/HgIgnore.java src/org/tmatesoft/hg/repo/HgStatusCollector.java src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java src/org/tmatesoft/hg/repo/Revlog.java
diffstat 7 files changed, 120 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/FileRenameHistory.java	Mon Aug 05 12:45:36 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/FileRenameHistory.java	Mon Aug 05 17:42:10 2013 +0200
@@ -16,7 +16,10 @@
  */
 package org.tmatesoft.hg.internal;
 
+import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION;
+
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
@@ -24,6 +27,8 @@
 import org.tmatesoft.hg.core.HgFileRevision;
 import org.tmatesoft.hg.core.HgIterateDirection;
 import org.tmatesoft.hg.repo.HgDataFile;
+import org.tmatesoft.hg.repo.HgRepository;
+import org.tmatesoft.hg.repo.HgRuntimeException;
 
 /**
  * Traces file renames. Quite similar to HgChangesetFileSneaker, although the latter tries different paths
@@ -61,8 +66,9 @@
 		LinkedList<Chunk> chunks = new LinkedList<Chunk>();
 		int chunkStart = 0, chunkEnd = fileRev;
 		int csetChunkEnd = -1, csetChunkStart = -1;
+		BasicRevMap csetMap = new BasicRevMap(0, fileRev).collect(df);
 		while (fileRev >= 0) {
-			int cset = df.getChangesetRevisionIndex(fileRev);
+			int cset = csetMap.changesetAt(fileRev);
 			if (csetChunkEnd == -1) {
 				csetChunkEnd = cset;
 			}
@@ -82,6 +88,7 @@
 				HgFileRevision origin = df.getCopySource(fileRev);
 				df = df.getRepo().getFileNode(origin.getPath());
 				fileRev = chunkEnd = df.getRevisionIndex(origin.getRevision());
+				csetMap = new BasicRevMap(0, fileRev).collect(df);
 				chunkStart = 0;
 				csetChunkEnd = cset - 1; // if df is copy, cset can't be 0
 				csetChunkStart = -1;
@@ -130,7 +137,7 @@
 	/**
 	 * file has changes [firstFileRev..lastFileRev] that have occurred somewhere in [firstCset..lastCset] 
 	 */
-	public static class Chunk {
+	public static final class Chunk {
 		private final HgDataFile df;
 		private final int fileRevFrom;
 		private final int fileRevTo;
@@ -159,4 +166,32 @@
 			return csetTo;
 		}
 	}
+	
+	private static final class BasicRevMap implements HgDataFile.LinkRevisionInspector {
+		private final int[] revs;
+		private final int fromRev;
+		private final int toRev;
+		public BasicRevMap(int startRev, int endRev) {
+			revs = new int[endRev+1]; // for simplicity, just ignore startRev now (it's 0 in local use anyway)
+			fromRev = startRev;
+			toRev = endRev;
+			Arrays.fill(revs, BAD_REVISION);
+		}
+		
+		public BasicRevMap collect(HgDataFile df) {
+			df.indexWalk(fromRev, toRev, this);
+			return this;
+		}
+
+		public void next(int revisionIndex, int linkedRevisionIndex) throws HgRuntimeException {
+			revs[revisionIndex] = linkedRevisionIndex;
+		}
+		
+		/**
+		 * @return {@link HgRepository#BAD_REVISION} if there's no mapping
+		 */
+		public int changesetAt(int rev) {
+			return revs[rev];
+		}
+	}
 }
--- a/src/org/tmatesoft/hg/internal/Metadata.java	Mon Aug 05 12:45:36 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/Metadata.java	Mon Aug 05 17:42:10 2013 +0200
@@ -16,6 +16,7 @@
  */
 package org.tmatesoft.hg.internal;
 
+import static org.tmatesoft.hg.repo.HgRepository.NO_REVISION;
 import static org.tmatesoft.hg.util.LogFacility.Severity.Error;
 
 import java.io.ByteArrayOutputStream;
@@ -50,18 +51,24 @@
 	private final Metadata.Record NONE = new Record(-1, null); // don't want statics
 
 	private final LogFacility log;
+	
+	private int lastRevRead = NO_REVISION;
 
 	public Metadata(SessionContext.Source sessionCtx) {
 		log = sessionCtx.getSessionContext().getLog();
 	}
 	
-	// true when there's metadata for given revision
+	/**
+	 * @return <code>true</code> when there's metadata for given revision
+	 */
 	public boolean known(int revision) {
 		Metadata.Record i = entries.get(revision);
 		return i != null && NONE != i;
 	}
 
-	// true when revision has been checked for metadata presence.
+	/**
+	 * @return <code>true</code> when revision has been checked for metadata presence.
+	 */
 	public boolean checked(int revision) {
 		return entries.containsKey(revision);
 	}
@@ -71,6 +78,14 @@
 		Metadata.Record i = entries.get(revision);
 		return i == NONE;
 	}
+	
+	/**
+	 * Get the greatest revision index visited so far.
+	 * Note, doesn't imply all revisions up to this has been visited.
+	 */
+	public int lastRevisionRead() {
+		return lastRevRead;
+	}
 
 	// mark revision as having no metadata.
 	void recordNone(int revision) {
@@ -98,6 +113,9 @@
 	 */
 	public boolean tryRead(int revisionNumber, DataAccess data) throws IOException, HgInvalidControlFileException {
 		final int daLength = data.length();
+		if (lastRevRead == NO_REVISION || revisionNumber > lastRevRead) {
+			lastRevRead = revisionNumber;
+		}
 		if (daLength < 4 || data.readByte() != 1 || data.readByte() != 10) {
 			recordNone(revisionNumber);
 			return false;
--- a/src/org/tmatesoft/hg/repo/HgDataFile.java	Mon Aug 05 12:45:36 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgDataFile.java	Mon Aug 05 17:42:10 2013 +0200
@@ -63,7 +63,12 @@
 	// slashes, unix-style?
 	// repo location agnostic, just to give info to user, not to access real storage
 	private final Path path;
-	private Metadata metadata; // get initialized on first access to file content.
+	/*
+	 * Get initialized on first access to file content.
+	 * We read metadata starting from rev 0 always, so that Metadata#lastRevisionRead()
+	 * shows the region of file history [0..lastRevisionRead] we know metadata for
+	 */
+	private Metadata metadata;
 	
 	/*package-local*/HgDataFile(HgRepository hgRepo, Path filePath, RevlogStream content) {
 		super(hgRepo, content, false);
@@ -477,12 +482,17 @@
 	}
 	
 	private void checkAndRecordMetadata(int localRev) throws HgRuntimeException {
+		int startRev;
 		if (metadata == null) {
 			metadata = new Metadata(getRepo());
+			startRev = 0; // read from the very beginning with one shot - likely isCopy(localRev-i) will be of interest, too
+		} else {
+			startRev = metadata.lastRevisionRead() + 1;
 		}
+		assert localRev >= startRev; // callers of this method ensure that metadata has been checked beforehand
 		// use MetadataInspector without delegate to process metadata only
 		RevlogStream.Inspector insp = new MetadataInspector(metadata, null);
-		super.content.iterate(localRev, localRev, true, insp);
+		super.content.iterate(startRev, localRev, true, insp);
 	}
 
 	private static class MetadataInspector extends ErrorHandlingInspector implements RevlogStream.Inspector {
@@ -501,17 +511,16 @@
 
 		public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) throws HgRuntimeException {
 			try {
-				if (metadata.tryRead(revisionNumber, data)) {
-					// da is in prepared state (i.e. we consumed all bytes up to metadata end).
-					// However, it's not safe to assume delegate won't call da.reset() for some reason,
-					// and we need to ensure predictable result.
+				final boolean gotMetadata = metadata.tryRead(revisionNumber, data);
+				if (delegate != null) {
 					data.reset();
-					int offset = metadata.dataOffset(revisionNumber);
-					data = new FilterDataAccess(data, offset, data.length() - offset);
-				} else {
-					data.reset();
-				}
-				if (delegate != null) {
+					if (gotMetadata) {
+						// da is in prepared state (i.e. we consumed all bytes up to metadata end).
+						// However, it's not safe to assume delegate won't call da.reset() for some reason,
+						// and we need to ensure predictable result.
+						int offset = metadata.dataOffset(revisionNumber);
+						data = new FilterDataAccess(data, offset, data.length() - offset);
+					}
 					delegate.next(revisionNumber, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeid, data);
 				}
 			} catch (IOException ex) {
--- a/src/org/tmatesoft/hg/repo/HgIgnore.java	Mon Aug 05 12:45:36 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgIgnore.java	Mon Aug 05 17:42:10 2013 +0200
@@ -25,7 +25,9 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
@@ -44,11 +46,16 @@
 
 	private List<Pattern> entries;
 	private final PathRewrite globPathHelper;
-	private FileChangeMonitor ignoreFileTracker; 
+	private FileChangeMonitor ignoreFileTracker;
+	// if pattern matches first fragment of a path, it will
+	// match any other path with this fragment, so we can avoid pattern matching
+	// if path starts with one of such fragments (often for e.g. ignored 'bin/' folders)
+	private final Set<String> ignoredFirstFragments;
 
 	HgIgnore(PathRewrite globPathRewrite) {
 		entries = Collections.emptyList();
 		globPathHelper = globPathRewrite;
+		ignoredFirstFragments = new HashSet<String>();
 	}
 
 	/* package-local */ void read(Internals repo) throws HgInvalidControlFileException {
@@ -230,18 +237,28 @@
 	 */
 	public boolean isIgnored(Path path) {
 		String ps = path.toString();
+		int x = ps.indexOf('/');
+		if (x != -1 && ignoredFirstFragments.contains(ps.substring(0, x))) {
+			return true;
+		}
 		for (Pattern p : entries) {
-			int x = ps.indexOf('/'); // reset for each pattern
 			if (p.matcher(ps).find()) {
 				return true;
 			}
-			while (x != -1 && x+1 != ps.length() /*skip very last segment not to check complete string twice*/) {
-				String fragment = ps.substring(0, x);
+		}
+		boolean firstFragment = true;
+		while (x != -1 && x+1 != ps.length() /*skip very last segment not to check complete string twice*/) {
+			String fragment = ps.substring(0, x);
+			for (Pattern p : entries) {
 				if (p.matcher(fragment).matches()) {
+					if (firstFragment) {
+						ignoredFirstFragments.add(new String(fragment));
+					}
 					return true;
 				}
-				x = ps.indexOf('/', x+1);
 			}
+			x = ps.indexOf('/', x+1);
+			firstFragment = false;
 		}
 		return false;
 	}
--- a/src/org/tmatesoft/hg/repo/HgStatusCollector.java	Mon Aug 05 12:45:36 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgStatusCollector.java	Mon Aug 05 17:42:10 2013 +0200
@@ -299,7 +299,6 @@
 
 		final CancelSupport cs = CancelSupport.Factory.get(inspector);
 
-		
 		Collection<Path> allBaseFiles = r1.files();
 		TreeSet<Path> r1Files = new TreeSet<Path>(allBaseFiles);
 		for (Path r2fname : r2.files()) {
--- a/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java	Mon Aug 05 12:45:36 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java	Mon Aug 05 17:42:10 2013 +0200
@@ -420,20 +420,11 @@
 			// removed: nothing to report, 
 			if (ds.checkNormal(fname) != null || ds.checkMerged(fname) != null) {
 				try {
-					// FIXME refactor, done numerous time e.g. in TestStatus#testStatusCommand with base = 3
-					ArrayList<Nodeid> parents = new ArrayList<Nodeid>(2);
-					parents.add(ds.parents().first());
-					parents.add(ds.parents().second());
-					parents.remove(Nodeid.NULL);
-					// try either parent if file came through one of them, or both
-					for (Nodeid parent : parents) {
-						int csetIndex = repo.getChangelog().getRevisionIndex(parent);
-						Nodeid fileRev = repo.getManifest().getFileRevision(csetIndex, fname);
-						if (fileRev == null) {
-							continue;
-						}
+					// XXX perhaps, shall take second parent into account if not null, too?
+					Nodeid nidFromDirstate = getDirstateParentManifest().nodeid(fname);
+					if (nidFromDirstate != null) {
 						// see if file revision known in this parent got copied from one of baseRevNames
-						Path origin = HgStatusCollector.getOriginIfCopy(repo, fname, fileRev, collect.files(), baseRevision);
+						Path origin = HgStatusCollector.getOriginIfCopy(repo, fname, nidFromDirstate, collect.files(), baseRevision);
 						if (origin != null) {
 							inspector.copied(getPathPool().mangle(origin), fname);
 							return;
--- a/src/org/tmatesoft/hg/repo/Revlog.java	Mon Aug 05 12:45:36 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/Revlog.java	Mon Aug 05 17:42:10 2013 +0200
@@ -334,6 +334,8 @@
 	 * EXPERIMENTAL CODE, DO NOT USE
 	 * 
 	 * Alternative revlog iteration
+	 * There's an implicit ordering of inspectors. Less inspector needs, earlier its invocation occurs.
+	 * E.g. ParentInspector goes after RevisionInspector. LinkRevisionInspector goes before RevisionInspector
 	 * 
 	 * @param start
 	 * @param end
@@ -349,6 +351,7 @@
 		}
 		final RevisionInspector revisionInsp = Adaptable.Factory.getAdapter(inspector, RevisionInspector.class, null);
 		final ParentInspector parentInsp = Adaptable.Factory.getAdapter(inspector, ParentInspector.class, null);
+		final LinkRevisionInspector linkRevInspector = Adaptable.Factory.getAdapter(inspector, LinkRevisionInspector.class, null);
 		final Nodeid[] allRevisions = parentInsp == null ? null : new Nodeid[end - _start + 1];
 		// next are to build set of parent indexes that are not part of the range iteration
 		// i.e. those parents we need to read separately. See Issue 31 for details.
@@ -360,6 +363,14 @@
 			private int i = 0;
 			
 			public void next(int revisionIndex, int actualLen, int baseRevIndex, int linkRevIndex, int parent1RevIndex, int parent2RevIndex, byte[] nodeid, DataAccess data) throws HgRuntimeException {
+				// FIXME refactor either as chain of RevlogStream.Inspector or an external AnyOf() runner not to keep everything
+				// in a single method
+				if (linkRevInspector != null) {
+					linkRevInspector.next(revisionIndex, linkRevIndex);
+					if (parentInsp == null && revisionInsp == null) {
+						return;
+					}
+				}
 				Nodeid nid = Nodeid.fromBinary(nodeid, 0);
 				if (revisionInsp != null) {
 					revisionInsp.next(revisionIndex, nid, linkRevIndex);
@@ -440,6 +451,11 @@
 	}
 
 	@Experimental
+	public interface LinkRevisionInspector extends Inspector {
+		void next(int revisionIndex, int linkedRevisionIndex) throws HgRuntimeException;
+	}
+
+	@Experimental
 	public interface ParentInspector extends Inspector {
 		// XXX document whether parentX is -1 or a constant (BAD_REVISION? or dedicated?)
 		void next(int revisionIndex, Nodeid revision, int parent1, int parent2, Nodeid nidParent1, Nodeid nidParent2) throws HgRuntimeException;