changeset 254:a620f0663a37

Collect tags for a file - improve performance of 'sparse' manifest reads
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 16 Aug 2011 04:03:29 +0200
parents 1874d11054e5
children 5a6ab50b4cbf
files cmdline/org/tmatesoft/hg/console/Main.java src/org/tmatesoft/hg/repo/HgManifest.java test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java
diffstat 3 files changed, 174 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/cmdline/org/tmatesoft/hg/console/Main.java	Mon Aug 15 18:59:05 2011 +0200
+++ b/cmdline/org/tmatesoft/hg/console/Main.java	Tue Aug 16 04:03:29 2011 +0200
@@ -18,6 +18,7 @@
 
 import static org.tmatesoft.hg.repo.HgRepository.TIP;
 
+import java.io.File;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -33,6 +34,7 @@
 import org.tmatesoft.hg.internal.ByteArrayChannel;
 import org.tmatesoft.hg.internal.DigestHelper;
 import org.tmatesoft.hg.internal.PathGlobMatcher;
+import org.tmatesoft.hg.internal.RelativePathRewrite;
 import org.tmatesoft.hg.repo.HgBranches;
 import org.tmatesoft.hg.repo.HgChangelog;
 import org.tmatesoft.hg.repo.HgDataFile;
@@ -46,8 +48,11 @@
 import org.tmatesoft.hg.repo.HgSubrepoLocation.Kind;
 import org.tmatesoft.hg.repo.HgWorkingCopyStatusCollector;
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
+import org.tmatesoft.hg.util.FileIterator;
+import org.tmatesoft.hg.util.FileWalker;
 import org.tmatesoft.hg.util.Pair;
 import org.tmatesoft.hg.util.Path;
+import org.tmatesoft.hg.util.PathRewrite;
 
 /**
  * Various debug dumps. 
@@ -72,10 +77,11 @@
 
 	public static void main(String[] args) throws Exception {
 		Main m = new Main(args);
+		m.testTreeTraversal();
 //		m.testRevisionMap();
 //		m.testSubrepos();
 //		m.testReadWorkingCopy();
-		m.testParents();
+//		m.testParents();
 //		m.testEffectiveFileLog();
 //		m.testCatAtCsetRevision();
 //		m.testMergeState();
@@ -91,6 +97,19 @@
 //		m.bunchOfTests();
 	}
 	
+	private void testTreeTraversal() throws Exception {
+		File repoRoot = hgRepo.getWorkingDir();
+		Path.Source pathSrc = new Path.SimpleSource(new PathRewrite.Composite(new RelativePathRewrite(repoRoot), hgRepo.getToRepoPathHelper()));
+		FileWalker w =  new FileWalker(repoRoot, pathSrc);
+		int count = 0;
+		final long start = System.currentTimeMillis();
+		while (w.hasNext()) {
+			count++;
+			w.next();
+		}
+		System.out.printf("Traversal of %d files took %d ms", count, System.currentTimeMillis() - start);
+	}
+	
 	/*
 	 * cpython repo with 70715 revisions.
 	 	3 revisions - 80 ms vs 250 ms (250ms init)
--- a/src/org/tmatesoft/hg/repo/HgManifest.java	Mon Aug 15 18:59:05 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgManifest.java	Tue Aug 16 04:03:29 2011 +0200
@@ -60,6 +60,31 @@
 		content.iterate(start0, end0, true, new ManifestParser(inspector));
 	}
 	
+	/**
+	 * "Sparse" iteration of the manifest
+	 * 
+	 * @param inspector
+	 * @param localRevisions local changeset revisions to visit
+	 */
+	public void walk(final Inspector inspector, int... localRevisions) {
+		if (inspector == null || localRevisions == null) {
+			throw new IllegalArgumentException();
+		}
+		int[] manifestLocalRevs = new int[localRevisions.length];
+		boolean needsSort = false;
+		for (int i = 0; i < localRevisions.length; i++) {
+			final int manifestLocalRev = fromChangelog(localRevisions[i]);
+			manifestLocalRevs[i] = manifestLocalRev;
+			if (i > 0 && manifestLocalRevs[i-1] > manifestLocalRev) {
+				needsSort = true;
+			}
+		}
+		if (needsSort) {
+			Arrays.sort(manifestLocalRevs);
+		}
+		content.iterate(manifestLocalRevs, true, new ManifestParser(inspector));
+	}
+	
 	// manifest revision number that corresponds to the given changeset
 	/*package-local*/ int fromChangelog(int revisionNumber) {
 		if (HgInternals.wrongLocalRevision(revisionNumber)) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java	Tue Aug 16 04:03:29 2011 +0200
@@ -0,0 +1,129 @@
+package org.tmatesoft.hg.test;
+
+import java.io.*;
+import java.util.*;
+import java.util.Map.Entry;
+
+import org.tmatesoft.hg.core.*;
+import org.tmatesoft.hg.internal.Pool;
+import org.tmatesoft.hg.repo.*;
+import org.tmatesoft.hg.repo.HgTags.TagInfo;
+import org.tmatesoft.hg.util.*;
+
+/**
+ * @author Marc Strapetz
+ */
+public class MapTagsToFileRevisions {
+
+	// Static =================================================================
+
+	public static void main(String[] args) throws HgException, CancelledException {
+		final long start = System.currentTimeMillis();
+		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
+		final HgTags tags = repository.getTags();
+		//
+		// build cache
+		final Map<String, Map<TagInfo, Nodeid>> file2tag2rev = new HashMap<String, Map<TagInfo, Nodeid>>();
+		System.out.printf("Collecting manifests for %d tags\n", tags.getTags().size());
+		// effective translation of changeset revisions to their local indexes
+		final HgChangelog.RevisionMap clogrmap = repository.getChangelog().new RevisionMap().init();
+		int[] tagLocalRevs = new int[tags.getTags().size()];
+		int i = 0;
+		for (TagInfo tag : tags.getTags().values()) {
+			final Nodeid tagRevision = tag.revision();
+			int tagLocalRev = clogrmap.localRevision(tagRevision);
+			tagLocalRevs[i++] = tagLocalRev;
+		}
+		System.out.printf("Found tag revisions to analyze: %d\n", System.currentTimeMillis() - start);
+		//
+		repository.getManifest().walk(new HgManifest.Inspector() {
+			private List<String> tagsAtRev;
+			final Pool<String> filenamePool = new Pool<String>();
+			final Pool<Nodeid> nodeidPool = new Pool<Nodeid>();
+
+			public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) {
+				Nodeid cset = clogrmap.revision(changelogRevision);
+				tagsAtRev = tags.tags(cset);
+				if (tagsAtRev.isEmpty()) {
+					System.out.println("Can't happen, provided we iterate over revisions with tags only");
+				}
+				return true;
+			}
+			
+			public boolean next(Nodeid nid, String fname, String flags) {
+				fname = filenamePool.unify(fname);
+				nid = nodeidPool.unify(nid);
+				Map<TagInfo, Nodeid> m = file2tag2rev.get(fname);
+				if (m == null) {
+					file2tag2rev.put(fname, m = new HashMap<TagInfo, Nodeid>());
+				}
+				for (String tag : tagsAtRev) {
+					m.put(tags.getTags().get(tag), nid);
+				}
+				return true;
+			}
+			
+			public boolean end(int manifestRevision) {
+				return true;
+			}
+			
+		}, tagLocalRevs);
+		System.out.printf("Cache built: %d\n", System.currentTimeMillis() - start);
+		//
+		// look up specific file. This part is fast.
+		final Path targetPath = Path.create("README");
+		HgDataFile fileNode = repository.getFileNode(targetPath);
+		// TODO if fileNode.isCopy, repeat for each getCopySourceName()
+		for (int localFileRev = 0; localFileRev < fileNode.getRevisionCount(); localFileRev++) {
+			Nodeid fileRev = fileNode.getRevision(localFileRev);
+			int changesetLocalRev = fileNode.getChangesetLocalRevision(localFileRev);
+			List<String> associatedTags = new LinkedList<String>();
+			final Map<TagInfo, Nodeid> allTagsOfTheFile = file2tag2rev.get(targetPath.toString());
+			for (Entry<TagInfo, Nodeid> e : allTagsOfTheFile.entrySet()) {
+				Nodeid fileRevAtTag = e.getValue();
+				if (fileRev.equals(fileRevAtTag)) {
+					associatedTags.add(e.getKey().name());
+				}
+			}
+			System.out.printf("%3d%7d%s\n", localFileRev, changesetLocalRev, associatedTags);
+		}
+		System.out.printf("Total time: %d", System.currentTimeMillis() - start);
+	}
+
+	public static void main2(String[] args) throws HgException, CancelledException {
+		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
+		final Path targetPath = Path.create("README");
+		final HgTags tags = repository.getTags();
+		final Map<String, HgTags.TagInfo> tagToInfo = tags.getTags();
+		final HgManifest manifest = repository.getManifest();
+		final Map<Nodeid, List<String>> changeSetRevisionToTags = new HashMap<Nodeid, List<String>>();
+		final HgDataFile fileNode = repository.getFileNode(targetPath);
+		for (String tagName : tagToInfo.keySet()) {
+			final HgTags.TagInfo info = tagToInfo.get(tagName);
+			final Nodeid nodeId = info.revision();
+			// TODO: This is not correct as we can't be sure that file at the corresponding revision is actually our target file (which may have been renamed, etc.)
+			final Nodeid fileRevision = manifest.getFileRevision(repository.getChangelog().getLocalRevision(nodeId), targetPath);
+			if (fileRevision == null) {
+				continue;
+			}
+
+			final Nodeid changeSetRevision = fileNode.getChangesetRevision(fileRevision);
+			List<String> revisionTags = changeSetRevisionToTags.get(changeSetRevision);
+			if (revisionTags == null) {
+				revisionTags = new ArrayList<String>();
+				changeSetRevisionToTags.put(changeSetRevision, revisionTags);
+			}
+			revisionTags.add(tagName);
+		}
+
+		final HgLogCommand logCommand = new HgLogCommand(repository);
+		logCommand.file(targetPath, true);
+		logCommand.execute(new HgChangesetHandler() {
+			public void next(HgChangeset changeset) {
+				if (changeset.getAffectedFiles().contains(targetPath)) {
+					System.out.println(changeset.getRevision() + " " + changeSetRevisionToTags.get(changeset.getNodeid()));
+				}
+			}
+		});
+	}
+}
\ No newline at end of file