changeset 326:d42a45a2c9d6

Alternative tag collection approach for a file history
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 04 Oct 2011 06:28:01 +0200
parents f05c8b1f08c4
children 3f09b8c19142
files src/org/tmatesoft/hg/repo/HgManifest.java test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java
diffstat 2 files changed, 127 insertions(+), 50 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/repo/HgManifest.java	Mon Oct 03 06:54:43 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgManifest.java	Tue Oct 04 06:28:01 2011 +0200
@@ -22,6 +22,8 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.tmatesoft.hg.core.HgBadStateException;
 import org.tmatesoft.hg.core.Nodeid;
@@ -124,19 +126,8 @@
 		if (inspector == null || localRevisions == null) {
 			throw new IllegalArgumentException();
 		}
-		int[] manifestLocalRevs = new int[localRevisions.length];
-		boolean needsSort = false;
-		for (int i = 0; i < localRevisions.length; i++) {
-			final int manifestLocalRev = fromChangelog(localRevisions[i]);
-			manifestLocalRevs[i] = manifestLocalRev;
-			if (i > 0 && manifestLocalRevs[i-1] > manifestLocalRev) {
-				needsSort = true;
-			}
-		}
-		if (needsSort) {
-			Arrays.sort(manifestLocalRevs);
-		}
-		content.iterate(manifestLocalRevs, true, new ManifestParser(inspector));
+		int[] localManifestRevs = toLocalManifestRevisions(localRevisions);
+		content.iterate(localManifestRevs, true, new ManifestParser(inspector));
 	}
 	
 	// manifest revision number that corresponds to the given changeset
@@ -158,15 +149,22 @@
 	/**
 	 * Extracts file revision as it was known at the time of given changeset.
 	 * 
-	 * @param revisionNumber local changeset index 
+	 * @param localChangelogRevision local changeset index 
 	 * @param file path to file in question
 	 * @return file revision or <code>null</code> if manifest at specified revision doesn't list such file
 	 */
-	@Experimental(reason="Perhaps, HgDataFile shall own this method")
-	public Nodeid getFileRevision(int revisionNumber, final Path file) {
-		int rev = fromChangelog(revisionNumber);
-		final Nodeid[] rv = new Nodeid[] { null };
-		content.iterate(rev, rev, true, new RevlogStream.Inspector() {
+	@Experimental(reason="Perhaps, HgDataFile shall own this method, or get a delegate?")
+	public Nodeid getFileRevision(int localChangelogRevision, final Path file) {
+		return getFileRevisions(file, localChangelogRevision).get(localChangelogRevision);
+	}
+	
+	// XXX package-local, IntMap, and HgDataFile getFileRevisionAt(int... localChangelogRevisions)
+	@Experimental(reason="@see #getFileRevision")
+	public Map<Integer, Nodeid> getFileRevisions(final Path file, int... localChangelogRevisions) {
+		// FIXME need tests
+		int[] localManifestRevisions = toLocalManifestRevisions(localChangelogRevisions);
+		final HashMap<Integer,Nodeid> rv = new HashMap<Integer, Nodeid>(localChangelogRevisions.length);
+		content.iterate(localManifestRevisions, true, new RevlogStream.Inspector() {
 			
 			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) {
 				ByteArrayOutputStream bos = new ByteArrayOutputStream();
@@ -181,8 +179,10 @@
 							if (file.toString().equals(fname)) {
 								byte[] nid = new byte[40];  
 								data.readBytes(nid, 0, 40);
-								rv[0] = Nodeid.fromAscii(nid, 0, 40);
+								rv.put(linkRevision, Nodeid.fromAscii(nid, 0, 40));
 								break;
+							} else {
+								data.skip(40);
 							}
 							// else skip to the end of line
 							while (!data.isEmpty() && (b = data.readByte()) != '\n')
@@ -194,9 +194,26 @@
 				}
 			}
 		});
-		return rv[0];
+		return rv;
 	}
-			
+
+
+	private int[] toLocalManifestRevisions(int[] localChangelogRevisions) {
+		int[] localManifestRevs = new int[localChangelogRevisions.length];
+		boolean needsSort = false;
+		for (int i = 0; i < localChangelogRevisions.length; i++) {
+			final int manifestLocalRev = fromChangelog(localChangelogRevisions[i]);
+			localManifestRevs[i] = manifestLocalRev;
+			if (i > 0 && localManifestRevs[i-1] > manifestLocalRev) {
+				needsSort = true;
+			}
+		}
+		if (needsSort) {
+			Arrays.sort(localManifestRevs);
+		}
+		return localManifestRevs;
+	}
+
 	public interface Inspector {
 		boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision);
 		/**
@@ -211,7 +228,7 @@
 	public interface Inspector2 extends Inspector {
 		boolean next(Nodeid nid, Path fname, Flags flags);
 	}
-	
+
 	/**
 	 * When Pool uses Strings directly,
 	 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion.
--- a/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java	Mon Oct 03 06:54:43 2011 +0200
+++ b/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java	Tue Oct 04 06:28:01 2011 +0200
@@ -17,15 +17,15 @@
 import org.tmatesoft.hg.core.HgException;
 import org.tmatesoft.hg.core.HgLogCommand;
 import org.tmatesoft.hg.core.Nodeid;
-import org.tmatesoft.hg.internal.ArrayHelper;
+import org.tmatesoft.hg.internal.IntMap;
 import org.tmatesoft.hg.repo.HgChangelog;
+import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
 import org.tmatesoft.hg.repo.HgDataFile;
 import org.tmatesoft.hg.repo.HgLookup;
 import org.tmatesoft.hg.repo.HgManifest;
+import org.tmatesoft.hg.repo.HgManifest.Flags;
 import org.tmatesoft.hg.repo.HgRepository;
 import org.tmatesoft.hg.repo.HgTags;
-import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
-import org.tmatesoft.hg.repo.HgManifest.Flags;
 import org.tmatesoft.hg.repo.HgTags.TagInfo;
 import org.tmatesoft.hg.util.CancelledException;
 import org.tmatesoft.hg.util.Path;
@@ -40,11 +40,11 @@
 	public static void main(String[] args) throws Exception {
 		MapTagsToFileRevisions m = new MapTagsToFileRevisions();
 		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
-//		m.collectTagsPerFile();
+		m.collectTagsPerFile();
 //		m.manifestWalk();
 //		m.changelogWalk();
 //		m.revisionMap();
-		m.buildFile2ChangelogRevisionMap();
+//		m.buildFile2ChangelogRevisionMap();
 		m = null;
 		System.gc();
 		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
@@ -200,6 +200,30 @@
 		System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start);
 		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
 	}
+	
+	private int[] collectLocalTagRevisions(HgChangelog.RevisionMap clogrmap, TagInfo[] allTags, IntMap<List<TagInfo>> tagLocalRev2TagInfo) {
+		int[] tagLocalRevs = new int[allTags.length];
+		int x = 0;
+		for (int i = 0; i < allTags.length; i++) {
+			final Nodeid tagRevision = allTags[i].revision();
+			final int tagLocalRev = clogrmap.localRevision(tagRevision);
+			if (tagLocalRev != HgRepository.BAD_REVISION) {
+				tagLocalRevs[x++] = tagLocalRev;
+				List<TagInfo> tagsAssociatedWithRevision = tagLocalRev2TagInfo.get(tagLocalRev);
+				if (tagsAssociatedWithRevision == null) {
+					tagLocalRev2TagInfo.put(tagLocalRev, tagsAssociatedWithRevision = new LinkedList<TagInfo>());
+				}
+				tagsAssociatedWithRevision.add(allTags[i]);
+			}
+		}
+		if (x != allTags.length) {
+			// some tags were removed (recorded Nodeid.NULL tagname)
+			int[] copy = new int[x];
+			System.arraycopy(tagLocalRevs, 0, copy, 0, x);
+			tagLocalRevs = copy;
+		}
+		return tagLocalRevs;
+	}
 
 	private void collectTagsPerFile() throws HgException, CancelledException {
 		final long start = System.currentTimeMillis();
@@ -210,33 +234,36 @@
 		//
 		final TagInfo[] allTags = new TagInfo[tags.getTags().size()];
 		tags.getTags().values().toArray(allTags);
+		// effective translation of changeset revisions to their local indexes
+		final HgChangelog.RevisionMap clogrmap = repository.getChangelog().new RevisionMap().init();
+		// map to look up tag by changeset local number
+		final IntMap<List<TagInfo>> tagLocalRev2TagInfo = new IntMap<List<TagInfo>>(allTags.length);
+		System.out.printf("Collecting manifests for %d tags\n", allTags.length);
+		final int[] tagLocalRevs = collectLocalTagRevisions(clogrmap, allTags, tagLocalRev2TagInfo);
+		System.out.printf("Prepared tag revisions to analyze: %d ms\n", System.currentTimeMillis() - start);
+
+		final Path targetPath = Path.create("README");
+		//
+		collectTagsPerFile_Approach_1(clogrmap, tagLocalRevs, allTags, targetPath);
+		System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start);
+
+		System.out.println("\nApproach 2");
+		collectTagsPerFile_Approach_2(repository, tagLocalRevs, tagLocalRev2TagInfo, allTags, targetPath);
+	}
+		
+	// Approach 1. Build map with all files, their revisions and corresponding tags
+	//
+	private void collectTagsPerFile_Approach_1(final HgChangelog.RevisionMap clogrmap, final int[] tagLocalRevs, final TagInfo[] allTags, Path targetPath) {
+		HgRepository repository = clogrmap.getRepo();
+		final long start = System.currentTimeMillis();
 		// file2rev2tag value is array of revisions, always of allTags.length. Revision index in the array
 		// is index of corresponding TagInfo in allTags;
 		final Map<Path, Nodeid[]> file2rev2tag = new HashMap<Path, Nodeid[]>();
-		System.out.printf("Collecting manifests for %d tags\n", allTags.length);
-		// effective translation of changeset revisions to their local indexes
-		final HgChangelog.RevisionMap clogrmap = repository.getChangelog().new RevisionMap().init();
-		int[] tagLocalRevs = new int[allTags.length];
-		int x = 0;
-		for (int i = 0; i < allTags.length; i++) {
-			final Nodeid tagRevision = allTags[i].revision();
-			final int tagLocalRev = clogrmap.localRevision(tagRevision);
-			if (tagLocalRev != HgRepository.BAD_REVISION) {
-				tagLocalRevs[x++] = tagLocalRev;
-			}
-		}
-		if (x != allTags.length) {
-			// some tags were removed (recorded Nodeid.NULL tagname)
-			int[] copy = new int[x];
-			System.arraycopy(tagLocalRevs, 0, copy, 0, x);
-			tagLocalRevs = copy;
-		}
-		System.out.printf("Prepared tag revisions to analyze: %d ms\n", System.currentTimeMillis() - start);
-		//
 		repository.getManifest().walk(new HgManifest.Inspector2() {
 			private int[] tagIndexAtRev = new int[4]; // it's unlikely there would be a lot of tags associated with a given cset
 
 			public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) {
+				// may do better here using tagLocalRev2TagInfo, but need to change a lot, too lazy now
 				Nodeid cset = clogrmap.revision(changelogRevision);
 				Arrays.fill(tagIndexAtRev, -1);
 				for (int i = 0, x = 0; i < allTags.length; i++) {
@@ -286,7 +313,6 @@
 		System.out.printf("Cache built: %d ms\n", System.currentTimeMillis() - start);
 		//
 		// look up specific file. This part is fast.
-		final Path targetPath = Path.create("README");
 		HgDataFile fileNode = repository.getFileNode(targetPath);
 		final Nodeid[] allTagsOfTheFile = file2rev2tag.get(targetPath);
 		// TODO if fileNode.isCopy, repeat for each getCopySourceName()
@@ -301,7 +327,41 @@
 			}
 			System.out.printf("%3d%7d%s\n", localFileRev, changesetLocalRev, associatedTags);
 		}
-		System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start);
+	}
+	
+	private void collectTagsPerFile_Approach_2(HgRepository repository, final int[] tagLocalRevs, final IntMap<List<TagInfo>> tagLocalRev2TagInfo, TagInfo[] allTags, Path targetPath) {
+		//
+		// Approach 2. No all-file map. Collect file revisions recorded at the time of tagging,
+		// then for each file revision check if it is among those above, and if yes, take corresponding tags
+		HgDataFile fileNode = repository.getFileNode(targetPath);
+		final long start2 = System.nanoTime();
+		final int lastRev = fileNode.getLastRevision();
+		final Map<Integer, Nodeid> fileRevisionAtTagRevision = repository.getManifest().getFileRevisions(targetPath, tagLocalRevs);
+		final long start2a = System.nanoTime();
+		fileNode.walk(0, lastRev, new HgDataFile.RevisionInspector() {
+
+			public void next(int localFileRev, Nodeid fileRevision, int linkedRevision) {
+				int changesetLocalRev = linkedRevision;
+				List<String> associatedTags = new LinkedList<String>();
+				for (int taggetRevision : tagLocalRevs) {
+					// current file revision can't appear in tags that point to earlier changelog revisions (they got own file revision)
+					if (taggetRevision >= changesetLocalRev) {
+						// z points to some changeset with tag
+						Nodeid wasKnownAs = fileRevisionAtTagRevision.get(taggetRevision);
+						if (wasKnownAs.equals(fileRevision)) {
+							// has tag associated with changeset at index z
+							List<TagInfo> tagsAtRev = tagLocalRev2TagInfo.get(taggetRevision);
+							assert tagsAtRev != null;
+							for (TagInfo ti : tagsAtRev) {
+								associatedTags.add(ti.name());
+							}
+						}
+					}
+				}
+				System.out.printf("%3d%7d%s\n", localFileRev, changesetLocalRev, associatedTags);
+			}
+		});
+		System.out.printf("Alternative total time: %d ms, of that init: %d ms\n", (System.nanoTime() - start2)/1000000, (start2a-start2)/1000000);
 		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
 	}