view test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java @ 709:497e697636fc

Report merged lines as changed block if possible, not as a sequence of added/deleted blocks. To facilitate access to merge parent lines AddBlock got mergeLineAt() method that reports index of the line in the second parent (if any), while insertedAt() has been changed to report index in the first parent always
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 21 Aug 2013 16:23:27 +0200
parents 6526d8adbc0f
children
line wrap: on
line source
package org.tmatesoft.hg.test;

import static org.tmatesoft.hg.repo.HgRepository.TIP;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.junit.Assert;
import org.tmatesoft.hg.core.HgCallbackTargetException;
import org.tmatesoft.hg.core.HgChangeset;
import org.tmatesoft.hg.core.HgChangesetHandler;
import org.tmatesoft.hg.core.HgException;
import org.tmatesoft.hg.core.HgLogCommand;
import org.tmatesoft.hg.core.Nodeid;
import org.tmatesoft.hg.internal.IntMap;
import org.tmatesoft.hg.repo.HgChangelog;
import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
import org.tmatesoft.hg.repo.HgDataFile;
import org.tmatesoft.hg.repo.HgLookup;
import org.tmatesoft.hg.repo.HgManifest;
import org.tmatesoft.hg.repo.HgManifest.Flags;
import org.tmatesoft.hg.repo.HgRepository;
import org.tmatesoft.hg.repo.HgRuntimeException;
import org.tmatesoft.hg.repo.HgTags;
import org.tmatesoft.hg.repo.HgTags.TagInfo;
import org.tmatesoft.hg.repo.HgRevisionMap;
import org.tmatesoft.hg.util.CancelledException;
import org.tmatesoft.hg.util.Path;

/**
 * @author Marc Strapetz
 */
public class MapTagsToFileRevisions {

	// Static =================================================================

	public static void main(String[] args) throws Exception {
		MapTagsToFileRevisions m = new MapTagsToFileRevisions();
		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
//		m.measurePatchAffectsArbitraryRevisionRead();
//		m.collectTagsPerFile();
//		m.manifestWalk();
//		m.changelogWalk();
//		m.revisionMap();
		m.buildFile2ChangelogRevisionMap(".hgtags", "README", "configure.in", "Misc/NEWS");
		m = null;
		System.gc();
		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
	}
	

	// revision == 2406  -   5 ms per run (baseRevision == 2406)
	// revision == 2405  -  69 ms per run (baseRevision == 1403)
	public void measurePatchAffectsArbitraryRevisionRead() throws Exception {
		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
		final DoNothingManifestInspector insp = new DoNothingManifestInspector();
		final int revision = 2405;
		// warm-up.
		repository.getManifest().walk(revision, revision, insp);
		final int runs = 10;
		final long start = System.nanoTime();
		for (int i = 0; i < runs; i++) {
			repository.getManifest().walk(revision, revision, insp);
		}
		final long end = System.nanoTime();
		System.out.printf("%d ms per run\n", (end - start)/ (runs*1000000));
	}

	/*
	 * .hgtags, 261 revisions
	 * Approach 1: total 83, init: 0, iteration: 82
	 * Approach 2: total 225, init: 206, iteration: 19
	 * README, 465 revisions
	 * Approach 1: total 162, init: 0, iteration: 161
	 * Approach 2: total 231, init: 198, iteration: 32
	 * configure.in, 1109 revisions
	 * Approach 1: total 409, init: 1, iteration: 407
	 * Approach 2: total 277, init: 203, iteration: 74
	 */
	/* New data, 0.9.0v (another CPU!)
	 *.hgtags, 306 revisions
	 * Approach 0: total 136
	 * Approach 1: total 53, init: 1, iteration: 52
	 * Approach 2: total 95, init: 78, iteration: 17
	 * Approach 3: total 17
	 * 
	 * README, 499 revisions
	 * Approach 0: total 149
	 * Approach 1: total 43, init: 0, iteration: 43
	 * Approach 2: total 102, init: 86, iteration: 16
	 * Approach 3: total 18
	 * 
	 * configure.in, 1170 revisions
	 * Approach 0: total 321
	 * Approach 1: total 116, init: 0, iteration: 115
	 * Approach 2: total 140, init: 79, iteration: 60
	 * Approach 3: total 30
	 * 
	 * Misc/NEWS, 10863 revisions
	 * Approach 0: total 4946
	 * Approach 1: total 309, init: 6, iteration: 302
	 * Approach 2: total 213, init: 63, iteration: 150
	 * Approach 3: total 140
 	 */
	private void buildFile2ChangelogRevisionMap(String... fileNames) throws Exception {
		final HgRepository repository = new HgLookup().detect(new File("/home/artem/hg/cpython"));
		final HgChangelog clog = repository.getChangelog();
		// warm-up
		HgRevisionMap<HgChangelog> clogMap = new HgRevisionMap<HgChangelog>(clog).init();

		for (String fname : fileNames) {
			HgDataFile fileNode = repository.getFileNode(fname);
			// warm-up
			HgRevisionMap<HgDataFile> fileMap = new HgRevisionMap<HgDataFile>(fileNode).init();
			//
			final int latestRevision = fileNode.getLastRevision();
			//
			final long start_0 = System.nanoTime();
			final Map<Nodeid, Nodeid> changesetToNodeid_0 = new HashMap<Nodeid, Nodeid>();
			for (int fileRevisionIndex = 0; fileRevisionIndex <= latestRevision; fileRevisionIndex++) {
				Nodeid fileRevision = fileNode.getRevision(fileRevisionIndex);
				Nodeid changesetRevision = fileNode.getChangesetRevision(fileRevision);
				changesetToNodeid_0.put(changesetRevision, fileRevision);
			}
			final long end_0 = System.nanoTime();
			//
			final long start_1 = System.nanoTime();
			fileMap = new HgRevisionMap<HgDataFile>(fileNode).init();
			final long start_1a = System.nanoTime();
			final Map<Nodeid, Nodeid> changesetToNodeid_1 = new HashMap<Nodeid, Nodeid>();
			for (int revision = 0; revision <= latestRevision; revision++) {
				final Nodeid nodeId = fileMap.revision(revision);
				int localCset = fileNode.getChangesetRevisionIndex(revision);
				final Nodeid changesetId = clog.getRevision(localCset);
//				final Nodeid changesetId = fileNode.getChangesetRevision(nodeId);
				changesetToNodeid_1.put(changesetId, nodeId);
			}
			final long end_1 = System.nanoTime();
			//
			final long start_2 = System.nanoTime();
			clogMap = new HgRevisionMap<HgChangelog>(clog).init();
			fileMap = new HgRevisionMap<HgDataFile>(fileNode).init();
			final Map<Nodeid, Nodeid> changesetToNodeid_2 = new HashMap<Nodeid, Nodeid>();
			final long start_2a = System.nanoTime();
			for (int revision = 0; revision <= latestRevision; revision++) {
				Nodeid nidFile = fileMap.revision(revision);
				int localCset = fileNode.getChangesetRevisionIndex(revision);
				Nodeid nidCset = clogMap.revision(localCset);
				changesetToNodeid_2.put(nidCset, nidFile);
			}
			final long end_2 = System.nanoTime();
			Assert.assertEquals(changesetToNodeid_1, changesetToNodeid_2);
			//
			final long start_3 = System.nanoTime();
			final Map<Nodeid, Nodeid> changesetToNodeid_3 = new HashMap<Nodeid, Nodeid>();
			fileNode.indexWalk(0, TIP, new HgDataFile.RevisionInspector() {
	
				public void next(int fileRevisionIndex, Nodeid revision, int linkedRevisionIndex) throws HgRuntimeException {
					changesetToNodeid_3.put(clog.getRevision(linkedRevisionIndex), revision);
				}
			});
			final long end_3 = System.nanoTime();
			Assert.assertEquals(changesetToNodeid_1, changesetToNodeid_3);
			System.out.printf("%s, %d revisions\n", fname, 1+latestRevision);
			System.out.printf("Approach 0: total %d\n", (end_0 - start_0)/1000000);
			System.out.printf("Approach 1: total %d, init: %d, iteration: %d\n", (end_1 - start_1)/1000000, (start_1a - start_1)/1000000, (end_1 - start_1a)/1000000);
			System.out.printf("Approach 2: total %d, init: %d, iteration: %d\n", (end_2 - start_2)/1000000, (start_2a - start_2)/1000000, (end_2 - start_2a)/1000000);
			System.out.printf("Approach 3: total %d\n", (end_3 - start_3)/1000000);
		} 
	}

	/*
	 * Each 5000 revisions from cpython, total 15 revisions
	 * Direct clog.getRevisionIndex: ~260 ms
	 * RevisionMap.revisionIndex: ~265 ms (almost 100% in #init())
	 * each 1000'th revision, total 71 revision: 1 230 vs 270
	 * each 2000'th revision, total 36 revision: 620 vs 270
	 * each 3000'th revision, total 24 revision: 410 vs 275
	 */
	public void revisionMap() throws Exception {
		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
		final HgChangelog clog = repository.getChangelog();
		ArrayList<Nodeid> revisions = new ArrayList<Nodeid>();
		final int step = 5000;
		for (int i = 0, top = clog.getLastRevision(); i < top; i += step) {
			revisions.add(clog.getRevision(i));
		}
		final long s1 = System.nanoTime();
		for (Nodeid n : revisions) {
			int r = clog.getRevisionIndex(n);
			if (r % step != 0) {
				throw new IllegalStateException(Integer.toString(r));
			}
		}
		System.out.printf("Direct lookup of %d revisions took %,d ns\n", revisions.size(), System.nanoTime() - s1);
		HgRevisionMap<HgChangelog> rmap = new HgRevisionMap<HgChangelog>(clog);
		final long s2 = System.nanoTime();
		rmap.init();
		final long s3 = System.nanoTime();
		for (Nodeid n : revisions) {
			int r = rmap.revisionIndex(n);
			if (r % step != 0) {
				throw new IllegalStateException(Integer.toString(r));
			}
		}
		System.out.printf("RevisionMap time: %d ms, of that init() %,d ns\n", (System.nanoTime() - s2) / 1000000, s3 - s2);
	}

	public void changelogWalk() throws Exception {
		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
		final long start = System.currentTimeMillis();
		repository.getChangelog().all(new HgChangelog.Inspector() {
			public int xx = 0;
			
			public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) {
				if (xx+revisionNumber < 0) {
					System.out.println(xx);
					System.out.println(revisionNumber);
				}
				xx += revisionNumber;
			}
		});
		// cpython: 17 seconds, mem  132,9 -> 129,0 -> 131,7
		// cpyhton: 13 seconds. Of that, cumulative Patch.apply takes 8.8 seconds, RevlogStream.Inspector.next - 1.8
		System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start);
		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
	}

	public void manifestWalk() throws Exception {
		System.out.println(System.getProperty("java.version"));
		final long start = System.currentTimeMillis();
		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
		repository.getManifest().walk(0, 10000, new DoNothingManifestInspector());
		// cpython: 1,1 sec for 0..1000, 43 sec for 0..10000, 115 sec for 0..20000 (Pool with HashMap)
		// 2,4 sec for 1000..2000
		// cpython -r 1000: 484 files, -r 2000: 1015 files. Iteration 1000..2000; fnamePool.size:1019 nodeidPool.size:2989
		// nodeidPool for two subsequent revisions only: 840. 37 sec for 0..10000. 99 sec for 0..20k
		// 0..10000 fnamePool: hits:15989152, misses:3020
		//
		// With Pool<StringProxy> for fname and flags, Nodeid's ascii2bin through local array, overall byte[] iteration, 
		// 0..10k is 34 seconds now
		// Another run, 23 seconds now, seems nothing has been changed. Switched to Pool2 with DirectHashSet: 22,5 seconds
		System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start);
		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
	}
	
	private int[] collectLocalTagRevisions(HgRevisionMap<HgChangelog> clogrmap, TagInfo[] allTags, IntMap<List<TagInfo>> tagLocalRev2TagInfo) {
		int[] tagLocalRevs = new int[allTags.length];
		int x = 0;
		for (int i = 0; i < allTags.length; i++) {
			final Nodeid tagRevision = allTags[i].revision();
			final int tagRevisionIndex = clogrmap.revisionIndex(tagRevision);
			if (tagRevisionIndex != HgRepository.BAD_REVISION) {
				tagLocalRevs[x++] = tagRevisionIndex;
				List<TagInfo> tagsAssociatedWithRevision = tagLocalRev2TagInfo.get(tagRevisionIndex);
				if (tagsAssociatedWithRevision == null) {
					tagLocalRev2TagInfo.put(tagRevisionIndex, tagsAssociatedWithRevision = new LinkedList<TagInfo>());
				}
				tagsAssociatedWithRevision.add(allTags[i]);
			}
		}
		if (x != allTags.length) {
			// some tags were removed (recorded Nodeid.NULL tagname)
			int[] copy = new int[x];
			System.arraycopy(tagLocalRevs, 0, copy, 0, x);
			tagLocalRevs = copy;
		}
		return tagLocalRevs;
	}

	public void collectTagsPerFile() throws HgException, CancelledException, HgRuntimeException {
		final long start = System.currentTimeMillis();
		final HgRepository repository = new HgLookup().detect(new File("/home/artem/hg/cpython"));
		final HgTags tags = repository.getTags();
		//
		// build cache
		//
		final TagInfo[] allTags = new TagInfo[tags.getAllTags().size()];
		tags.getAllTags().values().toArray(allTags);
		// effective translation of changeset revisions to their local indexes
		final HgRevisionMap<HgChangelog> clogrmap = new HgRevisionMap<HgChangelog>(repository.getChangelog()).init();
		// map to look up tag by changeset local number
		final IntMap<List<TagInfo>> tagLocalRev2TagInfo = new IntMap<List<TagInfo>>(allTags.length);
		System.out.printf("Collecting manifests for %d tags\n", allTags.length);
		final int[] tagLocalRevs = collectLocalTagRevisions(clogrmap, allTags, tagLocalRev2TagInfo);
		System.out.printf("Prepared %d tag revisions to analyze: %d ms\n", tagLocalRevs.length, System.currentTimeMillis() - start);

		final Path targetPath = Path.create("README");
		//
		collectTagsPerFile_Approach_1(clogrmap, tagLocalRevs, allTags, targetPath);
		System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start);

		System.out.println("\nApproach 2");
		collectTagsPerFile_Approach_2(repository, tagLocalRevs, tagLocalRev2TagInfo, targetPath);
	}
		
	// Approach 1. Build map with all files, their revisions and corresponding tags
	//
	private void collectTagsPerFile_Approach_1(final HgRevisionMap<HgChangelog> clogrmap, final int[] tagLocalRevs, final TagInfo[] allTags, Path targetPath) throws HgException, IllegalArgumentException, HgRuntimeException {
		HgRepository repository = clogrmap.getRepo();
		final long start = System.currentTimeMillis();
		// file2rev2tag value is array of revisions, always of allTags.length. Revision index in the array
		// is index of corresponding TagInfo in allTags;
		final Map<Path, Nodeid[]> file2rev2tag = new HashMap<Path, Nodeid[]>();
		repository.getManifest().walk(new HgManifest.Inspector() {
			private int[] tagIndexAtRev = new int[4]; // it's unlikely there would be a lot of tags associated with a given cset

			public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) {
				// may do better here using tagLocalRev2TagInfo, but need to change a lot, too lazy now
				Nodeid cset = clogrmap.revision(changelogRevision);
				Arrays.fill(tagIndexAtRev, -1);
				for (int i = 0, x = 0; i < allTags.length; i++) {
					if (cset.equals(allTags[i].revision())) {
						tagIndexAtRev[x++] = i;
						if (x == tagIndexAtRev.length) {
							// expand twice as much
							int[] expanded = new int[x << 1];
							System.arraycopy(tagIndexAtRev, 0, expanded, 0, x);
							expanded[x] = -1; // just in case there'd be no more tags associated with this cset
							tagIndexAtRev = expanded;
						}
					}
				}
				if (tagIndexAtRev[0] == -1) {
					System.out.println("Can't happen, provided we iterate over revisions with tags only");
				}
				return true;
			}

			public boolean next(Nodeid nid, Path fname, HgManifest.Flags flags) {
				Nodeid[] m = file2rev2tag.get(fname);
				if (m == null) {
					file2rev2tag.put(fname, m = new Nodeid[allTags.length]);
				}
				for (int tagIndex : tagIndexAtRev) {
					if (tagIndex == -1) {
						break;
					}
					if (m[tagIndex] != null) {
						System.out.printf("There's another revision (%s) associated with tag %s already while we try to associate %s\n", m[tagIndex].shortNotation(), allTags[tagIndex].name(), nid.shortNotation());
					}
					m[tagIndex] = nid;
				}
				return true;
			}
			
			public boolean end(int manifestRevision) {
				return true;
			}
			
		}, tagLocalRevs);
		System.out.printf("Cache built: %d ms\n", System.currentTimeMillis() - start);
		//
		// look up specific file. This part is fast.
		HgDataFile fileNode = repository.getFileNode(targetPath);
		final Nodeid[] allTagsOfTheFile = file2rev2tag.get(targetPath);
		// TODO if fileNode.isCopy, repeat for each getCopySourceName()
		for (int fileRevIndex = 0; fileRevIndex < fileNode.getRevisionCount(); fileRevIndex++) {
			Nodeid fileRev = fileNode.getRevision(fileRevIndex);
			int changesetRevIndex = fileNode.getChangesetRevisionIndex(fileRevIndex);
			List<String> associatedTags = new LinkedList<String>();
			for (int i = 0; i < allTagsOfTheFile.length; i++) {
				if (fileRev.equals(allTagsOfTheFile[i])) {
					associatedTags.add(allTags[i].name());
				}
			}
			System.out.printf("%3d%7d%s\n", fileRevIndex, changesetRevIndex, associatedTags);
		}
	}
	
	private void collectTagsPerFile_Approach_2(HgRepository repository, final int[] tagLocalRevs, final IntMap<List<TagInfo>> tagRevIndex2TagInfo, Path targetPath) throws HgException, HgRuntimeException {
		//
		// Approach 2. No all-file map. Collect file revisions recorded at the time of tagging,
		// then for each file revision check if it is among those above, and if yes, take corresponding tags
		HgDataFile fileNode = repository.getFileNode(targetPath);
		final long start2 = System.nanoTime();
		final Map<Integer, Nodeid> fileRevisionAtTagRevision = new HashMap<Integer, Nodeid>();
		final Map<Nodeid, List<String>> fileRev2TagNames = new HashMap<Nodeid, List<String>>();
		HgManifest.Inspector collectFileRevAtCset = new HgManifest.Inspector() {
			
			private int csetRevIndex;

			public boolean next(Nodeid nid, Path fname, Flags flags) {
				fileRevisionAtTagRevision.put(csetRevIndex, nid);
				if (tagRevIndex2TagInfo.containsKey(csetRevIndex)) {
					List<String> tags = fileRev2TagNames.get(nid);
					if (tags == null) {
						fileRev2TagNames.put(nid, tags = new ArrayList<String>(3));
					}
					for (TagInfo ti : tagRevIndex2TagInfo.get(csetRevIndex)) {
						tags.add(ti.name());
					}
				}
				return true;
			}
			
			public boolean end(int manifestRevision) {
				return true;
			}
			
			public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) {
				csetRevIndex = changelogRevision;
				return true;
			}
		};
		repository.getManifest().walkFileRevisions(targetPath, collectFileRevAtCset,tagLocalRevs);
		
		final long start2a = System.nanoTime();
		fileNode.indexWalk(0, TIP, new HgDataFile.RevisionInspector() {

			public void next(int fileRevisionIndex, Nodeid fileRevision, int changesetRevisionIndex) {
				List<String> associatedTags = new LinkedList<String>();
				
				for (int taggedRevision : tagLocalRevs) {
					// current file revision can't appear in tags that point to earlier changelog revisions (they got own file revision)
					if (taggedRevision >= changesetRevisionIndex) {
						// z points to some changeset with tag
						Nodeid wasKnownAs = fileRevisionAtTagRevision.get(taggedRevision);
						if (wasKnownAs.equals(fileRevision)) {
							// has tag associated with changeset at index z
							List<TagInfo> tagsAtRev = tagRevIndex2TagInfo.get(taggedRevision);
							assert tagsAtRev != null;
							for (TagInfo ti : tagsAtRev) {
								associatedTags.add(ti.name());
							}
						}
					}
				}
				// 
				System.out.printf("%3d%7d%s\n", fileRevisionIndex, changesetRevisionIndex, associatedTags);
			}
		});
		for (int i = 0, lastRev = fileNode.getLastRevision(); i <= lastRev; i++) {
			Nodeid fileRevision = fileNode.getRevision(i);
			List<String> associatedTags2 = fileRev2TagNames.get(fileRevision);
			int changesetRevIndex = fileNode.getChangesetRevisionIndex(i);
			System.out.printf("%3d%7d%s\n", i, changesetRevIndex, associatedTags2 == null ? Collections.emptyList() : associatedTags2);
		}
		System.out.printf("Alternative total time: %d ms, of that init: %d ms\n", (System.nanoTime() - start2)/1000000, (start2a-start2)/1000000);
		System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory());
	}

	static class DoNothingManifestInspector implements HgManifest.Inspector {
		public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) {
			return true;
		}
		public boolean next(Nodeid nid, Path fname, Flags flags) {
			return true;
		}
		public boolean end(int manifestRevision) {
			return true;
		}
	}
	
	public static void main2(String[] args) throws HgCallbackTargetException, HgException, CancelledException, HgRuntimeException {
		final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython"));
		final Path targetPath = Path.create("README");
		final HgTags tags = repository.getTags();
		final Map<String, HgTags.TagInfo> tagToInfo = tags.getAllTags();
		final HgManifest manifest = repository.getManifest();
		final Map<Nodeid, List<String>> changeSetRevisionToTags = new HashMap<Nodeid, List<String>>();
		final HgDataFile fileNode = repository.getFileNode(targetPath);
		for (String tagName : tagToInfo.keySet()) {
			final HgTags.TagInfo info = tagToInfo.get(tagName);
			final Nodeid nodeId = info.revision();
			// TODO: This is not correct as we can't be sure that file at the corresponding revision is actually our target file (which may have been renamed, etc.)
			final Nodeid fileRevision = manifest.getFileRevision(repository.getChangelog().getRevisionIndex(nodeId), targetPath);
			if (fileRevision == null) {
				continue;
			}

			final Nodeid changeSetRevision = fileNode.getChangesetRevision(fileRevision);
			List<String> revisionTags = changeSetRevisionToTags.get(changeSetRevision);
			if (revisionTags == null) {
				revisionTags = new ArrayList<String>();
				changeSetRevisionToTags.put(changeSetRevision, revisionTags);
			}
			revisionTags.add(tagName);
		}

		final HgLogCommand logCommand = new HgLogCommand(repository);
		logCommand.file(targetPath, true);
		logCommand.execute(new HgChangesetHandler() {
			public void cset(HgChangeset changeset) {
				if (changeset.getAffectedFiles().contains(targetPath)) {
					System.out.println(changeset.getRevisionIndex() + " " + changeSetRevisionToTags.get(changeset.getNodeid()));
				}
			}
		});
	}
}