Mercurial > hg4j
view test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java @ 489:9c0138cda59a
HgRepositoryLock: some javadoc
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Mon, 13 Aug 2012 19:24:29 +0200 |
parents | 0d128e09d70f |
children | 6526d8adbc0f |
line wrap: on
line source
package org.tmatesoft.hg.test; import static org.tmatesoft.hg.repo.HgRepository.TIP; import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.junit.Assert; import org.tmatesoft.hg.core.HgCallbackTargetException; import org.tmatesoft.hg.core.HgChangeset; import org.tmatesoft.hg.core.HgChangesetHandler; import org.tmatesoft.hg.core.HgException; import org.tmatesoft.hg.core.HgLogCommand; import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.internal.IntMap; import org.tmatesoft.hg.repo.HgChangelog; import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; import org.tmatesoft.hg.repo.HgDataFile; import org.tmatesoft.hg.repo.HgLookup; import org.tmatesoft.hg.repo.HgManifest; import org.tmatesoft.hg.repo.HgManifest.Flags; import org.tmatesoft.hg.repo.HgRepository; import org.tmatesoft.hg.repo.HgTags; import org.tmatesoft.hg.repo.HgTags.TagInfo; import org.tmatesoft.hg.repo.HgRevisionMap; import org.tmatesoft.hg.util.CancelledException; import org.tmatesoft.hg.util.Path; /** * @author Marc Strapetz */ public class MapTagsToFileRevisions { // Static ================================================================= public static void main(String[] args) throws Exception { MapTagsToFileRevisions m = new MapTagsToFileRevisions(); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); // m.measurePatchAffectsArbitraryRevisionRead(); // m.collectTagsPerFile(); // m.manifestWalk(); // m.changelogWalk(); // m.revisionMap(); m.buildFile2ChangelogRevisionMap(".hgtags", "README", "configure.in", "Misc/NEWS"); m = null; System.gc(); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); } // revision == 2406 - 5 ms per run (baseRevision == 2406) // revision == 2405 - 69 ms per run (baseRevision == 1403) public void measurePatchAffectsArbitraryRevisionRead() throws Exception { final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); final DoNothingManifestInspector insp = new DoNothingManifestInspector(); final int revision = 2405; // warm-up. repository.getManifest().walk(revision, revision, insp); final int runs = 10; final long start = System.nanoTime(); for (int i = 0; i < runs; i++) { repository.getManifest().walk(revision, revision, insp); } final long end = System.nanoTime(); System.out.printf("%d ms per run\n", (end - start)/ (runs*1000000)); } /* * .hgtags, 261 revisions * Approach 1: total 83, init: 0, iteration: 82 * Approach 2: total 225, init: 206, iteration: 19 * README, 465 revisions * Approach 1: total 162, init: 0, iteration: 161 * Approach 2: total 231, init: 198, iteration: 32 * configure.in, 1109 revisions * Approach 1: total 409, init: 1, iteration: 407 * Approach 2: total 277, init: 203, iteration: 74 */ /* New data, 0.9.0v (another CPU!) *.hgtags, 306 revisions * Approach 0: total 136 * Approach 1: total 53, init: 1, iteration: 52 * Approach 2: total 95, init: 78, iteration: 17 * Approach 3: total 17 * * README, 499 revisions * Approach 0: total 149 * Approach 1: total 43, init: 0, iteration: 43 * Approach 2: total 102, init: 86, iteration: 16 * Approach 3: total 18 * * configure.in, 1170 revisions * Approach 0: total 321 * Approach 1: total 116, init: 0, iteration: 115 * Approach 2: total 140, init: 79, iteration: 60 * Approach 3: total 30 * * Misc/NEWS, 10863 revisions * Approach 0: total 4946 * Approach 1: total 309, init: 6, iteration: 302 * Approach 2: total 213, init: 63, iteration: 150 * Approach 3: total 140 */ private void buildFile2ChangelogRevisionMap(String... fileNames) throws Exception { final HgRepository repository = new HgLookup().detect(new File("/home/artem/hg/cpython")); final HgChangelog clog = repository.getChangelog(); // warm-up HgRevisionMap<HgChangelog> clogMap = new HgRevisionMap<HgChangelog>(clog).init(); for (String fname : fileNames) { HgDataFile fileNode = repository.getFileNode(fname); // warm-up HgRevisionMap<HgDataFile> fileMap = new HgRevisionMap<HgDataFile>(fileNode).init(); // final int latestRevision = fileNode.getLastRevision(); // final long start_0 = System.nanoTime(); final Map<Nodeid, Nodeid> changesetToNodeid_0 = new HashMap<Nodeid, Nodeid>(); for (int fileRevisionIndex = 0; fileRevisionIndex <= latestRevision; fileRevisionIndex++) { Nodeid fileRevision = fileNode.getRevision(fileRevisionIndex); Nodeid changesetRevision = fileNode.getChangesetRevision(fileRevision); changesetToNodeid_0.put(changesetRevision, fileRevision); } final long end_0 = System.nanoTime(); // final long start_1 = System.nanoTime(); fileMap = new HgRevisionMap<HgDataFile>(fileNode).init(); final long start_1a = System.nanoTime(); final Map<Nodeid, Nodeid> changesetToNodeid_1 = new HashMap<Nodeid, Nodeid>(); for (int revision = 0; revision <= latestRevision; revision++) { final Nodeid nodeId = fileMap.revision(revision); int localCset = fileNode.getChangesetRevisionIndex(revision); final Nodeid changesetId = clog.getRevision(localCset); // final Nodeid changesetId = fileNode.getChangesetRevision(nodeId); changesetToNodeid_1.put(changesetId, nodeId); } final long end_1 = System.nanoTime(); // final long start_2 = System.nanoTime(); clogMap = new HgRevisionMap<HgChangelog>(clog).init(); fileMap = new HgRevisionMap<HgDataFile>(fileNode).init(); final Map<Nodeid, Nodeid> changesetToNodeid_2 = new HashMap<Nodeid, Nodeid>(); final long start_2a = System.nanoTime(); for (int revision = 0; revision <= latestRevision; revision++) { Nodeid nidFile = fileMap.revision(revision); int localCset = fileNode.getChangesetRevisionIndex(revision); Nodeid nidCset = clogMap.revision(localCset); changesetToNodeid_2.put(nidCset, nidFile); } final long end_2 = System.nanoTime(); Assert.assertEquals(changesetToNodeid_1, changesetToNodeid_2); // final long start_3 = System.nanoTime(); final Map<Nodeid, Nodeid> changesetToNodeid_3 = new HashMap<Nodeid, Nodeid>(); fileNode.indexWalk(0, TIP, new HgDataFile.RevisionInspector() { public void next(int fileRevisionIndex, Nodeid revision, int linkedRevisionIndex) { changesetToNodeid_3.put(clog.getRevision(linkedRevisionIndex), revision); } }); final long end_3 = System.nanoTime(); Assert.assertEquals(changesetToNodeid_1, changesetToNodeid_3); System.out.printf("%s, %d revisions\n", fname, 1+latestRevision); System.out.printf("Approach 0: total %d\n", (end_0 - start_0)/1000000); System.out.printf("Approach 1: total %d, init: %d, iteration: %d\n", (end_1 - start_1)/1000000, (start_1a - start_1)/1000000, (end_1 - start_1a)/1000000); System.out.printf("Approach 2: total %d, init: %d, iteration: %d\n", (end_2 - start_2)/1000000, (start_2a - start_2)/1000000, (end_2 - start_2a)/1000000); System.out.printf("Approach 3: total %d\n", (end_3 - start_3)/1000000); } } /* * Each 5000 revisions from cpython, total 15 revisions * Direct clog.getRevisionIndex: ~260 ms * RevisionMap.revisionIndex: ~265 ms (almost 100% in #init()) * each 1000'th revision, total 71 revision: 1 230 vs 270 * each 2000'th revision, total 36 revision: 620 vs 270 * each 3000'th revision, total 24 revision: 410 vs 275 */ public void revisionMap() throws Exception { final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); final HgChangelog clog = repository.getChangelog(); ArrayList<Nodeid> revisions = new ArrayList<Nodeid>(); final int step = 5000; for (int i = 0, top = clog.getLastRevision(); i < top; i += step) { revisions.add(clog.getRevision(i)); } final long s1 = System.nanoTime(); for (Nodeid n : revisions) { int r = clog.getRevisionIndex(n); if (r % step != 0) { throw new IllegalStateException(Integer.toString(r)); } } System.out.printf("Direct lookup of %d revisions took %,d ns\n", revisions.size(), System.nanoTime() - s1); HgRevisionMap<HgChangelog> rmap = new HgRevisionMap<HgChangelog>(clog); final long s2 = System.nanoTime(); rmap.init(); final long s3 = System.nanoTime(); for (Nodeid n : revisions) { int r = rmap.revisionIndex(n); if (r % step != 0) { throw new IllegalStateException(Integer.toString(r)); } } System.out.printf("RevisionMap time: %d ms, of that init() %,d ns\n", (System.nanoTime() - s2) / 1000000, s3 - s2); } public void changelogWalk() throws Exception { final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); final long start = System.currentTimeMillis(); repository.getChangelog().all(new HgChangelog.Inspector() { public int xx = 0; public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) { if (xx+revisionNumber < 0) { System.out.println(xx); System.out.println(revisionNumber); } xx += revisionNumber; } }); // cpython: 17 seconds, mem 132,9 -> 129,0 -> 131,7 // cpyhton: 13 seconds. Of that, cumulative Patch.apply takes 8.8 seconds, RevlogStream.Inspector.next - 1.8 System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); } public void manifestWalk() throws Exception { System.out.println(System.getProperty("java.version")); final long start = System.currentTimeMillis(); final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); repository.getManifest().walk(0, 10000, new DoNothingManifestInspector()); // cpython: 1,1 sec for 0..1000, 43 sec for 0..10000, 115 sec for 0..20000 (Pool with HashMap) // 2,4 sec for 1000..2000 // cpython -r 1000: 484 files, -r 2000: 1015 files. Iteration 1000..2000; fnamePool.size:1019 nodeidPool.size:2989 // nodeidPool for two subsequent revisions only: 840. 37 sec for 0..10000. 99 sec for 0..20k // 0..10000 fnamePool: hits:15989152, misses:3020 // // With Pool<StringProxy> for fname and flags, Nodeid's ascii2bin through local array, overall byte[] iteration, // 0..10k is 34 seconds now // Another run, 23 seconds now, seems nothing has been changed. Switched to Pool2 with DirectHashSet: 22,5 seconds System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); } private int[] collectLocalTagRevisions(HgRevisionMap<HgChangelog> clogrmap, TagInfo[] allTags, IntMap<List<TagInfo>> tagLocalRev2TagInfo) { int[] tagLocalRevs = new int[allTags.length]; int x = 0; for (int i = 0; i < allTags.length; i++) { final Nodeid tagRevision = allTags[i].revision(); final int tagRevisionIndex = clogrmap.revisionIndex(tagRevision); if (tagRevisionIndex != HgRepository.BAD_REVISION) { tagLocalRevs[x++] = tagRevisionIndex; List<TagInfo> tagsAssociatedWithRevision = tagLocalRev2TagInfo.get(tagRevisionIndex); if (tagsAssociatedWithRevision == null) { tagLocalRev2TagInfo.put(tagRevisionIndex, tagsAssociatedWithRevision = new LinkedList<TagInfo>()); } tagsAssociatedWithRevision.add(allTags[i]); } } if (x != allTags.length) { // some tags were removed (recorded Nodeid.NULL tagname) int[] copy = new int[x]; System.arraycopy(tagLocalRevs, 0, copy, 0, x); tagLocalRevs = copy; } return tagLocalRevs; } public void collectTagsPerFile() throws HgException, CancelledException { final long start = System.currentTimeMillis(); final HgRepository repository = new HgLookup().detect(new File("/home/artem/hg/cpython")); final HgTags tags = repository.getTags(); // // build cache // final TagInfo[] allTags = new TagInfo[tags.getAllTags().size()]; tags.getAllTags().values().toArray(allTags); // effective translation of changeset revisions to their local indexes final HgRevisionMap<HgChangelog> clogrmap = new HgRevisionMap<HgChangelog>(repository.getChangelog()).init(); // map to look up tag by changeset local number final IntMap<List<TagInfo>> tagLocalRev2TagInfo = new IntMap<List<TagInfo>>(allTags.length); System.out.printf("Collecting manifests for %d tags\n", allTags.length); final int[] tagLocalRevs = collectLocalTagRevisions(clogrmap, allTags, tagLocalRev2TagInfo); System.out.printf("Prepared %d tag revisions to analyze: %d ms\n", tagLocalRevs.length, System.currentTimeMillis() - start); final Path targetPath = Path.create("README"); // collectTagsPerFile_Approach_1(clogrmap, tagLocalRevs, allTags, targetPath); System.out.printf("Total time: %d ms\n", System.currentTimeMillis() - start); System.out.println("\nApproach 2"); collectTagsPerFile_Approach_2(repository, tagLocalRevs, tagLocalRev2TagInfo, targetPath); } // Approach 1. Build map with all files, their revisions and corresponding tags // private void collectTagsPerFile_Approach_1(final HgRevisionMap<HgChangelog> clogrmap, final int[] tagLocalRevs, final TagInfo[] allTags, Path targetPath) throws HgException { HgRepository repository = clogrmap.getRepo(); final long start = System.currentTimeMillis(); // file2rev2tag value is array of revisions, always of allTags.length. Revision index in the array // is index of corresponding TagInfo in allTags; final Map<Path, Nodeid[]> file2rev2tag = new HashMap<Path, Nodeid[]>(); repository.getManifest().walk(new HgManifest.Inspector() { private int[] tagIndexAtRev = new int[4]; // it's unlikely there would be a lot of tags associated with a given cset public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) { // may do better here using tagLocalRev2TagInfo, but need to change a lot, too lazy now Nodeid cset = clogrmap.revision(changelogRevision); Arrays.fill(tagIndexAtRev, -1); for (int i = 0, x = 0; i < allTags.length; i++) { if (cset.equals(allTags[i].revision())) { tagIndexAtRev[x++] = i; if (x == tagIndexAtRev.length) { // expand twice as much int[] expanded = new int[x << 1]; System.arraycopy(tagIndexAtRev, 0, expanded, 0, x); expanded[x] = -1; // just in case there'd be no more tags associated with this cset tagIndexAtRev = expanded; } } } if (tagIndexAtRev[0] == -1) { System.out.println("Can't happen, provided we iterate over revisions with tags only"); } return true; } public boolean next(Nodeid nid, Path fname, HgManifest.Flags flags) { Nodeid[] m = file2rev2tag.get(fname); if (m == null) { file2rev2tag.put(fname, m = new Nodeid[allTags.length]); } for (int tagIndex : tagIndexAtRev) { if (tagIndex == -1) { break; } if (m[tagIndex] != null) { System.out.printf("There's another revision (%s) associated with tag %s already while we try to associate %s\n", m[tagIndex].shortNotation(), allTags[tagIndex].name(), nid.shortNotation()); } m[tagIndex] = nid; } return true; } public boolean end(int manifestRevision) { return true; } }, tagLocalRevs); System.out.printf("Cache built: %d ms\n", System.currentTimeMillis() - start); // // look up specific file. This part is fast. HgDataFile fileNode = repository.getFileNode(targetPath); final Nodeid[] allTagsOfTheFile = file2rev2tag.get(targetPath); // TODO if fileNode.isCopy, repeat for each getCopySourceName() for (int fileRevIndex = 0; fileRevIndex < fileNode.getRevisionCount(); fileRevIndex++) { Nodeid fileRev = fileNode.getRevision(fileRevIndex); int changesetRevIndex = fileNode.getChangesetRevisionIndex(fileRevIndex); List<String> associatedTags = new LinkedList<String>(); for (int i = 0; i < allTagsOfTheFile.length; i++) { if (fileRev.equals(allTagsOfTheFile[i])) { associatedTags.add(allTags[i].name()); } } System.out.printf("%3d%7d%s\n", fileRevIndex, changesetRevIndex, associatedTags); } } private void collectTagsPerFile_Approach_2(HgRepository repository, final int[] tagLocalRevs, final IntMap<List<TagInfo>> tagRevIndex2TagInfo, Path targetPath) throws HgException { // // Approach 2. No all-file map. Collect file revisions recorded at the time of tagging, // then for each file revision check if it is among those above, and if yes, take corresponding tags HgDataFile fileNode = repository.getFileNode(targetPath); final long start2 = System.nanoTime(); final Map<Integer, Nodeid> fileRevisionAtTagRevision = new HashMap<Integer, Nodeid>(); final Map<Nodeid, List<String>> fileRev2TagNames = new HashMap<Nodeid, List<String>>(); HgManifest.Inspector collectFileRevAtCset = new HgManifest.Inspector() { private int csetRevIndex; public boolean next(Nodeid nid, Path fname, Flags flags) { fileRevisionAtTagRevision.put(csetRevIndex, nid); if (tagRevIndex2TagInfo.containsKey(csetRevIndex)) { List<String> tags = fileRev2TagNames.get(nid); if (tags == null) { fileRev2TagNames.put(nid, tags = new ArrayList<String>(3)); } for (TagInfo ti : tagRevIndex2TagInfo.get(csetRevIndex)) { tags.add(ti.name()); } } return true; } public boolean end(int manifestRevision) { return true; } public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) { csetRevIndex = changelogRevision; return true; } }; repository.getManifest().walkFileRevisions(targetPath, collectFileRevAtCset,tagLocalRevs); final long start2a = System.nanoTime(); fileNode.indexWalk(0, TIP, new HgDataFile.RevisionInspector() { public void next(int fileRevisionIndex, Nodeid fileRevision, int changesetRevisionIndex) { List<String> associatedTags = new LinkedList<String>(); for (int taggedRevision : tagLocalRevs) { // current file revision can't appear in tags that point to earlier changelog revisions (they got own file revision) if (taggedRevision >= changesetRevisionIndex) { // z points to some changeset with tag Nodeid wasKnownAs = fileRevisionAtTagRevision.get(taggedRevision); if (wasKnownAs.equals(fileRevision)) { // has tag associated with changeset at index z List<TagInfo> tagsAtRev = tagRevIndex2TagInfo.get(taggedRevision); assert tagsAtRev != null; for (TagInfo ti : tagsAtRev) { associatedTags.add(ti.name()); } } } } // System.out.printf("%3d%7d%s\n", fileRevisionIndex, changesetRevisionIndex, associatedTags); } }); for (int i = 0, lastRev = fileNode.getLastRevision(); i <= lastRev; i++) { Nodeid fileRevision = fileNode.getRevision(i); List<String> associatedTags2 = fileRev2TagNames.get(fileRevision); int changesetRevIndex = fileNode.getChangesetRevisionIndex(i); System.out.printf("%3d%7d%s\n", i, changesetRevIndex, associatedTags2 == null ? Collections.emptyList() : associatedTags2); } System.out.printf("Alternative total time: %d ms, of that init: %d ms\n", (System.nanoTime() - start2)/1000000, (start2a-start2)/1000000); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); } static class DoNothingManifestInspector implements HgManifest.Inspector { public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) { return true; } public boolean next(Nodeid nid, Path fname, Flags flags) { return true; } public boolean end(int manifestRevision) { return true; } } public static void main2(String[] args) throws HgCallbackTargetException, HgException, CancelledException { final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); final Path targetPath = Path.create("README"); final HgTags tags = repository.getTags(); final Map<String, HgTags.TagInfo> tagToInfo = tags.getAllTags(); final HgManifest manifest = repository.getManifest(); final Map<Nodeid, List<String>> changeSetRevisionToTags = new HashMap<Nodeid, List<String>>(); final HgDataFile fileNode = repository.getFileNode(targetPath); for (String tagName : tagToInfo.keySet()) { final HgTags.TagInfo info = tagToInfo.get(tagName); final Nodeid nodeId = info.revision(); // TODO: This is not correct as we can't be sure that file at the corresponding revision is actually our target file (which may have been renamed, etc.) final Nodeid fileRevision = manifest.getFileRevision(repository.getChangelog().getRevisionIndex(nodeId), targetPath); if (fileRevision == null) { continue; } final Nodeid changeSetRevision = fileNode.getChangesetRevision(fileRevision); List<String> revisionTags = changeSetRevisionToTags.get(changeSetRevision); if (revisionTags == null) { revisionTags = new ArrayList<String>(); changeSetRevisionToTags.put(changeSetRevision, revisionTags); } revisionTags.add(tagName); } final HgLogCommand logCommand = new HgLogCommand(repository); logCommand.file(targetPath, true); logCommand.execute(new HgChangesetHandler() { public void cset(HgChangeset changeset) { if (changeset.getAffectedFiles().contains(targetPath)) { System.out.println(changeset.getRevisionIndex() + " " + changeSetRevisionToTags.get(changeset.getNodeid())); } } }); } }