# HG changeset patch # User Artem Tikhomirov # Date 1313460209 -7200 # Node ID a620f0663a37cb69a7244b637bd82fb0be4384be # Parent 1874d11054e55f93b7af3321a4890d74969e3f7f Collect tags for a file - improve performance of 'sparse' manifest reads diff -r 1874d11054e5 -r a620f0663a37 cmdline/org/tmatesoft/hg/console/Main.java --- a/cmdline/org/tmatesoft/hg/console/Main.java Mon Aug 15 18:59:05 2011 +0200 +++ b/cmdline/org/tmatesoft/hg/console/Main.java Tue Aug 16 04:03:29 2011 +0200 @@ -18,6 +18,7 @@ import static org.tmatesoft.hg.repo.HgRepository.TIP; +import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -33,6 +34,7 @@ import org.tmatesoft.hg.internal.ByteArrayChannel; import org.tmatesoft.hg.internal.DigestHelper; import org.tmatesoft.hg.internal.PathGlobMatcher; +import org.tmatesoft.hg.internal.RelativePathRewrite; import org.tmatesoft.hg.repo.HgBranches; import org.tmatesoft.hg.repo.HgChangelog; import org.tmatesoft.hg.repo.HgDataFile; @@ -46,8 +48,11 @@ import org.tmatesoft.hg.repo.HgSubrepoLocation.Kind; import org.tmatesoft.hg.repo.HgWorkingCopyStatusCollector; import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; +import org.tmatesoft.hg.util.FileIterator; +import org.tmatesoft.hg.util.FileWalker; import org.tmatesoft.hg.util.Pair; import org.tmatesoft.hg.util.Path; +import org.tmatesoft.hg.util.PathRewrite; /** * Various debug dumps. @@ -72,10 +77,11 @@ public static void main(String[] args) throws Exception { Main m = new Main(args); + m.testTreeTraversal(); // m.testRevisionMap(); // m.testSubrepos(); // m.testReadWorkingCopy(); - m.testParents(); +// m.testParents(); // m.testEffectiveFileLog(); // m.testCatAtCsetRevision(); // m.testMergeState(); @@ -91,6 +97,19 @@ // m.bunchOfTests(); } + private void testTreeTraversal() throws Exception { + File repoRoot = hgRepo.getWorkingDir(); + Path.Source pathSrc = new Path.SimpleSource(new PathRewrite.Composite(new RelativePathRewrite(repoRoot), hgRepo.getToRepoPathHelper())); + FileWalker w = new FileWalker(repoRoot, pathSrc); + int count = 0; + final long start = System.currentTimeMillis(); + while (w.hasNext()) { + count++; + w.next(); + } + System.out.printf("Traversal of %d files took %d ms", count, System.currentTimeMillis() - start); + } + /* * cpython repo with 70715 revisions. 3 revisions - 80 ms vs 250 ms (250ms init) diff -r 1874d11054e5 -r a620f0663a37 src/org/tmatesoft/hg/repo/HgManifest.java --- a/src/org/tmatesoft/hg/repo/HgManifest.java Mon Aug 15 18:59:05 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/HgManifest.java Tue Aug 16 04:03:29 2011 +0200 @@ -60,6 +60,31 @@ content.iterate(start0, end0, true, new ManifestParser(inspector)); } + /** + * "Sparse" iteration of the manifest + * + * @param inspector + * @param localRevisions local changeset revisions to visit + */ + public void walk(final Inspector inspector, int... localRevisions) { + if (inspector == null || localRevisions == null) { + throw new IllegalArgumentException(); + } + int[] manifestLocalRevs = new int[localRevisions.length]; + boolean needsSort = false; + for (int i = 0; i < localRevisions.length; i++) { + final int manifestLocalRev = fromChangelog(localRevisions[i]); + manifestLocalRevs[i] = manifestLocalRev; + if (i > 0 && manifestLocalRevs[i-1] > manifestLocalRev) { + needsSort = true; + } + } + if (needsSort) { + Arrays.sort(manifestLocalRevs); + } + content.iterate(manifestLocalRevs, true, new ManifestParser(inspector)); + } + // manifest revision number that corresponds to the given changeset /*package-local*/ int fromChangelog(int revisionNumber) { if (HgInternals.wrongLocalRevision(revisionNumber)) { diff -r 1874d11054e5 -r a620f0663a37 test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java Tue Aug 16 04:03:29 2011 +0200 @@ -0,0 +1,129 @@ +package org.tmatesoft.hg.test; + +import java.io.*; +import java.util.*; +import java.util.Map.Entry; + +import org.tmatesoft.hg.core.*; +import org.tmatesoft.hg.internal.Pool; +import org.tmatesoft.hg.repo.*; +import org.tmatesoft.hg.repo.HgTags.TagInfo; +import org.tmatesoft.hg.util.*; + +/** + * @author Marc Strapetz + */ +public class MapTagsToFileRevisions { + + // Static ================================================================= + + public static void main(String[] args) throws HgException, CancelledException { + final long start = System.currentTimeMillis(); + final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); + final HgTags tags = repository.getTags(); + // + // build cache + final Map> file2tag2rev = new HashMap>(); + System.out.printf("Collecting manifests for %d tags\n", tags.getTags().size()); + // effective translation of changeset revisions to their local indexes + final HgChangelog.RevisionMap clogrmap = repository.getChangelog().new RevisionMap().init(); + int[] tagLocalRevs = new int[tags.getTags().size()]; + int i = 0; + for (TagInfo tag : tags.getTags().values()) { + final Nodeid tagRevision = tag.revision(); + int tagLocalRev = clogrmap.localRevision(tagRevision); + tagLocalRevs[i++] = tagLocalRev; + } + System.out.printf("Found tag revisions to analyze: %d\n", System.currentTimeMillis() - start); + // + repository.getManifest().walk(new HgManifest.Inspector() { + private List tagsAtRev; + final Pool filenamePool = new Pool(); + final Pool nodeidPool = new Pool(); + + public boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision) { + Nodeid cset = clogrmap.revision(changelogRevision); + tagsAtRev = tags.tags(cset); + if (tagsAtRev.isEmpty()) { + System.out.println("Can't happen, provided we iterate over revisions with tags only"); + } + return true; + } + + public boolean next(Nodeid nid, String fname, String flags) { + fname = filenamePool.unify(fname); + nid = nodeidPool.unify(nid); + Map m = file2tag2rev.get(fname); + if (m == null) { + file2tag2rev.put(fname, m = new HashMap()); + } + for (String tag : tagsAtRev) { + m.put(tags.getTags().get(tag), nid); + } + return true; + } + + public boolean end(int manifestRevision) { + return true; + } + + }, tagLocalRevs); + System.out.printf("Cache built: %d\n", System.currentTimeMillis() - start); + // + // look up specific file. This part is fast. + final Path targetPath = Path.create("README"); + HgDataFile fileNode = repository.getFileNode(targetPath); + // TODO if fileNode.isCopy, repeat for each getCopySourceName() + for (int localFileRev = 0; localFileRev < fileNode.getRevisionCount(); localFileRev++) { + Nodeid fileRev = fileNode.getRevision(localFileRev); + int changesetLocalRev = fileNode.getChangesetLocalRevision(localFileRev); + List associatedTags = new LinkedList(); + final Map allTagsOfTheFile = file2tag2rev.get(targetPath.toString()); + for (Entry e : allTagsOfTheFile.entrySet()) { + Nodeid fileRevAtTag = e.getValue(); + if (fileRev.equals(fileRevAtTag)) { + associatedTags.add(e.getKey().name()); + } + } + System.out.printf("%3d%7d%s\n", localFileRev, changesetLocalRev, associatedTags); + } + System.out.printf("Total time: %d", System.currentTimeMillis() - start); + } + + public static void main2(String[] args) throws HgException, CancelledException { + final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); + final Path targetPath = Path.create("README"); + final HgTags tags = repository.getTags(); + final Map tagToInfo = tags.getTags(); + final HgManifest manifest = repository.getManifest(); + final Map> changeSetRevisionToTags = new HashMap>(); + final HgDataFile fileNode = repository.getFileNode(targetPath); + for (String tagName : tagToInfo.keySet()) { + final HgTags.TagInfo info = tagToInfo.get(tagName); + final Nodeid nodeId = info.revision(); + // TODO: This is not correct as we can't be sure that file at the corresponding revision is actually our target file (which may have been renamed, etc.) + final Nodeid fileRevision = manifest.getFileRevision(repository.getChangelog().getLocalRevision(nodeId), targetPath); + if (fileRevision == null) { + continue; + } + + final Nodeid changeSetRevision = fileNode.getChangesetRevision(fileRevision); + List revisionTags = changeSetRevisionToTags.get(changeSetRevision); + if (revisionTags == null) { + revisionTags = new ArrayList(); + changeSetRevisionToTags.put(changeSetRevision, revisionTags); + } + revisionTags.add(tagName); + } + + final HgLogCommand logCommand = new HgLogCommand(repository); + logCommand.file(targetPath, true); + logCommand.execute(new HgChangesetHandler() { + public void next(HgChangeset changeset) { + if (changeset.getAffectedFiles().contains(targetPath)) { + System.out.println(changeset.getRevision() + " " + changeSetRevisionToTags.get(changeset.getNodeid())); + } + } + }); + } +} \ No newline at end of file