Mercurial > jhg
changeset 324:283b294d1079
Explore alternatives to access file-changelog combined history
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> | 
|---|---|
| date | Mon, 03 Oct 2011 06:47:20 +0200 | 
| parents | 4c7e3ba67213 | 
| children | f05c8b1f08c4 | 
| files | src/org/tmatesoft/hg/repo/HgManifest.java src/org/tmatesoft/hg/repo/Revlog.java test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java | 
| diffstat | 3 files changed, 155 insertions(+), 42 deletions(-) [+] | 
line wrap: on
 line diff
--- a/src/org/tmatesoft/hg/repo/HgManifest.java Fri Sep 30 08:44:48 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/HgManifest.java Mon Oct 03 06:47:20 2011 +0200 @@ -411,6 +411,7 @@ return revisionNumber; } + // XXX likely can be replaced with Revlog.RevisionInspector public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) { if (changelog2manifest != null) { // next assertion is not an error, rather assumption check, which is too development-related to be explicit exception -
--- a/src/org/tmatesoft/hg/repo/Revlog.java Fri Sep 30 08:44:48 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/Revlog.java Mon Oct 03 06:47:20 2011 +0200 @@ -33,7 +33,9 @@ import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.internal.ArrayHelper; import org.tmatesoft.hg.internal.DataAccess; +import org.tmatesoft.hg.internal.Experimental; import org.tmatesoft.hg.internal.RevlogStream; +import org.tmatesoft.hg.util.Adaptable; import org.tmatesoft.hg.util.ByteChannel; import org.tmatesoft.hg.util.CancelSupport; import org.tmatesoft.hg.util.CancelledException; @@ -215,6 +217,62 @@ } } + @Experimental + public void walk(int start, int end, final Revlog.Inspector inspector) { + int lastRev = getLastRevision(); + if (start == TIP) { + start = lastRev; + } + if (end == TIP) { + end = lastRev; + } + final RevisionInspector revisionInsp = getAdapter(inspector, RevisionInspector.class); + final ParentInspector parentInsp = getAdapter(inspector, ParentInspector.class); + final Nodeid[] allRevisions = parentInsp == null ? null : new Nodeid[end - start + 1]; + + content.iterate(start, end, false, new RevlogStream.Inspector() { + + public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) { + Nodeid nid = Nodeid.fromBinary(nodeid, 0); + if (revisionInsp != null) { + revisionInsp.next(revisionNumber, nid, linkRevision); + } + if (parentInsp != null) { + Nodeid p1 = parent1Revision == -1 ? Nodeid.NULL : allRevisions[parent1Revision]; + Nodeid p2 = parent2Revision == -1 ? Nodeid.NULL : allRevisions[parent2Revision]; + allRevisions[revisionNumber] = nid; + parentInsp.next(revisionNumber, nid, parent1Revision, parent2Revision, p1, p2); + } + } + }); + } + private static <T> T getAdapter(Object o, Class<T> adapterClass) { + if (adapterClass.isInstance(o)) { + return adapterClass.cast(o); + } + if (o instanceof Adaptable) { + return ((Adaptable) o).getAdapter(adapterClass); + } + return null; + } + + /** + * MARKER + */ + @Experimental + public interface Inspector { + } + + @Experimental + public interface RevisionInspector extends Inspector { + void next(int localRevision, Nodeid revision, int linkedRevision); + } + + @Experimental + public interface ParentInspector extends Inspector { + void next(int localRevision, Nodeid revision, int parent1, int parent2, Nodeid nidParent1, Nodeid nidParent2); + } + /* * XXX think over if it's better to do either: * pw = getChangelog().new ParentWalker(); pw.init() and pass pw instance around as needed @@ -223,7 +281,7 @@ * * and yes, walker is not a proper name */ - public final class ParentWalker { + public final class ParentWalker implements ParentInspector { private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering @@ -241,43 +299,31 @@ return Revlog.this.getRepo(); } + public void next(int revisionNumber, Nodeid revision, int parent1Revision, int parent2Revision, Nodeid nidParent1, Nodeid nidParent2) { + if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) { + throw new IllegalStateException(); // sanity, revisions are sequential + } + int ix = revisionNumber; + sequential[ix] = sorted[ix] = revision; + if (parent1Revision != -1) { + firstParent[ix] = sequential[parent1Revision]; + } + if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1 + secondParent[ix] = sequential[parent2Revision]; + } + } + public void init() { - final RevlogStream stream = Revlog.this.content; - final int revisionCount = stream.revisionCount(); + final int revisionCount = Revlog.this.getRevisionCount(); firstParent = new Nodeid[revisionCount]; // although branches/merges are less frequent, and most of secondParent would be -1/null, some sort of // SparseOrderedList might be handy, provided its inner structures do not overweight simplicity of an array + // FIXME IntMap is right candidate? secondParent = new Nodeid[revisionCount]; // sequential = new Nodeid[revisionCount]; sorted = new Nodeid[revisionCount]; - - RevlogStream.Inspector insp = new RevlogStream.Inspector() { - - int ix = 0; - public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { - if (ix != revisionNumber) { - // XXX temp code, just to make sure I understand what's going on here - // FIXME check against cpython or another tool-mangled repository - throw new IllegalStateException(); - } - if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) { - throw new IllegalStateException(); // sanity, revisions are sequential - } - final Nodeid nid = new Nodeid(nodeid, true); - sequential[ix] = sorted[ix] = nid; - if (parent1Revision != -1) { - assert parent1Revision < ix; - firstParent[ix] = sequential[parent1Revision]; - } - if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1 - assert parent2Revision < ix; - secondParent[ix] = sequential[parent2Revision]; - } - ix++; - } - }; - stream.iterate(0, TIP, false, insp); + Revlog.this.walk(0, TIP, this); Arrays.sort(sorted); sorted2natural = new int[revisionCount]; for (int i = 0; i < revisionCount; i++) { @@ -424,7 +470,7 @@ * getLocalRevision(Nodeid) with straightforward lookup approach performs O(n/2) * #localRevision() is log(n), plus initialization is O(n) */ - public final class RevisionMap { + public final class RevisionMap implements RevisionInspector { /* * in fact, initialization is much slower as it instantiates Nodeids, while #getLocalRevision * compares directly against byte buffer. Measuring cpython with 70k+ gives 3 times difference (47 vs 171) @@ -432,7 +478,9 @@ */ /* - * XXX 3 * (x * 4) bytes. Can I do better? + * XXX 3 * (x * 4) bytes. Can I do better? + * It seems, yes. Don't need to keep sorted, always can emulate it with indirect access to sequential through sorted2natural. + * i.e. instead sorted[mid].compareTo(toFind), do sequential[sorted2natural[mid]].compareTo(toFind) */ private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering private Nodeid[] sorted; // for binary search @@ -444,23 +492,20 @@ public HgRepository getRepo() { return Revlog.this.getRepo(); } + + public void next(int localRevision, Nodeid revision, int linkedRevision) { + sequential[localRevision] = sorted[localRevision] = revision; + } /** * @return <code>this</code> for convenience. */ public RevisionMap init(/*XXX Pool<Nodeid> to reuse nodeids, if possible. */) { // XXX HgRepository.register((RepoChangeListener) this); // listen to changes in repo, re-init if needed? - final int revisionCount = Revlog.this.content.revisionCount(); + final int revisionCount = Revlog.this.getRevisionCount(); sequential = new Nodeid[revisionCount]; sorted = new Nodeid[revisionCount]; - RevlogStream.Inspector insp = new RevlogStream.Inspector() { - - public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) { - final Nodeid nid = new Nodeid(nodeid, true); - sequential[revisionNumber] = sorted[revisionNumber] = nid; - } - }; - Revlog.this.content.iterate(0, TIP, false, insp); + Revlog.this.walk(0, TIP, this); // next is alternative to Arrays.sort(sorted), and build sorted2natural looking up each element of sequential in sorted. // the way sorted2natural was build is O(n*log n). final ArrayHelper ah = new ArrayHelper();
--- a/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java Fri Sep 30 08:44:48 2011 +0200 +++ b/test/org/tmatesoft/hg/test/MapTagsToFileRevisions.java Mon Oct 03 06:47:20 2011 +0200 @@ -1,5 +1,7 @@ package org.tmatesoft.hg.test; +import static org.tmatesoft.hg.repo.HgRepository.TIP; + import java.io.File; import java.util.ArrayList; import java.util.Arrays; @@ -8,6 +10,7 @@ import java.util.List; import java.util.Map; +import org.junit.Assert; import org.tmatesoft.hg.core.HgBadStateException; import org.tmatesoft.hg.core.HgChangeset; import org.tmatesoft.hg.core.HgChangesetHandler; @@ -40,11 +43,75 @@ // m.collectTagsPerFile(); // m.manifestWalk(); // m.changelogWalk(); - m.revisionMap(); +// m.revisionMap(); + m.buildFile2ChangelogRevisionMap(); m = null; System.gc(); System.out.printf("Free mem: %,d\n", Runtime.getRuntime().freeMemory()); } + + /* + * .hgtags, 261 revisions + * Approach 1: total 83, init: 0, iteration: 82 + * Approach 2: total 225, init: 206, iteration: 19 + * README, 465 revisions + * Approach 1: total 162, init: 0, iteration: 161 + * Approach 2: total 231, init: 198, iteration: 32 + * configure.in, 1109 revisions + * Approach 1: total 409, init: 1, iteration: 407 + * Approach 2: total 277, init: 203, iteration: 74 + */ + private void buildFile2ChangelogRevisionMap() throws Exception { + final HgRepository repository = new HgLookup().detect(new File("/temp/hg/cpython")); + final HgChangelog clog = repository.getChangelog(); + final HgDataFile fileNode = repository.getFileNode("configure.in"); + // warm-up + HgChangelog.RevisionMap clogMap = clog.new RevisionMap().init(); + HgDataFile.RevisionMap fileMap = fileNode.new RevisionMap().init(); + // + final int latestRevision = fileNode.getLastRevision(); + // + final long start_1 = System.nanoTime(); + fileMap = fileNode.new RevisionMap().init(); + final long start_1a = System.nanoTime(); + final Map<Nodeid, Nodeid> changesetToNodeid_1 = new HashMap<Nodeid, Nodeid>(); + for (int revision = 0; revision <= latestRevision; revision++) { + final Nodeid nodeId = fileMap.revision(revision); +// final Nodeid changesetId = fileNode.getChangesetRevision(nodeId); + int localCset = fileNode.getChangesetLocalRevision(revision); + final Nodeid changesetId = clog.getRevision(localCset); + changesetToNodeid_1.put(changesetId, nodeId); + } + final long end_1 = System.nanoTime(); + // + final long start_2 = System.nanoTime(); + clogMap = clog.new RevisionMap().init(); + fileMap = fileNode.new RevisionMap().init(); + final Map<Nodeid, Nodeid> changesetToNodeid_2 = new HashMap<Nodeid, Nodeid>(); + final long start_2a = System.nanoTime(); + for (int revision = 0; revision <= latestRevision; revision++) { + Nodeid nidFile = fileMap.revision(revision); + int localCset = fileNode.getChangesetLocalRevision(revision); + Nodeid nidCset = clogMap.revision(localCset); + changesetToNodeid_2.put(nidCset, nidFile); + } + final long end_2 = System.nanoTime(); + Assert.assertEquals(changesetToNodeid_1, changesetToNodeid_2); + // + final long start_3 = System.nanoTime(); + final Map<Nodeid, Nodeid> changesetToNodeid_3 = new HashMap<Nodeid, Nodeid>(); + fileNode.walk(0, TIP, new HgDataFile.RevisionInspector() { + + public void next(int localRevision, Nodeid revision, int linkedRevision) { + changesetToNodeid_3.put(clog.getRevision(linkedRevision), revision); + } + }); + final long end_3 = System.nanoTime(); + Assert.assertEquals(changesetToNodeid_1, changesetToNodeid_3); + System.out.printf("Approach 1: total %d, init: %d, iteration: %d\n", (end_1 - start_1)/1000000, (start_1a - start_1)/1000000, (end_1 - start_1a)/1000000); + System.out.printf("Approach 2: total %d, init: %d, iteration: %d\n", (end_2 - start_2)/1000000, (start_2a - start_2)/1000000, (end_2 - start_2a)/1000000); + System.out.printf("Approach 3: total %d\n", (end_3 - start_3)/1000000); + } /* * Each 5000 revisions from cpython, total 15 revisions
