# HG changeset patch # User Artem Tikhomirov # Date 1308841118 -7200 # Node ID 0e01f9182e16f938a06fb25e9a6e11b5aae97ecb # Parent ad6a046943beaf490e01651cd4452bf095fe0670 External cache Nodeid<->int added, Revlog.RevisionMap diff -r ad6a046943be -r 0e01f9182e16 cmdline/org/tmatesoft/hg/console/Main.java --- a/cmdline/org/tmatesoft/hg/console/Main.java Thu Jun 23 15:19:07 2011 +0200 +++ b/cmdline/org/tmatesoft/hg/console/Main.java Thu Jun 23 16:58:38 2011 +0200 @@ -72,10 +72,11 @@ public static void main(String[] args) throws Exception { Main m = new Main(args); + m.testRevisionMap(); // m.testSubrepos(); // m.testReadWorkingCopy(); // m.testParents(); - m.testEffectiveFileLog(); +// m.testEffectiveFileLog(); // m.testCatAtCsetRevision(); // m.testMergeState(); // m.testFileStatus(); @@ -89,6 +90,48 @@ // m.dumpCompleteManifestHigh(); // m.bunchOfTests(); } + + /* + * cpython repo with 70715 revisions. + 3 revisions - 80 ms vs 250 ms (250ms init) + 4 revisions - 110 ms vs 265 ms (265 ms init) + 5 revisions - 94 vs 266. + complete iteration in changelog.getLocalRevision(tipNodeid) takes 47 ms + compared to complete iteration inside RevisionMap.init() of 171 ms. + The only difference is latter instantiates Nodeids, while former compares binary content as is. + Hence, with 20-30 ms per regular getLocalRevision, it pays off to use RevisionMap with at least 15-20 + queries + */ + private void testRevisionMap() throws Exception { + HgChangelog changelog = hgRepo.getChangelog(); + HgChangelog.RevisionMap rmap = changelog.new RevisionMap().init(); // warm-up, ensure complete file read + int tip = changelog.getLastRevision(); + // take 5 arbitrary revisions at 0, 1/4, 2/4, 3/4 and 4/4 + final Nodeid[] revs = new Nodeid[5]; + revs[4] = changelog.getRevision(0); + revs[3] = changelog.getRevision(tip / 4); + revs[2] = changelog.getRevision(tip / 2); + revs[1] = changelog.getRevision(tip / 4 + tip / 2); + revs[0] = changelog.getRevision(tip); + long start = System.currentTimeMillis(); + for (int i = 0; i < revs.length; i++) { + final int localRev = changelog.getLocalRevision(revs[i]); + System.out.printf("%d:%s\n", localRev, revs[i]); + } + System.out.println(System.currentTimeMillis() - start); + System.out.println(); + // + start = System.currentTimeMillis(); + rmap = changelog.new RevisionMap().init(); + long s2 = System.currentTimeMillis(); + for (int i = 0; i < revs.length; i++) { + final int localRev = rmap.localRevision(revs[i]); + System.out.printf("%d:%s\n", localRev, revs[i]); + } + System.out.println(System.currentTimeMillis() - start); + System.out.printf("\t from that, init took %d ms\n", s2 - start); + + } private void testSubrepos() throws Exception { for (HgSubrepoLocation l : hgRepo.getSubrepositories()) { diff -r ad6a046943be -r 0e01f9182e16 src/org/tmatesoft/hg/internal/RevlogStream.java --- a/src/org/tmatesoft/hg/internal/RevlogStream.java Thu Jun 23 15:19:07 2011 +0200 +++ b/src/org/tmatesoft/hg/internal/RevlogStream.java Thu Jun 23 16:58:38 2011 +0200 @@ -167,7 +167,7 @@ daIndex.skip(inline ? 12 + compressedLen : 12); } } catch (IOException ex) { - ex.printStackTrace(); // log error. FIXME better handling + ex.printStackTrace(); // log error. FIXME better handling. Perhaps, shall return BAD_REVISION here as well? throw new IllegalStateException(ex); } finally { daIndex.done(); @@ -371,7 +371,6 @@ } public boolean range(int start, int end) throws IOException { -// System.out.printf("RevlogStream.ReaderN1.range(): [%d, %d]\n", start, end); byte[] nodeidBuf = new byte[20]; int i; boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without diff -r ad6a046943be -r 0e01f9182e16 src/org/tmatesoft/hg/repo/Revlog.java --- a/src/org/tmatesoft/hg/repo/Revlog.java Thu Jun 23 15:19:07 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/Revlog.java Thu Jun 23 16:58:38 2011 +0200 @@ -16,6 +16,7 @@ */ package org.tmatesoft.hg.repo; +import static org.tmatesoft.hg.core.Nodeid.NULL; import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; import static org.tmatesoft.hg.repo.HgRepository.TIP; @@ -87,6 +88,15 @@ return Nodeid.fromBinary(content.nodeid(revision), 0); } + /** + * Get local revision number (index) of the specified revision. + * + * For occasional queries, this method works with decent performance, despite its O(n/2) approach. + * Alternatively, if you need to perform multiple queries (e.g. at least 15-20), {@link RevisionMap} may come handy. + * + * @param nid revision to look up + * @return revision index, or {@link HgRepository#BAD_REVISION} if specified revision not found. + */ public final int getLocalRevision(Nodeid nid) { int revision = content.findLocalRevisionNumber(nid); if (revision == BAD_REVISION) { @@ -229,6 +239,7 @@ public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { if (ix != revisionNumber) { // XXX temp code, just to make sure I understand what's going on here + // FIXME check against cpython or another tool-mangled repository throw new IllegalStateException(); } if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) { @@ -370,6 +381,76 @@ } } + /** + * Effective int to Nodeid and vice versa translation. It's advised to use this class instead of + * multiple {@link Revlog#getLocalRevision(Nodeid)} calls. + * + * getLocalRevision(Nodeid) with straightforward lookup approach performs O(n/2) + * #localRevision() is log(n), plus initialization is O(n) + */ + public final class RevisionMap { + /* + * in fact, initialization is much slower as it instantiates Nodeids, while #getLocalRevision + * compares directly against byte buffer. Measuring cpython with 70k+ gives 3 times difference (47 vs 171) + * for complete changelog iteration. + */ + + /* + * XXX 3 * (x * 4) bytes. Can I do better? + */ + private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering + private Nodeid[] sorted; // for binary search + private int[] sorted2natural; + + public RevisionMap() { + } + + public HgRepository getRepo() { + return Revlog.this.getRepo(); + } + + /** + * @return this for convenience. + */ + public RevisionMap init(/*XXX Pool to reuse nodeids, if possible. */) { + // XXX HgRepository.register((RepoChangeListener) this); // listen to changes in repo, re-init if needed? + final int revisionCount = Revlog.this.content.revisionCount(); + sequential = new Nodeid[revisionCount]; + sorted = new Nodeid[revisionCount]; + sorted2natural = new int[revisionCount]; + RevlogStream.Inspector insp = new RevlogStream.Inspector() { + + public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) { + final Nodeid nid = new Nodeid(nodeid, true); + sequential[revisionNumber] = sorted[revisionNumber] = nid; + } + }; + Revlog.this.content.iterate(0, TIP, false, insp); + Arrays.sort(sorted); + for (int i = 0; i < revisionCount; i++) { + Nodeid n = sequential[i]; + int x = Arrays.binarySearch(sorted, n); + sorted2natural[x] = i; + } + return this; + } + + public Nodeid revision(int localRevision) { + return sequential[localRevision]; + } + public int localRevision(Nodeid revision) { + if (revision == null || NULL.equals(revision)) { + return BAD_REVISION; + } + int x = Arrays.binarySearch(sorted, revision); + if (x < 0) { + return BAD_REVISION; + } + return sorted2natural[x]; + } + } + + protected static class ContentPipe implements RevlogStream.Inspector, CancelSupport { private final ByteChannel sink; private final CancelSupport cancelSupport;