# HG changeset patch # User Artem Tikhomirov # Date 1367593411 -7200 # Node ID 46f29b73e51e385061eaabfb7dc8187b8bcd9ba3 # Parent 55b7987c1796ec89dbd9aba1638275df4172a8a1 Utilize RevisionLookup to speed-up getRevisionIndex of both manifest and changelog diff -r 55b7987c1796 -r 46f29b73e51e src/org/tmatesoft/hg/internal/RevisionLookup.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/RevisionLookup.java Fri May 03 17:03:31 2013 +0200 @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2013 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.internal; + +import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; + +import java.util.Arrays; + +import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.repo.HgRevisionMap; + +/** + * Lite alternative to {@link HgRevisionMap}, to speed up nodeid to index conversion without consuming too much memory. + * E.g. for a 100k revisions, {@link HgRevisionMap} consumes 3 * (N * sizeof(int)) for indexes plus 48 bytes per + * Nodeid instance, total (12+48)*N = 6 mb of memory. {RevisionLookup} instead keeps only Nodeid hashes, (N * sizeof(int) = 400 kb), + * but is slower in lookup, O(N/2) to find potential match plus disk read operatin (or few, in an unlikely case of hash collisions). + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class RevisionLookup implements RevlogStream.Inspector { + + private final RevlogStream content; + private int[] nodeidHashes; + + public RevisionLookup(RevlogStream stream) { + assert stream != null; + content = stream; + } + + public static RevisionLookup createFor(RevlogStream stream) { + RevisionLookup rv = new RevisionLookup(stream); + int revCount = stream.revisionCount(); + rv.prepare(revCount); + if (revCount > 0) { + stream.iterate(0, revCount - 1, false, rv); + } + return rv; + } + + public void prepare(int count) { + nodeidHashes = new int[count]; + Arrays.fill(nodeidHashes, BAD_REVISION); + } + public void next(int index, byte[] nodeid) { + nodeidHashes[index] = Nodeid.hashCode(nodeid); + } + public void next(int index, Nodeid nodeid) { + nodeidHashes[index] = nodeid.hashCode(); + } + public int findIndex(Nodeid nodeid) { + final int hash = nodeid.hashCode(); + for (int i = 0; i < nodeidHashes.length; i++) { + if (nodeidHashes[i] == hash) { + byte[] nodeidAtI = content.nodeid(i); + if (nodeid.equalsTo(nodeidAtI)) { + return i; + } + } + // else: false match (only 4 head bytes matched, continue loop + } + return BAD_REVISION; + } + + public void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) { + next(revisionIndex, nodeid); + } +} diff -r 55b7987c1796 -r 46f29b73e51e src/org/tmatesoft/hg/repo/HgChangelog.java --- a/src/org/tmatesoft/hg/repo/HgChangelog.java Fri May 03 15:29:26 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgChangelog.java Fri May 03 17:03:31 2013 +0200 @@ -50,7 +50,7 @@ public final class HgChangelog extends Revlog { /* package-local */HgChangelog(HgRepository hgRepo, RevlogStream content) { - super(hgRepo, content); + super(hgRepo, content, true); } public void all(final HgChangelog.Inspector inspector) throws HgInvalidRevisionException, HgInvalidControlFileException { diff -r 55b7987c1796 -r 46f29b73e51e src/org/tmatesoft/hg/repo/HgDataFile.java --- a/src/org/tmatesoft/hg/repo/HgDataFile.java Fri May 03 15:29:26 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgDataFile.java Fri May 03 17:03:31 2013 +0200 @@ -67,7 +67,7 @@ private Metadata metadata; // get initialized on first access to file content. /*package-local*/HgDataFile(HgRepository hgRepo, Path filePath, RevlogStream content) { - super(hgRepo, content); + super(hgRepo, content, false); path = filePath; } diff -r 55b7987c1796 -r 46f29b73e51e src/org/tmatesoft/hg/repo/HgManifest.java --- a/src/org/tmatesoft/hg/repo/HgManifest.java Fri May 03 15:29:26 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgManifest.java Fri May 03 17:03:31 2013 +0200 @@ -35,6 +35,7 @@ import org.tmatesoft.hg.internal.IntVector; import org.tmatesoft.hg.internal.IterateControlMediator; import org.tmatesoft.hg.internal.Lifecycle; +import org.tmatesoft.hg.internal.RevisionLookup; import org.tmatesoft.hg.internal.RevlogStream; import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; import org.tmatesoft.hg.util.CancelSupport; @@ -123,7 +124,7 @@ } /*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content, EncodingHelper eh) { - super(hgRepo, content); + super(hgRepo, content, true); encodingHelper = eh; pathFactory = hgRepo.getSessionContext().getPathFactory(); } @@ -244,8 +245,14 @@ } // revisionNumber == TIP is processed by RevisionMapper if (revisionMap == null) { - revisionMap = new RevisionMapper(getRepo()); + revisionMap = new RevisionMapper(super.revisionLookup == null); content.iterate(0, TIP, false, revisionMap); + revisionMap.fixReusedManifests(); + if (super.useRevisionLookup && super.revisionLookup == null) { + // reuse RevisionLookup if there's none yet + super.revisionLookup = revisionMap.manifestNodeids; + } + revisionMap.manifestNodeids = null; } return revisionMap.at(changesetRevisionIndex); } @@ -572,18 +579,19 @@ } } - private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle { + private class RevisionMapper implements RevlogStream.Inspector, Lifecycle { private final int changelogRevisionCount; private int[] changelog2manifest; - private final HgRepository repo; - private int[] manifestNodeidHashes; // first 4 bytes of manifest nodeid at corresponding index + RevisionLookup manifestNodeids; - public RevisionMapper(HgRepository hgRepo) { - repo = hgRepo; - changelogRevisionCount = repo.getChangelog().getRevisionCount(); + private RevisionMapper(boolean useOwnRevisionLookup) { + changelogRevisionCount = HgManifest.this.getRepo().getChangelog().getRevisionCount(); + if (useOwnRevisionLookup) { + manifestNodeids = new RevisionLookup(HgManifest.this.content); + } } - + /** * Get index of manifest revision that corresponds to specified changeset * @param changesetRevisionIndex non-negative index of changelog revision, or {@link HgRepository#TIP} @@ -603,7 +611,7 @@ return changesetRevisionIndex; } - // XXX likely can be replaced with Revlog.RevisionInspector + // XXX can be replaced with Revlog.RevisionInspector, but I don't want Nodeid instances public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) { if (changelog2manifest != null) { // next assertion is not an error, rather assumption check, which is too development-related to be explicit exception - @@ -620,7 +628,9 @@ changelog2manifest[linkRevision] = revisionNumber; } } - manifestNodeidHashes[revisionNumber] = Nodeid.hashCode(nodeid); + if (manifestNodeids != null) { + manifestNodeids.next(revisionNumber, nodeid); + } } public void start(int count, Callback callback, Object token) { @@ -633,11 +643,19 @@ changelog2manifest = new int[changelogRevisionCount]; Arrays.fill(changelog2manifest, BAD_REVISION); } - manifestNodeidHashes = new int[count]; + if (manifestNodeids != null) { + manifestNodeids.prepare(count); + } } public void finish(Object token) { + // it's not a nice idea to fix changesets that reuse existing manifest entries from inside + // #finish, as the manifest read operation is not complete at the moment. + } + + public void fixReusedManifests() { if (changelog2manifest == null) { + // direct, 1-1 mapping of changeset indexes to manifest return; } // I assume there'd be not too many revisions we don't know manifest of @@ -652,7 +670,7 @@ final IntMap missingCsetToManifest = new IntMap(undefinedChangelogRevision.size()); int[] undefinedClogRevs = undefinedChangelogRevision.toArray(); // undefinedChangelogRevision is sorted by the nature it's created - repo.getChangelog().rangeInternal(new HgChangelog.Inspector() { + HgManifest.this.getRepo().getChangelog().rangeInternal(new HgChangelog.Inspector() { public void next(int revisionIndex, Nodeid nodeid, RawChangeset cset) { missingCsetToManifest.put(revisionIndex, cset.manifest()); @@ -663,25 +681,24 @@ for (int u : undefinedClogRevs) { Nodeid manifest = missingCsetToManifest.get(u); if (manifest == null || manifest.isNull()) { - repo.getSessionContext().getLog().dump(getClass(), Severity.Warn, "Changeset %d has no associated manifest entry", u); - // keep -1 in the changelog2manifest map. + HgManifest.this.getRepo().getSessionContext().getLog().dump(getClass(), Severity.Warn, "Changeset %d has no associated manifest entry", u); + // keep BAD_REVISION in the changelog2manifest map. continue; } - int hash = manifest.hashCode(); - for (int i = 0; i < manifestNodeidHashes.length; i++) { - if (manifestNodeidHashes[i] == hash) { - if (manifest.equals(repo.getManifest().getRevision(i))) { - changelog2manifest[u] = i; - break; - } - // else false match (only 4 head bytes matched, continue loop + if (manifestNodeids != null) { + int manifestRevIndex = manifestNodeids.findIndex(manifest); + // mimic HgManifest#getRevisionIndex() to keep behavior the same + if (manifestRevIndex == BAD_REVISION) { + throw new HgInvalidRevisionException(String.format("Can't find index of revision %s", manifest.shortNotation()), manifest, null); } + changelog2manifest[u] = manifestRevIndex; + } else { + changelog2manifest[u] = HgManifest.this.getRevisionIndex(manifest); } } final long t3 = System.nanoTime(); System.out.printf("\tRevisionMapper#finish(), %d missing revs: %d + %d us\n", undefinedChangelogRevision.size(), (t2-t1) / 1000, (t3-t2) / 1000); } - manifestNodeidHashes = null; } } diff -r 55b7987c1796 -r 46f29b73e51e src/org/tmatesoft/hg/repo/Revlog.java --- a/src/org/tmatesoft/hg/repo/Revlog.java Fri May 03 15:29:26 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/Revlog.java Fri May 03 17:03:31 2013 +0200 @@ -30,6 +30,7 @@ import org.tmatesoft.hg.internal.Experimental; import org.tmatesoft.hg.internal.IntMap; import org.tmatesoft.hg.internal.Preview; +import org.tmatesoft.hg.internal.RevisionLookup; import org.tmatesoft.hg.internal.RevlogStream; import org.tmatesoft.hg.util.Adaptable; import org.tmatesoft.hg.util.ByteChannel; @@ -53,8 +54,10 @@ private final HgRepository repo; protected final RevlogStream content; + protected final boolean useRevisionLookup; + protected RevisionLookup revisionLookup; - protected Revlog(HgRepository hgRepo, RevlogStream contentStream) { + protected Revlog(HgRepository hgRepo, RevlogStream contentStream, boolean needRevisionLookup) { if (hgRepo == null) { throw new IllegalArgumentException(); } @@ -63,12 +66,14 @@ } repo = hgRepo; content = contentStream; + useRevisionLookup = needRevisionLookup; } // invalid Revlog protected Revlog(HgRepository hgRepo) { repo = hgRepo; content = null; + useRevisionLookup = false; } public final HgRepository getRepo() { @@ -145,13 +150,24 @@ * @throws HgRuntimeException subclass thereof to indicate issues with the library. Runtime exception */ public final int getRevisionIndex(Nodeid nid) throws HgRuntimeException { - int revision = content.findRevisionIndex(nid); +// final long t1 = System.nanoTime(); + int revision; + if (useRevisionLookup) { + if (revisionLookup == null) { + revisionLookup = RevisionLookup.createFor(content); + } + revision = revisionLookup.findIndex(nid); + } else { + revision = content.findRevisionIndex(nid); + } if (revision == BAD_REVISION) { // using toString() to identify revlog. HgDataFile.toString includes path, HgManifest and HgChangelog instances // are fine with default (class name) // Perhaps, more tailored description method would be suitable here throw new HgInvalidRevisionException(String.format("Can't find revision %s in %s", nid.shortNotation(), this), nid, null); } +// final long t2 = System.nanoTime(); +// System.out.printf("\tgetRevisionIndex(%s): %d us\n", nid.shortNotation(), (t2-t1)/1000); return revision; }