Mercurial > jhg
changeset 694:7efabe0cddcf
Speed up (a) file rename history to minimize file reads; (b) file.isCopy(int) to read metadata for few revisions at once (use pattern assumes earlier revisions are likely to be queried, too); (c) HgIgnore.isIgnored by caching matched initial fragments (to substitute more expensive Matcher.matches with cheaper HashMap.contains)
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Mon, 05 Aug 2013 17:42:10 +0200 (2013-08-05) |
parents | 32b0d19e8aba |
children | 053bb4397bf9 |
files | src/org/tmatesoft/hg/internal/FileRenameHistory.java src/org/tmatesoft/hg/internal/Metadata.java src/org/tmatesoft/hg/repo/HgDataFile.java src/org/tmatesoft/hg/repo/HgIgnore.java src/org/tmatesoft/hg/repo/HgStatusCollector.java src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java src/org/tmatesoft/hg/repo/Revlog.java |
diffstat | 7 files changed, 120 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/FileRenameHistory.java Mon Aug 05 12:45:36 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/FileRenameHistory.java Mon Aug 05 17:42:10 2013 +0200 @@ -16,7 +16,10 @@ */ package org.tmatesoft.hg.internal; +import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; + import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -24,6 +27,8 @@ import org.tmatesoft.hg.core.HgFileRevision; import org.tmatesoft.hg.core.HgIterateDirection; import org.tmatesoft.hg.repo.HgDataFile; +import org.tmatesoft.hg.repo.HgRepository; +import org.tmatesoft.hg.repo.HgRuntimeException; /** * Traces file renames. Quite similar to HgChangesetFileSneaker, although the latter tries different paths @@ -61,8 +66,9 @@ LinkedList<Chunk> chunks = new LinkedList<Chunk>(); int chunkStart = 0, chunkEnd = fileRev; int csetChunkEnd = -1, csetChunkStart = -1; + BasicRevMap csetMap = new BasicRevMap(0, fileRev).collect(df); while (fileRev >= 0) { - int cset = df.getChangesetRevisionIndex(fileRev); + int cset = csetMap.changesetAt(fileRev); if (csetChunkEnd == -1) { csetChunkEnd = cset; } @@ -82,6 +88,7 @@ HgFileRevision origin = df.getCopySource(fileRev); df = df.getRepo().getFileNode(origin.getPath()); fileRev = chunkEnd = df.getRevisionIndex(origin.getRevision()); + csetMap = new BasicRevMap(0, fileRev).collect(df); chunkStart = 0; csetChunkEnd = cset - 1; // if df is copy, cset can't be 0 csetChunkStart = -1; @@ -130,7 +137,7 @@ /** * file has changes [firstFileRev..lastFileRev] that have occurred somewhere in [firstCset..lastCset] */ - public static class Chunk { + public static final class Chunk { private final HgDataFile df; private final int fileRevFrom; private final int fileRevTo; @@ -159,4 +166,32 @@ return csetTo; } } + + private static final class BasicRevMap implements HgDataFile.LinkRevisionInspector { + private final int[] revs; + private final int fromRev; + private final int toRev; + public BasicRevMap(int startRev, int endRev) { + revs = new int[endRev+1]; // for simplicity, just ignore startRev now (it's 0 in local use anyway) + fromRev = startRev; + toRev = endRev; + Arrays.fill(revs, BAD_REVISION); + } + + public BasicRevMap collect(HgDataFile df) { + df.indexWalk(fromRev, toRev, this); + return this; + } + + public void next(int revisionIndex, int linkedRevisionIndex) throws HgRuntimeException { + revs[revisionIndex] = linkedRevisionIndex; + } + + /** + * @return {@link HgRepository#BAD_REVISION} if there's no mapping + */ + public int changesetAt(int rev) { + return revs[rev]; + } + } }
--- a/src/org/tmatesoft/hg/internal/Metadata.java Mon Aug 05 12:45:36 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/Metadata.java Mon Aug 05 17:42:10 2013 +0200 @@ -16,6 +16,7 @@ */ package org.tmatesoft.hg.internal; +import static org.tmatesoft.hg.repo.HgRepository.NO_REVISION; import static org.tmatesoft.hg.util.LogFacility.Severity.Error; import java.io.ByteArrayOutputStream; @@ -50,18 +51,24 @@ private final Metadata.Record NONE = new Record(-1, null); // don't want statics private final LogFacility log; + + private int lastRevRead = NO_REVISION; public Metadata(SessionContext.Source sessionCtx) { log = sessionCtx.getSessionContext().getLog(); } - // true when there's metadata for given revision + /** + * @return <code>true</code> when there's metadata for given revision + */ public boolean known(int revision) { Metadata.Record i = entries.get(revision); return i != null && NONE != i; } - // true when revision has been checked for metadata presence. + /** + * @return <code>true</code> when revision has been checked for metadata presence. + */ public boolean checked(int revision) { return entries.containsKey(revision); } @@ -71,6 +78,14 @@ Metadata.Record i = entries.get(revision); return i == NONE; } + + /** + * Get the greatest revision index visited so far. + * Note, doesn't imply all revisions up to this has been visited. + */ + public int lastRevisionRead() { + return lastRevRead; + } // mark revision as having no metadata. void recordNone(int revision) { @@ -98,6 +113,9 @@ */ public boolean tryRead(int revisionNumber, DataAccess data) throws IOException, HgInvalidControlFileException { final int daLength = data.length(); + if (lastRevRead == NO_REVISION || revisionNumber > lastRevRead) { + lastRevRead = revisionNumber; + } if (daLength < 4 || data.readByte() != 1 || data.readByte() != 10) { recordNone(revisionNumber); return false;
--- a/src/org/tmatesoft/hg/repo/HgDataFile.java Mon Aug 05 12:45:36 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgDataFile.java Mon Aug 05 17:42:10 2013 +0200 @@ -63,7 +63,12 @@ // slashes, unix-style? // repo location agnostic, just to give info to user, not to access real storage private final Path path; - private Metadata metadata; // get initialized on first access to file content. + /* + * Get initialized on first access to file content. + * We read metadata starting from rev 0 always, so that Metadata#lastRevisionRead() + * shows the region of file history [0..lastRevisionRead] we know metadata for + */ + private Metadata metadata; /*package-local*/HgDataFile(HgRepository hgRepo, Path filePath, RevlogStream content) { super(hgRepo, content, false); @@ -477,12 +482,17 @@ } private void checkAndRecordMetadata(int localRev) throws HgRuntimeException { + int startRev; if (metadata == null) { metadata = new Metadata(getRepo()); + startRev = 0; // read from the very beginning with one shot - likely isCopy(localRev-i) will be of interest, too + } else { + startRev = metadata.lastRevisionRead() + 1; } + assert localRev >= startRev; // callers of this method ensure that metadata has been checked beforehand // use MetadataInspector without delegate to process metadata only RevlogStream.Inspector insp = new MetadataInspector(metadata, null); - super.content.iterate(localRev, localRev, true, insp); + super.content.iterate(startRev, localRev, true, insp); } private static class MetadataInspector extends ErrorHandlingInspector implements RevlogStream.Inspector { @@ -501,17 +511,16 @@ public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) throws HgRuntimeException { try { - if (metadata.tryRead(revisionNumber, data)) { - // da is in prepared state (i.e. we consumed all bytes up to metadata end). - // However, it's not safe to assume delegate won't call da.reset() for some reason, - // and we need to ensure predictable result. + final boolean gotMetadata = metadata.tryRead(revisionNumber, data); + if (delegate != null) { data.reset(); - int offset = metadata.dataOffset(revisionNumber); - data = new FilterDataAccess(data, offset, data.length() - offset); - } else { - data.reset(); - } - if (delegate != null) { + if (gotMetadata) { + // da is in prepared state (i.e. we consumed all bytes up to metadata end). + // However, it's not safe to assume delegate won't call da.reset() for some reason, + // and we need to ensure predictable result. + int offset = metadata.dataOffset(revisionNumber); + data = new FilterDataAccess(data, offset, data.length() - offset); + } delegate.next(revisionNumber, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeid, data); } } catch (IOException ex) {
--- a/src/org/tmatesoft/hg/repo/HgIgnore.java Mon Aug 05 12:45:36 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgIgnore.java Mon Aug 05 17:42:10 2013 +0200 @@ -25,7 +25,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -44,11 +46,16 @@ private List<Pattern> entries; private final PathRewrite globPathHelper; - private FileChangeMonitor ignoreFileTracker; + private FileChangeMonitor ignoreFileTracker; + // if pattern matches first fragment of a path, it will + // match any other path with this fragment, so we can avoid pattern matching + // if path starts with one of such fragments (often for e.g. ignored 'bin/' folders) + private final Set<String> ignoredFirstFragments; HgIgnore(PathRewrite globPathRewrite) { entries = Collections.emptyList(); globPathHelper = globPathRewrite; + ignoredFirstFragments = new HashSet<String>(); } /* package-local */ void read(Internals repo) throws HgInvalidControlFileException { @@ -230,18 +237,28 @@ */ public boolean isIgnored(Path path) { String ps = path.toString(); + int x = ps.indexOf('/'); + if (x != -1 && ignoredFirstFragments.contains(ps.substring(0, x))) { + return true; + } for (Pattern p : entries) { - int x = ps.indexOf('/'); // reset for each pattern if (p.matcher(ps).find()) { return true; } - while (x != -1 && x+1 != ps.length() /*skip very last segment not to check complete string twice*/) { - String fragment = ps.substring(0, x); + } + boolean firstFragment = true; + while (x != -1 && x+1 != ps.length() /*skip very last segment not to check complete string twice*/) { + String fragment = ps.substring(0, x); + for (Pattern p : entries) { if (p.matcher(fragment).matches()) { + if (firstFragment) { + ignoredFirstFragments.add(new String(fragment)); + } return true; } - x = ps.indexOf('/', x+1); } + x = ps.indexOf('/', x+1); + firstFragment = false; } return false; }
--- a/src/org/tmatesoft/hg/repo/HgStatusCollector.java Mon Aug 05 12:45:36 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgStatusCollector.java Mon Aug 05 17:42:10 2013 +0200 @@ -299,7 +299,6 @@ final CancelSupport cs = CancelSupport.Factory.get(inspector); - Collection<Path> allBaseFiles = r1.files(); TreeSet<Path> r1Files = new TreeSet<Path>(allBaseFiles); for (Path r2fname : r2.files()) {
--- a/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java Mon Aug 05 12:45:36 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java Mon Aug 05 17:42:10 2013 +0200 @@ -420,20 +420,11 @@ // removed: nothing to report, if (ds.checkNormal(fname) != null || ds.checkMerged(fname) != null) { try { - // FIXME refactor, done numerous time e.g. in TestStatus#testStatusCommand with base = 3 - ArrayList<Nodeid> parents = new ArrayList<Nodeid>(2); - parents.add(ds.parents().first()); - parents.add(ds.parents().second()); - parents.remove(Nodeid.NULL); - // try either parent if file came through one of them, or both - for (Nodeid parent : parents) { - int csetIndex = repo.getChangelog().getRevisionIndex(parent); - Nodeid fileRev = repo.getManifest().getFileRevision(csetIndex, fname); - if (fileRev == null) { - continue; - } + // XXX perhaps, shall take second parent into account if not null, too? + Nodeid nidFromDirstate = getDirstateParentManifest().nodeid(fname); + if (nidFromDirstate != null) { // see if file revision known in this parent got copied from one of baseRevNames - Path origin = HgStatusCollector.getOriginIfCopy(repo, fname, fileRev, collect.files(), baseRevision); + Path origin = HgStatusCollector.getOriginIfCopy(repo, fname, nidFromDirstate, collect.files(), baseRevision); if (origin != null) { inspector.copied(getPathPool().mangle(origin), fname); return;
--- a/src/org/tmatesoft/hg/repo/Revlog.java Mon Aug 05 12:45:36 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/Revlog.java Mon Aug 05 17:42:10 2013 +0200 @@ -334,6 +334,8 @@ * EXPERIMENTAL CODE, DO NOT USE * * Alternative revlog iteration + * There's an implicit ordering of inspectors. Less inspector needs, earlier its invocation occurs. + * E.g. ParentInspector goes after RevisionInspector. LinkRevisionInspector goes before RevisionInspector * * @param start * @param end @@ -349,6 +351,7 @@ } final RevisionInspector revisionInsp = Adaptable.Factory.getAdapter(inspector, RevisionInspector.class, null); final ParentInspector parentInsp = Adaptable.Factory.getAdapter(inspector, ParentInspector.class, null); + final LinkRevisionInspector linkRevInspector = Adaptable.Factory.getAdapter(inspector, LinkRevisionInspector.class, null); final Nodeid[] allRevisions = parentInsp == null ? null : new Nodeid[end - _start + 1]; // next are to build set of parent indexes that are not part of the range iteration // i.e. those parents we need to read separately. See Issue 31 for details. @@ -360,6 +363,14 @@ private int i = 0; public void next(int revisionIndex, int actualLen, int baseRevIndex, int linkRevIndex, int parent1RevIndex, int parent2RevIndex, byte[] nodeid, DataAccess data) throws HgRuntimeException { + // FIXME refactor either as chain of RevlogStream.Inspector or an external AnyOf() runner not to keep everything + // in a single method + if (linkRevInspector != null) { + linkRevInspector.next(revisionIndex, linkRevIndex); + if (parentInsp == null && revisionInsp == null) { + return; + } + } Nodeid nid = Nodeid.fromBinary(nodeid, 0); if (revisionInsp != null) { revisionInsp.next(revisionIndex, nid, linkRevIndex); @@ -440,6 +451,11 @@ } @Experimental + public interface LinkRevisionInspector extends Inspector { + void next(int revisionIndex, int linkedRevisionIndex) throws HgRuntimeException; + } + + @Experimental public interface ParentInspector extends Inspector { // XXX document whether parentX is -1 or a constant (BAD_REVISION? or dedicated?) void next(int revisionIndex, Nodeid revision, int parent1, int parent2, Nodeid nidParent1, Nodeid nidParent2) throws HgRuntimeException;