Mercurial > hg4j
view src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java @ 197:3a7696fb457c
Investigate optimization options to allow fast processing of huge repositories. Fix defect in StatusCollector that lead to wrong result comparing first revision to empty repo (-1 to 0), due to same TIP constant value
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Tue, 19 Apr 2011 03:49:29 +0200 |
parents | c9b305df0b89 |
children | 047b1dec7a04 |
line wrap: on
line source
/* * Copyright (c) 2011 TMate Software Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * For information on how to redistribute this software under * the terms of a license other than GNU General Public License * contact TMate Software at support@hg4j.com */ package org.tmatesoft.hg.repo; import static java.lang.Math.max; import static java.lang.Math.min; import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; import static org.tmatesoft.hg.repo.HgRepository.TIP; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.Collections; import java.util.Set; import java.util.TreeSet; import org.tmatesoft.hg.core.HgDataStreamException; import org.tmatesoft.hg.core.HgException; import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.internal.ByteArrayChannel; import org.tmatesoft.hg.internal.FilterByteChannel; import org.tmatesoft.hg.repo.HgStatusCollector.ManifestRevisionInspector; import org.tmatesoft.hg.util.ByteChannel; import org.tmatesoft.hg.util.CancelledException; import org.tmatesoft.hg.util.FileIterator; import org.tmatesoft.hg.util.Path; import org.tmatesoft.hg.util.PathPool; import org.tmatesoft.hg.util.PathRewrite; /** * * @author Artem Tikhomirov * @author TMate Software Ltd. */ public class HgWorkingCopyStatusCollector { private final HgRepository repo; private final FileIterator repoWalker; private HgDirstate dirstate; private HgStatusCollector baseRevisionCollector; private PathPool pathPool; public HgWorkingCopyStatusCollector(HgRepository hgRepo) { this(hgRepo, hgRepo.createWorkingDirWalker()); } HgWorkingCopyStatusCollector(HgRepository hgRepo, FileIterator hgRepoWalker) { this.repo = hgRepo; this.repoWalker = hgRepoWalker; } /** * Optionally, supply a collector instance that may cache (or have already cached) base revision * @param sc may be null */ public void setBaseRevisionCollector(HgStatusCollector sc) { baseRevisionCollector = sc; } /*package-local*/ PathPool getPathPool() { if (pathPool == null) { if (baseRevisionCollector == null) { pathPool = new PathPool(new PathRewrite.Empty()); } else { return baseRevisionCollector.getPathPool(); } } return pathPool; } public void setPathPool(PathPool pathPool) { this.pathPool = pathPool; } private HgDirstate getDirstate() { if (dirstate == null) { dirstate = repo.loadDirstate(); } return dirstate; } // may be invoked few times public void walk(int baseRevision, HgStatusInspector inspector) { final HgIgnore hgIgnore = repo.getIgnore(); TreeSet<String> knownEntries = getDirstate().all(); final boolean isTipBase; if (baseRevision == TIP) { baseRevision = repo.getManifest().getRevisionCount() - 1; isTipBase = true; } else { isTipBase = baseRevision == repo.getManifest().getRevisionCount() - 1; } HgStatusCollector.ManifestRevisionInspector collect = null; Set<String> baseRevFiles = Collections.emptySet(); if (!isTipBase) { if (baseRevisionCollector != null) { collect = baseRevisionCollector.raw(baseRevision); } else { collect = new HgStatusCollector.ManifestRevisionInspector(null, null); repo.getManifest().walk(baseRevision, baseRevision, collect); } baseRevFiles = new TreeSet<String>(collect.files()); } if (inspector instanceof HgStatusCollector.Record) { HgStatusCollector sc = baseRevisionCollector == null ? new HgStatusCollector(repo) : baseRevisionCollector; ((HgStatusCollector.Record) inspector).init(baseRevision, BAD_REVISION, sc); } repoWalker.reset(); final PathPool pp = getPathPool(); while (repoWalker.hasNext()) { repoWalker.next(); Path fname = repoWalker.name(); File f = repoWalker.file(); if (hgIgnore.isIgnored(fname)) { inspector.ignored(pp.path(fname)); } else if (knownEntries.remove(fname.toString())) { // modified, added, removed, clean if (collect != null) { // need to check against base revision, not FS file checkLocalStatusAgainstBaseRevision(baseRevFiles, collect, baseRevision, fname, f, inspector); baseRevFiles.remove(fname.toString()); } else { checkLocalStatusAgainstFile(fname, f, inspector); } } else { inspector.unknown(pp.path(fname)); } } if (collect != null) { for (String r : baseRevFiles) { inspector.removed(pp.path(r)); } } for (String m : knownEntries) { // missing known file from a working dir if (getDirstate().checkRemoved(m) == null) { // not removed from the repository = 'deleted' inspector.missing(pp.path(m)); } else { // removed from the repo // if we check against non-tip revision, do not report files that were added past that revision and now removed. if (collect == null || baseRevFiles.contains(m)) { inspector.removed(pp.path(m)); } } } } public HgStatusCollector.Record status(int baseRevision) { HgStatusCollector.Record rv = new HgStatusCollector.Record(); walk(baseRevision, rv); return rv; } //******************************************** private void checkLocalStatusAgainstFile(Path fname, File f, HgStatusInspector inspector) { HgDirstate.Record r; if ((r = getDirstate().checkNormal(fname)) != null) { // either clean or modified if (f.lastModified() / 1000 == r.time && r.size == f.length()) { inspector.clean(getPathPool().path(fname)); } else { // check actual content to avoid false modified files HgDataFile df = repo.getFileNode(fname); if (!areTheSame(f, df, HgRepository.TIP)) { inspector.modified(df.getPath()); } else { inspector.clean(df.getPath()); } } } else if ((r = getDirstate().checkAdded(fname)) != null) { if (r.name2 == null) { inspector.added(getPathPool().path(fname)); } else { inspector.copied(getPathPool().path(r.name2), getPathPool().path(fname)); } } else if ((r = getDirstate().checkRemoved(fname)) != null) { inspector.removed(getPathPool().path(fname)); } else if ((r = getDirstate().checkMerged(fname)) != null) { inspector.modified(getPathPool().path(fname)); } } // XXX refactor checkLocalStatus methods in more OO way private void checkLocalStatusAgainstBaseRevision(Set<String> baseRevNames, ManifestRevisionInspector collect, int baseRevision, Path fname, File f, HgStatusInspector inspector) { // fname is in the dirstate, either Normal, Added, Removed or Merged Nodeid nid1 = collect.nodeid(fname.toString()); String flags = collect.flags(fname.toString()); HgDirstate.Record r; if (nid1 == null) { // normal: added? // added: not known at the time of baseRevision, shall report // merged: was not known, report as added? if ((r = getDirstate().checkNormal(fname)) != null) { try { Path origin = HgStatusCollector.getOriginIfCopy(repo, fname, baseRevNames, baseRevision); if (origin != null) { inspector.copied(getPathPool().path(origin), getPathPool().path(fname)); return; } } catch (HgDataStreamException ex) { ex.printStackTrace(); // FIXME report to a mediator, continue status collection } } else if ((r = getDirstate().checkAdded(fname)) != null) { if (r.name2 != null && baseRevNames.contains(r.name2)) { baseRevNames.remove(r.name2); // XXX surely I shall not report rename source as Removed? inspector.copied(getPathPool().path(r.name2), getPathPool().path(fname)); return; } // fall-through, report as added } else if (getDirstate().checkRemoved(fname) != null) { // removed: removed file was not known at the time of baseRevision, and we should not report it as removed return; } inspector.added(getPathPool().path(fname)); } else { // was known; check whether clean or modified // when added - seems to be the case of a file added once again, hence need to check if content is different if ((r = getDirstate().checkNormal(fname)) != null || (r = getDirstate().checkMerged(fname)) != null || (r = getDirstate().checkAdded(fname)) != null) { // either clean or modified HgDataFile fileNode = repo.getFileNode(fname); final int lengthAtRevision = fileNode.length(nid1); if (r.size /* XXX File.length() ?! */ != lengthAtRevision || flags != todoGenerateFlags(fname /*java.io.File*/)) { inspector.modified(getPathPool().path(fname)); } else { // check actual content to see actual changes if (areTheSame(f, fileNode, fileNode.getLocalRevision(nid1))) { inspector.clean(getPathPool().path(fname)); } else { inspector.modified(getPathPool().path(fname)); } } } // only those left in idsMap after processing are reported as removed } // TODO think over if content comparison may be done more effectively by e.g. calculating nodeid for a local file and comparing it with nodeid from manifest // we don't need to tell exact difference, hash should be enough to detect difference, and it doesn't involve reading historical file content, and it's relatively // cheap to calc hash on a file (no need to keep it completely in memory). OTOH, if I'm right that the next approach is used for nodeids: // changeset nodeid + hash(actual content) => entry (Nodeid) in the next Manifest // then it's sufficient to check parents from dirstate, and if they do not match parents from file's baseRevision (non matching parents means different nodeids). // The question is whether original Hg treats this case (same content, different parents and hence nodeids) as 'modified' or 'clean' } private boolean areTheSame(File f, HgDataFile dataFile, int localRevision) { // XXX consider adding HgDataDile.compare(File/byte[]/whatever) operation to optimize comparison ByteArrayChannel bac = new ByteArrayChannel(); boolean ioFailed = false; try { // need content with metadata striped off - although theoretically chances are metadata may be different, // WC doesn't have it anyway dataFile.content(localRevision, bac); } catch (CancelledException ex) { // silently ignore - can't happen, ByteArrayChannel is not cancellable } catch (IOException ex) { ioFailed = true; } catch (HgException ex) { ioFailed = true; } return !ioFailed && areTheSame(f, bac.toArray(), dataFile.getPath()); } private boolean areTheSame(File f, final byte[] data, Path p) { FileInputStream fis = null; try { try { fis = new FileInputStream(f); FileChannel fc = fis.getChannel(); ByteBuffer fb = ByteBuffer.allocate(min(data.length, 8192)); final boolean[] checkValue = new boolean[] { true }; ByteChannel check = new ByteChannel() { int x = 0; final boolean debug = false; // XXX may want to add global variable to allow clients to turn public int write(ByteBuffer buffer) { for (int i = buffer.remaining(); i > 0; i--, x++) { if (data[x] != buffer.get()) { if (debug) { byte[] xx = new byte[15]; if (buffer.position() > 5) { buffer.position(buffer.position() - 5); } buffer.get(xx); System.out.print("expected >>" + new String(data, max(0, x - 4), 20) + "<< but got >>"); System.out.println(new String(xx) + "<<"); } checkValue[0] = false; break; } } buffer.position(buffer.limit()); // mark as read return buffer.limit(); } }; FilterByteChannel filters = new FilterByteChannel(check, repo.getFiltersFromWorkingDirToRepo(p)); while (fc.read(fb) != -1 && checkValue[0]) { fb.flip(); filters.write(fb); fb.compact(); } return checkValue[0]; } catch (IOException ex) { if (fis != null) { fis.close(); } ex.printStackTrace(); // log warn } } catch (/*TODO typed*/Exception ex) { ex.printStackTrace(); } return false; } private static String todoGenerateFlags(Path fname) { // FIXME implement return null; } }