# HG changeset patch # User Artem Tikhomirov # Date 1294281020 -3600 # Node ID 603806cd2dc6a3e833397dc34f70925a814dd9be # Parent e929cecae4e102806c1668867d3cbfeec5ed5573 Status of local working dir against non-tip base revision diff -r e929cecae4e1 -r 603806cd2dc6 design.txt --- a/design.txt Wed Jan 05 04:10:28 2011 +0100 +++ b/design.txt Thu Jan 06 03:30:20 2011 +0100 @@ -18,6 +18,8 @@ Revlog What happens when big entry is added to a file - when it detects it can't longer fit into .i and needs .d? Inline flag and .i format changes? +What's hg natural way to see nodeids of specific files (i.e. when I do 'hg --debug manifest -r 11' and see nodeid of some file, and +then would like to see what changeset this file came from)? ---------- + support patch from baseRev + few deltas (although done in a way patches are applied one by one instead of accumulated) diff -r e929cecae4e1 -r 603806cd2dc6 src/com/tmate/hgkit/console/Main.java --- a/src/com/tmate/hgkit/console/Main.java Wed Jan 05 04:10:28 2011 +0100 +++ b/src/com/tmate/hgkit/console/Main.java Thu Jan 06 03:30:20 2011 +0100 @@ -1,3 +1,6 @@ +/* + * Copyright (c) 2010, 2011 Artem Tikhomirov + */ package com.tmate.hgkit.console; import java.io.BufferedInputStream; @@ -18,12 +21,17 @@ public class Main { public static void main(String[] args) throws Exception { +// String repo = "/temp/hg/hello/.hg/"; // String filename = "store/00changelog.i"; - String filename = "store/data/hello.c.i"; +// String filename = "store/data/hello.c.i"; // String filename = "store/data/docs/readme.i"; + String repo = "/eclipse-3.7/ws.hg/com.tmate.hgkit/.hg/"; +// String filename = "store/data/design.txt.i"; + String filename = "store/data/src/com/tmate/hgkit/ll/_revlog_stream.java.i"; + // LinkedList changelog = new LinkedList(); // - DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File("/temp/hg/hello/.hg/" + filename)))); + DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(repo + filename)))); DataInput di = dis; dis.mark(10); int versionField = di.readInt(); diff -r e929cecae4e1 -r 603806cd2dc6 src/com/tmate/hgkit/console/Manifest.java --- a/src/com/tmate/hgkit/console/Manifest.java Wed Jan 05 04:10:28 2011 +0100 +++ b/src/com/tmate/hgkit/console/Manifest.java Thu Jan 06 03:30:20 2011 +0100 @@ -25,22 +25,24 @@ return; } System.out.println(hgRepo.getLocation()); - HgManifest.Inspector insp = new HgManifest.Inspector() { - public boolean begin(int revision, Nodeid nid) { - System.out.printf("%d : %s\n", revision, nid); - return true; - } - - public boolean next(Nodeid nid, String fname, String flags) { - System.out.println(nid + "\t" + fname + "\t\t" + flags); - return true; - } - - public boolean end(int revision) { - System.out.println(); - return true; - } - }; + HgManifest.Inspector insp = new Dump(); hgRepo.getManifest().walk(0, TIP, insp); } + + public static final class Dump implements HgManifest.Inspector { + public boolean begin(int revision, Nodeid nid) { + System.out.printf("%d : %s\n", revision, nid); + return true; + } + + public boolean next(Nodeid nid, String fname, String flags) { + System.out.println(nid + "\t" + fname + "\t\t" + flags); + return true; + } + + public boolean end(int revision) { + System.out.println(); + return true; + } + } } diff -r e929cecae4e1 -r 603806cd2dc6 src/com/tmate/hgkit/console/Status.java --- a/src/com/tmate/hgkit/console/Status.java Wed Jan 05 04:10:28 2011 +0100 +++ b/src/com/tmate/hgkit/console/Status.java Thu Jan 06 03:30:20 2011 +0100 @@ -6,8 +6,10 @@ import static com.tmate.hgkit.ll.HgRepository.TIP; import com.tmate.hgkit.fs.RepositoryLookup; +import com.tmate.hgkit.ll.HgDataFile; import com.tmate.hgkit.ll.HgRepository; import com.tmate.hgkit.ll.LocalHgRepo; +import com.tmate.hgkit.ll.Nodeid; /** * @@ -33,6 +35,22 @@ hgRepo.status(r1, r2, dump); System.out.println("\nStatus against working dir:"); ((LocalHgRepo) hgRepo).statusLocal(TIP, dump); + System.out.println(); + System.out.printf("Manifest of the revision %d:\n", r2); + hgRepo.getManifest().walk(r2, r2, new Manifest.Dump()); + System.out.println(); + System.out.printf("\nStatus of working dir against %d:\n", r2); + ((LocalHgRepo) hgRepo).statusLocal(r2, dump); + } + + protected static void testStatusInternals(HgRepository hgRepo) { + HgDataFile n = hgRepo.getFileNode("design.txt"); + for (String s : new String[] {"011dfd44417c72bd9e54cf89b82828f661b700ed", "e5529faa06d53e06a816e56d218115b42782f1ba", "c18e7111f1fc89a80a00f6a39d51288289a382fc"}) { + // expected: 359, 2123, 3079 + byte[] b = s.getBytes(); + final Nodeid nid = Nodeid.fromAscii(b, 0, b.length); + System.out.println(s + " : " + n.length(nid)); + } } private static class StatusDump implements HgRepository.StatusInspector { diff -r e929cecae4e1 -r 603806cd2dc6 src/com/tmate/hgkit/ll/HgDataFile.java --- a/src/com/tmate/hgkit/ll/HgDataFile.java Wed Jan 05 04:10:28 2011 +0100 +++ b/src/com/tmate/hgkit/ll/HgDataFile.java Thu Jan 06 03:30:20 2011 +0100 @@ -32,10 +32,23 @@ return path; // hgRepo.backresolve(this) -> name? } + public int length(Nodeid nodeid) { + int revision = content.findLocalRevisionNumber(nodeid); + return content.dataLength(revision); + } + public byte[] content() { return content(TIP); } + + public byte[] content(Nodeid nodeid) { + int revision = content.findLocalRevisionNumber(nodeid); + return content(revision); + } + /** + * @param revision - repo-local index of this file change (not a changelog revision number!) + */ public byte[] content(int revision) { final byte[][] dataPtr = new byte[1][]; Revlog.Inspector insp = new Revlog.Inspector() { diff -r e929cecae4e1 -r 603806cd2dc6 src/com/tmate/hgkit/ll/LocalHgRepo.java --- a/src/com/tmate/hgkit/ll/LocalHgRepo.java Wed Jan 05 04:10:28 2011 +0100 +++ b/src/com/tmate/hgkit/ll/LocalHgRepo.java Thu Jan 06 03:30:20 2011 +0100 @@ -3,6 +3,7 @@ */ package com.tmate.hgkit.ll; +import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; @@ -91,7 +92,7 @@ final HgDirstate dirstate = loadDirstate(); final HgIgnore hgignore = loadIgnore(); TreeSet knownEntries = dirstate.all(); - final boolean isTipBase = baseRevision == TIP || baseRevision == getManifest().revisionCount(); + final boolean isTipBase = baseRevision == TIP || baseRevision == getManifest().getRevisionCount(); final ManifestRevisionCollector collect = isTipBase ? null : new ManifestRevisionCollector(); if (!isTipBase) { getManifest().walk(baseRevision, baseRevision, collect); @@ -104,32 +105,17 @@ folders.addLast(f); } } else { - // FIXME path relative to rootDir + // FIXME path relative to rootDir - need more robust approach String fname = normalize(f.getPath().substring(rootDir.getPath().length() + 1)); if (hgignore.isIgnored(fname)) { inspector.ignored(fname); } else { if (knownEntries.remove(fname)) { // modified, added, removed, clean - HgDirstate.Record r; - if ((r = dirstate.checkNormal(fname)) != null) { - // either clean or modified - if (f.lastModified() / 1000 == r.time && r.size == f.length()) { - inspector.clean(fname); - } else { - // FIXME check actual content to avoid false modified files - inspector.modified(fname); - } - } else if ((r = dirstate.checkAdded(fname)) != null) { - if (r.name2 == null) { - inspector.added(fname); - } else { - inspector.copied(fname, r.name2); - } - } else if ((r = dirstate.checkRemoved(fname)) != null) { - inspector.removed(fname); - } else if ((r = dirstate.checkMerged(fname)) != null) { - inspector.modified(fname); + if (collect != null) { // need to check against base revision, not FS file + checkLocalStatusAgainstBaseRevision(collect, fname, f, dirstate, inspector); + } else { + checkLocalStatusAgainstFile(fname, f, dirstate, inspector); } } else { inspector.unknown(fname); @@ -138,11 +124,113 @@ } } } while (!folders.isEmpty()); + if (collect != null) { + for (String r : collect.idsMap.keySet()) { + inspector.removed(r); + } + } for (String m : knownEntries) { - inspector.missing(m); + // removed from the repository and missing from working dir shall not be reported as 'deleted' + if (dirstate.checkRemoved(m) == null) { + inspector.missing(m); + } } } + private static void checkLocalStatusAgainstFile(String fname, File f, HgDirstate dirstate, StatusInspector inspector) { + HgDirstate.Record r; + if ((r = dirstate.checkNormal(fname)) != null) { + // either clean or modified + if (f.lastModified() / 1000 == r.time && r.size == f.length()) { + inspector.clean(fname); + } else { + // FIXME check actual content to avoid false modified files + inspector.modified(fname); + } + } else if ((r = dirstate.checkAdded(fname)) != null) { + if (r.name2 == null) { + inspector.added(fname); + } else { + inspector.copied(fname, r.name2); + } + } else if ((r = dirstate.checkRemoved(fname)) != null) { + inspector.removed(fname); + } else if ((r = dirstate.checkMerged(fname)) != null) { + inspector.modified(fname); + } + } + + // XXX refactor checkLocalStatus methods in more OO way + private void checkLocalStatusAgainstBaseRevision(ManifestRevisionCollector collect, String fname, File f, HgDirstate dirstate, StatusInspector inspector) { + // fname is in the dirstate, either Normal, Added, Removed or Merged + Nodeid nid1 = collect.idsMap.remove(fname); + String flags = collect.flagsMap.remove(fname); + HgDirstate.Record r; + if (nid1 == null) { + // normal: added? + // added: not known at the time of baseRevision, shall report + // merged: was not known, report as added? + if ((r = dirstate.checkAdded(fname)) != null) { + if (r.name2 != null && collect.idsMap.containsKey(r.name2)) { + collect.idsMap.remove(r.name2); + collect.idsMap.remove(r.name2); + inspector.copied(r.name2, fname); + return; + } + // fall-through, report as added + } else if (dirstate.checkRemoved(fname) != null) { + // removed: removed file was not known at the time of baseRevision, and we should not report it as removed + return; + } + inspector.added(fname); + } else { + // was known; check whether clean or modified + // when added - seems to be the case of a file added once again, hence need to check if content is different + if ((r = dirstate.checkNormal(fname)) != null || (r = dirstate.checkMerged(fname)) != null || (r = dirstate.checkAdded(fname)) != null) { + // either clean or modified + HgDataFile fileNode = getFileNode(fname); + final int lengthAtRevision = fileNode.length(nid1); + if (r.size /* XXX File.length() ?! */ != lengthAtRevision || flags != todoGenerateFlags(fname /*java.io.File*/)) { + inspector.modified(fname); + } else { + // check actual content to see actual changes + // XXX consider adding HgDataDile.compare(File/byte[]/whatever) operation to optimize comparison + if (areTheSame(f, fileNode.content(nid1))) { + inspector.clean(fname); + } else { + inspector.modified(fname); + } + } + } + // only those left in idsMap after processing are reported as removed + } + + // TODO think over if content comparison may be done more effectively by e.g. calculating nodeid for a local file and comparing it with nodeid from manifest + // we don't need to tell exact difference, hash should be enough to detect difference, and it doesn't involve reading historical file content, and it's relatively + // cheap to calc hash on a file (no need to keep it completely in memory). OTOH, if I'm right that the next approach is used for nodeids: + // changeset nodeid + hash(actual content) => entry (Nodeid) in the next Manifest + // then it's sufficient to check parents from dirstate, and if they do not match parents from file's baseRevision (non matching parents means different nodeids). + // The question is whether original Hg treats this case (same content, different parents and hence nodeids) as 'modified' or 'clean' + } + + private static String todoGenerateFlags(String fname) { + // FIXME implement + return null; + } + private static boolean areTheSame(File f, byte[] data) { + try { + BufferedInputStream is = new BufferedInputStream(new FileInputStream(f)); + int i = 0; + while (i < data.length && data[i] == is.read()) { + i++; // increment only for successful match, otherwise won't tell last byte in data was the same as read from the stream + } + return i == data.length && is.read() == -1; // although data length is expected to be the same (see caller), check that we reached EOF, no more data left. + } catch (IOException ex) { + ex.printStackTrace(); // log warn + } + return false; + } + // XXX package-local, unless there are cases when required from outside (guess, working dir/revision walkers may hide dirstate access and no public visibility needed) public final HgDirstate loadDirstate() { // XXX may cache in SoftReference if creation is expensive @@ -324,7 +412,8 @@ return path; } - private final class ManifestRevisionCollector implements HgManifest.Inspector { + // XXX idsMap is being modified from outside. It's better to let outer (modifying) code to create these maps instead + private static final class ManifestRevisionCollector implements HgManifest.Inspector { final HashMap idsMap = new HashMap(); final HashMap flagsMap = new HashMap(); diff -r e929cecae4e1 -r 603806cd2dc6 src/com/tmate/hgkit/ll/Nodeid.java --- a/src/com/tmate/hgkit/ll/Nodeid.java Wed Jan 05 04:10:28 2011 +0100 +++ b/src/com/tmate/hgkit/ll/Nodeid.java Thu Jan 06 03:30:20 2011 +0100 @@ -3,7 +3,6 @@ */ package com.tmate.hgkit.ll; -import java.util.Arrays; /** @@ -27,21 +26,25 @@ // instead of hashCode/equals public int compareTo(Nodeid o) { - byte[] a1, a2; - if (this.binaryData.length != 20) { - a1 = new byte[20]; - System.arraycopy(binaryData, 0, a1, 20 - binaryData.length, binaryData.length); - } else { - a1 = this.binaryData; + return equals(this.binaryData, o.binaryData) ? 0 : -1; + } + + public boolean equalsTo(byte[] buf) { + return equals(this.binaryData, buf); + } + + private static boolean equals(byte[] a1, byte[] a2) { + if (a1 == null || a1.length < 20 || a2 == null || a2.length < 20) { + throw new IllegalArgumentException(); } - - if (o.binaryData.length != 20) { - a2 = new byte[20]; - System.arraycopy(o.binaryData, 0, a2, 20 - o.binaryData.length, o.binaryData.length); - } else { - a2 = o.binaryData; + // assume significant bits are at the end of the array + final int s1 = a1.length - 20, s2 = a2.length - 20; + for (int i = 0; i < 20; i++) { + if (a1[s1+i] != a2[s2+i]) { + return false; + } } - return Arrays.equals(a1, a2) ? 0 : -1; + return true; } @Override diff -r e929cecae4e1 -r 603806cd2dc6 src/com/tmate/hgkit/ll/Revlog.java --- a/src/com/tmate/hgkit/ll/Revlog.java Wed Jan 05 04:10:28 2011 +0100 +++ b/src/com/tmate/hgkit/ll/Revlog.java Thu Jan 06 03:30:20 2011 +0100 @@ -1,5 +1,5 @@ -/** - * Copyright (c) 2010 Artem Tikhomirov +/* + * Copyright (c) 2010, 2011 Artem Tikhomirov */ package com.tmate.hgkit.ll; @@ -28,6 +28,8 @@ return content.revisionCount(); } + // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data + // instantly - e.g. calculate hash, or comparing two revisions public interface Inspector { // XXX boolean retVal to indicate whether to continue? void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*32*/] nodeid, byte[] data); diff -r e929cecae4e1 -r 603806cd2dc6 src/com/tmate/hgkit/ll/RevlogStream.java --- a/src/com/tmate/hgkit/ll/RevlogStream.java Wed Jan 05 04:10:28 2011 +0100 +++ b/src/com/tmate/hgkit/ll/RevlogStream.java Thu Jan 06 03:30:20 2011 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Artem Tikhomirov + * Copyright (c) 2010, 2011 Artem Tikhomirov */ package com.tmate.hgkit.ll; @@ -51,6 +51,53 @@ initOutline(); return index.size(); } + + public int dataLength(int revision) { + // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. + // + final int indexSize = revisionCount(); + DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset) + if (revision == TIP) { + revision = indexSize - 1; + } + try { + int recordOffset = inline ? (int) index.get(revision).offset : revision * REVLOGV1_RECORD_SIZE; + daIndex.seek(recordOffset + 12); // 6+2+4 + int actualLen = daIndex.readInt(); + return actualLen; + } catch (IOException ex) { + ex.printStackTrace(); // log error. FIXME better handling + throw new IllegalStateException(ex); + } finally { + daIndex.done(); + } + } + + public int findLocalRevisionNumber(Nodeid nodeid) { + // XXX this one may be implemented with iterate() once there's mechanism to stop iterations + final int indexSize = revisionCount(); + DataAccess daIndex = getIndexStream(); + try { + for (int i = 0; i < indexSize; i++) { + daIndex.skip(8); + int compressedLen = daIndex.readInt(); + daIndex.skip(20); + byte[] buf = new byte[20]; + daIndex.readBytes(buf, 0, 20); + if (nodeid.equalsTo(buf)) { + return i; + } + daIndex.skip(inline ? 12 + compressedLen : 12); + } + } catch (IOException ex) { + ex.printStackTrace(); // log error. FIXME better handling + throw new IllegalStateException(ex); + } finally { + daIndex.done(); + } + throw new IllegalArgumentException(String.format("%s doesn't represent a revision of %s", nodeid.toString(), indexFile.getName() /*XXX HgDataFile.getPath might be more suitable here*/)); + } + private final int REVLOGV1_RECORD_SIZE = 64; @@ -92,7 +139,7 @@ i = start; } - daIndex.seek(inline ? (int) index.get(i).offset : start * REVLOGV1_RECORD_SIZE); + daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE); for (; i <= end; i++ ) { long l = daIndex.readLong(); long offset = l >>> 16; @@ -205,6 +252,7 @@ } if (da.isEmpty()) { // fine, done then + res.trimToSize(); index = res; break; } else {