changeset 22:603806cd2dc6

Status of local working dir against non-tip base revision
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 06 Jan 2011 03:30:20 +0100
parents e929cecae4e1
children 6f9aca1a97be
files design.txt src/com/tmate/hgkit/console/Main.java src/com/tmate/hgkit/console/Manifest.java src/com/tmate/hgkit/console/Status.java src/com/tmate/hgkit/ll/HgDataFile.java src/com/tmate/hgkit/ll/LocalHgRepo.java src/com/tmate/hgkit/ll/Nodeid.java src/com/tmate/hgkit/ll/Revlog.java src/com/tmate/hgkit/ll/RevlogStream.java
diffstat 9 files changed, 244 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/design.txt	Wed Jan 05 04:10:28 2011 +0100
+++ b/design.txt	Thu Jan 06 03:30:20 2011 +0100
@@ -18,6 +18,8 @@
 Revlog
 What happens when big entry is added to a file - when it detects it can't longer fit into .i and needs .d? Inline flag and .i format changes?
 
+What's hg natural way to see nodeids of specific files (i.e. when I do 'hg --debug manifest -r 11' and see nodeid of some file, and 
+then would like to see what changeset this file came from)?
 
 ----------
 + support patch from baseRev + few deltas (although done in a way patches are applied one by one instead of accumulated)
--- a/src/com/tmate/hgkit/console/Main.java	Wed Jan 05 04:10:28 2011 +0100
+++ b/src/com/tmate/hgkit/console/Main.java	Thu Jan 06 03:30:20 2011 +0100
@@ -1,3 +1,6 @@
+/*
+ * Copyright (c) 2010, 2011 Artem Tikhomirov 
+ */
 package com.tmate.hgkit.console;
 
 import java.io.BufferedInputStream;
@@ -18,12 +21,17 @@
 public class Main {
 
 	public static void main(String[] args) throws Exception {
+//		String repo = "/temp/hg/hello/.hg/";
 //		String filename = "store/00changelog.i";
-		String filename = "store/data/hello.c.i";
+//		String filename = "store/data/hello.c.i";
 //		String filename = "store/data/docs/readme.i";
+		String repo = "/eclipse-3.7/ws.hg/com.tmate.hgkit/.hg/";
+//		String filename = "store/data/design.txt.i";
+		String filename = "store/data/src/com/tmate/hgkit/ll/_revlog_stream.java.i";
+		//
 		LinkedList<Changeset> changelog = new LinkedList<Changeset>();
 		//
-		DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File("/temp/hg/hello/.hg/" + filename))));
+		DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(repo + filename))));
 		DataInput di = dis;
 		dis.mark(10);
 		int versionField = di.readInt();
--- a/src/com/tmate/hgkit/console/Manifest.java	Wed Jan 05 04:10:28 2011 +0100
+++ b/src/com/tmate/hgkit/console/Manifest.java	Thu Jan 06 03:30:20 2011 +0100
@@ -25,22 +25,24 @@
 			return;
 		}
 		System.out.println(hgRepo.getLocation());
-		HgManifest.Inspector insp = new HgManifest.Inspector() {
-			public boolean begin(int revision, Nodeid nid) {
-				System.out.printf("%d : %s\n", revision, nid);
-				return true;
-			}
-
-			public boolean next(Nodeid nid, String fname, String flags) {
-				System.out.println(nid + "\t" + fname + "\t\t" + flags);
-				return true;
-			}
-
-			public boolean end(int revision) {
-				System.out.println();
-				return true;
-			}
-		};
+		HgManifest.Inspector insp = new Dump();
 		hgRepo.getManifest().walk(0, TIP, insp);
 	}
+
+	public static final class Dump implements HgManifest.Inspector {
+		public boolean begin(int revision, Nodeid nid) {
+			System.out.printf("%d : %s\n", revision, nid);
+			return true;
+		}
+
+		public boolean next(Nodeid nid, String fname, String flags) {
+			System.out.println(nid + "\t" + fname + "\t\t" + flags);
+			return true;
+		}
+
+		public boolean end(int revision) {
+			System.out.println();
+			return true;
+		}
+	}
 }
--- a/src/com/tmate/hgkit/console/Status.java	Wed Jan 05 04:10:28 2011 +0100
+++ b/src/com/tmate/hgkit/console/Status.java	Thu Jan 06 03:30:20 2011 +0100
@@ -6,8 +6,10 @@
 import static com.tmate.hgkit.ll.HgRepository.TIP;
 
 import com.tmate.hgkit.fs.RepositoryLookup;
+import com.tmate.hgkit.ll.HgDataFile;
 import com.tmate.hgkit.ll.HgRepository;
 import com.tmate.hgkit.ll.LocalHgRepo;
+import com.tmate.hgkit.ll.Nodeid;
 
 /**
  *
@@ -33,6 +35,22 @@
 		hgRepo.status(r1, r2, dump);
 		System.out.println("\nStatus against working dir:");
 		((LocalHgRepo) hgRepo).statusLocal(TIP, dump);
+		System.out.println();
+		System.out.printf("Manifest of the revision %d:\n", r2);
+		hgRepo.getManifest().walk(r2, r2, new Manifest.Dump());
+		System.out.println();
+		System.out.printf("\nStatus of working dir against %d:\n", r2);
+		((LocalHgRepo) hgRepo).statusLocal(r2, dump);
+	}
+	
+	protected static void testStatusInternals(HgRepository hgRepo) {
+		HgDataFile n = hgRepo.getFileNode("design.txt");
+		for (String s : new String[] {"011dfd44417c72bd9e54cf89b82828f661b700ed", "e5529faa06d53e06a816e56d218115b42782f1ba", "c18e7111f1fc89a80a00f6a39d51288289a382fc"}) {
+			// expected: 359, 2123, 3079
+			byte[] b = s.getBytes();
+			final Nodeid nid = Nodeid.fromAscii(b, 0, b.length);
+			System.out.println(s + " : " + n.length(nid));
+		}
 	}
 
 	private static class StatusDump implements HgRepository.StatusInspector {
--- a/src/com/tmate/hgkit/ll/HgDataFile.java	Wed Jan 05 04:10:28 2011 +0100
+++ b/src/com/tmate/hgkit/ll/HgDataFile.java	Thu Jan 06 03:30:20 2011 +0100
@@ -32,10 +32,23 @@
 		return path; // hgRepo.backresolve(this) -> name?
 	}
 
+	public int length(Nodeid nodeid) {
+		int revision = content.findLocalRevisionNumber(nodeid);
+		return content.dataLength(revision);
+	}
+
 	public byte[] content() {
 		return content(TIP);
 	}
+
+	public byte[] content(Nodeid nodeid) {
+		int revision = content.findLocalRevisionNumber(nodeid);
+		return content(revision);
+	}
 	
+	/**
+	 * @param revision - repo-local index of this file change (not a changelog revision number!)
+	 */
 	public byte[] content(int revision) {
 		final byte[][] dataPtr = new byte[1][];
 		Revlog.Inspector insp = new Revlog.Inspector() {
--- a/src/com/tmate/hgkit/ll/LocalHgRepo.java	Wed Jan 05 04:10:28 2011 +0100
+++ b/src/com/tmate/hgkit/ll/LocalHgRepo.java	Thu Jan 06 03:30:20 2011 +0100
@@ -3,6 +3,7 @@
  */
 package com.tmate.hgkit.ll;
 
+import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
@@ -91,7 +92,7 @@
 		final HgDirstate dirstate = loadDirstate();
 		final HgIgnore hgignore = loadIgnore();
 		TreeSet<String> knownEntries = dirstate.all();
-		final boolean isTipBase = baseRevision == TIP || baseRevision == getManifest().revisionCount();
+		final boolean isTipBase = baseRevision == TIP || baseRevision == getManifest().getRevisionCount();
 		final ManifestRevisionCollector collect = isTipBase ? null : new ManifestRevisionCollector();
 		if (!isTipBase) {
 			getManifest().walk(baseRevision, baseRevision, collect);
@@ -104,32 +105,17 @@
 						folders.addLast(f);
 					}
 				} else {
-					// FIXME path relative to rootDir
+					// FIXME path relative to rootDir - need more robust approach
 					String fname = normalize(f.getPath().substring(rootDir.getPath().length() + 1));
 					if (hgignore.isIgnored(fname)) {
 						inspector.ignored(fname);
 					} else {
 						if (knownEntries.remove(fname)) {
 							// modified, added, removed, clean
-							HgDirstate.Record r;
-							if ((r = dirstate.checkNormal(fname)) != null) {
-								// either clean or modified
-								if (f.lastModified() / 1000 == r.time && r.size == f.length()) {
-									inspector.clean(fname);
-								} else {
-									// FIXME check actual content to avoid false modified files
-									inspector.modified(fname);
-								}
-							} else if ((r = dirstate.checkAdded(fname)) != null) {
-								if (r.name2 == null) {
-									inspector.added(fname);
-								} else {
-									inspector.copied(fname, r.name2);
-								}
-							} else if ((r = dirstate.checkRemoved(fname)) != null) {
-								inspector.removed(fname);
-							} else if ((r = dirstate.checkMerged(fname)) != null) {
-								inspector.modified(fname);
+							if (collect != null) { // need to check against base revision, not FS file
+								checkLocalStatusAgainstBaseRevision(collect, fname, f, dirstate, inspector);
+							} else {
+								checkLocalStatusAgainstFile(fname, f, dirstate, inspector);
 							}
 						} else {
 							inspector.unknown(fname);
@@ -138,11 +124,113 @@
 				}
 			}
 		} while (!folders.isEmpty());
+		if (collect != null) {
+			for (String r : collect.idsMap.keySet()) {
+				inspector.removed(r);
+			}
+		}
 		for (String m : knownEntries) {
-			inspector.missing(m);
+			// removed from the repository and missing from working dir shall not be reported as 'deleted' 
+			if (dirstate.checkRemoved(m) == null) {
+				inspector.missing(m);
+			}
 		}
 	}
 	
+	private static void checkLocalStatusAgainstFile(String fname, File f, HgDirstate dirstate, StatusInspector inspector) {
+		HgDirstate.Record r;
+		if ((r = dirstate.checkNormal(fname)) != null) {
+			// either clean or modified
+			if (f.lastModified() / 1000 == r.time && r.size == f.length()) {
+				inspector.clean(fname);
+			} else {
+				// FIXME check actual content to avoid false modified files
+				inspector.modified(fname);
+			}
+		} else if ((r = dirstate.checkAdded(fname)) != null) {
+			if (r.name2 == null) {
+				inspector.added(fname);
+			} else {
+				inspector.copied(fname, r.name2);
+			}
+		} else if ((r = dirstate.checkRemoved(fname)) != null) {
+			inspector.removed(fname);
+		} else if ((r = dirstate.checkMerged(fname)) != null) {
+			inspector.modified(fname);
+		}
+	}
+	
+	// XXX refactor checkLocalStatus methods in more OO way
+	private void checkLocalStatusAgainstBaseRevision(ManifestRevisionCollector collect, String fname, File f, HgDirstate dirstate, StatusInspector inspector) {
+		// fname is in the dirstate, either Normal, Added, Removed or Merged
+		Nodeid nid1 = collect.idsMap.remove(fname);
+		String flags = collect.flagsMap.remove(fname);
+		HgDirstate.Record r;
+		if (nid1 == null) {
+			// normal: added?
+			// added: not known at the time of baseRevision, shall report
+			// merged: was not known, report as added?
+			if ((r = dirstate.checkAdded(fname)) != null) {
+				if (r.name2 != null && collect.idsMap.containsKey(r.name2)) {
+					collect.idsMap.remove(r.name2);
+					collect.idsMap.remove(r.name2);
+					inspector.copied(r.name2, fname);
+					return;
+				}
+				// fall-through, report as added
+			} else if (dirstate.checkRemoved(fname) != null) {
+				// removed: removed file was not known at the time of baseRevision, and we should not report it as removed
+				return;
+			}
+			inspector.added(fname);
+		} else {
+			// was known; check whether clean or modified
+			// when added - seems to be the case of a file added once again, hence need to check if content is different
+			if ((r = dirstate.checkNormal(fname)) != null || (r = dirstate.checkMerged(fname)) != null || (r = dirstate.checkAdded(fname)) != null) {
+				// either clean or modified
+				HgDataFile fileNode = getFileNode(fname);
+				final int lengthAtRevision = fileNode.length(nid1);
+				if (r.size /* XXX File.length() ?! */ != lengthAtRevision || flags != todoGenerateFlags(fname /*java.io.File*/)) {
+					inspector.modified(fname);
+				} else {
+					// check actual content to see actual changes
+					// XXX consider adding HgDataDile.compare(File/byte[]/whatever) operation to optimize comparison
+					if (areTheSame(f, fileNode.content(nid1))) {
+						inspector.clean(fname);
+					} else {
+						inspector.modified(fname);
+					}
+				}
+			}
+			// only those left in idsMap after processing are reported as removed 
+		}
+
+		// TODO think over if content comparison may be done more effectively by e.g. calculating nodeid for a local file and comparing it with nodeid from manifest
+		// we don't need to tell exact difference, hash should be enough to detect difference, and it doesn't involve reading historical file content, and it's relatively 
+		// cheap to calc hash on a file (no need to keep it completely in memory). OTOH, if I'm right that the next approach is used for nodeids: 
+		// changeset nodeid + hash(actual content) => entry (Nodeid) in the next Manifest
+		// then it's sufficient to check parents from dirstate, and if they do not match parents from file's baseRevision (non matching parents means different nodeids).
+		// The question is whether original Hg treats this case (same content, different parents and hence nodeids) as 'modified' or 'clean'
+	}
+
+	private static String todoGenerateFlags(String fname) {
+		// FIXME implement
+		return null;
+	}
+	private static boolean areTheSame(File f, byte[] data) {
+		try {
+			BufferedInputStream is = new BufferedInputStream(new FileInputStream(f));
+			int i = 0;
+			while (i < data.length && data[i] == is.read()) {
+				i++; // increment only for successful match, otherwise won't tell last byte in data was the same as read from the stream
+			}
+			return i == data.length && is.read() == -1; // although data length is expected to be the same (see caller), check that we reached EOF, no more data left.
+		} catch (IOException ex) {
+			ex.printStackTrace(); // log warn
+		}
+		return false;
+	}
+
 	// XXX package-local, unless there are cases when required from outside (guess, working dir/revision walkers may hide dirstate access and no public visibility needed)
 	public final HgDirstate loadDirstate() {
 		// XXX may cache in SoftReference if creation is expensive
@@ -324,7 +412,8 @@
 		return path;
 	}
 
-	private final class ManifestRevisionCollector implements HgManifest.Inspector {
+	// XXX idsMap is being modified from outside. It's better to let outer (modifying) code to create these maps instead
+	private static final class ManifestRevisionCollector implements HgManifest.Inspector {
 		final HashMap<String, Nodeid> idsMap = new HashMap<String, Nodeid>();
 		final HashMap<String, String> flagsMap = new HashMap<String, String>();
 
--- a/src/com/tmate/hgkit/ll/Nodeid.java	Wed Jan 05 04:10:28 2011 +0100
+++ b/src/com/tmate/hgkit/ll/Nodeid.java	Thu Jan 06 03:30:20 2011 +0100
@@ -3,7 +3,6 @@
  */
 package com.tmate.hgkit.ll;
 
-import java.util.Arrays;
 
 
 /**
@@ -27,21 +26,25 @@
 
 	// instead of hashCode/equals
 	public int compareTo(Nodeid o) {
-		byte[] a1, a2;
-		if (this.binaryData.length != 20) {
-			a1 = new byte[20];
-			System.arraycopy(binaryData, 0, a1, 20 - binaryData.length, binaryData.length);
-		} else {
-			a1 = this.binaryData;
+		return equals(this.binaryData, o.binaryData) ? 0 : -1;
+	}
+
+	public boolean equalsTo(byte[] buf) {
+		return equals(this.binaryData, buf);
+	}
+	
+	private static boolean equals(byte[] a1, byte[] a2) {
+		if (a1 == null || a1.length < 20 || a2 == null || a2.length < 20) {
+			throw new IllegalArgumentException();
 		}
-		
-		if (o.binaryData.length != 20) {
-			a2 = new byte[20];
-			System.arraycopy(o.binaryData, 0, a2, 20 - o.binaryData.length, o.binaryData.length);
-		} else {
-			a2 = o.binaryData;
+		// assume significant bits are at the end of the array
+		final int s1 = a1.length - 20, s2 = a2.length - 20;
+		for (int i = 0; i < 20; i++) {
+			if (a1[s1+i] != a2[s2+i]) {
+				return false;
+			}
 		}
-		return Arrays.equals(a1, a2) ? 0 : -1;
+		return true;
 	}
 
 	@Override
--- a/src/com/tmate/hgkit/ll/Revlog.java	Wed Jan 05 04:10:28 2011 +0100
+++ b/src/com/tmate/hgkit/ll/Revlog.java	Thu Jan 06 03:30:20 2011 +0100
@@ -1,5 +1,5 @@
-/**
- * Copyright (c) 2010 Artem Tikhomirov 
+/*
+ * Copyright (c) 2010, 2011 Artem Tikhomirov 
  */
 package com.tmate.hgkit.ll;
 
@@ -28,6 +28,8 @@
 		return content.revisionCount();
 	}
 
+	// FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data 
+	// instantly - e.g. calculate hash, or comparing two revisions
 	public interface Inspector {
 		// XXX boolean retVal to indicate whether to continue?
 		void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*32*/] nodeid, byte[] data);
--- a/src/com/tmate/hgkit/ll/RevlogStream.java	Wed Jan 05 04:10:28 2011 +0100
+++ b/src/com/tmate/hgkit/ll/RevlogStream.java	Thu Jan 06 03:30:20 2011 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 Artem Tikhomirov 
+ * Copyright (c) 2010, 2011 Artem Tikhomirov 
  */
 package com.tmate.hgkit.ll;
 
@@ -51,6 +51,53 @@
 		initOutline();
 		return index.size();
 	}
+	
+	public int dataLength(int revision) {
+		// XXX in fact, use of iterate() instead of this implementation may be quite reasonable.
+		//
+		final int indexSize = revisionCount();
+		DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset)
+		if (revision == TIP) {
+			revision = indexSize - 1;
+		}
+		try {
+			int recordOffset = inline ? (int) index.get(revision).offset : revision * REVLOGV1_RECORD_SIZE;
+			daIndex.seek(recordOffset + 12); // 6+2+4
+			int actualLen = daIndex.readInt();
+			return actualLen; 
+		} catch (IOException ex) {
+			ex.printStackTrace(); // log error. FIXME better handling
+			throw new IllegalStateException(ex);
+		} finally {
+			daIndex.done();
+		}
+	}
+	
+	public int findLocalRevisionNumber(Nodeid nodeid) {
+		// XXX this one may be implemented with iterate() once there's mechanism to stop iterations
+		final int indexSize = revisionCount();
+		DataAccess daIndex = getIndexStream();
+		try {
+			for (int i = 0; i < indexSize; i++) {
+				daIndex.skip(8);
+				int compressedLen = daIndex.readInt();
+				daIndex.skip(20);
+				byte[] buf = new byte[20];
+				daIndex.readBytes(buf, 0, 20);
+				if (nodeid.equalsTo(buf)) {
+					return i;
+				}
+				daIndex.skip(inline ? 12 + compressedLen : 12);
+			}
+		} catch (IOException ex) {
+			ex.printStackTrace(); // log error. FIXME better handling
+			throw new IllegalStateException(ex);
+		} finally {
+			daIndex.done();
+		}
+		throw new IllegalArgumentException(String.format("%s doesn't represent a revision of %s", nodeid.toString(), indexFile.getName() /*XXX HgDataFile.getPath might be more suitable here*/));
+	}
+
 
 	private final int REVLOGV1_RECORD_SIZE = 64;
 
@@ -92,7 +139,7 @@
 				i = start;
 			}
 			
-			daIndex.seek(inline ? (int) index.get(i).offset : start * REVLOGV1_RECORD_SIZE);
+			daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE);
 			for (; i <= end; i++ ) {
 				long l = daIndex.readLong();
 				long offset = l >>> 16;
@@ -205,6 +252,7 @@
 				}
 				if (da.isEmpty()) {
 					// fine, done then
+					res.trimToSize();
 					index = res;
 					break;
 				} else {