diff src/org/tmatesoft/hg/repo/HgBundle.java @ 169:8c8e3f372fa1

Towards initial clone: refactor HgBundle to provide slightly higher-level structure of the bundle
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 23 Mar 2011 14:13:11 +0100
parents d5268ca7715b
children 71ddbf8603e8
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/repo/HgBundle.java	Tue Mar 22 21:18:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgBundle.java	Wed Mar 23 14:13:11 2011 +0100
@@ -16,11 +16,14 @@
  */
 package org.tmatesoft.hg.repo;
 
+import static org.tmatesoft.hg.core.Nodeid.NULL;
+
 import java.io.File;
 import java.io.IOException;
 import java.util.LinkedList;
 import java.util.List;
 
+import org.tmatesoft.hg.core.HgBadStateException;
 import org.tmatesoft.hg.core.HgException;
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.internal.ByteArrayChannel;
@@ -28,14 +31,14 @@
 import org.tmatesoft.hg.internal.DataAccess;
 import org.tmatesoft.hg.internal.DataAccessProvider;
 import org.tmatesoft.hg.internal.DigestHelper;
+import org.tmatesoft.hg.internal.InflaterDataAccess;
 import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
 import org.tmatesoft.hg.util.CancelledException;
 
-
 /**
  * @see http://mercurial.selenic.com/wiki/BundleFormat
- *
+ * 
  * @author Artem Tikhomirov
  * @author TMate Software Ltd.
  */
@@ -44,129 +47,372 @@
 	private final File bundleFile;
 	private final DataAccessProvider accessProvider;
 
-	public HgBundle(DataAccessProvider dap, File bundle) {
+	HgBundle(DataAccessProvider dap, File bundle) {
 		accessProvider = dap;
 		bundleFile = bundle;
 	}
 
-	public void changes(HgRepository hgRepo) throws HgException, IOException {
+	private DataAccess getDataStream() throws IOException {
 		DataAccess da = accessProvider.create(bundleFile);
-		DigestHelper dh = new DigestHelper();
-		try {
-			List<GroupElement> changelogGroup = readGroup(da);
-			if (changelogGroup.isEmpty()) {
-				throw new IllegalStateException("No changelog group in the bundle"); // XXX perhaps, just be silent and/or log?
-			}
-			// XXX in fact, bundle not necessarily starts with the first revision missing in hgRepo
-			// need to 'scroll' till the last one common.
-			final Nodeid base = changelogGroup.get(0).firstParent();
-			if (!hgRepo.getChangelog().isKnown(base)) {
-				throw new IllegalArgumentException("unknown parent");
+		byte[] signature = new byte[6];
+		if (da.length() > 6) {
+			da.readBytes(signature, 0, 6);
+			if (signature[0] == 'H' && signature[1] == 'G' && signature[2] == '1' && signature[3] == '0') {
+				if (signature[4] == 'G' && signature[5] == 'Z') {
+					return new InflaterDataAccess(da, 6, da.length() - 6);
+				}
+				if (signature[4] == 'B' && signature[5] == 'Z') {
+					throw HgRepository.notImplemented();
+				}
+				if (signature[4] != 'U' || signature[5] != 'N') {
+					throw new HgBadStateException("Bad bundle signature:" + new String(signature));
+				}
+				// "...UN", fall-through
+			} else {
+				da.reset();
 			}
-			// BundleFormat wiki says:
-			// Each Changelog entry patches the result of all previous patches 
-			// (the previous, or parent patch of a given patch p is the patch that has a node equal to p's p1 field)
-			ByteArrayChannel bac = new ByteArrayChannel();
-			hgRepo.getChangelog().rawContent(base, bac); // FIXME get DataAccess directly, to avoid
-			// extra byte[] (inside ByteArrayChannel) duplication just for the sake of subsequent ByteArrayDataChannel wrap.
-			ByteArrayDataAccess baseRevContent = new ByteArrayDataAccess(bac.toArray());
-			for (GroupElement ge : changelogGroup) {
-				byte[] csetContent = RevlogStream.apply(baseRevContent, -1, ge.patches);
-				dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid?
-				if (!ge.node().equalsTo(dh.asBinary())) {
-					throw new IllegalStateException("Integrity check failed on " + bundleFile + ", node:" + ge.node());
+		}
+		return da;
+	}
+
+	// shows changes recorded in the bundle that are missing from the supplied repository 
+	public void changes(final HgRepository hgRepo) throws HgException, IOException {
+		Inspector insp = new Inspector() {
+			DigestHelper dh = new DigestHelper();
+			boolean emptyChangelog = true;
+			private DataAccess prevRevContent;
+
+			public void changelogStart() {
+				emptyChangelog = true;
+				
+			}
+
+			public void changelogEnd() {
+				if (emptyChangelog) {
+					throw new IllegalStateException("No changelog group in the bundle"); // XXX perhaps, just be silent and/or log?
 				}
-				ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent);
-				RawChangeset cs = RawChangeset.parse(csetDataAccess);
-				System.out.println(cs.toString());
-				baseRevContent = csetDataAccess.reset();
 			}
-		} catch (CancelledException ex) {
-			System.out.println("Operation cancelled");
+
+/*
+ * Despite that BundleFormat wiki says: "Each Changelog entry patches the result of all previous patches 
+ * (the previous, or parent patch of a given patch p is the patch that has a node equal to p's p1 field)",
+ *  it seems not to hold true. Instead, each entry patches previous one, regardless of whether the one
+ *  before is its parent (i.e. ge.firstParent()) or not.
+ *  
+Actual state in the changelog.i
+Index    Offset      Flags     Packed     Actual   Base Rev   Link Rev  Parent1  Parent2     nodeid
+  50:          9212      0        209        329         48         50       49       -1     f1db8610da62a3e0beb8d360556ee1fd6eb9885e
+  51:          9421      0        278        688         48         51       50       -1     9429c7bd1920fab164a9d2b621d38d57bcb49ae0
+  52:          9699      0        154        179         52         52       50       -1     30bd389788464287cee22ccff54c330a4b715de5
+  53:          9853      0        133        204         52         53       51       52     a6f39e595b2b54f56304470269a936ead77f5725
+  54:          9986      0        156        182         54         54       52       -1     fd4f2c98995beb051070630c272a9be87bef617d
+
+Excerpt from bundle (nodeid, p1, p2, cs):
+   f1db8610da62a3e0beb8d360556ee1fd6eb9885e 26e3eeaa39623de552b45ee1f55c14f36460f220 0000000000000000000000000000000000000000 f1db8610da62a3e0beb8d360556ee1fd6eb9885e; patches:4
+   9429c7bd1920fab164a9d2b621d38d57bcb49ae0 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 9429c7bd1920fab164a9d2b621d38d57bcb49ae0; patches:3
+>  30bd389788464287cee22ccff54c330a4b715de5 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 30bd389788464287cee22ccff54c330a4b715de5; patches:3
+   a6f39e595b2b54f56304470269a936ead77f5725 9429c7bd1920fab164a9d2b621d38d57bcb49ae0 30bd389788464287cee22ccff54c330a4b715de5 a6f39e595b2b54f56304470269a936ead77f5725; patches:3
+   fd4f2c98995beb051070630c272a9be87bef617d 30bd389788464287cee22ccff54c330a4b715de5 0000000000000000000000000000000000000000 fd4f2c98995beb051070630c272a9be87bef617d; patches:3
+
+To recreate 30bd..e5, one have to take content of 9429..e0, not its p1 f1db..5e
+ */
+			public boolean element(GroupElement ge) {
+				emptyChangelog = false;
+				HgChangelog changelog = hgRepo.getChangelog();
+				try {
+					if (prevRevContent == null) { 
+						if (NULL.equals(ge.firstParent()) && NULL.equals(ge.secondParent())) {
+							prevRevContent = new ByteArrayDataAccess(new byte[0]);
+						} else {
+							final Nodeid base = ge.firstParent();
+							if (!changelog.isKnown(base) /*only first parent, that's Bundle contract*/) {
+								throw new IllegalStateException(String.format("Revision %s needs a parent %s, which is missing in the supplied repo %s", ge.node().shortNotation(), base.shortNotation(), hgRepo.toString()));
+							}
+							ByteArrayChannel bac = new ByteArrayChannel();
+							changelog.rawContent(base, bac); // FIXME get DataAccess directly, to avoid
+							// extra byte[] (inside ByteArrayChannel) duplication just for the sake of subsequent ByteArrayDataChannel wrap.
+							prevRevContent = new ByteArrayDataAccess(bac.toArray());
+						}
+					}
+					//
+					byte[] csetContent = ge.apply(prevRevContent);
+					dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid?
+					if (!ge.node().equalsTo(dh.asBinary())) {
+						throw new IllegalStateException("Integrity check failed on " + bundleFile + ", node:" + ge.node());
+					}
+					ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent);
+					if (changelog.isKnown(ge.node())) {
+						System.out.print("+");
+					} else {
+						System.out.print("-");
+					}
+					RawChangeset cs = RawChangeset.parse(csetDataAccess);
+					System.out.println(cs.toString());
+					prevRevContent = csetDataAccess.reset();
+				} catch (CancelledException ex) {
+					return false;
+				} catch (Exception ex) {
+					throw new HgBadStateException(ex); // FIXME
+				}
+				return true;
+			}
+
+			public void manifestStart() {}
+			public void manifestEnd() {}
+			public void fileStart(String name) {}
+			public void fileEnd(String name) {}
+
+		};
+		inspectChangelog(insp);
+	}
+
+	public void dump() throws IOException {
+		Dump dump = new Dump();
+		inspectAll(dump);
+		System.out.println("Total files:" + dump.names.size());
+		for (String s : dump.names) {
+			System.out.println(s);
+		}
+	}
+
+	// callback to minimize amount of Strings and Nodeids instantiated
+	public interface Inspector {
+		void changelogStart();
+
+		void changelogEnd();
+
+		void manifestStart();
+
+		void manifestEnd();
+
+		void fileStart(String name);
+
+		void fileEnd(String name);
+
+		/**
+		 * @param element
+		 *            data element, instance might be reused
+		 * @return <code>true</code> to continue
+		 */
+		boolean element(GroupElement element);
+	}
+
+	public static class Dump implements Inspector {
+		public final LinkedList<String> names = new LinkedList<String>();
+
+		public void changelogStart() {
+			System.out.println("Changelog group");
+		}
+
+		public void changelogEnd() {
+		}
+
+		public void manifestStart() {
+			System.out.println("Manifest group");
+		}
+
+		public void manifestEnd() {
+		}
+
+		public void fileStart(String name) {
+			names.add(name);
+			System.out.println(name);
+		}
+
+		public void fileEnd(String name) {
+		}
+
+		public boolean element(GroupElement ge) {
+			try {
+				System.out.printf("  %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches().size());
+			} catch (Exception ex) {
+				ex.printStackTrace(); // FIXME
+			}
+			return true;
+		}
+	}
+
+	public void inspectChangelog(Inspector inspector) throws IOException {
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		DataAccess da = getDataStream();
+		try {
+			if (da.isEmpty()) {
+				return;
+			}
+			inspector.changelogStart();
+			readGroup(da, inspector);
+			inspector.changelogEnd();
 		} finally {
 			da.done();
 		}
 	}
 
-	public void dump() throws IOException {
-		DataAccess da = accessProvider.create(bundleFile);
+	public void inspectManifest(Inspector inspector) throws IOException {
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		DataAccess da = getDataStream();
 		try {
-			LinkedList<String> names = new LinkedList<String>();
+			if (da.isEmpty()) {
+				return;
+			}
+			skipGroup(da); // changelog
 			if (!da.isEmpty()) {
-				System.out.println("Changelog group");
-				List<GroupElement> changelogGroup = readGroup(da);
-				for (GroupElement ge : changelogGroup) {
-					System.out.printf("  %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches.size());
-				}
-				System.out.println("Manifest group");
-				List<GroupElement> manifestGroup = readGroup(da);
-				for (GroupElement ge : manifestGroup) {
-					System.out.printf("  %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches.size());
-				}
-				while (!da.isEmpty()) {
-					int fnameLen = da.readInt();
-					if (fnameLen <= 4) {
-						break; // null chunk, the last one.
-					}
-					byte[] fname = new byte[fnameLen - 4];
-					da.readBytes(fname, 0, fname.length);
-					names.add(new String(fname));
-					List<GroupElement> fileGroup = readGroup(da);
-					System.out.println(names.getLast());
-					for (GroupElement ge : fileGroup) {
-						System.out.printf("  %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches.size());
-					}
-				}
-			}
-			System.out.println(names.size());
-			for (String s : names) {
-				System.out.println(s);
+				inspector.manifestStart();
+				readGroup(da, inspector);
+				inspector.manifestEnd();
 			}
 		} finally {
 			da.done();
 		}
 	}
 
-	private static List<GroupElement> readGroup(DataAccess da) throws IOException {
+	public void inspectFiles(Inspector inspector) throws IOException {
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		DataAccess da = getDataStream();
+		try {
+			if (!da.isEmpty()) {
+				skipGroup(da); // changelog
+			}
+			if (!da.isEmpty()) {
+				skipGroup(da); // manifest
+			}
+			while (!da.isEmpty()) {
+				int fnameLen = da.readInt();
+				if (fnameLen <= 4) {
+					break; // null chunk, the last one.
+				}
+				byte[] nameBuf = new byte[fnameLen - 4];
+				da.readBytes(nameBuf, 0, nameBuf.length);
+				String fname = new String(nameBuf);
+				inspector.fileStart(fname);
+				readGroup(da, inspector);
+				inspector.fileEnd(fname);
+			}
+		} finally {
+			da.done();
+		}
+	}
+
+	public void inspectAll(Inspector inspector) throws IOException {
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		DataAccess da = getDataStream();
+		try {
+			if (da.isEmpty()) {
+				return;
+			}
+			inspector.changelogStart();
+			readGroup(da, inspector);
+			inspector.changelogEnd();
+			//
+			if (da.isEmpty()) {
+				return;
+			}
+			inspector.manifestStart();
+			readGroup(da, inspector);
+			inspector.manifestEnd();
+			//
+			while (!da.isEmpty()) {
+				int fnameLen = da.readInt();
+				if (fnameLen <= 4) {
+					break; // null chunk, the last one.
+				}
+				byte[] fnameBuf = new byte[fnameLen - 4];
+				da.readBytes(fnameBuf, 0, fnameBuf.length);
+				String name = new String(fnameBuf);
+				inspector.fileStart(name);
+				readGroup(da, inspector);
+				inspector.fileEnd(name);
+			}
+		} finally {
+			da.done();
+		}
+	}
+
+	private static void readGroup(DataAccess da, Inspector inspector) throws IOException {
 		int len = da.readInt();
-		LinkedList<GroupElement> rv = new LinkedList<HgBundle.GroupElement>();
-		while (len > 4 && !da.isEmpty()) {
+		boolean good2go = true;
+		while (len > 4 && !da.isEmpty() && good2go) {
 			byte[] nb = new byte[80];
 			da.readBytes(nb, 0, 80);
-			int dataLength = len-84;
-			LinkedList<RevlogStream.PatchRecord> patches = new LinkedList<RevlogStream.PatchRecord>();
-			while (dataLength > 0) {
-				RevlogStream.PatchRecord pr = RevlogStream.PatchRecord.read(da);
-				patches.add(pr);
-				dataLength -= pr.len + 12;
-			}
-			rv.add(new GroupElement(nb, patches));
+			int dataLength = len - 84 /* length field + 4 nodeids */;
+			byte[] data = new byte[dataLength];
+			da.readBytes(data, 0, dataLength);
+			DataAccess slice = new ByteArrayDataAccess(data); // XXX in fact, may pass a slicing DataAccess.
+			// Just need to make sure that we seek to proper location afterwards (where next GroupElement starts),
+			// regardless whether that slice has read it or not.
+			GroupElement ge = new GroupElement(nb, slice);
+			good2go = inspector.element(ge);
 			len = da.isEmpty() ? 0 : da.readInt();
 		}
-		return rv;
+		// need to skip up to group end if inspector told he don't want to continue with the group, 
+		// because outer code may try to read next group immediately as we return back.
+		while (len > 4 && !da.isEmpty()) {
+			da.skip(len - 4 /* length field */);
+			len = da.isEmpty() ? 0 : da.readInt();
+		}
 	}
 
-	static class GroupElement {
-		private byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192
+	private static void skipGroup(DataAccess da) throws IOException {
+		int len = da.readInt();
+		while (len > 4 && !da.isEmpty()) {
+			da.skip(len - 4); // sizeof(int)
+			len = da.isEmpty() ? 0 : da.readInt();
+		}
+	}
+
+	public static class GroupElement {
+		private final byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192
+		private final DataAccess dataAccess;
 		private List<RevlogStream.PatchRecord> patches;
-		
-		GroupElement(byte[] fourNodeids, List<RevlogStream.PatchRecord> patchList) {
+
+		GroupElement(byte[] fourNodeids, DataAccess rawDataAccess) {
 			assert fourNodeids != null && fourNodeids.length == 80;
-			// patchList.size() > 0
 			header = fourNodeids;
-			patches = patchList;
+			dataAccess = rawDataAccess;
 		}
+
 		public Nodeid node() {
 			return Nodeid.fromBinary(header, 0);
 		}
+
 		public Nodeid firstParent() {
 			return Nodeid.fromBinary(header, 20);
 		}
+
 		public Nodeid secondParent() {
 			return Nodeid.fromBinary(header, 40);
 		}
+
 		public Nodeid cset() { // cs seems to be changeset
 			return Nodeid.fromBinary(header, 60);
 		}
+
+		public DataAccess rawData() {
+			return dataAccess;
+		}
+		
+		public List<RevlogStream.PatchRecord> patches() throws IOException {
+			if (patches == null) {
+				dataAccess.reset();
+				LinkedList<RevlogStream.PatchRecord> p = new LinkedList<RevlogStream.PatchRecord>();
+				while (!dataAccess.isEmpty()) {
+					RevlogStream.PatchRecord pr = RevlogStream.PatchRecord.read(dataAccess);
+					p.add(pr);
+				}
+				patches = p;
+			}
+			return patches;
+		}
+
+		public byte[] apply(DataAccess baseContent) throws IOException {
+			return RevlogStream.apply(baseContent, -1, patches());
+		}
 	}
 }