changeset 169:8c8e3f372fa1

Towards initial clone: refactor HgBundle to provide slightly higher-level structure of the bundle
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 23 Mar 2011 14:13:11 +0100
parents dd525ca65de8
children 71ddbf8603e8
files cmdline/org/tmatesoft/hg/console/Bundle.java design.txt src/org/tmatesoft/hg/repo/HgBundle.java src/org/tmatesoft/hg/repo/HgLookup.java
diffstat 4 files changed, 378 insertions(+), 89 deletions(-) [+]
line wrap: on
line diff
--- a/cmdline/org/tmatesoft/hg/console/Bundle.java	Tue Mar 22 21:18:40 2011 +0100
+++ b/cmdline/org/tmatesoft/hg/console/Bundle.java	Wed Mar 23 14:13:11 2011 +0100
@@ -18,8 +18,8 @@
 
 import java.io.File;
 
-import org.tmatesoft.hg.internal.DataAccessProvider;
 import org.tmatesoft.hg.repo.HgBundle;
+import org.tmatesoft.hg.repo.HgLookup;
 import org.tmatesoft.hg.repo.HgRepository;
 
 
@@ -30,7 +30,6 @@
  * @author TMate Software Ltd.
  */
 public class Bundle {
-
 	public static void main(String[] args) throws Exception {
 		Options cmdLineOpts = Options.parse(args);
 		HgRepository hgRepo = cmdLineOpts.findRepository();
@@ -38,10 +37,43 @@
 			System.err.printf("Can't find repository in: %s\n", hgRepo.getLocation());
 			return;
 		}
-		File bundleFile = new File("/temp/hg/hg-bundle-a78c980749e3.tmp");
-		DataAccessProvider dap = new DataAccessProvider();
-		HgBundle hgBundle = new HgBundle(dap, bundleFile);
+		File bundleFile = new File("/temp/hg/hg-bundle-000000000000-gz.tmp");
+		HgBundle hgBundle = new HgLookup().loadBundle(bundleFile);
 //		hgBundle.dump();
+		/* pass -R <path-to-repo-with-less-revisions-than-bundle>, e.g. for bundle with tip=168 and -R \temp\hg4j-50 with tip:159
+		+Changeset {User: ..., Comment: Integer ....}
+		+Changeset {User: ..., Comment: Approach with ...}
+		-Changeset {User: ..., Comment: Correct project name...}
+		-Changeset {User: ..., Comment: Record possible...}
+		*/
 		hgBundle.changes(hgRepo);
 	}
+
+/*
+ *  TODO EXPLAIN why DataAccess.java on merge from branch has P2 set, and P1 is NULL
+ *  
+ *  excerpt from dump('hg-bundle-00') output (node, p1, p2, cs):
+ src/org/tmatesoft/hg/internal/DataAccess.java
+  186af94a2a7ddb34190e63ce556d0fa4dd24add2 0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 6f1b88693d48422e98c3eaaa8428ffd4d4d98ca7; patches:1
+  be8d0fdc4ff268bf5eb0a9120282ce6e63de1606 186af94a2a7ddb34190e63ce556d0fa4dd24add2 0000000000000000000000000000000000000000 a3a2e5deb320d7412ccbb59bdc44668d445bc4c4; patches:2
+  333d7bbd4a80a5d6fb4b44e54e39e290f50dc7f8 be8d0fdc4ff268bf5eb0a9120282ce6e63de1606 0000000000000000000000000000000000000000 e93101b97e4ab0a3f3402ec0e80b6e559237c7c8; patches:1
+  56e4523cb8b42630daf70511d73d29e0b375dfa5 0000000000000000000000000000000000000000 333d7bbd4a80a5d6fb4b44e54e39e290f50dc7f8 d5268ca7715b8d96204fc62abc632e8f55761547; patches:6
+  f85b6d7ed3cc4b7c6f99444eb0a41b58793cc900 56e4523cb8b42630daf70511d73d29e0b375dfa5 0000000000000000000000000000000000000000 b413b16d10a50cc027f4c38e4df5a9fedd618a79; patches:4
+	  
+  RevlogDump for the file says:
+  Index    Offset      Flags     Packed     Actual   Base Rev   Link Rev  Parent1  Parent2     nodeid
+   0:    4295032832      0       1109       2465          0         74       -1       -1     186af94a2a7ddb34190e63ce556d0fa4dd24add2
+   1:          1109      0         70       2364          0        102        0       -1     be8d0fdc4ff268bf5eb0a9120282ce6e63de1606
+   2:          1179      0         63       2365          0        122        1       -1     333d7bbd4a80a5d6fb4b44e54e39e290f50dc7f8
+   3:          1242      0        801       3765          0        157       -1        2     56e4523cb8b42630daf70511d73d29e0b375dfa5
+   4:          2043      0        130       3658          0        158        3       -1     f85b6d7ed3cc4b7c6f99444eb0a41b58793cc900
+
+  Excerpt from changelog dump:
+  155:         30541      0        155        195        155        155      154       -1     a4ec5e08701771b96057522188b16ed289e9e8fe
+  156:         30696      0        154        186        155        156      155       -1     643ddec3be36246fc052cf22ece503fa60cafe22
+  157:         30850      0        478       1422        155        157      156       53     d5268ca7715b8d96204fc62abc632e8f55761547
+  158:         31328      0        247        665        155        158      157       -1     b413b16d10a50cc027f4c38e4df5a9fedd618a79
+			   
+
+ */
 }
--- a/design.txt	Tue Mar 22 21:18:40 2011 +0100
+++ b/design.txt	Wed Mar 23 14:13:11 2011 +0100
@@ -37,11 +37,15 @@
 +repo.status - use same collector class twice, difference as external code. add external walker that keeps collected maps and use it in Log operation to give files+,files-  
 + strip \1\n metadata out from RevlogStream
 + hash/digest long names for fncache
++Strip off metadata from beg of the stream - DataAccess (with rebase/moveBaseOffset(int)) would be handy
++ hg status, compare revision and local file with kw expansion and eol extension
+
+write code to convert inlined revlog to .i and .d
 
 delta merge
 DataAccess - collect debug info (buffer misses, file size/total read operations) to find out better strategy to buffer size detection. Compare performance.
 
-Strip off metadata from beg of the stream - DataAccess (with rebase/moveBaseOffset(int)) would be handy
+
 Parameterize StatusCollector to produce copy only when needed. And HgDataFile.metadata perhaps should be moved to cacheable place?
 
 Status operation from GUI - guess, usually on a file/subfolder, hence API should allow for starting path (unlike cmdline, seems useless to implement include/exclide patterns - GUI users hardly enter them, ever)
@@ -54,7 +58,6 @@
 ??? http://mercurial.selenic.com/wiki/Manifest says "Multiple changesets may refer to the same manifest revision". To me, each changeset 
 changes repository, hence manifest should update nodeids of the files it lists, effectively creating new manifest revision.
 
-? hg status, compare revision and local file with kw expansion and eol extension
 ? subrepos in log, status (-S) and manifest commands
 
 
--- a/src/org/tmatesoft/hg/repo/HgBundle.java	Tue Mar 22 21:18:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgBundle.java	Wed Mar 23 14:13:11 2011 +0100
@@ -16,11 +16,14 @@
  */
 package org.tmatesoft.hg.repo;
 
+import static org.tmatesoft.hg.core.Nodeid.NULL;
+
 import java.io.File;
 import java.io.IOException;
 import java.util.LinkedList;
 import java.util.List;
 
+import org.tmatesoft.hg.core.HgBadStateException;
 import org.tmatesoft.hg.core.HgException;
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.internal.ByteArrayChannel;
@@ -28,14 +31,14 @@
 import org.tmatesoft.hg.internal.DataAccess;
 import org.tmatesoft.hg.internal.DataAccessProvider;
 import org.tmatesoft.hg.internal.DigestHelper;
+import org.tmatesoft.hg.internal.InflaterDataAccess;
 import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
 import org.tmatesoft.hg.util.CancelledException;
 
-
 /**
  * @see http://mercurial.selenic.com/wiki/BundleFormat
- *
+ * 
  * @author Artem Tikhomirov
  * @author TMate Software Ltd.
  */
@@ -44,129 +47,372 @@
 	private final File bundleFile;
 	private final DataAccessProvider accessProvider;
 
-	public HgBundle(DataAccessProvider dap, File bundle) {
+	HgBundle(DataAccessProvider dap, File bundle) {
 		accessProvider = dap;
 		bundleFile = bundle;
 	}
 
-	public void changes(HgRepository hgRepo) throws HgException, IOException {
+	private DataAccess getDataStream() throws IOException {
 		DataAccess da = accessProvider.create(bundleFile);
-		DigestHelper dh = new DigestHelper();
-		try {
-			List<GroupElement> changelogGroup = readGroup(da);
-			if (changelogGroup.isEmpty()) {
-				throw new IllegalStateException("No changelog group in the bundle"); // XXX perhaps, just be silent and/or log?
-			}
-			// XXX in fact, bundle not necessarily starts with the first revision missing in hgRepo
-			// need to 'scroll' till the last one common.
-			final Nodeid base = changelogGroup.get(0).firstParent();
-			if (!hgRepo.getChangelog().isKnown(base)) {
-				throw new IllegalArgumentException("unknown parent");
+		byte[] signature = new byte[6];
+		if (da.length() > 6) {
+			da.readBytes(signature, 0, 6);
+			if (signature[0] == 'H' && signature[1] == 'G' && signature[2] == '1' && signature[3] == '0') {
+				if (signature[4] == 'G' && signature[5] == 'Z') {
+					return new InflaterDataAccess(da, 6, da.length() - 6);
+				}
+				if (signature[4] == 'B' && signature[5] == 'Z') {
+					throw HgRepository.notImplemented();
+				}
+				if (signature[4] != 'U' || signature[5] != 'N') {
+					throw new HgBadStateException("Bad bundle signature:" + new String(signature));
+				}
+				// "...UN", fall-through
+			} else {
+				da.reset();
 			}
-			// BundleFormat wiki says:
-			// Each Changelog entry patches the result of all previous patches 
-			// (the previous, or parent patch of a given patch p is the patch that has a node equal to p's p1 field)
-			ByteArrayChannel bac = new ByteArrayChannel();
-			hgRepo.getChangelog().rawContent(base, bac); // FIXME get DataAccess directly, to avoid
-			// extra byte[] (inside ByteArrayChannel) duplication just for the sake of subsequent ByteArrayDataChannel wrap.
-			ByteArrayDataAccess baseRevContent = new ByteArrayDataAccess(bac.toArray());
-			for (GroupElement ge : changelogGroup) {
-				byte[] csetContent = RevlogStream.apply(baseRevContent, -1, ge.patches);
-				dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid?
-				if (!ge.node().equalsTo(dh.asBinary())) {
-					throw new IllegalStateException("Integrity check failed on " + bundleFile + ", node:" + ge.node());
+		}
+		return da;
+	}
+
+	// shows changes recorded in the bundle that are missing from the supplied repository 
+	public void changes(final HgRepository hgRepo) throws HgException, IOException {
+		Inspector insp = new Inspector() {
+			DigestHelper dh = new DigestHelper();
+			boolean emptyChangelog = true;
+			private DataAccess prevRevContent;
+
+			public void changelogStart() {
+				emptyChangelog = true;
+				
+			}
+
+			public void changelogEnd() {
+				if (emptyChangelog) {
+					throw new IllegalStateException("No changelog group in the bundle"); // XXX perhaps, just be silent and/or log?
 				}
-				ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent);
-				RawChangeset cs = RawChangeset.parse(csetDataAccess);
-				System.out.println(cs.toString());
-				baseRevContent = csetDataAccess.reset();
 			}
-		} catch (CancelledException ex) {
-			System.out.println("Operation cancelled");
+
+/*
+ * Despite that BundleFormat wiki says: "Each Changelog entry patches the result of all previous patches 
+ * (the previous, or parent patch of a given patch p is the patch that has a node equal to p's p1 field)",
+ *  it seems not to hold true. Instead, each entry patches previous one, regardless of whether the one
+ *  before is its parent (i.e. ge.firstParent()) or not.
+ *  
+Actual state in the changelog.i
+Index    Offset      Flags     Packed     Actual   Base Rev   Link Rev  Parent1  Parent2     nodeid
+  50:          9212      0        209        329         48         50       49       -1     f1db8610da62a3e0beb8d360556ee1fd6eb9885e
+  51:          9421      0        278        688         48         51       50       -1     9429c7bd1920fab164a9d2b621d38d57bcb49ae0
+  52:          9699      0        154        179         52         52       50       -1     30bd389788464287cee22ccff54c330a4b715de5
+  53:          9853      0        133        204         52         53       51       52     a6f39e595b2b54f56304470269a936ead77f5725
+  54:          9986      0        156        182         54         54       52       -1     fd4f2c98995beb051070630c272a9be87bef617d
+
+Excerpt from bundle (nodeid, p1, p2, cs):
+   f1db8610da62a3e0beb8d360556ee1fd6eb9885e 26e3eeaa39623de552b45ee1f55c14f36460f220 0000000000000000000000000000000000000000 f1db8610da62a3e0beb8d360556ee1fd6eb9885e; patches:4
+   9429c7bd1920fab164a9d2b621d38d57bcb49ae0 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 9429c7bd1920fab164a9d2b621d38d57bcb49ae0; patches:3
+>  30bd389788464287cee22ccff54c330a4b715de5 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 30bd389788464287cee22ccff54c330a4b715de5; patches:3
+   a6f39e595b2b54f56304470269a936ead77f5725 9429c7bd1920fab164a9d2b621d38d57bcb49ae0 30bd389788464287cee22ccff54c330a4b715de5 a6f39e595b2b54f56304470269a936ead77f5725; patches:3
+   fd4f2c98995beb051070630c272a9be87bef617d 30bd389788464287cee22ccff54c330a4b715de5 0000000000000000000000000000000000000000 fd4f2c98995beb051070630c272a9be87bef617d; patches:3
+
+To recreate 30bd..e5, one have to take content of 9429..e0, not its p1 f1db..5e
+ */
+			public boolean element(GroupElement ge) {
+				emptyChangelog = false;
+				HgChangelog changelog = hgRepo.getChangelog();
+				try {
+					if (prevRevContent == null) { 
+						if (NULL.equals(ge.firstParent()) && NULL.equals(ge.secondParent())) {
+							prevRevContent = new ByteArrayDataAccess(new byte[0]);
+						} else {
+							final Nodeid base = ge.firstParent();
+							if (!changelog.isKnown(base) /*only first parent, that's Bundle contract*/) {
+								throw new IllegalStateException(String.format("Revision %s needs a parent %s, which is missing in the supplied repo %s", ge.node().shortNotation(), base.shortNotation(), hgRepo.toString()));
+							}
+							ByteArrayChannel bac = new ByteArrayChannel();
+							changelog.rawContent(base, bac); // FIXME get DataAccess directly, to avoid
+							// extra byte[] (inside ByteArrayChannel) duplication just for the sake of subsequent ByteArrayDataChannel wrap.
+							prevRevContent = new ByteArrayDataAccess(bac.toArray());
+						}
+					}
+					//
+					byte[] csetContent = ge.apply(prevRevContent);
+					dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid?
+					if (!ge.node().equalsTo(dh.asBinary())) {
+						throw new IllegalStateException("Integrity check failed on " + bundleFile + ", node:" + ge.node());
+					}
+					ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent);
+					if (changelog.isKnown(ge.node())) {
+						System.out.print("+");
+					} else {
+						System.out.print("-");
+					}
+					RawChangeset cs = RawChangeset.parse(csetDataAccess);
+					System.out.println(cs.toString());
+					prevRevContent = csetDataAccess.reset();
+				} catch (CancelledException ex) {
+					return false;
+				} catch (Exception ex) {
+					throw new HgBadStateException(ex); // FIXME
+				}
+				return true;
+			}
+
+			public void manifestStart() {}
+			public void manifestEnd() {}
+			public void fileStart(String name) {}
+			public void fileEnd(String name) {}
+
+		};
+		inspectChangelog(insp);
+	}
+
+	public void dump() throws IOException {
+		Dump dump = new Dump();
+		inspectAll(dump);
+		System.out.println("Total files:" + dump.names.size());
+		for (String s : dump.names) {
+			System.out.println(s);
+		}
+	}
+
+	// callback to minimize amount of Strings and Nodeids instantiated
+	public interface Inspector {
+		void changelogStart();
+
+		void changelogEnd();
+
+		void manifestStart();
+
+		void manifestEnd();
+
+		void fileStart(String name);
+
+		void fileEnd(String name);
+
+		/**
+		 * @param element
+		 *            data element, instance might be reused
+		 * @return <code>true</code> to continue
+		 */
+		boolean element(GroupElement element);
+	}
+
+	public static class Dump implements Inspector {
+		public final LinkedList<String> names = new LinkedList<String>();
+
+		public void changelogStart() {
+			System.out.println("Changelog group");
+		}
+
+		public void changelogEnd() {
+		}
+
+		public void manifestStart() {
+			System.out.println("Manifest group");
+		}
+
+		public void manifestEnd() {
+		}
+
+		public void fileStart(String name) {
+			names.add(name);
+			System.out.println(name);
+		}
+
+		public void fileEnd(String name) {
+		}
+
+		public boolean element(GroupElement ge) {
+			try {
+				System.out.printf("  %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches().size());
+			} catch (Exception ex) {
+				ex.printStackTrace(); // FIXME
+			}
+			return true;
+		}
+	}
+
+	public void inspectChangelog(Inspector inspector) throws IOException {
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		DataAccess da = getDataStream();
+		try {
+			if (da.isEmpty()) {
+				return;
+			}
+			inspector.changelogStart();
+			readGroup(da, inspector);
+			inspector.changelogEnd();
 		} finally {
 			da.done();
 		}
 	}
 
-	public void dump() throws IOException {
-		DataAccess da = accessProvider.create(bundleFile);
+	public void inspectManifest(Inspector inspector) throws IOException {
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		DataAccess da = getDataStream();
 		try {
-			LinkedList<String> names = new LinkedList<String>();
+			if (da.isEmpty()) {
+				return;
+			}
+			skipGroup(da); // changelog
 			if (!da.isEmpty()) {
-				System.out.println("Changelog group");
-				List<GroupElement> changelogGroup = readGroup(da);
-				for (GroupElement ge : changelogGroup) {
-					System.out.printf("  %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches.size());
-				}
-				System.out.println("Manifest group");
-				List<GroupElement> manifestGroup = readGroup(da);
-				for (GroupElement ge : manifestGroup) {
-					System.out.printf("  %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches.size());
-				}
-				while (!da.isEmpty()) {
-					int fnameLen = da.readInt();
-					if (fnameLen <= 4) {
-						break; // null chunk, the last one.
-					}
-					byte[] fname = new byte[fnameLen - 4];
-					da.readBytes(fname, 0, fname.length);
-					names.add(new String(fname));
-					List<GroupElement> fileGroup = readGroup(da);
-					System.out.println(names.getLast());
-					for (GroupElement ge : fileGroup) {
-						System.out.printf("  %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches.size());
-					}
-				}
-			}
-			System.out.println(names.size());
-			for (String s : names) {
-				System.out.println(s);
+				inspector.manifestStart();
+				readGroup(da, inspector);
+				inspector.manifestEnd();
 			}
 		} finally {
 			da.done();
 		}
 	}
 
-	private static List<GroupElement> readGroup(DataAccess da) throws IOException {
+	public void inspectFiles(Inspector inspector) throws IOException {
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		DataAccess da = getDataStream();
+		try {
+			if (!da.isEmpty()) {
+				skipGroup(da); // changelog
+			}
+			if (!da.isEmpty()) {
+				skipGroup(da); // manifest
+			}
+			while (!da.isEmpty()) {
+				int fnameLen = da.readInt();
+				if (fnameLen <= 4) {
+					break; // null chunk, the last one.
+				}
+				byte[] nameBuf = new byte[fnameLen - 4];
+				da.readBytes(nameBuf, 0, nameBuf.length);
+				String fname = new String(nameBuf);
+				inspector.fileStart(fname);
+				readGroup(da, inspector);
+				inspector.fileEnd(fname);
+			}
+		} finally {
+			da.done();
+		}
+	}
+
+	public void inspectAll(Inspector inspector) throws IOException {
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		DataAccess da = getDataStream();
+		try {
+			if (da.isEmpty()) {
+				return;
+			}
+			inspector.changelogStart();
+			readGroup(da, inspector);
+			inspector.changelogEnd();
+			//
+			if (da.isEmpty()) {
+				return;
+			}
+			inspector.manifestStart();
+			readGroup(da, inspector);
+			inspector.manifestEnd();
+			//
+			while (!da.isEmpty()) {
+				int fnameLen = da.readInt();
+				if (fnameLen <= 4) {
+					break; // null chunk, the last one.
+				}
+				byte[] fnameBuf = new byte[fnameLen - 4];
+				da.readBytes(fnameBuf, 0, fnameBuf.length);
+				String name = new String(fnameBuf);
+				inspector.fileStart(name);
+				readGroup(da, inspector);
+				inspector.fileEnd(name);
+			}
+		} finally {
+			da.done();
+		}
+	}
+
+	private static void readGroup(DataAccess da, Inspector inspector) throws IOException {
 		int len = da.readInt();
-		LinkedList<GroupElement> rv = new LinkedList<HgBundle.GroupElement>();
-		while (len > 4 && !da.isEmpty()) {
+		boolean good2go = true;
+		while (len > 4 && !da.isEmpty() && good2go) {
 			byte[] nb = new byte[80];
 			da.readBytes(nb, 0, 80);
-			int dataLength = len-84;
-			LinkedList<RevlogStream.PatchRecord> patches = new LinkedList<RevlogStream.PatchRecord>();
-			while (dataLength > 0) {
-				RevlogStream.PatchRecord pr = RevlogStream.PatchRecord.read(da);
-				patches.add(pr);
-				dataLength -= pr.len + 12;
-			}
-			rv.add(new GroupElement(nb, patches));
+			int dataLength = len - 84 /* length field + 4 nodeids */;
+			byte[] data = new byte[dataLength];
+			da.readBytes(data, 0, dataLength);
+			DataAccess slice = new ByteArrayDataAccess(data); // XXX in fact, may pass a slicing DataAccess.
+			// Just need to make sure that we seek to proper location afterwards (where next GroupElement starts),
+			// regardless whether that slice has read it or not.
+			GroupElement ge = new GroupElement(nb, slice);
+			good2go = inspector.element(ge);
 			len = da.isEmpty() ? 0 : da.readInt();
 		}
-		return rv;
+		// need to skip up to group end if inspector told he don't want to continue with the group, 
+		// because outer code may try to read next group immediately as we return back.
+		while (len > 4 && !da.isEmpty()) {
+			da.skip(len - 4 /* length field */);
+			len = da.isEmpty() ? 0 : da.readInt();
+		}
 	}
 
-	static class GroupElement {
-		private byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192
+	private static void skipGroup(DataAccess da) throws IOException {
+		int len = da.readInt();
+		while (len > 4 && !da.isEmpty()) {
+			da.skip(len - 4); // sizeof(int)
+			len = da.isEmpty() ? 0 : da.readInt();
+		}
+	}
+
+	public static class GroupElement {
+		private final byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192
+		private final DataAccess dataAccess;
 		private List<RevlogStream.PatchRecord> patches;
-		
-		GroupElement(byte[] fourNodeids, List<RevlogStream.PatchRecord> patchList) {
+
+		GroupElement(byte[] fourNodeids, DataAccess rawDataAccess) {
 			assert fourNodeids != null && fourNodeids.length == 80;
-			// patchList.size() > 0
 			header = fourNodeids;
-			patches = patchList;
+			dataAccess = rawDataAccess;
 		}
+
 		public Nodeid node() {
 			return Nodeid.fromBinary(header, 0);
 		}
+
 		public Nodeid firstParent() {
 			return Nodeid.fromBinary(header, 20);
 		}
+
 		public Nodeid secondParent() {
 			return Nodeid.fromBinary(header, 40);
 		}
+
 		public Nodeid cset() { // cs seems to be changeset
 			return Nodeid.fromBinary(header, 60);
 		}
+
+		public DataAccess rawData() {
+			return dataAccess;
+		}
+		
+		public List<RevlogStream.PatchRecord> patches() throws IOException {
+			if (patches == null) {
+				dataAccess.reset();
+				LinkedList<RevlogStream.PatchRecord> p = new LinkedList<RevlogStream.PatchRecord>();
+				while (!dataAccess.isEmpty()) {
+					RevlogStream.PatchRecord pr = RevlogStream.PatchRecord.read(dataAccess);
+					p.add(pr);
+				}
+				patches = p;
+			}
+			return patches;
+		}
+
+		public byte[] apply(DataAccess baseContent) throws IOException {
+			return RevlogStream.apply(baseContent, -1, patches());
+		}
 	}
 }
--- a/src/org/tmatesoft/hg/repo/HgLookup.java	Tue Mar 22 21:18:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgLookup.java	Wed Mar 23 14:13:11 2011 +0100
@@ -20,6 +20,7 @@
 import java.io.IOException;
 
 import org.tmatesoft.hg.core.HgException;
+import org.tmatesoft.hg.internal.DataAccessProvider;
 
 /**
  * Utility methods to find Mercurial repository at a given location
@@ -61,4 +62,11 @@
 			throw new HgException(location.toString(), ex);
 		}
 	}
+	
+	public HgBundle loadBundle(File location) throws HgException {
+		if (location == null || !location.canRead()) {
+			throw new IllegalArgumentException();
+		}
+		return new HgBundle(new DataAccessProvider(), location);
+	}
 }