changeset 673:545b1d4cc11d

Refactor HgBundle.GroupElement (clear experimental mark), resolve few technical debt issues
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 12 Jul 2013 20:14:24 +0200 (2013-07-12)
parents d2552e6a5af6
children cce0387c6041
files src/org/tmatesoft/hg/core/HgCloneCommand.java src/org/tmatesoft/hg/internal/BundleSerializer.java src/org/tmatesoft/hg/internal/ByteArrayDataAccess.java src/org/tmatesoft/hg/internal/ChangesetParser.java src/org/tmatesoft/hg/internal/DataAccessInputStream.java src/org/tmatesoft/hg/internal/DataSerializer.java src/org/tmatesoft/hg/internal/RevlogStreamWriter.java src/org/tmatesoft/hg/repo/HgBundle.java src/org/tmatesoft/hg/repo/HgChangelog.java src/org/tmatesoft/hg/repo/HgInternals.java src/org/tmatesoft/hg/repo/HgRemoteRepository.java
diffstat 11 files changed, 441 insertions(+), 258 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/core/HgCloneCommand.java	Fri Jul 12 16:29:06 2013 +0200
+++ b/src/org/tmatesoft/hg/core/HgCloneCommand.java	Fri Jul 12 20:14:24 2013 +0200
@@ -31,16 +31,19 @@
 import org.tmatesoft.hg.internal.ByteArrayDataAccess;
 import org.tmatesoft.hg.internal.DataAccess;
 import org.tmatesoft.hg.internal.DataSerializer;
+import org.tmatesoft.hg.internal.DataSerializer.ByteArrayDataSource;
 import org.tmatesoft.hg.internal.DigestHelper;
 import org.tmatesoft.hg.internal.FNCacheFile;
 import org.tmatesoft.hg.internal.Internals;
 import org.tmatesoft.hg.internal.Lifecycle;
+import org.tmatesoft.hg.internal.Patch;
 import org.tmatesoft.hg.internal.RepoInitializer;
 import org.tmatesoft.hg.internal.RevlogCompressor;
 import org.tmatesoft.hg.internal.RevlogStreamWriter;
 import org.tmatesoft.hg.internal.Transaction;
 import org.tmatesoft.hg.repo.HgBundle;
 import org.tmatesoft.hg.repo.HgBundle.GroupElement;
+import org.tmatesoft.hg.repo.HgInternals;
 import org.tmatesoft.hg.repo.HgInvalidControlFileException;
 import org.tmatesoft.hg.repo.HgInvalidStateException;
 import org.tmatesoft.hg.repo.HgLookup;
@@ -316,7 +319,8 @@
 					}
 				}
 				//
-				byte[] content = ge.apply(prevRevContent.byteArray());
+				Patch patch = HgInternals.patchFromData(ge);
+				byte[] content = patch.apply(prevRevContent, -1);
 				Nodeid p1 = ge.firstParent();
 				Nodeid p2 = ge.secondParent();
 				byte[] calculated = dh.sha1(p1, p2, content).asBinary();
@@ -340,23 +344,23 @@
 				//
 				revlogHeader.parents(knownRevision(p1), knownRevision(p2));
 				//
-				byte[] patchContent = ge.rawDataByteArray();
+				int patchSerializedLength = patch.serializedLength();
 				// no reason to keep patch if it's close (here, >75%) in size to the complete contents,
 				// save patching effort in this case
-				writeComplete = writeComplete || preferCompleteOverPatch(patchContent.length, content.length);
+				writeComplete = writeComplete || preferCompleteOverPatch(patchSerializedLength, content.length);
 
 				if (writeComplete) {
 					revlogHeader.baseRevision(revisionSequence.size());
 				}
 				assert revlogHeader.baseRevision() >= 0;
 
-				final byte[] sourceData = writeComplete ? content : patchContent;
-				revlogDataZip.reset(new DataSerializer.ByteArrayDataSource(sourceData));
+				DataSerializer.DataSource dataSource = writeComplete ? new ByteArrayDataSource(content) : patch.new PatchDataSource();
+				revlogDataZip.reset(dataSource);
 				final int compressedLen;
-				final boolean useUncompressedData = preferCompressedOverComplete(revlogDataZip.getCompressedLength(), sourceData.length);
+				final boolean useUncompressedData = preferCompressedOverComplete(revlogDataZip.getCompressedLength(), dataSource.serializeLength());
 				if (useUncompressedData) {
 					// compression wasn't too effective,
-					compressedLen = sourceData.length + 1 /*1 byte for 'u' - uncompressed prefix byte*/;
+					compressedLen = dataSource.serializeLength() + 1 /*1 byte for 'u' - uncompressed prefix byte*/;
 				} else {
 					compressedLen= revlogDataZip.getCompressedLength();
 				}
@@ -377,8 +381,8 @@
 				revlogHeader.serialize(sds);
 
 				if (useUncompressedData) {
-					indexFile.write((byte) 'u');
-					indexFile.write(sourceData);
+					sds.writeByte((byte) 'u');
+					dataSource.serialize(sds);
 				} else {
 					int actualCompressedLenWritten = revlogDataZip.writeCompressedData(sds);
 					if (actualCompressedLenWritten != compressedLen) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/BundleSerializer.java	Fri Jul 12 20:14:24 2013 +0200
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.tmatesoft.hg.core.HgIOException;
+import org.tmatesoft.hg.core.SessionContext;
+import org.tmatesoft.hg.repo.HgBundle;
+import org.tmatesoft.hg.repo.HgInternals;
+import org.tmatesoft.hg.repo.HgRuntimeException;
+import org.tmatesoft.hg.util.LogFacility;
+
+/**
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class BundleSerializer implements DataSerializer.DataSource {
+	private final LogFacility log;
+	private final File bundleFile;
+	
+	public static BundleSerializer newInstance(SessionContext ctx, HgBundle bundle) {
+		return new BundleSerializer(ctx.getLog(), HgInternals.getBundleFile(bundle));
+	}
+
+	public BundleSerializer(LogFacility logFacility, File bundleFile) {
+		log = logFacility;
+		this.bundleFile = bundleFile;
+	}
+
+	public void serialize(DataSerializer out) throws HgIOException, HgRuntimeException {
+		FileInputStream fis = null;
+		try {
+			fis = new FileInputStream(bundleFile);
+			byte[] buffer = new byte[8*1024];
+			int r;
+			while ((r = fis.read(buffer, 0, buffer.length)) > 0) {
+				out.write(buffer, 0, r);
+			}
+			
+		} catch (IOException ex) {
+			throw new HgIOException("Failed to serialize bundle", bundleFile);
+		} finally {
+			new FileUtils(log, this).closeQuietly(fis, bundleFile);
+		}
+	}
+
+	public int serializeLength() throws HgRuntimeException {
+		return Internals.ltoi(bundleFile.length());
+	}
+}
\ No newline at end of file
--- a/src/org/tmatesoft/hg/internal/ByteArrayDataAccess.java	Fri Jul 12 16:29:06 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/ByteArrayDataAccess.java	Fri Jul 12 20:14:24 2013 +0200
@@ -95,7 +95,7 @@
 		pos = offset;
 	}
 	@Override
-	public void skip(int bytes) throws IOException {
+	public void skip(int bytes) {
 		seek(pos + bytes);
 	}
 	@Override
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/ChangesetParser.java	Fri Jul 12 20:14:24 2013 +0200
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2010-2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.core.SessionContext;
+import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
+import org.tmatesoft.hg.repo.HgInvalidDataFormatException;
+import org.tmatesoft.hg.repo.HgRepository;
+
+/**
+ * @see mercurial/changelog.py:read()
+ * 
+ *      <pre>
+ *         format used:
+ *         nodeid\n        : manifest node in ascii
+ *         user\n          : user, no \n or \r allowed
+ *         time tz extra\n : date (time is int or float, timezone is int)
+ *                         : extra is metadatas, encoded and separated by '\0'
+ *                         : older versions ignore it
+ *         files\n\n       : files modified by the cset, no \n or \r allowed
+ *         (.*)            : comment (free text, ideally utf-8)
+ * 
+ *         changelog v0 doesn't use extra
+ * </pre>
+ * 
+ * Extracted from internals of HgChangelog (the code initially from inside RawChangeset)
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public final class ChangesetParser {
+	private final EncodingHelper encHelper;
+	// it's likely user names get repeated again and again throughout repository. 
+	private final Pool<String> usersPool;
+	private final Pool<String> filesPool;
+	private final CsetFactory factory;
+	
+	public ChangesetParser(SessionContext.Source sessionContex, CsetFactory csetFactory) {
+		assert csetFactory != null;
+		encHelper = Internals.buildFileNameEncodingHelper(sessionContex);
+		usersPool = new Pool<String>();
+		filesPool = new Pool<String>();
+		factory = csetFactory;
+	}
+	
+	public void dispose() {
+		usersPool.clear();
+		filesPool.clear();
+	}
+
+	public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException {
+		byte[] data = da.byteArray();
+		return parse(data);
+	}
+	
+	public RawChangeset parse(byte[] data) throws HgInvalidDataFormatException {
+		return init(data, 0, data.length);
+	}
+
+	private RawChangeset init(byte[] data, int offset, int length) throws HgInvalidDataFormatException {
+		final int bufferEndIndex = offset + length;
+		final byte lineBreak = (byte) '\n';
+		int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex);
+		if (breakIndex1 == -1) {
+			throw new HgInvalidDataFormatException("Bad Changeset data");
+		}
+		Nodeid _nodeid = Nodeid.fromAscii(data, 0, breakIndex1);
+		int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex);
+		if (breakIndex2 == -1) {
+			throw new HgInvalidDataFormatException("Bad Changeset data");
+		}
+		String _user;
+		_user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1);
+		_user = usersPool.unify(_user);
+
+		int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex);
+		if (breakIndex3 == -1) {
+			throw new HgInvalidDataFormatException("Bad Changeset data");
+		}
+		String _timeString = new String(data, breakIndex2 + 1, breakIndex3 - breakIndex2 - 1);
+		int space1 = _timeString.indexOf(' ');
+		if (space1 == -1) {
+			throw new HgInvalidDataFormatException(String.format("Bad Changeset data: %s in [%d..%d]", "time string", breakIndex2+1, breakIndex3));
+		}
+		int space2 = _timeString.indexOf(' ', space1 + 1);
+		if (space2 == -1) {
+			space2 = _timeString.length();
+		}
+		long unixTime = Long.parseLong(_timeString.substring(0, space1));
+		int _timezone = Integer.parseInt(_timeString.substring(space1 + 1, space2));
+		// unixTime is local time, and timezone records difference of the local time to UTC.
+		Date _time = new Date(unixTime * 1000);
+		String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null;
+		Map<String, String> _extrasMap = parseExtras(_extras);
+		//
+		int lastStart = breakIndex3 + 1;
+		int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
+		ArrayList<String> _files = null;
+		if (breakIndex4 > lastStart) {
+			// if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision)
+			_files = new ArrayList<String>(5);
+			while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) {
+				String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart);
+				_files.add(filesPool.unify(fname));
+				lastStart = breakIndex4 + 1;
+				if (data[breakIndex4 + 1] == lineBreak) {
+					// found \n\n
+					break;
+				} else {
+					breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
+				}
+			}
+			if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) {
+				throw new HgInvalidDataFormatException("Bad Changeset data");
+			}
+		} else {
+			breakIndex4--;
+		}
+		String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2);
+		RawChangeset target = factory.create(_nodeid, _user, _time, _timezone, _files, _comment, _extrasMap);
+		return target; 
+	}
+
+	private Map<String, String> parseExtras(String _extras) {
+		final String extras_branch_key = "branch";
+		_extras = _extras == null ? null : _extras.trim();
+		if (_extras == null || _extras.length() == 0) {
+			return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
+		}
+		Map<String, String> _extrasMap = new HashMap<String, String>();
+		int lastIndex = 0;
+		do {
+			String pair;
+			int sp = _extras.indexOf('\0', lastIndex);
+			if (sp == -1) {
+				sp = _extras.length();
+			}
+			if (sp > lastIndex) {
+				pair = _extras.substring(lastIndex, sp);
+				pair = decode(pair);
+				int eq = pair.indexOf(':');
+				_extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1));
+				lastIndex = sp + 1;
+			}
+		} while (lastIndex < _extras.length());
+		if (!_extrasMap.containsKey(extras_branch_key)) {
+			_extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
+		}
+		return Collections.unmodifiableMap(_extrasMap);
+	}
+
+	private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) {
+		for (int i = startOffset; i < endIndex; i++) {
+			if (src[i] == what) {
+				return i;
+			}
+		}
+		return -1;
+	}
+	
+	private static String decode(String s) {
+		if (s != null && s.indexOf('\\') != -1) {
+			// TestAuxUtilities#testChangelogExtrasDecode
+			return s.replace("\\\\", "\\").replace("\\n", "\n").replace("\\r", "\r").replace("\\0", "\00");
+		}
+		return s;
+	}
+
+	public interface CsetFactory {
+		public RawChangeset create(Nodeid nodeid, String user, Date time, int timezone, List<String> files, String comment, Map<String, String> extrasMap);
+	}
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/DataAccessInputStream.java	Fri Jul 12 20:14:24 2013 +0200
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Wrap our internal API into a public one
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class DataAccessInputStream extends InputStream {
+	
+	private final DataAccess da;
+	private int bytesLeft = -1;
+
+	public DataAccessInputStream(DataAccess dataAccess) {
+		da = dataAccess;
+	}
+	
+	@Override
+	public int available() throws IOException {
+		initAvailable();
+		return bytesLeft;
+	}
+	
+	@Override
+	public int read() throws IOException {
+		initAvailable();
+		if (bytesLeft == 0) {
+			return -1;
+		}
+		int rv = da.readByte();
+		bytesLeft--;
+		return rv;
+	}
+	
+	@Override
+	public int read(byte[] b, int off, int len) throws IOException {
+		initAvailable();
+		if (bytesLeft == 0) {
+			return -1;
+		}
+		if (len == 0) {
+			return 0;
+		}
+		int x = Math.min(len, bytesLeft);
+		da.readBytes(b, off, x);
+		bytesLeft -= x;
+		return x;
+	}
+
+
+	private void initAvailable() throws IOException {
+		da.reset();
+		if (bytesLeft == -1) {
+			bytesLeft = da.length();
+		}
+	}
+}
--- a/src/org/tmatesoft/hg/internal/DataSerializer.java	Fri Jul 12 16:29:06 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/DataSerializer.java	Fri Jul 12 20:14:24 2013 +0200
@@ -24,7 +24,7 @@
 import org.tmatesoft.hg.repo.HgRuntimeException;
 
 /**
- * Serialization friend of {@link DataAccess}
+ * Serialization friend of {@link DataAccess}, similar to OutputStream with few handy methods
  * 
  * @author Artem Tikhomirov
  * @author TMate Software Ltd.
--- a/src/org/tmatesoft/hg/internal/RevlogStreamWriter.java	Fri Jul 12 16:29:06 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/RevlogStreamWriter.java	Fri Jul 12 20:14:24 2013 +0200
@@ -30,6 +30,7 @@
 import org.tmatesoft.hg.internal.DataSerializer.ByteArraySerializer;
 import org.tmatesoft.hg.internal.DataSerializer.DataSource;
 import org.tmatesoft.hg.repo.HgBundle.GroupElement;
+import org.tmatesoft.hg.repo.HgInternals;
 import org.tmatesoft.hg.repo.HgInvalidControlFileException;
 import org.tmatesoft.hg.repo.HgInvalidDataFormatException;
 import org.tmatesoft.hg.repo.HgInvalidRevisionException;
@@ -89,12 +90,9 @@
 		int p1 = p1Rev.isNull() ? NO_REVISION : revlogRevs.revisionIndex(p1Rev);
 		final Nodeid p2Rev = ge.secondParent();
 		int p2 = p2Rev.isNull() ? NO_REVISION : revlogRevs.revisionIndex(p2Rev);
-		Patch p = new Patch();
-		final byte[] patchBytes;
+		Patch p = null;
 		try {
-			// XXX there's ge.rawData(), to avoid extra array wrap
-			patchBytes = ge.rawDataByteArray();
-			p.read(new ByteArrayDataAccess(patchBytes));
+			p = HgInternals.patchFromData(ge);
 		} catch (IOException ex) {
 			throw new HgIOException("Failed to read patch information", ex, null);
 		}
@@ -109,7 +107,7 @@
 			// we may write patch from GroupElement as is
 			int patchBaseLen = dataLength(patchBaseRev);
 			revLen = patchBaseLen + p.patchSizeDelta();
-			ds = new ByteArrayDataSource(patchBytes);
+			ds = p.new PatchDataSource();
 		} else {
 			// read baseRev, unless it's the pull to empty repository
 			try {
@@ -180,7 +178,7 @@
 		DataSerializer indexFile, dataFile;
 		indexFile = dataFile = null;
 		try {
-			//
+			// FIXME perhaps, not a good idea to open stream for each revision added (e.g, when we pull a lot of them)
 			indexFile = revlogStream.getIndexStreamWriter(transaction);
 			final boolean isInlineData = revlogStream.isInlineData();
 			HeaderWriter revlogHeader = new HeaderWriter(isInlineData);
--- a/src/org/tmatesoft/hg/repo/HgBundle.java	Fri Jul 12 16:29:06 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgBundle.java	Fri Jul 12 20:14:24 2013 +0200
@@ -17,8 +17,8 @@
 package org.tmatesoft.hg.repo;
 
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.ConcurrentModificationException;
 
 import org.tmatesoft.hg.core.HgIOException;
@@ -27,18 +27,17 @@
 import org.tmatesoft.hg.internal.ByteArrayChannel;
 import org.tmatesoft.hg.internal.ByteArrayDataAccess;
 import org.tmatesoft.hg.internal.Callback;
+import org.tmatesoft.hg.internal.ChangesetParser;
 import org.tmatesoft.hg.internal.DataAccess;
+import org.tmatesoft.hg.internal.DataAccessInputStream;
 import org.tmatesoft.hg.internal.DataAccessProvider;
-import org.tmatesoft.hg.internal.DataSerializer;
 import org.tmatesoft.hg.internal.DigestHelper;
 import org.tmatesoft.hg.internal.EncodingHelper;
 import org.tmatesoft.hg.internal.Experimental;
-import org.tmatesoft.hg.internal.FileUtils;
 import org.tmatesoft.hg.internal.InflaterDataAccess;
 import org.tmatesoft.hg.internal.Internals;
 import org.tmatesoft.hg.internal.Lifecycle;
 import org.tmatesoft.hg.internal.Patch;
-import org.tmatesoft.hg.repo.HgChangelog.ChangesetParser;
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
 import org.tmatesoft.hg.util.Adaptable;
 import org.tmatesoft.hg.util.CancelledException;
@@ -54,9 +53,9 @@
 @Experimental(reason="API is not stable")
 public class HgBundle {
 
-	private final File bundleFile;
+	final File bundleFile;
 	private final DataAccessProvider accessProvider;
-	private final SessionContext ctx;
+	final SessionContext ctx;
 	private final EncodingHelper fnDecorer;
 	private Lifecycle.BasicCallback flowControl;
 
@@ -110,7 +109,7 @@
 	 * @param hgRepo repository that shall possess base revision for this bundle
 	 * @param inspector callback to get each changeset found 
 	 */
-	public void changes(final HgRepository hgRepo, final HgChangelog.Inspector inspector) throws HgRuntimeException {
+	public void changes(final HgRepository hgRepo, final HgChangelog.Inspector inspector) throws HgIOException, HgRuntimeException {
 		Inspector bundleInsp = new Inspector() {
 			DigestHelper dh = new DigestHelper();
 			boolean emptyChangelog = true;
@@ -121,7 +120,7 @@
 			public void changelogStart() {
 				emptyChangelog = true;
 				revisionIndex = 0;
-				csetBuilder = new ChangesetParser(hgRepo, true);
+				csetBuilder = new ChangesetParser(hgRepo, new HgChangelog.RawCsetFactory(true));
 			}
 
 			public void changelogEnd() {
@@ -153,7 +152,7 @@
 
 To recreate 30bd..e5, one have to take content of 9429..e0, not its p1 f1db..5e
  */
-			public boolean element(GroupElement ge) throws HgRuntimeException {
+			public boolean element(GroupElement ge) throws IOException, HgRuntimeException {
 				emptyChangelog = false;
 				HgChangelog changelog = hgRepo.getChangelog();
 				try {
@@ -172,20 +171,17 @@
 						}
 					}
 					//
-					byte[] csetContent = ge.apply(prevRevContent);
+					byte[] csetContent = ge.patch().apply(prevRevContent, -1);
 					dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid?
 					if (!ge.node().equalsTo(dh.asBinary())) {
 						throw new HgInvalidStateException(String.format("Integrity check failed on %s, node: %s", bundleFile, ge.node().shortNotation()));
 					}
-					ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent);
-					RawChangeset cs = csetBuilder.parse(csetDataAccess);
+					RawChangeset cs = csetBuilder.parse(csetContent);
 					inspector.next(revisionIndex++, ge.node(), cs);
 					prevRevContent.done();
-					prevRevContent = csetDataAccess.reset();
+					prevRevContent = new ByteArrayDataAccess(csetContent);
 				} catch (CancelledException ex) {
 					return false;
-				} catch (IOException ex) {
-					throw new HgInvalidFileException("Invalid bundle file", ex, bundleFile); // TODO post-1.0 revisit exception handling
 				} catch (HgInvalidDataFormatException ex) {
 					throw new HgInvalidControlFileException("Invalid bundle file", ex, bundleFile);
 				}
@@ -217,11 +213,12 @@
 		void fileEnd(String name) throws HgRuntimeException;
 
 		/**
-		 * XXX desperately need exceptions here
 		 * @param element data element, instance might be reused, don't keep a reference to it or its raw data
 		 * @return <code>true</code> to continue
+		 * @throws IOException propagated exception from {@link GroupElement#data()}
+		 * @throws HgRuntimeException propagated exception (subclass thereof) to indicate issues with the library. <em>Runtime exception</em>
 		 */
-		boolean element(GroupElement element) throws HgRuntimeException;
+		boolean element(GroupElement element) throws IOException, HgRuntimeException;
 	}
 
 	/**
@@ -229,7 +226,7 @@
 	 * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
 	 * @throws IllegalArgumentException if inspector argument is null
 	 */
-	public void inspectChangelog(Inspector inspector) throws HgRuntimeException {
+	public void inspectChangelog(Inspector inspector) throws HgIOException, HgRuntimeException {
 		if (inspector == null) {
 			throw new IllegalArgumentException();
 		}
@@ -239,7 +236,7 @@
 			da = getDataStream();
 			internalInspectChangelog(da, inspector);
 		} catch (IOException ex) {
-			throw new HgInvalidFileException("Bundle.inspectChangelog failed", ex, bundleFile);
+			throw new HgIOException("Failed to inspect changelog in the bundle", ex, bundleFile);
 		} finally {
 			if (da != null) {
 				da.done();
@@ -253,7 +250,7 @@
 	 * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
 	 * @throws IllegalArgumentException if inspector argument is null
 	 */
-	public void inspectManifest(Inspector inspector) throws HgRuntimeException {
+	public void inspectManifest(Inspector inspector) throws HgIOException, HgRuntimeException {
 		if (inspector == null) {
 			throw new IllegalArgumentException();
 		}
@@ -267,7 +264,7 @@
 			skipGroup(da); // changelog
 			internalInspectManifest(da, inspector);
 		} catch (IOException ex) {
-			throw new HgInvalidFileException("Bundle.inspectManifest failed", ex, bundleFile);
+			throw new HgIOException("Failed to inspect manifest in the bundle", ex, bundleFile);
 		} finally {
 			if (da != null) {
 				da.done();
@@ -281,7 +278,7 @@
 	 * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
 	 * @throws IllegalArgumentException if inspector argument is null
 	 */
-	public void inspectFiles(Inspector inspector) throws HgRuntimeException {
+	public void inspectFiles(Inspector inspector) throws HgIOException, HgRuntimeException {
 		if (inspector == null) {
 			throw new IllegalArgumentException();
 		}
@@ -299,7 +296,7 @@
 			skipGroup(da); // manifest
 			internalInspectFiles(da, inspector);
 		} catch (IOException ex) {
-			throw new HgInvalidFileException("Bundle.inspectFiles failed", ex, bundleFile);
+			throw new HgIOException("Failed to inspect files in the bundle", ex, bundleFile);
 		} finally {
 			if (da != null) {
 				da.done();
@@ -313,7 +310,7 @@
 	 * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
 	 * @throws IllegalArgumentException if inspector argument is null
 	 */
-	public void inspectAll(Inspector inspector) throws HgRuntimeException {
+	public void inspectAll(Inspector inspector) throws HgIOException, HgRuntimeException {
 		if (inspector == null) {
 			throw new IllegalArgumentException();
 		}
@@ -331,7 +328,7 @@
 			}
 			internalInspectFiles(da, inspector);
 		} catch (IOException ex) {
-			throw new HgInvalidFileException("Bundle.inspectAll failed", ex, bundleFile);
+			throw new HgIOException("Failed to inspect bundle", ex, bundleFile);
 		} finally {
 			if (da != null) {
 				da.done();
@@ -453,13 +450,15 @@
 		}
 	}
 
-	@Experimental(reason="Cumbersome API, rawData and apply with byte[] perhaps need replacement with ByteChannel/ByteBuffer, and better Exceptions. Perhaps, shall split into interface and impl")
-	public static class GroupElement {
+	/**
+	 * Describes single element (a.k.a. chunk) of the group, either changelog, manifest or a file. 
+	 */
+	public static final class GroupElement {
 		private final byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192
 		private final DataAccess dataAccess;
+		private final Nodeid deltaBase;
 		private Patch patches;
-		private final Nodeid deltaBase;
-
+		
 		GroupElement(byte[] fourNodeids, Nodeid deltaBaseRev, DataAccess rawDataAccess) {
 			assert fourNodeids != null && fourNodeids.length == 80;
 			header = fourNodeids;
@@ -507,16 +506,15 @@
 			return deltaBase == null ? firstParent() : deltaBase;
 		}
 		
-		public byte[] rawDataByteArray() throws IOException { // XXX IOException or HgInvalidFileException?
-			return rawData().byteArray();
-		}
-		
-		public byte[] apply(byte[] baseContent) throws IOException {
-			return apply(new ByteArrayDataAccess(baseContent));
-		}
-
-		/*package-local*/ DataAccess rawData() {
-			return dataAccess;
+		/**
+		 * Read data of the group element. 
+		 * Note, {@link InputStream streams} obtained from several calls to this method
+		 * can't be read simultaneously.
+		 *  
+		 * @return stream to access content of this group element, never <code>null</code>
+		 */
+		public InputStream data() {
+			return new DataAccessInputStream(dataAccess);
 		}
 		
 		/*package-local*/ Patch patch() throws IOException {
@@ -528,10 +526,6 @@
 			return patches;
 		}
 
-		/*package-local*/ byte[] apply(DataAccess baseContent) throws IOException {
-			return patch().apply(baseContent, -1);
-		}
-		
 		public String toString() {
 			int patchCount;
 			try {
@@ -543,29 +537,4 @@
 			return String.format("%s %s %s %s; patches:%d\n", node().shortNotation(), firstParent().shortNotation(), secondParent().shortNotation(), cset().shortNotation(), patchCount);
 		}
 	}
-
-	@Experimental(reason="Work in progress, not an API")
-	public class BundleSerializer implements DataSerializer.DataSource {
-
-		public void serialize(DataSerializer out) throws HgIOException, HgRuntimeException {
-			FileInputStream fis = null;
-			try {
-				fis = new FileInputStream(HgBundle.this.bundleFile);
-				byte[] buffer = new byte[8*1024];
-				int r;
-				while ((r = fis.read(buffer, 0, buffer.length)) > 0) {
-					out.write(buffer, 0, r);
-				}
-				
-			} catch (IOException ex) {
-				throw new HgIOException("Failed to serialize bundle", HgBundle.this.bundleFile);
-			} finally {
-				new FileUtils(HgBundle.this.ctx.getLog(), this).closeQuietly(fis, HgBundle.this.bundleFile);
-			}
-		}
-
-		public int serializeLength() throws HgRuntimeException {
-			return Internals.ltoi(HgBundle.this.bundleFile.length());
-		}
-	}
 }
--- a/src/org/tmatesoft/hg/repo/HgChangelog.java	Fri Jul 12 16:29:06 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgChangelog.java	Fri Jul 12 20:14:24 2013 +0200
@@ -20,10 +20,8 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Calendar;
-import java.util.Collections;
 import java.util.Date;
 import java.util.Formatter;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -32,12 +30,10 @@
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.core.SessionContext;
 import org.tmatesoft.hg.internal.Callback;
+import org.tmatesoft.hg.internal.ChangesetParser;
 import org.tmatesoft.hg.internal.DataAccess;
-import org.tmatesoft.hg.internal.EncodingHelper;
-import org.tmatesoft.hg.internal.Internals;
 import org.tmatesoft.hg.internal.Lifecycle;
 import org.tmatesoft.hg.internal.LifecycleBridge;
-import org.tmatesoft.hg.internal.Pool;
 import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.util.Adaptable;
 import org.tmatesoft.hg.util.CancelSupport;
@@ -150,14 +146,14 @@
 	 */
 	public static final class RawChangeset implements Cloneable /* for those that would like to keep a copy */{
 		// would be nice to get it immutable, but then we can't reuse instances
-		private/* final */Nodeid manifest;
-		private String user;
-		private String comment;
-		private String[] files; // shall not be modified (#clone() does shallow copy)
-		private Date time;
-		private int timezone;
+		/* final */Nodeid manifest;
+		String user;
+		String comment;
+		String[] files; // shall not be modified (#clone() does shallow copy)
+		Date time;
+		int timezone;
 		// http://mercurial.selenic.com/wiki/PruningDeadBranches - Closing changesets can be identified by close=1 in the changeset's extra field.
-		private Map<String, String> extras;
+		Map<String, String> extras;
 
 		private RawChangeset() {
 		}
@@ -241,170 +237,33 @@
 		}
 	}
 	
-	/**
-	 * @see mercurial/changelog.py:read()
-	 * 
-	 *      <pre>
-	 *         format used:
-	 *         nodeid\n        : manifest node in ascii
-	 *         user\n          : user, no \n or \r allowed
-	 *         time tz extra\n : date (time is int or float, timezone is int)
-	 *                         : extra is metadatas, encoded and separated by '\0'
-	 *                         : older versions ignore it
-	 *         files\n\n       : files modified by the cset, no \n or \r allowed
-	 *         (.*)            : comment (free text, ideally utf-8)
-	 * 
-	 *         changelog v0 doesn't use extra
-	 * </pre>
-	 */
-	/*package-local*/static final class ChangesetParser {
-		private final EncodingHelper encHelper;
-		// it's likely user names get repeated again and again throughout repository. 
-		private final Pool<String> usersPool;
-		private final Pool<String> filesPool;
-		private final boolean reuseChangesetInstance;
-		private RawChangeset target;
-		
-		public ChangesetParser(SessionContext.Source sessionContex, boolean shallReuseCsetInstance) {
-			encHelper = Internals.buildFileNameEncodingHelper(sessionContex);
-			usersPool = new Pool<String>();
-			filesPool = new Pool<String>();
-			reuseChangesetInstance = shallReuseCsetInstance;
+	/*package-local*/static final class RawCsetFactory implements ChangesetParser.CsetFactory {
+		private RawChangeset cset;
+
+		public RawCsetFactory(boolean shallReuseCsetInstance) {
 			if (shallReuseCsetInstance) {
-				target = new RawChangeset();
+				cset = new RawChangeset();
 			}
 		}
-		
-		public void dispose() {
-			usersPool.clear();
-			filesPool.clear();
-		}
-
-		public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException {
-			byte[] data = da.byteArray();
-			if (!reuseChangesetInstance) {
-				target = new RawChangeset();
-			}
-			init(data, 0, data.length);
-			return target;
-		}
 
-		private void init(byte[] data, int offset, int length) throws HgInvalidDataFormatException {
-			final int bufferEndIndex = offset + length;
-			final byte lineBreak = (byte) '\n';
-			int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex);
-			if (breakIndex1 == -1) {
-				throw new HgInvalidDataFormatException("Bad Changeset data");
-			}
-			Nodeid _nodeid = Nodeid.fromAscii(data, 0, breakIndex1);
-			int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex);
-			if (breakIndex2 == -1) {
-				throw new HgInvalidDataFormatException("Bad Changeset data");
-			}
-			String _user;
-			_user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1);
-			_user = usersPool.unify(_user);
-
-			int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex);
-			if (breakIndex3 == -1) {
-				throw new HgInvalidDataFormatException("Bad Changeset data");
-			}
-			String _timeString = new String(data, breakIndex2 + 1, breakIndex3 - breakIndex2 - 1);
-			int space1 = _timeString.indexOf(' ');
-			if (space1 == -1) {
-				throw new HgInvalidDataFormatException(String.format("Bad Changeset data: %s in [%d..%d]", "time string", breakIndex2+1, breakIndex3));
-			}
-			int space2 = _timeString.indexOf(' ', space1 + 1);
-			if (space2 == -1) {
-				space2 = _timeString.length();
+		public RawChangeset create(Nodeid nodeidManifest, String user, Date time, int timezone, List<String> files, String comment, Map<String, String> extrasMap) {
+			RawChangeset target;
+			if (cset != null) {
+				target = cset;
+			} else {
+				target = new RawChangeset();
 			}
-			long unixTime = Long.parseLong(_timeString.substring(0, space1));
-			int _timezone = Integer.parseInt(_timeString.substring(space1 + 1, space2));
-			// unixTime is local time, and timezone records difference of the local time to UTC.
-			Date _time = new Date(unixTime * 1000);
-			String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null;
-			Map<String, String> _extrasMap = parseExtras(_extras);
-			//
-			int lastStart = breakIndex3 + 1;
-			int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
-			ArrayList<String> _files = null;
-			if (breakIndex4 > lastStart) {
-				// if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision)
-				_files = new ArrayList<String>(5);
-				while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) {
-					String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart);
-					_files.add(filesPool.unify(fname));
-					lastStart = breakIndex4 + 1;
-					if (data[breakIndex4 + 1] == lineBreak) {
-						// found \n\n
-						break;
-					} else {
-						breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
-					}
-				}
-				if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) {
-					throw new HgInvalidDataFormatException("Bad Changeset data");
-				}
-			} else {
-				breakIndex4--;
-			}
-			String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2);
-			// change this instance at once, don't leave it partially changes in case of error
-			target.manifest = _nodeid;
-			target.user = _user;
-			target.time = _time;
-			target.timezone = _timezone;
-			target.files = _files == null ? new String[0] : _files.toArray(new String[_files.size()]);
-			target.comment = _comment;
-			target.extras = _extrasMap;
-		}
-
-		private Map<String, String> parseExtras(String _extras) {
-			final String extras_branch_key = "branch";
-			_extras = _extras == null ? null : _extras.trim();
-			if (_extras == null || _extras.length() == 0) {
-				return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
-			}
-			Map<String, String> _extrasMap = new HashMap<String, String>();
-			int lastIndex = 0;
-			do {
-				String pair;
-				int sp = _extras.indexOf('\0', lastIndex);
-				if (sp == -1) {
-					sp = _extras.length();
-				}
-				if (sp > lastIndex) {
-					pair = _extras.substring(lastIndex, sp);
-					pair = decode(pair);
-					int eq = pair.indexOf(':');
-					_extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1));
-					lastIndex = sp + 1;
-				}
-			} while (lastIndex < _extras.length());
-			if (!_extrasMap.containsKey(extras_branch_key)) {
-				_extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
-			}
-			return Collections.unmodifiableMap(_extrasMap);
-		}
-
-		private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) {
-			for (int i = startOffset; i < endIndex; i++) {
-				if (src[i] == what) {
-					return i;
-				}
-			}
-			return -1;
-		}
-		
-		private static String decode(String s) {
-			if (s != null && s.indexOf('\\') != -1) {
-				// TestAuxUtilities#testChangelogExtrasDecode
-				return s.replace("\\\\", "\\").replace("\\n", "\n").replace("\\r", "\r").replace("\\0", "\00");
-			}
-			return s;
+			target.manifest = nodeidManifest;
+			target.user = user;
+			target.time = time;
+			target.timezone = timezone;
+			target.files = files == null ? new String[0] : files.toArray(new String[files.size()]);
+			target.comment = comment;
+			target.extras = extrasMap;
+			return target;
 		}
 	}
-
+	
 	private static class RawCsetCollector implements Inspector {
 		final ArrayList<RawChangeset> result;
 		
@@ -430,7 +289,7 @@
 		public RawCsetParser(SessionContext.Source sessionContext, HgChangelog.Inspector delegate) {
 			assert delegate != null;
 			inspector = delegate;
-			csetBuilder = new ChangesetParser(sessionContext, true);
+			csetBuilder = new ChangesetParser(sessionContext, new RawCsetFactory(true));
 			inspectorLifecycle = Adaptable.Factory.getAdapter(delegate, Lifecycle.class, null);
 			if (inspectorLifecycle == null) {
 				ProgressSupport ph = Adaptable.Factory.getAdapter(delegate, ProgressSupport.class, null);
--- a/src/org/tmatesoft/hg/repo/HgInternals.java	Fri Jul 12 16:29:06 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgInternals.java	Fri Jul 12 20:14:24 2013 +0200
@@ -28,8 +28,10 @@
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.internal.Experimental;
 import org.tmatesoft.hg.internal.Internals;
+import org.tmatesoft.hg.internal.Patch;
 import org.tmatesoft.hg.internal.RelativePathRewrite;
 import org.tmatesoft.hg.internal.WinToNixPathRewrite;
+import org.tmatesoft.hg.repo.HgBundle.GroupElement;
 import org.tmatesoft.hg.repo.HgSubrepoLocation.Kind;
 import org.tmatesoft.hg.util.FileIterator;
 import org.tmatesoft.hg.util.FileWalker;
@@ -114,8 +116,19 @@
 		br.close();
 		return hgIgnore;
 	}
+	
+	// XXX just to access package local method. Perhaps, GroupElement shall be redesigned
+	// to allow classes from .internal to access its details?
+	// or Patch may become public?
+	public static Patch patchFromData(GroupElement ge) throws IOException {
+		return ge.patch();
+	}
 
-	// in fact, need a setter for this anyway, shall move to internal.Internals perhaps?
+	public static File getBundleFile(HgBundle bundle) {
+		return bundle.bundleFile;
+	}
+
+	// TODO in fact, need a setter for this anyway, shall move to internal.Internals perhaps?
 	public String getNextCommitUsername() {
 		String hgUser = System.getenv("HGUSER");
 		if (hgUser != null && hgUser.trim().length() > 0) {
--- a/src/org/tmatesoft/hg/repo/HgRemoteRepository.java	Fri Jul 12 16:29:06 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgRemoteRepository.java	Fri Jul 12 20:14:24 2013 +0200
@@ -65,6 +65,7 @@
 import org.tmatesoft.hg.core.SessionContext;
 import org.tmatesoft.hg.internal.DataSerializer;
 import org.tmatesoft.hg.internal.DataSerializer.OutputStreamSerializer;
+import org.tmatesoft.hg.internal.BundleSerializer;
 import org.tmatesoft.hg.internal.EncodingHelper;
 import org.tmatesoft.hg.internal.Internals;
 import org.tmatesoft.hg.internal.PropertyMarshal;
@@ -422,7 +423,7 @@
 		StringBuilder sb = appendNodeidListArgument("heads", remoteHeads, null);
 		
 		HttpURLConnection c = null;
-		DataSerializer.DataSource bundleData = bundle.new BundleSerializer();
+		DataSerializer.DataSource bundleData = BundleSerializer.newInstance(sessionContext, bundle);
 		try {
 			URL u = new URL(url, url.getPath() + "?cmd=unbundle&" + sb.toString());
 			c = setupConnection(u.openConnection());