changeset 667:fba85bc1dfb8

Refactoring: move all encoding/decoding operations into single place, EncodingHelper
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 11 Jul 2013 17:54:08 +0200
parents 27a3ddedd6cc
children d25f0324a27a
files src/org/tmatesoft/hg/core/SessionContext.java src/org/tmatesoft/hg/internal/BundleGenerator.java src/org/tmatesoft/hg/internal/ChangelogEntryBuilder.java src/org/tmatesoft/hg/internal/CommitFacility.java src/org/tmatesoft/hg/internal/EncodingHelper.java src/org/tmatesoft/hg/internal/FNCacheFile.java src/org/tmatesoft/hg/internal/Internals.java src/org/tmatesoft/hg/repo/HgBundle.java src/org/tmatesoft/hg/repo/HgChangelog.java
diffstat 9 files changed, 261 insertions(+), 133 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/core/SessionContext.java	Wed Jul 10 20:22:07 2013 +0200
+++ b/src/org/tmatesoft/hg/core/SessionContext.java	Thu Jul 11 17:54:08 2013 +0200
@@ -72,4 +72,16 @@
 	public interface Source {
 		SessionContext getSessionContext();
 	}
+	
+	public static final class SourcePrim implements Source {
+		private final SessionContext ctx;
+
+		public SourcePrim(SessionContext sessionContext) {
+			assert sessionContext != null;
+			ctx = sessionContext;
+		}
+		public SessionContext getSessionContext() {
+			return ctx;
+		}
+	}
 }
--- a/src/org/tmatesoft/hg/internal/BundleGenerator.java	Wed Jul 10 20:22:07 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/BundleGenerator.java	Thu Jul 11 17:54:08 2013 +0200
@@ -101,6 +101,7 @@
 		new ChunkGenerator(outRaw, clogMap).iterate(manifestStream, manifestRevs.toArray(true));
 		outRaw.writeInt(0); // null chunk for manifest group
 		//
+		EncodingHelper fnEncoder = repo.buildFileNameEncodingHelper();
 		for (HgDataFile df : sortedByName(files)) {
 			RevlogStream s = repo.getImplAccess().getStream(df);
 			final IntVector fileRevs = new IntVector();
@@ -117,7 +118,7 @@
 				// although BundleFormat page says "filename length, filename" for a file,
 				// in fact there's a sort of 'filename chunk', i.e. filename length field includes
 				// not only length of filename, but also length of the field itseld, i.e. filename.length+sizeof(int)
-				byte[] fnameBytes = df.getPath().toString().getBytes(); // FIXME check encoding in native hg (and fix accordingly in HgBundle)
+				byte[] fnameBytes = fnEncoder.toBundle(df.getPath());
 				outRaw.writeInt(fnameBytes.length + 4);
 				outRaw.writeByte(fnameBytes);
 				new ChunkGenerator(outRaw, clogMap).iterate(s, fileRevs.toArray(true));
--- a/src/org/tmatesoft/hg/internal/ChangelogEntryBuilder.java	Wed Jul 10 20:22:07 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/ChangelogEntryBuilder.java	Thu Jul 11 17:54:08 2013 +0200
@@ -17,7 +17,6 @@
 package org.tmatesoft.hg.internal;
 
 import java.io.ByteArrayOutputStream;
-import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -31,7 +30,6 @@
 import org.tmatesoft.hg.core.HgIOException;
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.internal.DataSerializer.DataSource;
-import org.tmatesoft.hg.repo.HgInvalidStateException;
 import org.tmatesoft.hg.util.Path;
 
 /**
@@ -41,6 +39,7 @@
  */
 public class ChangelogEntryBuilder implements DataSource {
 
+	private final EncodingHelper encHelper;
 	private String user;
 	private List<Path> modifiedFiles;
 	private final Map<String, String> extrasMap = new LinkedHashMap<String, String>();
@@ -49,6 +48,10 @@
 	private Nodeid manifestRev;
 	private CharSequence comment;
 	
+	ChangelogEntryBuilder(EncodingHelper encodingHelper) {
+		encHelper = encodingHelper;
+	}
+	
 	public ChangelogEntryBuilder user(String username) {
 		user = username;
 		return this;
@@ -116,36 +119,32 @@
 	}
 
 	public byte[] build() {
-		try {
-			ByteArrayOutputStream out = new ByteArrayOutputStream();
-			final int LF = '\n';
-			CharSequence extras = buildExtras();
-			CharSequence files = buildFiles();
-			byte[] manifestRevision = manifestRev.toString().getBytes();
-			byte[] username = user().getBytes(EncodingHelper.getUTF8().name()); // XXX Java 1.5
-			out.write(manifestRevision, 0, manifestRevision.length);
-			out.write(LF);
-			out.write(username, 0, username.length);
-			out.write(LF);
-			final long csetDate = csetTime();
-			byte[] date = String.format("%d %d", csetDate, csetTimezone(csetDate)).getBytes();
-			out.write(date, 0, date.length);
-			if (extras.length() > 0) {
-				out.write(' ');
-				byte[] b = extras.toString().getBytes();
-				out.write(b, 0, b.length);
-			}
-			out.write(LF);
-			byte[] b = files.toString().getBytes();
+		ByteArrayOutputStream out = new ByteArrayOutputStream();
+		final int LF = '\n';
+		CharSequence extras = buildExtras();
+		CharSequence files = buildFiles();
+		byte[] manifestRevision = manifestRev.toString().getBytes();
+		byte[] username = encHelper.userToChangeset(user());
+		out.write(manifestRevision, 0, manifestRevision.length);
+		out.write(LF);
+		out.write(username, 0, username.length);
+		out.write(LF);
+		final long csetDate = csetTime();
+		byte[] date = String.format("%d %d", csetDate, csetTimezone(csetDate)).getBytes();
+		out.write(date, 0, date.length);
+		if (extras.length() > 0) {
+			out.write(' ');
+			byte[] b = extras.toString().getBytes();
 			out.write(b, 0, b.length);
-			out.write(LF);
-			out.write(LF);
-			byte[] cmt = comment.toString().getBytes(EncodingHelper.getUTF8().name()); // XXX Java 1.5
-			out.write(cmt, 0, cmt.length);
-			return out.toByteArray();
-		} catch (UnsupportedEncodingException ex) {
-			throw new HgInvalidStateException(ex.getMessage()); // Can't happen, UTF8 is always there
 		}
+		out.write(LF);
+		byte[] b = encHelper.fileToChangeset(files);
+		out.write(b, 0, b.length);
+		out.write(LF);
+		out.write(LF);
+		byte[] cmt = encHelper.commentToChangeset(comment);
+		out.write(cmt, 0, cmt.length);
+		return out.toByteArray();
 	}
 
 	private CharSequence buildExtras() {
--- a/src/org/tmatesoft/hg/internal/CommitFacility.java	Wed Jul 10 20:22:07 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/CommitFacility.java	Thu Jul 11 17:54:08 2013 +0200
@@ -164,8 +164,10 @@
 			}
 		}
 		//
+		final EncodingHelper encHelper = repo.buildFileNameEncodingHelper();
+		//
 		// Manifest
-		final ManifestEntryBuilder manifestBuilder = new ManifestEntryBuilder(repo.buildFileNameEncodingHelper());
+		final ManifestEntryBuilder manifestBuilder = new ManifestEntryBuilder(encHelper);
 		for (Map.Entry<Path, Nodeid> me : newManifestRevision.entrySet()) {
 			manifestBuilder.add(me.getKey().toString(), me.getValue());
 		}
@@ -173,7 +175,7 @@
 		Nodeid manifestRev = manifestWriter.addRevision(manifestBuilder, clogRevisionIndex, manifestParents.first(), manifestParents.second()).second();
 		//
 		// Changelog
-		final ChangelogEntryBuilder changelogBuilder = new ChangelogEntryBuilder();
+		final ChangelogEntryBuilder changelogBuilder = new ChangelogEntryBuilder(encHelper);
 		changelogBuilder.setModified(files.keySet());
 		changelogBuilder.branch(branch == null ? DEFAULT_BRANCH_NAME : branch);
 		changelogBuilder.user(String.valueOf(user));
--- a/src/org/tmatesoft/hg/internal/EncodingHelper.java	Wed Jul 10 20:22:07 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/EncodingHelper.java	Thu Jul 11 17:54:08 2013 +0200
@@ -18,6 +18,7 @@
 
 import static org.tmatesoft.hg.util.LogFacility.Severity.Error;
 
+import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.CharacterCodingException;
@@ -26,15 +27,19 @@
 import java.nio.charset.CharsetEncoder;
 
 import org.tmatesoft.hg.core.SessionContext;
+import org.tmatesoft.hg.repo.HgInvalidStateException;
 
 /**
  * Keep all encoding-related issues in the single place
  * NOT thread-safe (encoder and decoder requires synchronized access)
+ * 
+ * @see http://mercurial.selenic.com/wiki/EncodingStrategy
+ * @see http://mercurial.selenic.com/wiki/WindowsUTF8Plan
+ * @see http://mercurial.selenic.com/wiki/CharacterEncodingOnWindows
  * @author Artem Tikhomirov
  * @author TMate Software Ltd.
  */
 public class EncodingHelper {
-	// XXX perhaps, shall not be full of statics, but rather an instance coming from e.g. HgRepository?
 	/*
 	 * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see
 	 * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html
@@ -43,11 +48,21 @@
 	private final SessionContext sessionContext;
 	private final CharsetEncoder encoder;
 	private final CharsetDecoder decoder;
+	private final CharsetEncoder utfEncoder;
+	private final CharsetDecoder utfDecoder;
 	
-	EncodingHelper(Charset fsEncoding, SessionContext ctx) {
-		sessionContext = ctx;
+	EncodingHelper(Charset fsEncoding, SessionContext.Source ctx) {
+		sessionContext = ctx.getSessionContext();
 		decoder = fsEncoding.newDecoder();
 		encoder = fsEncoding.newEncoder();
+		Charset utf8 = getUTF8();
+		if (fsEncoding.equals(utf8)) {
+			utfDecoder = decoder;
+			utfEncoder = encoder;
+		} else {
+			utfDecoder = utf8.newDecoder();
+			utfEncoder = utf8.newEncoder();
+		}
 	}
 
 	/**
@@ -65,7 +80,7 @@
 			// perhaps, can return byte[0] in this case?
 			throw new IllegalArgumentException();
 		}
-		return encodeWithSystemDefaultFallback(s);
+		return toArray(encodeWithSystemDefaultFallback(s));
 	}
 
 	/**
@@ -79,9 +94,51 @@
 		if (fname == null) {
 			throw new IllegalArgumentException();
 		}
+		return toArray(encodeWithSystemDefaultFallback(fname));
+	}
+	
+	/**
+	 * prepare filename to be serialized into fncache file
+	 */
+	public ByteBuffer toFNCache(CharSequence fname) {
 		return encodeWithSystemDefaultFallback(fname);
 	}
+	
+	public byte[] toBundle(CharSequence fname) {
+		// yes, mercurial transfers filenames in local encoding
+		// so that if your local encoding doesn't match that on server, 
+		// and you use native characters, you'd likely fail
+		return toArray(encodeWithSystemDefaultFallback(fname));
+	}
+	public String fromBundle(byte[] data, int start, int length) {
+		return decodeWithSystemDefaultFallback(data, start, length);
+	}
+	
+	
+	public String userFromChangeset(byte[] data, int start, int length) {
+		return decodeUnicodeWithFallback(data, start, length);
+	}
+	
+	public String commentFromChangeset(byte[] data, int start, int length) {
+		return decodeUnicodeWithFallback(data, start, length);
+	}
+	
+	public String fileFromChangeset(byte[] data, int start, int length) {
+		return decodeWithSystemDefaultFallback(data, start, length);
+	}
 
+	public byte[] userToChangeset(CharSequence user) {
+		return toArray(encodeUnicode(user));
+	}
+	
+	public byte[] commentToChangeset(CharSequence comment) {
+		return toArray(encodeUnicode(comment));
+	}
+	
+	public byte[] fileToChangeset(CharSequence file) {
+		return toArray(encodeWithSystemDefaultFallback(file));
+	}
+	
 	private String decodeWithSystemDefaultFallback(byte[] data, int start, int length) {
 		try {
 			return decoder.decode(ByteBuffer.wrap(data, start, length)).toString();
@@ -92,18 +149,53 @@
 		}
 	}
 	
-	private byte[] encodeWithSystemDefaultFallback(CharSequence s) {
+	private ByteBuffer encodeWithSystemDefaultFallback(CharSequence s) {
 		try {
 			// synchronized(encoder) {
-			ByteBuffer bb = encoder.encode(CharBuffer.wrap(s));
+			return encoder.encode(CharBuffer.wrap(s));
 			// }
-			byte[] rv = new byte[bb.remaining()];
-			bb.get(rv, 0, rv.length);
-			return rv;
 		} catch (CharacterCodingException ex) {
 			sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name()));
 			// resort to system-default
-			return s.toString().getBytes();
+			return ByteBuffer.wrap(s.toString().getBytes());
+		}
+	}
+
+	private byte[] toArray(ByteBuffer bb) {
+		byte[] rv;
+		if (bb.hasArray() && bb.arrayOffset() == 0) {
+			rv = bb.array();
+			if (rv.length == bb.remaining()) {
+				return rv;
+			}
+			// fall through
+		}
+		rv = new byte[bb.remaining()];
+		bb.get(rv, 0, rv.length);
+		return rv;
+	}
+
+	private String decodeUnicodeWithFallback(byte[] data, int start, int length) {
+		try {
+			return utfDecoder.decode(ByteBuffer.wrap(data, start, length)).toString();
+		} catch (CharacterCodingException ex) {
+			// TODO post-1.2 respect ui.fallbackencoding actual setting
+			return new String(data, start, length, Charset.forName("ISO-8859-1"));
+		}
+	}
+	
+	private ByteBuffer encodeUnicode(CharSequence s) {
+		// 
+		try {
+			return utfEncoder.encode(CharBuffer.wrap(s));
+		} catch (CharacterCodingException ex) {
+			byte[] rv;
+			try {
+				rv = s.toString().getBytes(getUTF8().name()); // XXX Java 1.5
+			} catch (UnsupportedEncodingException e) {
+				throw new HgInvalidStateException("Unexpected error trying to get UTF-8 encoding"); 
+			}
+			return ByteBuffer.wrap(rv);
 		}
 	}
 
--- a/src/org/tmatesoft/hg/internal/FNCacheFile.java	Wed Jul 10 20:22:07 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/FNCacheFile.java	Thu Jul 11 17:54:08 2013 +0200
@@ -24,7 +24,6 @@
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.channels.FileChannel;
-import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -83,7 +82,7 @@
 		}
 		File f = repo.getRepositoryFile(FNCache);
 		f.getParentFile().mkdirs();
-		final Charset filenameEncoding = repo.getFilenameEncoding();
+		final EncodingHelper fnEncoder = repo.buildFileNameEncodingHelper();
 		ArrayList<CharBuffer> added = new ArrayList<CharBuffer>();
 		for (Path p : addedDotI) {
 			added.add(CharBuffer.wrap(pathHelper.rewrite(p)));
@@ -105,7 +104,7 @@
 			FileChannel fncacheFile = fos.getChannel();
 			ByteBuffer lf = ByteBuffer.wrap(new byte[] { 0x0A });
 			for (CharBuffer b : added) {
-				fncacheFile.write(filenameEncoding.encode(b));
+				fncacheFile.write(fnEncoder.toFNCache(b));
 				fncacheFile.write(lf);
 				lf.rewind();
 			}
--- a/src/org/tmatesoft/hg/internal/Internals.java	Wed Jul 10 20:22:07 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/Internals.java	Thu Jul 11 17:54:08 2013 +0200
@@ -247,19 +247,18 @@
 	public boolean isCaseSensitiveFileSystem() {
 		return isCaseSensitiveFileSystem;
 	}
-	
-	public EncodingHelper buildFileNameEncodingHelper() {
-		return new EncodingHelper(getFilenameEncoding(), repo.getSessionContext());
-	}
-	
+
 	public boolean fncacheInUse() {
 		return (getRequiresFlags() & RequiresFile.FNCACHE) != 0;
 	}
-	
-	/*package-local*/ Charset getFilenameEncoding() {
-		return getFileEncoding(getSessionContext());
+
+	public EncodingHelper buildFileNameEncodingHelper() {
+		return new EncodingHelper(getFileEncoding(getSessionContext()), repo);
 	}
 	
+	public static EncodingHelper buildFileNameEncodingHelper(SessionContext.Source ctx) {
+		return new EncodingHelper(getFileEncoding(ctx.getSessionContext()), ctx);
+	}
 	/*package-local*/ static Charset getFileEncoding(SessionContext ctx) {
 		Object altEncoding = ctx.getConfigurationProperty(CFG_PROPERTY_FS_FILENAME_ENCODING, null);
 		Charset cs;
--- a/src/org/tmatesoft/hg/repo/HgBundle.java	Wed Jul 10 20:22:07 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgBundle.java	Thu Jul 11 17:54:08 2013 +0200
@@ -31,12 +31,14 @@
 import org.tmatesoft.hg.internal.DataAccessProvider;
 import org.tmatesoft.hg.internal.DataSerializer;
 import org.tmatesoft.hg.internal.DigestHelper;
+import org.tmatesoft.hg.internal.EncodingHelper;
 import org.tmatesoft.hg.internal.Experimental;
 import org.tmatesoft.hg.internal.FileUtils;
 import org.tmatesoft.hg.internal.InflaterDataAccess;
 import org.tmatesoft.hg.internal.Internals;
 import org.tmatesoft.hg.internal.Lifecycle;
 import org.tmatesoft.hg.internal.Patch;
+import org.tmatesoft.hg.repo.HgChangelog.ChangesetParser;
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
 import org.tmatesoft.hg.util.Adaptable;
 import org.tmatesoft.hg.util.CancelledException;
@@ -55,12 +57,14 @@
 	private final File bundleFile;
 	private final DataAccessProvider accessProvider;
 	private final SessionContext ctx;
+	private final EncodingHelper fnDecorer;
 	private Lifecycle.BasicCallback flowControl;
 
 	HgBundle(SessionContext sessionContext, DataAccessProvider dap, File bundle) {
 		ctx = sessionContext;
 		accessProvider = dap;
 		bundleFile = bundle;
+		fnDecorer = Internals.buildFileNameEncodingHelper(new SessionContext.SourcePrim(ctx));
 	}
 
 	private DataAccess getDataStream() throws IOException {
@@ -112,10 +116,12 @@
 			boolean emptyChangelog = true;
 			private DataAccess prevRevContent;
 			private int revisionIndex;
+			private ChangesetParser csetBuilder;
 
 			public void changelogStart() {
 				emptyChangelog = true;
 				revisionIndex = 0;
+				csetBuilder = new ChangesetParser(hgRepo, true);
 			}
 
 			public void changelogEnd() {
@@ -172,7 +178,7 @@
 						throw new HgInvalidStateException(String.format("Integrity check failed on %s, node: %s", bundleFile, ge.node().shortNotation()));
 					}
 					ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent);
-					RawChangeset cs = RawChangeset.parse(csetDataAccess);
+					RawChangeset cs = csetBuilder.parse(csetDataAccess);
 					inspector.next(revisionIndex++, ge.node(), cs);
 					prevRevContent.done();
 					prevRevContent = csetDataAccess.reset();
@@ -397,7 +403,7 @@
 			}
 			byte[] fnameBuf = new byte[fnameLen - 4];
 			da.readBytes(fnameBuf, 0, fnameBuf.length);
-			String name = new String(fnameBuf);
+			String name = fnDecorer.fromBundle(fnameBuf, 0, fnameBuf.length);
 			inspector.fileStart(name);
 			if (flowControl.isStopped()) {
 				return;
--- a/src/org/tmatesoft/hg/repo/HgChangelog.java	Wed Jul 10 20:22:07 2013 +0200
+++ b/src/org/tmatesoft/hg/repo/HgChangelog.java	Thu Jul 11 17:54:08 2013 +0200
@@ -17,7 +17,6 @@
 package org.tmatesoft.hg.repo;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Calendar;
@@ -31,8 +30,11 @@
 import java.util.TimeZone;
 
 import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.core.SessionContext;
 import org.tmatesoft.hg.internal.Callback;
 import org.tmatesoft.hg.internal.DataAccess;
+import org.tmatesoft.hg.internal.EncodingHelper;
+import org.tmatesoft.hg.internal.Internals;
 import org.tmatesoft.hg.internal.Lifecycle;
 import org.tmatesoft.hg.internal.LifecycleBridge;
 import org.tmatesoft.hg.internal.Pool;
@@ -76,7 +78,7 @@
 		if (inspector == null) {
 			throw new IllegalArgumentException();
 		}
-		content.iterate(start, end, true, new RawCsetParser(inspector));
+		content.iterate(start, end, true, new RawCsetParser(getRepo(), inspector));
 	}
 
 	/**
@@ -116,7 +118,7 @@
 		if (inspector == null) {
 			throw new IllegalArgumentException();
 		}
-		content.iterate(sortedRevisions, true, new RawCsetParser(inspector));
+		content.iterate(sortedRevisions, true, new RawCsetParser(getRepo(), inspector));
 	}
 
 	/**
@@ -146,33 +148,17 @@
 	/**
 	 * Entry in the Changelog
 	 */
-	public static class RawChangeset implements Cloneable /* for those that would like to keep a copy */{
-		// TODO immutable
+	public static final class RawChangeset implements Cloneable /* for those that would like to keep a copy */{
+		// would be nice to get it immutable, but then we can't reuse instances
 		private/* final */Nodeid manifest;
 		private String user;
 		private String comment;
-		private List<String> files; // unmodifiable collection (otherwise #files() and implicit #clone() shall be revised)
+		private String[] files; // shall not be modified (#clone() does shallow copy)
 		private Date time;
 		private int timezone;
 		// http://mercurial.selenic.com/wiki/PruningDeadBranches - Closing changesets can be identified by close=1 in the changeset's extra field.
 		private Map<String, String> extras;
 
-		/**
-		 * @see mercurial/changelog.py:read()
-		 * 
-		 *      <pre>
-		 *         format used:
-		 *         nodeid\n        : manifest node in ascii
-		 *         user\n          : user, no \n or \r allowed
-		 *         time tz extra\n : date (time is int or float, timezone is int)
-		 *                         : extra is metadatas, encoded and separated by '\0'
-		 *                         : older versions ignore it
-		 *         files\n\n       : files modified by the cset, no \n or \r allowed
-		 *         (.*)            : comment (free text, ideally utf-8)
-		 * 
-		 *         changelog v0 doesn't use extra
-		 * </pre>
-		 */
 		private RawChangeset() {
 		}
 
@@ -189,7 +175,7 @@
 		}
 
 		public List<String> files() {
-			return files;
+			return Arrays.asList(files);
 		}
 
 		public Date date() {
@@ -234,7 +220,7 @@
 			sb.append("Comment: ").append(comment).append(", ");
 			sb.append("Manifest: ").append(manifest).append(", ");
 			sb.append("Date: ").append(time).append(", ");
-			sb.append("Files: ").append(files.size());
+			sb.append("Files: ").append(files.length);
 			for (String s : files) {
 				sb.append(", ").append(s);
 			}
@@ -253,16 +239,57 @@
 				throw new InternalError(ex.toString());
 			}
 		}
-
-		/*package*/ static RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException {
-			byte[] data = da.byteArray();
-			RawChangeset rv = new RawChangeset();
-			rv.init(data, 0, data.length, null);
-			return rv;
+	}
+	
+	/**
+	 * @see mercurial/changelog.py:read()
+	 * 
+	 *      <pre>
+	 *         format used:
+	 *         nodeid\n        : manifest node in ascii
+	 *         user\n          : user, no \n or \r allowed
+	 *         time tz extra\n : date (time is int or float, timezone is int)
+	 *                         : extra is metadatas, encoded and separated by '\0'
+	 *                         : older versions ignore it
+	 *         files\n\n       : files modified by the cset, no \n or \r allowed
+	 *         (.*)            : comment (free text, ideally utf-8)
+	 * 
+	 *         changelog v0 doesn't use extra
+	 * </pre>
+	 */
+	/*package-local*/static final class ChangesetParser {
+		private final EncodingHelper encHelper;
+		// it's likely user names get repeated again and again throughout repository. 
+		private final Pool<String> usersPool;
+		private final Pool<String> filesPool;
+		private final boolean reuseChangesetInstance;
+		private RawChangeset target;
+		
+		public ChangesetParser(SessionContext.Source sessionContex, boolean shallReuseCsetInstance) {
+			encHelper = Internals.buildFileNameEncodingHelper(sessionContex);
+			usersPool = new Pool<String>();
+			filesPool = new Pool<String>();
+			reuseChangesetInstance = shallReuseCsetInstance;
+			if (shallReuseCsetInstance) {
+				target = new RawChangeset();
+			}
+		}
+		
+		public void dispose() {
+			usersPool.clear();
+			filesPool.clear();
 		}
 
-		// @param usersPool - it's likely user names get repeated again and again throughout repository. can be null
-		/* package-local */void init(byte[] data, int offset, int length, Pool<String> usersPool) throws HgInvalidDataFormatException {
+		public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException {
+			byte[] data = da.byteArray();
+			if (!reuseChangesetInstance) {
+				target = new RawChangeset();
+			}
+			init(data, 0, data.length);
+			return target;
+		}
+
+		private void init(byte[] data, int offset, int length) throws HgInvalidDataFormatException {
 			final int bufferEndIndex = offset + length;
 			final byte lineBreak = (byte) '\n';
 			int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex);
@@ -275,17 +302,8 @@
 				throw new HgInvalidDataFormatException("Bad Changeset data");
 			}
 			String _user;
-			try {
-				// TODO use encoding helper? Although where encoding is fixed (like here), seems to be just too much
-				_user = new String(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1, "UTF-8");
-				if (usersPool != null) {
-					_user = usersPool.unify(_user);
-				}
-			} catch (UnsupportedEncodingException ex) {
-				_user = "";
-				// Could hardly happen
-				throw new HgInvalidDataFormatException("Bad Changeset data", ex);
-			}
+			_user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1);
+			_user = usersPool.unify(_user);
 
 			int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex);
 			if (breakIndex3 == -1) {
@@ -313,10 +331,9 @@
 			if (breakIndex4 > lastStart) {
 				// if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision)
 				_files = new ArrayList<String>(5);
-				// TODO pool file names
-				// TODO encoding of filenames?
 				while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) {
-					_files.add(new String(data, lastStart, breakIndex4 - lastStart));
+					String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart);
+					_files.add(filesPool.unify(fname));
 					lastStart = breakIndex4 + 1;
 					if (data[breakIndex4 + 1] == lineBreak) {
 						// found \n\n
@@ -331,23 +348,15 @@
 			} else {
 				breakIndex4--;
 			}
-			String _comment;
-			try {
-				_comment = new String(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2, "UTF-8");
-				// TODO post-1.0 respect ui.fallbackencoding and try to decode if set; use EncodingHelper
-			} catch (UnsupportedEncodingException ex) {
-				_comment = "";
-				// Could hardly happen
-				throw new HgInvalidDataFormatException("Bad Changeset data", ex);
-			}
+			String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2);
 			// change this instance at once, don't leave it partially changes in case of error
-			this.manifest = _nodeid;
-			this.user = _user;
-			this.time = _time;
-			this.timezone = _timezone;
-			this.files = _files == null ? Collections.<String> emptyList() : Collections.unmodifiableList(_files);
-			this.comment = _comment;
-			this.extras = _extrasMap;
+			target.manifest = _nodeid;
+			target.user = _user;
+			target.time = _time;
+			target.timezone = _timezone;
+			target.files = _files == null ? new String[0] : _files.toArray(new String[_files.size()]);
+			target.comment = _comment;
+			target.extras = _extrasMap;
 		}
 
 		private Map<String, String> parseExtras(String _extras) {
@@ -408,21 +417,20 @@
 		}
 	}
 
-	private static class RawCsetParser implements RevlogStream.Inspector, Adaptable {
+	private static final class RawCsetParser implements RevlogStream.Inspector, Adaptable, Lifecycle {
 		
 		private final Inspector inspector;
-		private final Pool<String> usersPool;
-		private final RawChangeset cset = new RawChangeset();
+		private final ChangesetParser csetBuilder;
 		// non-null when inspector uses high-level lifecycle entities (progress and/or cancel supports)
 		private final LifecycleBridge lifecycleStub;
 		// non-null when inspector relies on low-level lifecycle and is responsible
 		// to proceed any possible high-level entities himself.
 		private final Lifecycle inspectorLifecycle;
 
-		public RawCsetParser(HgChangelog.Inspector delegate) {
+		public RawCsetParser(SessionContext.Source sessionContext, HgChangelog.Inspector delegate) {
 			assert delegate != null;
 			inspector = delegate;
-			usersPool = new Pool<String>();
+			csetBuilder = new ChangesetParser(sessionContext, true);
 			inspectorLifecycle = Adaptable.Factory.getAdapter(delegate, Lifecycle.class, null);
 			if (inspectorLifecycle == null) {
 				ProgressSupport ph = Adaptable.Factory.getAdapter(delegate, ProgressSupport.class, null);
@@ -439,8 +447,7 @@
 
 		public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) throws HgRuntimeException {
 			try {
-				byte[] data = da.byteArray();
-				cset.init(data, 0, data.length, usersPool);
+				RawChangeset cset = csetBuilder.parse(da);
 				// XXX there's no guarantee for Changeset.Callback that distinct instance comes each time, consider instance reuse
 				inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset);
 				if (lifecycleStub != null) {
@@ -456,17 +463,28 @@
 		
 		public <T> T getAdapter(Class<T> adapterClass) {
 			if (adapterClass == Lifecycle.class) {
-				if (inspectorLifecycle != null) {
-					return adapterClass.cast(inspectorLifecycle);
-				}
-				// reveal interest in lifecycle only when either progress or cancel support is there
-				// and inspector itself doesn't respond to lifecycle request
-				// lifecycleStub may still be null here (no progress and cancel), it's ok to cast(null) 
-				return adapterClass.cast(lifecycleStub);
-				
+				return adapterClass.cast(this);
 			}
+			// XXX what if caller takes Progress/Cancel (which we update through lifecycleStub, too)
 			return Adaptable.Factory.getAdapter(inspector, adapterClass, null);
 		}
 
+		public void start(int count, Callback callback, Object token) {
+			if (inspectorLifecycle != null) {
+				inspectorLifecycle.start(count, callback, token);
+			} else if (lifecycleStub != null) {
+				lifecycleStub.start(count, callback, token);
+			}
+		}
+
+		public void finish(Object token) {
+			if (inspectorLifecycle != null) {
+				inspectorLifecycle.finish(token);
+			} else if (lifecycleStub != null) {
+				lifecycleStub.finish(token);
+			}
+			csetBuilder.dispose();
+		}
+
 	}
 }