# HG changeset patch # User Artem Tikhomirov # Date 1373558048 -7200 # Node ID fba85bc1dfb81125b12c8b45e8add65eda221927 # Parent 27a3ddedd6cc6eb0af61b80f2985f443d9ccce0d Refactoring: move all encoding/decoding operations into single place, EncodingHelper diff -r 27a3ddedd6cc -r fba85bc1dfb8 src/org/tmatesoft/hg/core/SessionContext.java --- a/src/org/tmatesoft/hg/core/SessionContext.java Wed Jul 10 20:22:07 2013 +0200 +++ b/src/org/tmatesoft/hg/core/SessionContext.java Thu Jul 11 17:54:08 2013 +0200 @@ -72,4 +72,16 @@ public interface Source { SessionContext getSessionContext(); } + + public static final class SourcePrim implements Source { + private final SessionContext ctx; + + public SourcePrim(SessionContext sessionContext) { + assert sessionContext != null; + ctx = sessionContext; + } + public SessionContext getSessionContext() { + return ctx; + } + } } diff -r 27a3ddedd6cc -r fba85bc1dfb8 src/org/tmatesoft/hg/internal/BundleGenerator.java --- a/src/org/tmatesoft/hg/internal/BundleGenerator.java Wed Jul 10 20:22:07 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/BundleGenerator.java Thu Jul 11 17:54:08 2013 +0200 @@ -101,6 +101,7 @@ new ChunkGenerator(outRaw, clogMap).iterate(manifestStream, manifestRevs.toArray(true)); outRaw.writeInt(0); // null chunk for manifest group // + EncodingHelper fnEncoder = repo.buildFileNameEncodingHelper(); for (HgDataFile df : sortedByName(files)) { RevlogStream s = repo.getImplAccess().getStream(df); final IntVector fileRevs = new IntVector(); @@ -117,7 +118,7 @@ // although BundleFormat page says "filename length, filename" for a file, // in fact there's a sort of 'filename chunk', i.e. filename length field includes // not only length of filename, but also length of the field itseld, i.e. filename.length+sizeof(int) - byte[] fnameBytes = df.getPath().toString().getBytes(); // FIXME check encoding in native hg (and fix accordingly in HgBundle) + byte[] fnameBytes = fnEncoder.toBundle(df.getPath()); outRaw.writeInt(fnameBytes.length + 4); outRaw.writeByte(fnameBytes); new ChunkGenerator(outRaw, clogMap).iterate(s, fileRevs.toArray(true)); diff -r 27a3ddedd6cc -r fba85bc1dfb8 src/org/tmatesoft/hg/internal/ChangelogEntryBuilder.java --- a/src/org/tmatesoft/hg/internal/ChangelogEntryBuilder.java Wed Jul 10 20:22:07 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/ChangelogEntryBuilder.java Thu Jul 11 17:54:08 2013 +0200 @@ -17,7 +17,6 @@ package org.tmatesoft.hg.internal; import java.io.ByteArrayOutputStream; -import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -31,7 +30,6 @@ import org.tmatesoft.hg.core.HgIOException; import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.internal.DataSerializer.DataSource; -import org.tmatesoft.hg.repo.HgInvalidStateException; import org.tmatesoft.hg.util.Path; /** @@ -41,6 +39,7 @@ */ public class ChangelogEntryBuilder implements DataSource { + private final EncodingHelper encHelper; private String user; private List modifiedFiles; private final Map extrasMap = new LinkedHashMap(); @@ -49,6 +48,10 @@ private Nodeid manifestRev; private CharSequence comment; + ChangelogEntryBuilder(EncodingHelper encodingHelper) { + encHelper = encodingHelper; + } + public ChangelogEntryBuilder user(String username) { user = username; return this; @@ -116,36 +119,32 @@ } public byte[] build() { - try { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - final int LF = '\n'; - CharSequence extras = buildExtras(); - CharSequence files = buildFiles(); - byte[] manifestRevision = manifestRev.toString().getBytes(); - byte[] username = user().getBytes(EncodingHelper.getUTF8().name()); // XXX Java 1.5 - out.write(manifestRevision, 0, manifestRevision.length); - out.write(LF); - out.write(username, 0, username.length); - out.write(LF); - final long csetDate = csetTime(); - byte[] date = String.format("%d %d", csetDate, csetTimezone(csetDate)).getBytes(); - out.write(date, 0, date.length); - if (extras.length() > 0) { - out.write(' '); - byte[] b = extras.toString().getBytes(); - out.write(b, 0, b.length); - } - out.write(LF); - byte[] b = files.toString().getBytes(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + final int LF = '\n'; + CharSequence extras = buildExtras(); + CharSequence files = buildFiles(); + byte[] manifestRevision = manifestRev.toString().getBytes(); + byte[] username = encHelper.userToChangeset(user()); + out.write(manifestRevision, 0, manifestRevision.length); + out.write(LF); + out.write(username, 0, username.length); + out.write(LF); + final long csetDate = csetTime(); + byte[] date = String.format("%d %d", csetDate, csetTimezone(csetDate)).getBytes(); + out.write(date, 0, date.length); + if (extras.length() > 0) { + out.write(' '); + byte[] b = extras.toString().getBytes(); out.write(b, 0, b.length); - out.write(LF); - out.write(LF); - byte[] cmt = comment.toString().getBytes(EncodingHelper.getUTF8().name()); // XXX Java 1.5 - out.write(cmt, 0, cmt.length); - return out.toByteArray(); - } catch (UnsupportedEncodingException ex) { - throw new HgInvalidStateException(ex.getMessage()); // Can't happen, UTF8 is always there } + out.write(LF); + byte[] b = encHelper.fileToChangeset(files); + out.write(b, 0, b.length); + out.write(LF); + out.write(LF); + byte[] cmt = encHelper.commentToChangeset(comment); + out.write(cmt, 0, cmt.length); + return out.toByteArray(); } private CharSequence buildExtras() { diff -r 27a3ddedd6cc -r fba85bc1dfb8 src/org/tmatesoft/hg/internal/CommitFacility.java --- a/src/org/tmatesoft/hg/internal/CommitFacility.java Wed Jul 10 20:22:07 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/CommitFacility.java Thu Jul 11 17:54:08 2013 +0200 @@ -164,8 +164,10 @@ } } // + final EncodingHelper encHelper = repo.buildFileNameEncodingHelper(); + // // Manifest - final ManifestEntryBuilder manifestBuilder = new ManifestEntryBuilder(repo.buildFileNameEncodingHelper()); + final ManifestEntryBuilder manifestBuilder = new ManifestEntryBuilder(encHelper); for (Map.Entry me : newManifestRevision.entrySet()) { manifestBuilder.add(me.getKey().toString(), me.getValue()); } @@ -173,7 +175,7 @@ Nodeid manifestRev = manifestWriter.addRevision(manifestBuilder, clogRevisionIndex, manifestParents.first(), manifestParents.second()).second(); // // Changelog - final ChangelogEntryBuilder changelogBuilder = new ChangelogEntryBuilder(); + final ChangelogEntryBuilder changelogBuilder = new ChangelogEntryBuilder(encHelper); changelogBuilder.setModified(files.keySet()); changelogBuilder.branch(branch == null ? DEFAULT_BRANCH_NAME : branch); changelogBuilder.user(String.valueOf(user)); diff -r 27a3ddedd6cc -r fba85bc1dfb8 src/org/tmatesoft/hg/internal/EncodingHelper.java --- a/src/org/tmatesoft/hg/internal/EncodingHelper.java Wed Jul 10 20:22:07 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/EncodingHelper.java Thu Jul 11 17:54:08 2013 +0200 @@ -18,6 +18,7 @@ import static org.tmatesoft.hg.util.LogFacility.Severity.Error; +import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; @@ -26,15 +27,19 @@ import java.nio.charset.CharsetEncoder; import org.tmatesoft.hg.core.SessionContext; +import org.tmatesoft.hg.repo.HgInvalidStateException; /** * Keep all encoding-related issues in the single place * NOT thread-safe (encoder and decoder requires synchronized access) + * + * @see http://mercurial.selenic.com/wiki/EncodingStrategy + * @see http://mercurial.selenic.com/wiki/WindowsUTF8Plan + * @see http://mercurial.selenic.com/wiki/CharacterEncodingOnWindows * @author Artem Tikhomirov * @author TMate Software Ltd. */ public class EncodingHelper { - // XXX perhaps, shall not be full of statics, but rather an instance coming from e.g. HgRepository? /* * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html @@ -43,11 +48,21 @@ private final SessionContext sessionContext; private final CharsetEncoder encoder; private final CharsetDecoder decoder; + private final CharsetEncoder utfEncoder; + private final CharsetDecoder utfDecoder; - EncodingHelper(Charset fsEncoding, SessionContext ctx) { - sessionContext = ctx; + EncodingHelper(Charset fsEncoding, SessionContext.Source ctx) { + sessionContext = ctx.getSessionContext(); decoder = fsEncoding.newDecoder(); encoder = fsEncoding.newEncoder(); + Charset utf8 = getUTF8(); + if (fsEncoding.equals(utf8)) { + utfDecoder = decoder; + utfEncoder = encoder; + } else { + utfDecoder = utf8.newDecoder(); + utfEncoder = utf8.newEncoder(); + } } /** @@ -65,7 +80,7 @@ // perhaps, can return byte[0] in this case? throw new IllegalArgumentException(); } - return encodeWithSystemDefaultFallback(s); + return toArray(encodeWithSystemDefaultFallback(s)); } /** @@ -79,9 +94,51 @@ if (fname == null) { throw new IllegalArgumentException(); } + return toArray(encodeWithSystemDefaultFallback(fname)); + } + + /** + * prepare filename to be serialized into fncache file + */ + public ByteBuffer toFNCache(CharSequence fname) { return encodeWithSystemDefaultFallback(fname); } + + public byte[] toBundle(CharSequence fname) { + // yes, mercurial transfers filenames in local encoding + // so that if your local encoding doesn't match that on server, + // and you use native characters, you'd likely fail + return toArray(encodeWithSystemDefaultFallback(fname)); + } + public String fromBundle(byte[] data, int start, int length) { + return decodeWithSystemDefaultFallback(data, start, length); + } + + + public String userFromChangeset(byte[] data, int start, int length) { + return decodeUnicodeWithFallback(data, start, length); + } + + public String commentFromChangeset(byte[] data, int start, int length) { + return decodeUnicodeWithFallback(data, start, length); + } + + public String fileFromChangeset(byte[] data, int start, int length) { + return decodeWithSystemDefaultFallback(data, start, length); + } + public byte[] userToChangeset(CharSequence user) { + return toArray(encodeUnicode(user)); + } + + public byte[] commentToChangeset(CharSequence comment) { + return toArray(encodeUnicode(comment)); + } + + public byte[] fileToChangeset(CharSequence file) { + return toArray(encodeWithSystemDefaultFallback(file)); + } + private String decodeWithSystemDefaultFallback(byte[] data, int start, int length) { try { return decoder.decode(ByteBuffer.wrap(data, start, length)).toString(); @@ -92,18 +149,53 @@ } } - private byte[] encodeWithSystemDefaultFallback(CharSequence s) { + private ByteBuffer encodeWithSystemDefaultFallback(CharSequence s) { try { // synchronized(encoder) { - ByteBuffer bb = encoder.encode(CharBuffer.wrap(s)); + return encoder.encode(CharBuffer.wrap(s)); // } - byte[] rv = new byte[bb.remaining()]; - bb.get(rv, 0, rv.length); - return rv; } catch (CharacterCodingException ex) { sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); // resort to system-default - return s.toString().getBytes(); + return ByteBuffer.wrap(s.toString().getBytes()); + } + } + + private byte[] toArray(ByteBuffer bb) { + byte[] rv; + if (bb.hasArray() && bb.arrayOffset() == 0) { + rv = bb.array(); + if (rv.length == bb.remaining()) { + return rv; + } + // fall through + } + rv = new byte[bb.remaining()]; + bb.get(rv, 0, rv.length); + return rv; + } + + private String decodeUnicodeWithFallback(byte[] data, int start, int length) { + try { + return utfDecoder.decode(ByteBuffer.wrap(data, start, length)).toString(); + } catch (CharacterCodingException ex) { + // TODO post-1.2 respect ui.fallbackencoding actual setting + return new String(data, start, length, Charset.forName("ISO-8859-1")); + } + } + + private ByteBuffer encodeUnicode(CharSequence s) { + // + try { + return utfEncoder.encode(CharBuffer.wrap(s)); + } catch (CharacterCodingException ex) { + byte[] rv; + try { + rv = s.toString().getBytes(getUTF8().name()); // XXX Java 1.5 + } catch (UnsupportedEncodingException e) { + throw new HgInvalidStateException("Unexpected error trying to get UTF-8 encoding"); + } + return ByteBuffer.wrap(rv); } } diff -r 27a3ddedd6cc -r fba85bc1dfb8 src/org/tmatesoft/hg/internal/FNCacheFile.java --- a/src/org/tmatesoft/hg/internal/FNCacheFile.java Wed Jul 10 20:22:07 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/FNCacheFile.java Thu Jul 11 17:54:08 2013 +0200 @@ -24,7 +24,6 @@ import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.channels.FileChannel; -import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; @@ -83,7 +82,7 @@ } File f = repo.getRepositoryFile(FNCache); f.getParentFile().mkdirs(); - final Charset filenameEncoding = repo.getFilenameEncoding(); + final EncodingHelper fnEncoder = repo.buildFileNameEncodingHelper(); ArrayList added = new ArrayList(); for (Path p : addedDotI) { added.add(CharBuffer.wrap(pathHelper.rewrite(p))); @@ -105,7 +104,7 @@ FileChannel fncacheFile = fos.getChannel(); ByteBuffer lf = ByteBuffer.wrap(new byte[] { 0x0A }); for (CharBuffer b : added) { - fncacheFile.write(filenameEncoding.encode(b)); + fncacheFile.write(fnEncoder.toFNCache(b)); fncacheFile.write(lf); lf.rewind(); } diff -r 27a3ddedd6cc -r fba85bc1dfb8 src/org/tmatesoft/hg/internal/Internals.java --- a/src/org/tmatesoft/hg/internal/Internals.java Wed Jul 10 20:22:07 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/Internals.java Thu Jul 11 17:54:08 2013 +0200 @@ -247,19 +247,18 @@ public boolean isCaseSensitiveFileSystem() { return isCaseSensitiveFileSystem; } - - public EncodingHelper buildFileNameEncodingHelper() { - return new EncodingHelper(getFilenameEncoding(), repo.getSessionContext()); - } - + public boolean fncacheInUse() { return (getRequiresFlags() & RequiresFile.FNCACHE) != 0; } - - /*package-local*/ Charset getFilenameEncoding() { - return getFileEncoding(getSessionContext()); + + public EncodingHelper buildFileNameEncodingHelper() { + return new EncodingHelper(getFileEncoding(getSessionContext()), repo); } + public static EncodingHelper buildFileNameEncodingHelper(SessionContext.Source ctx) { + return new EncodingHelper(getFileEncoding(ctx.getSessionContext()), ctx); + } /*package-local*/ static Charset getFileEncoding(SessionContext ctx) { Object altEncoding = ctx.getConfigurationProperty(CFG_PROPERTY_FS_FILENAME_ENCODING, null); Charset cs; diff -r 27a3ddedd6cc -r fba85bc1dfb8 src/org/tmatesoft/hg/repo/HgBundle.java --- a/src/org/tmatesoft/hg/repo/HgBundle.java Wed Jul 10 20:22:07 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgBundle.java Thu Jul 11 17:54:08 2013 +0200 @@ -31,12 +31,14 @@ import org.tmatesoft.hg.internal.DataAccessProvider; import org.tmatesoft.hg.internal.DataSerializer; import org.tmatesoft.hg.internal.DigestHelper; +import org.tmatesoft.hg.internal.EncodingHelper; import org.tmatesoft.hg.internal.Experimental; import org.tmatesoft.hg.internal.FileUtils; import org.tmatesoft.hg.internal.InflaterDataAccess; import org.tmatesoft.hg.internal.Internals; import org.tmatesoft.hg.internal.Lifecycle; import org.tmatesoft.hg.internal.Patch; +import org.tmatesoft.hg.repo.HgChangelog.ChangesetParser; import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; import org.tmatesoft.hg.util.Adaptable; import org.tmatesoft.hg.util.CancelledException; @@ -55,12 +57,14 @@ private final File bundleFile; private final DataAccessProvider accessProvider; private final SessionContext ctx; + private final EncodingHelper fnDecorer; private Lifecycle.BasicCallback flowControl; HgBundle(SessionContext sessionContext, DataAccessProvider dap, File bundle) { ctx = sessionContext; accessProvider = dap; bundleFile = bundle; + fnDecorer = Internals.buildFileNameEncodingHelper(new SessionContext.SourcePrim(ctx)); } private DataAccess getDataStream() throws IOException { @@ -112,10 +116,12 @@ boolean emptyChangelog = true; private DataAccess prevRevContent; private int revisionIndex; + private ChangesetParser csetBuilder; public void changelogStart() { emptyChangelog = true; revisionIndex = 0; + csetBuilder = new ChangesetParser(hgRepo, true); } public void changelogEnd() { @@ -172,7 +178,7 @@ throw new HgInvalidStateException(String.format("Integrity check failed on %s, node: %s", bundleFile, ge.node().shortNotation())); } ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent); - RawChangeset cs = RawChangeset.parse(csetDataAccess); + RawChangeset cs = csetBuilder.parse(csetDataAccess); inspector.next(revisionIndex++, ge.node(), cs); prevRevContent.done(); prevRevContent = csetDataAccess.reset(); @@ -397,7 +403,7 @@ } byte[] fnameBuf = new byte[fnameLen - 4]; da.readBytes(fnameBuf, 0, fnameBuf.length); - String name = new String(fnameBuf); + String name = fnDecorer.fromBundle(fnameBuf, 0, fnameBuf.length); inspector.fileStart(name); if (flowControl.isStopped()) { return; diff -r 27a3ddedd6cc -r fba85bc1dfb8 src/org/tmatesoft/hg/repo/HgChangelog.java --- a/src/org/tmatesoft/hg/repo/HgChangelog.java Wed Jul 10 20:22:07 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgChangelog.java Thu Jul 11 17:54:08 2013 +0200 @@ -17,7 +17,6 @@ package org.tmatesoft.hg.repo; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; @@ -31,8 +30,11 @@ import java.util.TimeZone; import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.core.SessionContext; import org.tmatesoft.hg.internal.Callback; import org.tmatesoft.hg.internal.DataAccess; +import org.tmatesoft.hg.internal.EncodingHelper; +import org.tmatesoft.hg.internal.Internals; import org.tmatesoft.hg.internal.Lifecycle; import org.tmatesoft.hg.internal.LifecycleBridge; import org.tmatesoft.hg.internal.Pool; @@ -76,7 +78,7 @@ if (inspector == null) { throw new IllegalArgumentException(); } - content.iterate(start, end, true, new RawCsetParser(inspector)); + content.iterate(start, end, true, new RawCsetParser(getRepo(), inspector)); } /** @@ -116,7 +118,7 @@ if (inspector == null) { throw new IllegalArgumentException(); } - content.iterate(sortedRevisions, true, new RawCsetParser(inspector)); + content.iterate(sortedRevisions, true, new RawCsetParser(getRepo(), inspector)); } /** @@ -146,33 +148,17 @@ /** * Entry in the Changelog */ - public static class RawChangeset implements Cloneable /* for those that would like to keep a copy */{ - // TODO immutable + public static final class RawChangeset implements Cloneable /* for those that would like to keep a copy */{ + // would be nice to get it immutable, but then we can't reuse instances private/* final */Nodeid manifest; private String user; private String comment; - private List files; // unmodifiable collection (otherwise #files() and implicit #clone() shall be revised) + private String[] files; // shall not be modified (#clone() does shallow copy) private Date time; private int timezone; // http://mercurial.selenic.com/wiki/PruningDeadBranches - Closing changesets can be identified by close=1 in the changeset's extra field. private Map extras; - /** - * @see mercurial/changelog.py:read() - * - *
-		 *         format used:
-		 *         nodeid\n        : manifest node in ascii
-		 *         user\n          : user, no \n or \r allowed
-		 *         time tz extra\n : date (time is int or float, timezone is int)
-		 *                         : extra is metadatas, encoded and separated by '\0'
-		 *                         : older versions ignore it
-		 *         files\n\n       : files modified by the cset, no \n or \r allowed
-		 *         (.*)            : comment (free text, ideally utf-8)
-		 * 
-		 *         changelog v0 doesn't use extra
-		 * 
- */ private RawChangeset() { } @@ -189,7 +175,7 @@ } public List files() { - return files; + return Arrays.asList(files); } public Date date() { @@ -234,7 +220,7 @@ sb.append("Comment: ").append(comment).append(", "); sb.append("Manifest: ").append(manifest).append(", "); sb.append("Date: ").append(time).append(", "); - sb.append("Files: ").append(files.size()); + sb.append("Files: ").append(files.length); for (String s : files) { sb.append(", ").append(s); } @@ -253,16 +239,57 @@ throw new InternalError(ex.toString()); } } - - /*package*/ static RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException { - byte[] data = da.byteArray(); - RawChangeset rv = new RawChangeset(); - rv.init(data, 0, data.length, null); - return rv; + } + + /** + * @see mercurial/changelog.py:read() + * + *
+	 *         format used:
+	 *         nodeid\n        : manifest node in ascii
+	 *         user\n          : user, no \n or \r allowed
+	 *         time tz extra\n : date (time is int or float, timezone is int)
+	 *                         : extra is metadatas, encoded and separated by '\0'
+	 *                         : older versions ignore it
+	 *         files\n\n       : files modified by the cset, no \n or \r allowed
+	 *         (.*)            : comment (free text, ideally utf-8)
+	 * 
+	 *         changelog v0 doesn't use extra
+	 * 
+ */ + /*package-local*/static final class ChangesetParser { + private final EncodingHelper encHelper; + // it's likely user names get repeated again and again throughout repository. + private final Pool usersPool; + private final Pool filesPool; + private final boolean reuseChangesetInstance; + private RawChangeset target; + + public ChangesetParser(SessionContext.Source sessionContex, boolean shallReuseCsetInstance) { + encHelper = Internals.buildFileNameEncodingHelper(sessionContex); + usersPool = new Pool(); + filesPool = new Pool(); + reuseChangesetInstance = shallReuseCsetInstance; + if (shallReuseCsetInstance) { + target = new RawChangeset(); + } + } + + public void dispose() { + usersPool.clear(); + filesPool.clear(); } - // @param usersPool - it's likely user names get repeated again and again throughout repository. can be null - /* package-local */void init(byte[] data, int offset, int length, Pool usersPool) throws HgInvalidDataFormatException { + public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException { + byte[] data = da.byteArray(); + if (!reuseChangesetInstance) { + target = new RawChangeset(); + } + init(data, 0, data.length); + return target; + } + + private void init(byte[] data, int offset, int length) throws HgInvalidDataFormatException { final int bufferEndIndex = offset + length; final byte lineBreak = (byte) '\n'; int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex); @@ -275,17 +302,8 @@ throw new HgInvalidDataFormatException("Bad Changeset data"); } String _user; - try { - // TODO use encoding helper? Although where encoding is fixed (like here), seems to be just too much - _user = new String(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1, "UTF-8"); - if (usersPool != null) { - _user = usersPool.unify(_user); - } - } catch (UnsupportedEncodingException ex) { - _user = ""; - // Could hardly happen - throw new HgInvalidDataFormatException("Bad Changeset data", ex); - } + _user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1); + _user = usersPool.unify(_user); int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex); if (breakIndex3 == -1) { @@ -313,10 +331,9 @@ if (breakIndex4 > lastStart) { // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision) _files = new ArrayList(5); - // TODO pool file names - // TODO encoding of filenames? while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) { - _files.add(new String(data, lastStart, breakIndex4 - lastStart)); + String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart); + _files.add(filesPool.unify(fname)); lastStart = breakIndex4 + 1; if (data[breakIndex4 + 1] == lineBreak) { // found \n\n @@ -331,23 +348,15 @@ } else { breakIndex4--; } - String _comment; - try { - _comment = new String(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2, "UTF-8"); - // TODO post-1.0 respect ui.fallbackencoding and try to decode if set; use EncodingHelper - } catch (UnsupportedEncodingException ex) { - _comment = ""; - // Could hardly happen - throw new HgInvalidDataFormatException("Bad Changeset data", ex); - } + String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2); // change this instance at once, don't leave it partially changes in case of error - this.manifest = _nodeid; - this.user = _user; - this.time = _time; - this.timezone = _timezone; - this.files = _files == null ? Collections. emptyList() : Collections.unmodifiableList(_files); - this.comment = _comment; - this.extras = _extrasMap; + target.manifest = _nodeid; + target.user = _user; + target.time = _time; + target.timezone = _timezone; + target.files = _files == null ? new String[0] : _files.toArray(new String[_files.size()]); + target.comment = _comment; + target.extras = _extrasMap; } private Map parseExtras(String _extras) { @@ -408,21 +417,20 @@ } } - private static class RawCsetParser implements RevlogStream.Inspector, Adaptable { + private static final class RawCsetParser implements RevlogStream.Inspector, Adaptable, Lifecycle { private final Inspector inspector; - private final Pool usersPool; - private final RawChangeset cset = new RawChangeset(); + private final ChangesetParser csetBuilder; // non-null when inspector uses high-level lifecycle entities (progress and/or cancel supports) private final LifecycleBridge lifecycleStub; // non-null when inspector relies on low-level lifecycle and is responsible // to proceed any possible high-level entities himself. private final Lifecycle inspectorLifecycle; - public RawCsetParser(HgChangelog.Inspector delegate) { + public RawCsetParser(SessionContext.Source sessionContext, HgChangelog.Inspector delegate) { assert delegate != null; inspector = delegate; - usersPool = new Pool(); + csetBuilder = new ChangesetParser(sessionContext, true); inspectorLifecycle = Adaptable.Factory.getAdapter(delegate, Lifecycle.class, null); if (inspectorLifecycle == null) { ProgressSupport ph = Adaptable.Factory.getAdapter(delegate, ProgressSupport.class, null); @@ -439,8 +447,7 @@ public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) throws HgRuntimeException { try { - byte[] data = da.byteArray(); - cset.init(data, 0, data.length, usersPool); + RawChangeset cset = csetBuilder.parse(da); // XXX there's no guarantee for Changeset.Callback that distinct instance comes each time, consider instance reuse inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset); if (lifecycleStub != null) { @@ -456,17 +463,28 @@ public T getAdapter(Class adapterClass) { if (adapterClass == Lifecycle.class) { - if (inspectorLifecycle != null) { - return adapterClass.cast(inspectorLifecycle); - } - // reveal interest in lifecycle only when either progress or cancel support is there - // and inspector itself doesn't respond to lifecycle request - // lifecycleStub may still be null here (no progress and cancel), it's ok to cast(null) - return adapterClass.cast(lifecycleStub); - + return adapterClass.cast(this); } + // XXX what if caller takes Progress/Cancel (which we update through lifecycleStub, too) return Adaptable.Factory.getAdapter(inspector, adapterClass, null); } + public void start(int count, Callback callback, Object token) { + if (inspectorLifecycle != null) { + inspectorLifecycle.start(count, callback, token); + } else if (lifecycleStub != null) { + lifecycleStub.start(count, callback, token); + } + } + + public void finish(Object token) { + if (inspectorLifecycle != null) { + inspectorLifecycle.finish(token); + } else if (lifecycleStub != null) { + lifecycleStub.finish(token); + } + csetBuilder.dispose(); + } + } }