Mercurial > hg4j
changeset 673:545b1d4cc11d
Refactor HgBundle.GroupElement (clear experimental mark), resolve few technical debt issues
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Fri, 12 Jul 2013 20:14:24 +0200 |
parents | d2552e6a5af6 |
children | cce0387c6041 |
files | src/org/tmatesoft/hg/core/HgCloneCommand.java src/org/tmatesoft/hg/internal/BundleSerializer.java src/org/tmatesoft/hg/internal/ByteArrayDataAccess.java src/org/tmatesoft/hg/internal/ChangesetParser.java src/org/tmatesoft/hg/internal/DataAccessInputStream.java src/org/tmatesoft/hg/internal/DataSerializer.java src/org/tmatesoft/hg/internal/RevlogStreamWriter.java src/org/tmatesoft/hg/repo/HgBundle.java src/org/tmatesoft/hg/repo/HgChangelog.java src/org/tmatesoft/hg/repo/HgInternals.java src/org/tmatesoft/hg/repo/HgRemoteRepository.java |
diffstat | 11 files changed, 441 insertions(+), 258 deletions(-) [+] |
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/core/HgCloneCommand.java Fri Jul 12 16:29:06 2013 +0200 +++ b/src/org/tmatesoft/hg/core/HgCloneCommand.java Fri Jul 12 20:14:24 2013 +0200 @@ -31,16 +31,19 @@ import org.tmatesoft.hg.internal.ByteArrayDataAccess; import org.tmatesoft.hg.internal.DataAccess; import org.tmatesoft.hg.internal.DataSerializer; +import org.tmatesoft.hg.internal.DataSerializer.ByteArrayDataSource; import org.tmatesoft.hg.internal.DigestHelper; import org.tmatesoft.hg.internal.FNCacheFile; import org.tmatesoft.hg.internal.Internals; import org.tmatesoft.hg.internal.Lifecycle; +import org.tmatesoft.hg.internal.Patch; import org.tmatesoft.hg.internal.RepoInitializer; import org.tmatesoft.hg.internal.RevlogCompressor; import org.tmatesoft.hg.internal.RevlogStreamWriter; import org.tmatesoft.hg.internal.Transaction; import org.tmatesoft.hg.repo.HgBundle; import org.tmatesoft.hg.repo.HgBundle.GroupElement; +import org.tmatesoft.hg.repo.HgInternals; import org.tmatesoft.hg.repo.HgInvalidControlFileException; import org.tmatesoft.hg.repo.HgInvalidStateException; import org.tmatesoft.hg.repo.HgLookup; @@ -316,7 +319,8 @@ } } // - byte[] content = ge.apply(prevRevContent.byteArray()); + Patch patch = HgInternals.patchFromData(ge); + byte[] content = patch.apply(prevRevContent, -1); Nodeid p1 = ge.firstParent(); Nodeid p2 = ge.secondParent(); byte[] calculated = dh.sha1(p1, p2, content).asBinary(); @@ -340,23 +344,23 @@ // revlogHeader.parents(knownRevision(p1), knownRevision(p2)); // - byte[] patchContent = ge.rawDataByteArray(); + int patchSerializedLength = patch.serializedLength(); // no reason to keep patch if it's close (here, >75%) in size to the complete contents, // save patching effort in this case - writeComplete = writeComplete || preferCompleteOverPatch(patchContent.length, content.length); + writeComplete = writeComplete || preferCompleteOverPatch(patchSerializedLength, content.length); if (writeComplete) { revlogHeader.baseRevision(revisionSequence.size()); } assert revlogHeader.baseRevision() >= 0; - final byte[] sourceData = writeComplete ? content : patchContent; - revlogDataZip.reset(new DataSerializer.ByteArrayDataSource(sourceData)); + DataSerializer.DataSource dataSource = writeComplete ? new ByteArrayDataSource(content) : patch.new PatchDataSource(); + revlogDataZip.reset(dataSource); final int compressedLen; - final boolean useUncompressedData = preferCompressedOverComplete(revlogDataZip.getCompressedLength(), sourceData.length); + final boolean useUncompressedData = preferCompressedOverComplete(revlogDataZip.getCompressedLength(), dataSource.serializeLength()); if (useUncompressedData) { // compression wasn't too effective, - compressedLen = sourceData.length + 1 /*1 byte for 'u' - uncompressed prefix byte*/; + compressedLen = dataSource.serializeLength() + 1 /*1 byte for 'u' - uncompressed prefix byte*/; } else { compressedLen= revlogDataZip.getCompressedLength(); } @@ -377,8 +381,8 @@ revlogHeader.serialize(sds); if (useUncompressedData) { - indexFile.write((byte) 'u'); - indexFile.write(sourceData); + sds.writeByte((byte) 'u'); + dataSource.serialize(sds); } else { int actualCompressedLenWritten = revlogDataZip.writeCompressedData(sds); if (actualCompressedLenWritten != compressedLen) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/BundleSerializer.java Fri Jul 12 20:14:24 2013 +0200 @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2013 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.internal; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import org.tmatesoft.hg.core.HgIOException; +import org.tmatesoft.hg.core.SessionContext; +import org.tmatesoft.hg.repo.HgBundle; +import org.tmatesoft.hg.repo.HgInternals; +import org.tmatesoft.hg.repo.HgRuntimeException; +import org.tmatesoft.hg.util.LogFacility; + +/** + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class BundleSerializer implements DataSerializer.DataSource { + private final LogFacility log; + private final File bundleFile; + + public static BundleSerializer newInstance(SessionContext ctx, HgBundle bundle) { + return new BundleSerializer(ctx.getLog(), HgInternals.getBundleFile(bundle)); + } + + public BundleSerializer(LogFacility logFacility, File bundleFile) { + log = logFacility; + this.bundleFile = bundleFile; + } + + public void serialize(DataSerializer out) throws HgIOException, HgRuntimeException { + FileInputStream fis = null; + try { + fis = new FileInputStream(bundleFile); + byte[] buffer = new byte[8*1024]; + int r; + while ((r = fis.read(buffer, 0, buffer.length)) > 0) { + out.write(buffer, 0, r); + } + + } catch (IOException ex) { + throw new HgIOException("Failed to serialize bundle", bundleFile); + } finally { + new FileUtils(log, this).closeQuietly(fis, bundleFile); + } + } + + public int serializeLength() throws HgRuntimeException { + return Internals.ltoi(bundleFile.length()); + } +} \ No newline at end of file
--- a/src/org/tmatesoft/hg/internal/ByteArrayDataAccess.java Fri Jul 12 16:29:06 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/ByteArrayDataAccess.java Fri Jul 12 20:14:24 2013 +0200 @@ -95,7 +95,7 @@ pos = offset; } @Override - public void skip(int bytes) throws IOException { + public void skip(int bytes) { seek(pos + bytes); } @Override
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/ChangesetParser.java Fri Jul 12 20:14:24 2013 +0200 @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2010-2013 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.internal; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.core.SessionContext; +import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; +import org.tmatesoft.hg.repo.HgInvalidDataFormatException; +import org.tmatesoft.hg.repo.HgRepository; + +/** + * @see mercurial/changelog.py:read() + * + * <pre> + * format used: + * nodeid\n : manifest node in ascii + * user\n : user, no \n or \r allowed + * time tz extra\n : date (time is int or float, timezone is int) + * : extra is metadatas, encoded and separated by '\0' + * : older versions ignore it + * files\n\n : files modified by the cset, no \n or \r allowed + * (.*) : comment (free text, ideally utf-8) + * + * changelog v0 doesn't use extra + * </pre> + * + * Extracted from internals of HgChangelog (the code initially from inside RawChangeset) + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public final class ChangesetParser { + private final EncodingHelper encHelper; + // it's likely user names get repeated again and again throughout repository. + private final Pool<String> usersPool; + private final Pool<String> filesPool; + private final CsetFactory factory; + + public ChangesetParser(SessionContext.Source sessionContex, CsetFactory csetFactory) { + assert csetFactory != null; + encHelper = Internals.buildFileNameEncodingHelper(sessionContex); + usersPool = new Pool<String>(); + filesPool = new Pool<String>(); + factory = csetFactory; + } + + public void dispose() { + usersPool.clear(); + filesPool.clear(); + } + + public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException { + byte[] data = da.byteArray(); + return parse(data); + } + + public RawChangeset parse(byte[] data) throws HgInvalidDataFormatException { + return init(data, 0, data.length); + } + + private RawChangeset init(byte[] data, int offset, int length) throws HgInvalidDataFormatException { + final int bufferEndIndex = offset + length; + final byte lineBreak = (byte) '\n'; + int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex); + if (breakIndex1 == -1) { + throw new HgInvalidDataFormatException("Bad Changeset data"); + } + Nodeid _nodeid = Nodeid.fromAscii(data, 0, breakIndex1); + int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex); + if (breakIndex2 == -1) { + throw new HgInvalidDataFormatException("Bad Changeset data"); + } + String _user; + _user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1); + _user = usersPool.unify(_user); + + int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex); + if (breakIndex3 == -1) { + throw new HgInvalidDataFormatException("Bad Changeset data"); + } + String _timeString = new String(data, breakIndex2 + 1, breakIndex3 - breakIndex2 - 1); + int space1 = _timeString.indexOf(' '); + if (space1 == -1) { + throw new HgInvalidDataFormatException(String.format("Bad Changeset data: %s in [%d..%d]", "time string", breakIndex2+1, breakIndex3)); + } + int space2 = _timeString.indexOf(' ', space1 + 1); + if (space2 == -1) { + space2 = _timeString.length(); + } + long unixTime = Long.parseLong(_timeString.substring(0, space1)); + int _timezone = Integer.parseInt(_timeString.substring(space1 + 1, space2)); + // unixTime is local time, and timezone records difference of the local time to UTC. + Date _time = new Date(unixTime * 1000); + String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null; + Map<String, String> _extrasMap = parseExtras(_extras); + // + int lastStart = breakIndex3 + 1; + int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); + ArrayList<String> _files = null; + if (breakIndex4 > lastStart) { + // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision) + _files = new ArrayList<String>(5); + while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) { + String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart); + _files.add(filesPool.unify(fname)); + lastStart = breakIndex4 + 1; + if (data[breakIndex4 + 1] == lineBreak) { + // found \n\n + break; + } else { + breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); + } + } + if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) { + throw new HgInvalidDataFormatException("Bad Changeset data"); + } + } else { + breakIndex4--; + } + String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2); + RawChangeset target = factory.create(_nodeid, _user, _time, _timezone, _files, _comment, _extrasMap); + return target; + } + + private Map<String, String> parseExtras(String _extras) { + final String extras_branch_key = "branch"; + _extras = _extras == null ? null : _extras.trim(); + if (_extras == null || _extras.length() == 0) { + return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); + } + Map<String, String> _extrasMap = new HashMap<String, String>(); + int lastIndex = 0; + do { + String pair; + int sp = _extras.indexOf('\0', lastIndex); + if (sp == -1) { + sp = _extras.length(); + } + if (sp > lastIndex) { + pair = _extras.substring(lastIndex, sp); + pair = decode(pair); + int eq = pair.indexOf(':'); + _extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1)); + lastIndex = sp + 1; + } + } while (lastIndex < _extras.length()); + if (!_extrasMap.containsKey(extras_branch_key)) { + _extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); + } + return Collections.unmodifiableMap(_extrasMap); + } + + private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) { + for (int i = startOffset; i < endIndex; i++) { + if (src[i] == what) { + return i; + } + } + return -1; + } + + private static String decode(String s) { + if (s != null && s.indexOf('\\') != -1) { + // TestAuxUtilities#testChangelogExtrasDecode + return s.replace("\\\\", "\\").replace("\\n", "\n").replace("\\r", "\r").replace("\\0", "\00"); + } + return s; + } + + public interface CsetFactory { + public RawChangeset create(Nodeid nodeid, String user, Date time, int timezone, List<String> files, String comment, Map<String, String> extrasMap); + } +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/DataAccessInputStream.java Fri Jul 12 20:14:24 2013 +0200 @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2013 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.internal; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Wrap our internal API into a public one + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class DataAccessInputStream extends InputStream { + + private final DataAccess da; + private int bytesLeft = -1; + + public DataAccessInputStream(DataAccess dataAccess) { + da = dataAccess; + } + + @Override + public int available() throws IOException { + initAvailable(); + return bytesLeft; + } + + @Override + public int read() throws IOException { + initAvailable(); + if (bytesLeft == 0) { + return -1; + } + int rv = da.readByte(); + bytesLeft--; + return rv; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + initAvailable(); + if (bytesLeft == 0) { + return -1; + } + if (len == 0) { + return 0; + } + int x = Math.min(len, bytesLeft); + da.readBytes(b, off, x); + bytesLeft -= x; + return x; + } + + + private void initAvailable() throws IOException { + da.reset(); + if (bytesLeft == -1) { + bytesLeft = da.length(); + } + } +}
--- a/src/org/tmatesoft/hg/internal/DataSerializer.java Fri Jul 12 16:29:06 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/DataSerializer.java Fri Jul 12 20:14:24 2013 +0200 @@ -24,7 +24,7 @@ import org.tmatesoft.hg.repo.HgRuntimeException; /** - * Serialization friend of {@link DataAccess} + * Serialization friend of {@link DataAccess}, similar to OutputStream with few handy methods * * @author Artem Tikhomirov * @author TMate Software Ltd.
--- a/src/org/tmatesoft/hg/internal/RevlogStreamWriter.java Fri Jul 12 16:29:06 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/RevlogStreamWriter.java Fri Jul 12 20:14:24 2013 +0200 @@ -30,6 +30,7 @@ import org.tmatesoft.hg.internal.DataSerializer.ByteArraySerializer; import org.tmatesoft.hg.internal.DataSerializer.DataSource; import org.tmatesoft.hg.repo.HgBundle.GroupElement; +import org.tmatesoft.hg.repo.HgInternals; import org.tmatesoft.hg.repo.HgInvalidControlFileException; import org.tmatesoft.hg.repo.HgInvalidDataFormatException; import org.tmatesoft.hg.repo.HgInvalidRevisionException; @@ -89,12 +90,9 @@ int p1 = p1Rev.isNull() ? NO_REVISION : revlogRevs.revisionIndex(p1Rev); final Nodeid p2Rev = ge.secondParent(); int p2 = p2Rev.isNull() ? NO_REVISION : revlogRevs.revisionIndex(p2Rev); - Patch p = new Patch(); - final byte[] patchBytes; + Patch p = null; try { - // XXX there's ge.rawData(), to avoid extra array wrap - patchBytes = ge.rawDataByteArray(); - p.read(new ByteArrayDataAccess(patchBytes)); + p = HgInternals.patchFromData(ge); } catch (IOException ex) { throw new HgIOException("Failed to read patch information", ex, null); } @@ -109,7 +107,7 @@ // we may write patch from GroupElement as is int patchBaseLen = dataLength(patchBaseRev); revLen = patchBaseLen + p.patchSizeDelta(); - ds = new ByteArrayDataSource(patchBytes); + ds = p.new PatchDataSource(); } else { // read baseRev, unless it's the pull to empty repository try { @@ -180,7 +178,7 @@ DataSerializer indexFile, dataFile; indexFile = dataFile = null; try { - // + // FIXME perhaps, not a good idea to open stream for each revision added (e.g, when we pull a lot of them) indexFile = revlogStream.getIndexStreamWriter(transaction); final boolean isInlineData = revlogStream.isInlineData(); HeaderWriter revlogHeader = new HeaderWriter(isInlineData);
--- a/src/org/tmatesoft/hg/repo/HgBundle.java Fri Jul 12 16:29:06 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgBundle.java Fri Jul 12 20:14:24 2013 +0200 @@ -17,8 +17,8 @@ package org.tmatesoft.hg.repo; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.ConcurrentModificationException; import org.tmatesoft.hg.core.HgIOException; @@ -27,18 +27,17 @@ import org.tmatesoft.hg.internal.ByteArrayChannel; import org.tmatesoft.hg.internal.ByteArrayDataAccess; import org.tmatesoft.hg.internal.Callback; +import org.tmatesoft.hg.internal.ChangesetParser; import org.tmatesoft.hg.internal.DataAccess; +import org.tmatesoft.hg.internal.DataAccessInputStream; import org.tmatesoft.hg.internal.DataAccessProvider; -import org.tmatesoft.hg.internal.DataSerializer; import org.tmatesoft.hg.internal.DigestHelper; import org.tmatesoft.hg.internal.EncodingHelper; import org.tmatesoft.hg.internal.Experimental; -import org.tmatesoft.hg.internal.FileUtils; import org.tmatesoft.hg.internal.InflaterDataAccess; import org.tmatesoft.hg.internal.Internals; import org.tmatesoft.hg.internal.Lifecycle; import org.tmatesoft.hg.internal.Patch; -import org.tmatesoft.hg.repo.HgChangelog.ChangesetParser; import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; import org.tmatesoft.hg.util.Adaptable; import org.tmatesoft.hg.util.CancelledException; @@ -54,9 +53,9 @@ @Experimental(reason="API is not stable") public class HgBundle { - private final File bundleFile; + final File bundleFile; private final DataAccessProvider accessProvider; - private final SessionContext ctx; + final SessionContext ctx; private final EncodingHelper fnDecorer; private Lifecycle.BasicCallback flowControl; @@ -110,7 +109,7 @@ * @param hgRepo repository that shall possess base revision for this bundle * @param inspector callback to get each changeset found */ - public void changes(final HgRepository hgRepo, final HgChangelog.Inspector inspector) throws HgRuntimeException { + public void changes(final HgRepository hgRepo, final HgChangelog.Inspector inspector) throws HgIOException, HgRuntimeException { Inspector bundleInsp = new Inspector() { DigestHelper dh = new DigestHelper(); boolean emptyChangelog = true; @@ -121,7 +120,7 @@ public void changelogStart() { emptyChangelog = true; revisionIndex = 0; - csetBuilder = new ChangesetParser(hgRepo, true); + csetBuilder = new ChangesetParser(hgRepo, new HgChangelog.RawCsetFactory(true)); } public void changelogEnd() { @@ -153,7 +152,7 @@ To recreate 30bd..e5, one have to take content of 9429..e0, not its p1 f1db..5e */ - public boolean element(GroupElement ge) throws HgRuntimeException { + public boolean element(GroupElement ge) throws IOException, HgRuntimeException { emptyChangelog = false; HgChangelog changelog = hgRepo.getChangelog(); try { @@ -172,20 +171,17 @@ } } // - byte[] csetContent = ge.apply(prevRevContent); + byte[] csetContent = ge.patch().apply(prevRevContent, -1); dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid? if (!ge.node().equalsTo(dh.asBinary())) { throw new HgInvalidStateException(String.format("Integrity check failed on %s, node: %s", bundleFile, ge.node().shortNotation())); } - ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent); - RawChangeset cs = csetBuilder.parse(csetDataAccess); + RawChangeset cs = csetBuilder.parse(csetContent); inspector.next(revisionIndex++, ge.node(), cs); prevRevContent.done(); - prevRevContent = csetDataAccess.reset(); + prevRevContent = new ByteArrayDataAccess(csetContent); } catch (CancelledException ex) { return false; - } catch (IOException ex) { - throw new HgInvalidFileException("Invalid bundle file", ex, bundleFile); // TODO post-1.0 revisit exception handling } catch (HgInvalidDataFormatException ex) { throw new HgInvalidControlFileException("Invalid bundle file", ex, bundleFile); } @@ -217,11 +213,12 @@ void fileEnd(String name) throws HgRuntimeException; /** - * XXX desperately need exceptions here * @param element data element, instance might be reused, don't keep a reference to it or its raw data * @return <code>true</code> to continue + * @throws IOException propagated exception from {@link GroupElement#data()} + * @throws HgRuntimeException propagated exception (subclass thereof) to indicate issues with the library. <em>Runtime exception</em> */ - boolean element(GroupElement element) throws HgRuntimeException; + boolean element(GroupElement element) throws IOException, HgRuntimeException; } /** @@ -229,7 +226,7 @@ * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em> * @throws IllegalArgumentException if inspector argument is null */ - public void inspectChangelog(Inspector inspector) throws HgRuntimeException { + public void inspectChangelog(Inspector inspector) throws HgIOException, HgRuntimeException { if (inspector == null) { throw new IllegalArgumentException(); } @@ -239,7 +236,7 @@ da = getDataStream(); internalInspectChangelog(da, inspector); } catch (IOException ex) { - throw new HgInvalidFileException("Bundle.inspectChangelog failed", ex, bundleFile); + throw new HgIOException("Failed to inspect changelog in the bundle", ex, bundleFile); } finally { if (da != null) { da.done(); @@ -253,7 +250,7 @@ * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em> * @throws IllegalArgumentException if inspector argument is null */ - public void inspectManifest(Inspector inspector) throws HgRuntimeException { + public void inspectManifest(Inspector inspector) throws HgIOException, HgRuntimeException { if (inspector == null) { throw new IllegalArgumentException(); } @@ -267,7 +264,7 @@ skipGroup(da); // changelog internalInspectManifest(da, inspector); } catch (IOException ex) { - throw new HgInvalidFileException("Bundle.inspectManifest failed", ex, bundleFile); + throw new HgIOException("Failed to inspect manifest in the bundle", ex, bundleFile); } finally { if (da != null) { da.done(); @@ -281,7 +278,7 @@ * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em> * @throws IllegalArgumentException if inspector argument is null */ - public void inspectFiles(Inspector inspector) throws HgRuntimeException { + public void inspectFiles(Inspector inspector) throws HgIOException, HgRuntimeException { if (inspector == null) { throw new IllegalArgumentException(); } @@ -299,7 +296,7 @@ skipGroup(da); // manifest internalInspectFiles(da, inspector); } catch (IOException ex) { - throw new HgInvalidFileException("Bundle.inspectFiles failed", ex, bundleFile); + throw new HgIOException("Failed to inspect files in the bundle", ex, bundleFile); } finally { if (da != null) { da.done(); @@ -313,7 +310,7 @@ * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em> * @throws IllegalArgumentException if inspector argument is null */ - public void inspectAll(Inspector inspector) throws HgRuntimeException { + public void inspectAll(Inspector inspector) throws HgIOException, HgRuntimeException { if (inspector == null) { throw new IllegalArgumentException(); } @@ -331,7 +328,7 @@ } internalInspectFiles(da, inspector); } catch (IOException ex) { - throw new HgInvalidFileException("Bundle.inspectAll failed", ex, bundleFile); + throw new HgIOException("Failed to inspect bundle", ex, bundleFile); } finally { if (da != null) { da.done(); @@ -453,13 +450,15 @@ } } - @Experimental(reason="Cumbersome API, rawData and apply with byte[] perhaps need replacement with ByteChannel/ByteBuffer, and better Exceptions. Perhaps, shall split into interface and impl") - public static class GroupElement { + /** + * Describes single element (a.k.a. chunk) of the group, either changelog, manifest or a file. + */ + public static final class GroupElement { private final byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192 private final DataAccess dataAccess; + private final Nodeid deltaBase; private Patch patches; - private final Nodeid deltaBase; - + GroupElement(byte[] fourNodeids, Nodeid deltaBaseRev, DataAccess rawDataAccess) { assert fourNodeids != null && fourNodeids.length == 80; header = fourNodeids; @@ -507,16 +506,15 @@ return deltaBase == null ? firstParent() : deltaBase; } - public byte[] rawDataByteArray() throws IOException { // XXX IOException or HgInvalidFileException? - return rawData().byteArray(); - } - - public byte[] apply(byte[] baseContent) throws IOException { - return apply(new ByteArrayDataAccess(baseContent)); - } - - /*package-local*/ DataAccess rawData() { - return dataAccess; + /** + * Read data of the group element. + * Note, {@link InputStream streams} obtained from several calls to this method + * can't be read simultaneously. + * + * @return stream to access content of this group element, never <code>null</code> + */ + public InputStream data() { + return new DataAccessInputStream(dataAccess); } /*package-local*/ Patch patch() throws IOException { @@ -528,10 +526,6 @@ return patches; } - /*package-local*/ byte[] apply(DataAccess baseContent) throws IOException { - return patch().apply(baseContent, -1); - } - public String toString() { int patchCount; try { @@ -543,29 +537,4 @@ return String.format("%s %s %s %s; patches:%d\n", node().shortNotation(), firstParent().shortNotation(), secondParent().shortNotation(), cset().shortNotation(), patchCount); } } - - @Experimental(reason="Work in progress, not an API") - public class BundleSerializer implements DataSerializer.DataSource { - - public void serialize(DataSerializer out) throws HgIOException, HgRuntimeException { - FileInputStream fis = null; - try { - fis = new FileInputStream(HgBundle.this.bundleFile); - byte[] buffer = new byte[8*1024]; - int r; - while ((r = fis.read(buffer, 0, buffer.length)) > 0) { - out.write(buffer, 0, r); - } - - } catch (IOException ex) { - throw new HgIOException("Failed to serialize bundle", HgBundle.this.bundleFile); - } finally { - new FileUtils(HgBundle.this.ctx.getLog(), this).closeQuietly(fis, HgBundle.this.bundleFile); - } - } - - public int serializeLength() throws HgRuntimeException { - return Internals.ltoi(HgBundle.this.bundleFile.length()); - } - } }
--- a/src/org/tmatesoft/hg/repo/HgChangelog.java Fri Jul 12 16:29:06 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgChangelog.java Fri Jul 12 20:14:24 2013 +0200 @@ -20,10 +20,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; -import java.util.Collections; import java.util.Date; import java.util.Formatter; -import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -32,12 +30,10 @@ import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.core.SessionContext; import org.tmatesoft.hg.internal.Callback; +import org.tmatesoft.hg.internal.ChangesetParser; import org.tmatesoft.hg.internal.DataAccess; -import org.tmatesoft.hg.internal.EncodingHelper; -import org.tmatesoft.hg.internal.Internals; import org.tmatesoft.hg.internal.Lifecycle; import org.tmatesoft.hg.internal.LifecycleBridge; -import org.tmatesoft.hg.internal.Pool; import org.tmatesoft.hg.internal.RevlogStream; import org.tmatesoft.hg.util.Adaptable; import org.tmatesoft.hg.util.CancelSupport; @@ -150,14 +146,14 @@ */ public static final class RawChangeset implements Cloneable /* for those that would like to keep a copy */{ // would be nice to get it immutable, but then we can't reuse instances - private/* final */Nodeid manifest; - private String user; - private String comment; - private String[] files; // shall not be modified (#clone() does shallow copy) - private Date time; - private int timezone; + /* final */Nodeid manifest; + String user; + String comment; + String[] files; // shall not be modified (#clone() does shallow copy) + Date time; + int timezone; // http://mercurial.selenic.com/wiki/PruningDeadBranches - Closing changesets can be identified by close=1 in the changeset's extra field. - private Map<String, String> extras; + Map<String, String> extras; private RawChangeset() { } @@ -241,170 +237,33 @@ } } - /** - * @see mercurial/changelog.py:read() - * - * <pre> - * format used: - * nodeid\n : manifest node in ascii - * user\n : user, no \n or \r allowed - * time tz extra\n : date (time is int or float, timezone is int) - * : extra is metadatas, encoded and separated by '\0' - * : older versions ignore it - * files\n\n : files modified by the cset, no \n or \r allowed - * (.*) : comment (free text, ideally utf-8) - * - * changelog v0 doesn't use extra - * </pre> - */ - /*package-local*/static final class ChangesetParser { - private final EncodingHelper encHelper; - // it's likely user names get repeated again and again throughout repository. - private final Pool<String> usersPool; - private final Pool<String> filesPool; - private final boolean reuseChangesetInstance; - private RawChangeset target; - - public ChangesetParser(SessionContext.Source sessionContex, boolean shallReuseCsetInstance) { - encHelper = Internals.buildFileNameEncodingHelper(sessionContex); - usersPool = new Pool<String>(); - filesPool = new Pool<String>(); - reuseChangesetInstance = shallReuseCsetInstance; + /*package-local*/static final class RawCsetFactory implements ChangesetParser.CsetFactory { + private RawChangeset cset; + + public RawCsetFactory(boolean shallReuseCsetInstance) { if (shallReuseCsetInstance) { - target = new RawChangeset(); + cset = new RawChangeset(); } } - - public void dispose() { - usersPool.clear(); - filesPool.clear(); - } - - public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException { - byte[] data = da.byteArray(); - if (!reuseChangesetInstance) { - target = new RawChangeset(); - } - init(data, 0, data.length); - return target; - } - private void init(byte[] data, int offset, int length) throws HgInvalidDataFormatException { - final int bufferEndIndex = offset + length; - final byte lineBreak = (byte) '\n'; - int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex); - if (breakIndex1 == -1) { - throw new HgInvalidDataFormatException("Bad Changeset data"); - } - Nodeid _nodeid = Nodeid.fromAscii(data, 0, breakIndex1); - int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex); - if (breakIndex2 == -1) { - throw new HgInvalidDataFormatException("Bad Changeset data"); - } - String _user; - _user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1); - _user = usersPool.unify(_user); - - int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex); - if (breakIndex3 == -1) { - throw new HgInvalidDataFormatException("Bad Changeset data"); - } - String _timeString = new String(data, breakIndex2 + 1, breakIndex3 - breakIndex2 - 1); - int space1 = _timeString.indexOf(' '); - if (space1 == -1) { - throw new HgInvalidDataFormatException(String.format("Bad Changeset data: %s in [%d..%d]", "time string", breakIndex2+1, breakIndex3)); - } - int space2 = _timeString.indexOf(' ', space1 + 1); - if (space2 == -1) { - space2 = _timeString.length(); + public RawChangeset create(Nodeid nodeidManifest, String user, Date time, int timezone, List<String> files, String comment, Map<String, String> extrasMap) { + RawChangeset target; + if (cset != null) { + target = cset; + } else { + target = new RawChangeset(); } - long unixTime = Long.parseLong(_timeString.substring(0, space1)); - int _timezone = Integer.parseInt(_timeString.substring(space1 + 1, space2)); - // unixTime is local time, and timezone records difference of the local time to UTC. - Date _time = new Date(unixTime * 1000); - String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null; - Map<String, String> _extrasMap = parseExtras(_extras); - // - int lastStart = breakIndex3 + 1; - int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); - ArrayList<String> _files = null; - if (breakIndex4 > lastStart) { - // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision) - _files = new ArrayList<String>(5); - while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) { - String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart); - _files.add(filesPool.unify(fname)); - lastStart = breakIndex4 + 1; - if (data[breakIndex4 + 1] == lineBreak) { - // found \n\n - break; - } else { - breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); - } - } - if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) { - throw new HgInvalidDataFormatException("Bad Changeset data"); - } - } else { - breakIndex4--; - } - String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2); - // change this instance at once, don't leave it partially changes in case of error - target.manifest = _nodeid; - target.user = _user; - target.time = _time; - target.timezone = _timezone; - target.files = _files == null ? new String[0] : _files.toArray(new String[_files.size()]); - target.comment = _comment; - target.extras = _extrasMap; - } - - private Map<String, String> parseExtras(String _extras) { - final String extras_branch_key = "branch"; - _extras = _extras == null ? null : _extras.trim(); - if (_extras == null || _extras.length() == 0) { - return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); - } - Map<String, String> _extrasMap = new HashMap<String, String>(); - int lastIndex = 0; - do { - String pair; - int sp = _extras.indexOf('\0', lastIndex); - if (sp == -1) { - sp = _extras.length(); - } - if (sp > lastIndex) { - pair = _extras.substring(lastIndex, sp); - pair = decode(pair); - int eq = pair.indexOf(':'); - _extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1)); - lastIndex = sp + 1; - } - } while (lastIndex < _extras.length()); - if (!_extrasMap.containsKey(extras_branch_key)) { - _extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); - } - return Collections.unmodifiableMap(_extrasMap); - } - - private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) { - for (int i = startOffset; i < endIndex; i++) { - if (src[i] == what) { - return i; - } - } - return -1; - } - - private static String decode(String s) { - if (s != null && s.indexOf('\\') != -1) { - // TestAuxUtilities#testChangelogExtrasDecode - return s.replace("\\\\", "\\").replace("\\n", "\n").replace("\\r", "\r").replace("\\0", "\00"); - } - return s; + target.manifest = nodeidManifest; + target.user = user; + target.time = time; + target.timezone = timezone; + target.files = files == null ? new String[0] : files.toArray(new String[files.size()]); + target.comment = comment; + target.extras = extrasMap; + return target; } } - + private static class RawCsetCollector implements Inspector { final ArrayList<RawChangeset> result; @@ -430,7 +289,7 @@ public RawCsetParser(SessionContext.Source sessionContext, HgChangelog.Inspector delegate) { assert delegate != null; inspector = delegate; - csetBuilder = new ChangesetParser(sessionContext, true); + csetBuilder = new ChangesetParser(sessionContext, new RawCsetFactory(true)); inspectorLifecycle = Adaptable.Factory.getAdapter(delegate, Lifecycle.class, null); if (inspectorLifecycle == null) { ProgressSupport ph = Adaptable.Factory.getAdapter(delegate, ProgressSupport.class, null);
--- a/src/org/tmatesoft/hg/repo/HgInternals.java Fri Jul 12 16:29:06 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgInternals.java Fri Jul 12 20:14:24 2013 +0200 @@ -28,8 +28,10 @@ import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.internal.Experimental; import org.tmatesoft.hg.internal.Internals; +import org.tmatesoft.hg.internal.Patch; import org.tmatesoft.hg.internal.RelativePathRewrite; import org.tmatesoft.hg.internal.WinToNixPathRewrite; +import org.tmatesoft.hg.repo.HgBundle.GroupElement; import org.tmatesoft.hg.repo.HgSubrepoLocation.Kind; import org.tmatesoft.hg.util.FileIterator; import org.tmatesoft.hg.util.FileWalker; @@ -114,8 +116,19 @@ br.close(); return hgIgnore; } + + // XXX just to access package local method. Perhaps, GroupElement shall be redesigned + // to allow classes from .internal to access its details? + // or Patch may become public? + public static Patch patchFromData(GroupElement ge) throws IOException { + return ge.patch(); + } - // in fact, need a setter for this anyway, shall move to internal.Internals perhaps? + public static File getBundleFile(HgBundle bundle) { + return bundle.bundleFile; + } + + // TODO in fact, need a setter for this anyway, shall move to internal.Internals perhaps? public String getNextCommitUsername() { String hgUser = System.getenv("HGUSER"); if (hgUser != null && hgUser.trim().length() > 0) {
--- a/src/org/tmatesoft/hg/repo/HgRemoteRepository.java Fri Jul 12 16:29:06 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgRemoteRepository.java Fri Jul 12 20:14:24 2013 +0200 @@ -65,6 +65,7 @@ import org.tmatesoft.hg.core.SessionContext; import org.tmatesoft.hg.internal.DataSerializer; import org.tmatesoft.hg.internal.DataSerializer.OutputStreamSerializer; +import org.tmatesoft.hg.internal.BundleSerializer; import org.tmatesoft.hg.internal.EncodingHelper; import org.tmatesoft.hg.internal.Internals; import org.tmatesoft.hg.internal.PropertyMarshal; @@ -422,7 +423,7 @@ StringBuilder sb = appendNodeidListArgument("heads", remoteHeads, null); HttpURLConnection c = null; - DataSerializer.DataSource bundleData = bundle.new BundleSerializer(); + DataSerializer.DataSource bundleData = BundleSerializer.newInstance(sessionContext, bundle); try { URL u = new URL(url, url.getPath() + "?cmd=unbundle&" + sb.toString()); c = setupConnection(u.openConnection());