view src/org/tmatesoft/hg/repo/HgBundle.java @ 709:497e697636fc

Report merged lines as changed block if possible, not as a sequence of added/deleted blocks. To facilitate access to merge parent lines AddBlock got mergeLineAt() method that reports index of the line in the second parent (if any), while insertedAt() has been changed to report index in the first parent always
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 21 Aug 2013 16:23:27 +0200
parents 545b1d4cc11d
children
line wrap: on
line source
/*
 * Copyright (c) 2011-2013 TMate Software Ltd
 *  
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * For information on how to redistribute this software under
 * the terms of a license other than GNU General Public License
 * contact TMate Software at support@hg4j.com
 */
package org.tmatesoft.hg.repo;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ConcurrentModificationException;

import org.tmatesoft.hg.core.HgIOException;
import org.tmatesoft.hg.core.Nodeid;
import org.tmatesoft.hg.core.SessionContext;
import org.tmatesoft.hg.internal.ByteArrayChannel;
import org.tmatesoft.hg.internal.ByteArrayDataAccess;
import org.tmatesoft.hg.internal.Callback;
import org.tmatesoft.hg.internal.ChangesetParser;
import org.tmatesoft.hg.internal.DataAccess;
import org.tmatesoft.hg.internal.DataAccessInputStream;
import org.tmatesoft.hg.internal.DataAccessProvider;
import org.tmatesoft.hg.internal.DigestHelper;
import org.tmatesoft.hg.internal.EncodingHelper;
import org.tmatesoft.hg.internal.Experimental;
import org.tmatesoft.hg.internal.InflaterDataAccess;
import org.tmatesoft.hg.internal.Internals;
import org.tmatesoft.hg.internal.Lifecycle;
import org.tmatesoft.hg.internal.Patch;
import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
import org.tmatesoft.hg.util.Adaptable;
import org.tmatesoft.hg.util.CancelledException;

/**
 * WORK IN PROGRESS
 * 
 * @see http://mercurial.selenic.com/wiki/BundleFormat
 * 
 * @author Artem Tikhomirov
 * @author TMate Software Ltd.
 */
@Experimental(reason="API is not stable")
public class HgBundle {

	final File bundleFile;
	private final DataAccessProvider accessProvider;
	final SessionContext ctx;
	private final EncodingHelper fnDecorer;
	private Lifecycle.BasicCallback flowControl;

	HgBundle(SessionContext sessionContext, DataAccessProvider dap, File bundle) {
		ctx = sessionContext;
		accessProvider = dap;
		bundleFile = bundle;
		fnDecorer = Internals.buildFileNameEncodingHelper(new SessionContext.SourcePrim(ctx));
	}

	private DataAccess getDataStream() throws IOException {
		DataAccess da = accessProvider.createReader(bundleFile, false);
		byte[] signature = new byte[6];
		if (da.length() > 6) {
			da.readBytes(signature, 0, 6);
			if (signature[0] == 'H' && signature[1] == 'G' && signature[2] == '1' && signature[3] == '0') {
				if (signature[4] == 'G' && signature[5] == 'Z') {
					return new InflaterDataAccess(da, 6, da.length() - 6);
				}
				if (signature[4] == 'B' && signature[5] == 'Z') {
					throw Internals.notImplemented();
				}
				if (signature[4] != 'U' || signature[5] != 'N') {
					throw new HgInvalidStateException(String.format("Bad bundle signature: %s",  String.valueOf(signature)));
				}
				// "...UN", fall-through
			} else {
				da.reset();
			}
		}
		return da;
	}

	private int uses = 0;
	public HgBundle link() {
		uses++;
		return this;
	}
	public void unlink() {
		uses--;
		if (uses == 0 && bundleFile != null) {
			bundleFile.deleteOnExit();
		}
	}
	public boolean inUse() {
		return uses > 0;
	}

	/**
	 * Get changes recorded in the bundle that are missing from the supplied repository.
	 * @param hgRepo repository that shall possess base revision for this bundle
	 * @param inspector callback to get each changeset found 
	 */
	public void changes(final HgRepository hgRepo, final HgChangelog.Inspector inspector) throws HgIOException, HgRuntimeException {
		Inspector bundleInsp = new Inspector() {
			DigestHelper dh = new DigestHelper();
			boolean emptyChangelog = true;
			private DataAccess prevRevContent;
			private int revisionIndex;
			private ChangesetParser csetBuilder;

			public void changelogStart() {
				emptyChangelog = true;
				revisionIndex = 0;
				csetBuilder = new ChangesetParser(hgRepo, new HgChangelog.RawCsetFactory(true));
			}

			public void changelogEnd() {
				if (emptyChangelog) {
					throw new IllegalStateException("No changelog group in the bundle"); // XXX perhaps, just be silent and/or log?
				}
			}

/*
 * Despite that BundleFormat wiki says: "Each Changelog entry patches the result of all previous patches 
 * (the previous, or parent patch of a given patch p is the patch that has a node equal to p's p1 field)",
 *  it seems not to hold true. Instead, each entry patches previous one, regardless of whether the one
 *  before is its parent (i.e. ge.firstParent()) or not.
 *  
Actual state in the changelog.i
Index    Offset      Flags     Packed     Actual   Base Rev   Link Rev  Parent1  Parent2     nodeid
  50:          9212      0        209        329         48         50       49       -1     f1db8610da62a3e0beb8d360556ee1fd6eb9885e
  51:          9421      0        278        688         48         51       50       -1     9429c7bd1920fab164a9d2b621d38d57bcb49ae0
  52:          9699      0        154        179         52         52       50       -1     30bd389788464287cee22ccff54c330a4b715de5
  53:          9853      0        133        204         52         53       51       52     a6f39e595b2b54f56304470269a936ead77f5725
  54:          9986      0        156        182         54         54       52       -1     fd4f2c98995beb051070630c272a9be87bef617d

Excerpt from bundle (nodeid, p1, p2, cs):
   f1db8610da62a3e0beb8d360556ee1fd6eb9885e 26e3eeaa39623de552b45ee1f55c14f36460f220 0000000000000000000000000000000000000000 f1db8610da62a3e0beb8d360556ee1fd6eb9885e; patches:4
   9429c7bd1920fab164a9d2b621d38d57bcb49ae0 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 9429c7bd1920fab164a9d2b621d38d57bcb49ae0; patches:3
>  30bd389788464287cee22ccff54c330a4b715de5 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 30bd389788464287cee22ccff54c330a4b715de5; patches:3
   a6f39e595b2b54f56304470269a936ead77f5725 9429c7bd1920fab164a9d2b621d38d57bcb49ae0 30bd389788464287cee22ccff54c330a4b715de5 a6f39e595b2b54f56304470269a936ead77f5725; patches:3
   fd4f2c98995beb051070630c272a9be87bef617d 30bd389788464287cee22ccff54c330a4b715de5 0000000000000000000000000000000000000000 fd4f2c98995beb051070630c272a9be87bef617d; patches:3

To recreate 30bd..e5, one have to take content of 9429..e0, not its p1 f1db..5e
 */
			public boolean element(GroupElement ge) throws IOException, HgRuntimeException {
				emptyChangelog = false;
				HgChangelog changelog = hgRepo.getChangelog();
				try {
					if (prevRevContent == null) { 
						if (ge.firstParent().isNull() && ge.secondParent().isNull()) {
							prevRevContent = new ByteArrayDataAccess(new byte[0]);
						} else {
							final Nodeid base = ge.firstParent();
							if (!changelog.isKnown(base) /*only first parent, that's Bundle contract*/) {
								throw new IllegalStateException(String.format("Revision %s needs a parent %s, which is missing in the supplied repo %s", ge.node().shortNotation(), base.shortNotation(), hgRepo.toString()));
							}
							ByteArrayChannel bac = new ByteArrayChannel();
							changelog.rawContent(base, bac); // TODO post-1.0 get DataAccess directly, to avoid
							// extra byte[] (inside ByteArrayChannel) duplication just for the sake of subsequent ByteArrayDataChannel wrap.
							prevRevContent = new ByteArrayDataAccess(bac.toArray());
						}
					}
					//
					byte[] csetContent = ge.patch().apply(prevRevContent, -1);
					dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid?
					if (!ge.node().equalsTo(dh.asBinary())) {
						throw new HgInvalidStateException(String.format("Integrity check failed on %s, node: %s", bundleFile, ge.node().shortNotation()));
					}
					RawChangeset cs = csetBuilder.parse(csetContent);
					inspector.next(revisionIndex++, ge.node(), cs);
					prevRevContent.done();
					prevRevContent = new ByteArrayDataAccess(csetContent);
				} catch (CancelledException ex) {
					return false;
				} catch (HgInvalidDataFormatException ex) {
					throw new HgInvalidControlFileException("Invalid bundle file", ex, bundleFile);
				}
				return true;
			}

			public void manifestStart() {}
			public void manifestEnd() {}
			public void fileStart(String name) {}
			public void fileEnd(String name) {}

		};
		inspectChangelog(bundleInsp);
	}

	// callback to minimize amount of Strings and Nodeids instantiated
	@Callback
	public interface Inspector {
		void changelogStart() throws HgRuntimeException;

		void changelogEnd() throws HgRuntimeException;

		void manifestStart() throws HgRuntimeException;

		void manifestEnd() throws HgRuntimeException;

		void fileStart(String name) throws HgRuntimeException;

		void fileEnd(String name) throws HgRuntimeException;

		/**
		 * @param element data element, instance might be reused, don't keep a reference to it or its raw data
		 * @return <code>true</code> to continue
		 * @throws IOException propagated exception from {@link GroupElement#data()}
		 * @throws HgRuntimeException propagated exception (subclass thereof) to indicate issues with the library. <em>Runtime exception</em>
		 */
		boolean element(GroupElement element) throws IOException, HgRuntimeException;
	}

	/**
	 * @param inspector callback to visit changelog entries
	 * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
	 * @throws IllegalArgumentException if inspector argument is null
	 */
	public void inspectChangelog(Inspector inspector) throws HgIOException, HgRuntimeException {
		if (inspector == null) {
			throw new IllegalArgumentException();
		}
		final Lifecycle lifecycle = lifecycleSetUp(inspector);
		DataAccess da = null;
		try {
			da = getDataStream();
			internalInspectChangelog(da, inspector);
		} catch (IOException ex) {
			throw new HgIOException("Failed to inspect changelog in the bundle", ex, bundleFile);
		} finally {
			if (da != null) {
				da.done();
			}
			lifecycleTearDown(lifecycle);
		}
	}

	/**
	 * @param inspector callback to visit manifest entries
	 * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
	 * @throws IllegalArgumentException if inspector argument is null
	 */
	public void inspectManifest(Inspector inspector) throws HgIOException, HgRuntimeException {
		if (inspector == null) {
			throw new IllegalArgumentException();
		}
		final Lifecycle lifecycle = lifecycleSetUp(inspector);
		DataAccess da = null;
		try {
			da = getDataStream();
			if (da.isEmpty()) {
				return;
			}
			skipGroup(da); // changelog
			internalInspectManifest(da, inspector);
		} catch (IOException ex) {
			throw new HgIOException("Failed to inspect manifest in the bundle", ex, bundleFile);
		} finally {
			if (da != null) {
				da.done();
			}
			lifecycleTearDown(lifecycle);
		}
	}

	/**
	 * @param inspector callback to visit file entries
	 * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
	 * @throws IllegalArgumentException if inspector argument is null
	 */
	public void inspectFiles(Inspector inspector) throws HgIOException, HgRuntimeException {
		if (inspector == null) {
			throw new IllegalArgumentException();
		}
		final Lifecycle lifecycle = lifecycleSetUp(inspector);
		DataAccess da = null;
		try {
			da = getDataStream();
			if (da.isEmpty()) {
				return;
			}
			skipGroup(da); // changelog
			if (da.isEmpty()) {
				return;
			}
			skipGroup(da); // manifest
			internalInspectFiles(da, inspector);
		} catch (IOException ex) {
			throw new HgIOException("Failed to inspect files in the bundle", ex, bundleFile);
		} finally {
			if (da != null) {
				da.done();
			}
			lifecycleTearDown(lifecycle);
		}
	}

	/**
	 * @param inspector visit complete bundle (changelog, manifest and file entries)
	 * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
	 * @throws IllegalArgumentException if inspector argument is null
	 */
	public void inspectAll(Inspector inspector) throws HgIOException, HgRuntimeException {
		if (inspector == null) {
			throw new IllegalArgumentException();
		}
		final Lifecycle lifecycle = lifecycleSetUp(inspector);
		DataAccess da = null;
		try {
			da = getDataStream();
			internalInspectChangelog(da, inspector);
			if (flowControl.isStopped()) {
				return;
			}
			internalInspectManifest(da, inspector);
			if (flowControl.isStopped()) {
				return;
			}
			internalInspectFiles(da, inspector);
		} catch (IOException ex) {
			throw new HgIOException("Failed to inspect bundle", ex, bundleFile);
		} finally {
			if (da != null) {
				da.done();
			}
			lifecycleTearDown(lifecycle);
		}
	}
	
	// initialize flowControl, check for concurrent usage, starts lifecyle, if any
	// return non-null only if inspector is interested in lifecycle events
	private Lifecycle lifecycleSetUp(Inspector inspector) throws ConcurrentModificationException {
		// Don't need flowControl in case Inspector doesn't implement Lifecycle,
		// however is handy not to expect it == null inside internalInspect* 
		// XXX Once there's need to make this class thread-safe,
		// shall move flowControl to thread-local state.
		if (flowControl != null) {
			throw new ConcurrentModificationException("HgBundle is in use and not thread-safe yet");
		}
		flowControl = new Lifecycle.BasicCallback();
		final Lifecycle lifecycle = Adaptable.Factory.getAdapter(inspector, Lifecycle.class, null);
		if (lifecycle != null) {
			lifecycle.start(-1, flowControl, flowControl);
		}
		return lifecycle;
	}
	
	private void lifecycleTearDown(Lifecycle lifecycle) {
		if (lifecycle != null) {
			lifecycle.finish(flowControl);
		}
		flowControl = null;
	}

	private void internalInspectChangelog(DataAccess da, Inspector inspector) throws IOException, HgRuntimeException {
		if (da.isEmpty()) {
			return;
		}
		inspector.changelogStart();
		if (flowControl.isStopped()) {
			return;
		}
		readGroup(da, inspector);
		if (flowControl.isStopped()) {
			return;
		}
		inspector.changelogEnd();
	}

	private void internalInspectManifest(DataAccess da, Inspector inspector) throws IOException, HgRuntimeException {
		if (da.isEmpty()) {
			return;
		}
		inspector.manifestStart();
		if (flowControl.isStopped()) {
			return;
		}
		readGroup(da, inspector);
		if (flowControl.isStopped()) {
			return;
		}
		inspector.manifestEnd();
	}

	private void internalInspectFiles(DataAccess da, Inspector inspector) throws IOException, HgRuntimeException {
		while (!da.isEmpty()) {
			int fnameLen = da.readInt();
			if (fnameLen <= 4) {
				break; // null chunk, the last one.
			}
			byte[] fnameBuf = new byte[fnameLen - 4];
			da.readBytes(fnameBuf, 0, fnameBuf.length);
			String name = fnDecorer.fromBundle(fnameBuf, 0, fnameBuf.length);
			inspector.fileStart(name);
			if (flowControl.isStopped()) {
				return;
			}
			readGroup(da, inspector);
			if (flowControl.isStopped()) {
				return;
			}
			inspector.fileEnd(name);
		}
	}

	private static void readGroup(DataAccess da, Inspector inspector) throws IOException, HgRuntimeException {
		int len = da.readInt();
		boolean good2go = true;
		Nodeid prevNodeid = null;
		while (len > 4 && !da.isEmpty() && good2go) {
			byte[] nb = new byte[80];
			da.readBytes(nb, 0, 80);
			int dataLength = len - 84 /* length field + 4 nodeids */;
			byte[] data = new byte[dataLength];
			da.readBytes(data, 0, dataLength);
			DataAccess slice = new ByteArrayDataAccess(data); // XXX in fact, may pass a slicing DataAccess.
			// Just need to make sure that we seek to proper location afterwards (where next GroupElement starts),
			// regardless whether that slice has read it or not.
			GroupElement ge = new GroupElement(nb, prevNodeid, slice);
			good2go = inspector.element(ge);
			slice.done(); // BADA doesn't implement done(), but it could (e.g. free array) 
			/// and we'd better tell it we are not going to use it any more. However, it's important to ensure Inspector
			// implementations out there do not retain GroupElement.rawData()
			prevNodeid = ge.node();
			len = da.isEmpty() ? 0 : da.readInt();
		}
		// need to skip up to group end if inspector told he don't want to continue with the group, 
		// because outer code may try to read next group immediately as we return back.
		while (len > 4 && !da.isEmpty()) {
			da.skip(len - 4 /* length field */);
			len = da.isEmpty() ? 0 : da.readInt();
		}
	}

	private static void skipGroup(DataAccess da) throws IOException {
		int len = da.readInt();
		while (len > 4 && !da.isEmpty()) {
			da.skip(len - 4); // sizeof(int)
			len = da.isEmpty() ? 0 : da.readInt();
		}
	}

	/**
	 * Describes single element (a.k.a. chunk) of the group, either changelog, manifest or a file. 
	 */
	public static final class GroupElement {
		private final byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192
		private final DataAccess dataAccess;
		private final Nodeid deltaBase;
		private Patch patches;
		
		GroupElement(byte[] fourNodeids, Nodeid deltaBaseRev, DataAccess rawDataAccess) {
			assert fourNodeids != null && fourNodeids.length == 80;
			header = fourNodeids;
			deltaBase = deltaBaseRev;
			dataAccess = rawDataAccess;
		}

		/**
		 * <b>node</b> field of the group element
		 * @return node revision, never <code>null</code>
		 */
		public Nodeid node() {
			return Nodeid.fromBinary(header, 0);
		}

		/**
		 * <b>p1</b> <i>(parent 1)</i> field of the group element
		 * @return revision of parent 1, never <code>null</code>
		 */
		public Nodeid firstParent() {
			return Nodeid.fromBinary(header, 20);
		}

		/**
		 * <b>p2</b> <i>(parent 2)</i> field of the group element
		 * @return revision of parent 2, never <code>null</code>
		 */
		public Nodeid secondParent() {
			return Nodeid.fromBinary(header, 40);
		}

		/**
		 * <b>cs</b> <i>(changeset link)</i> field of the group element
		 * @return changeset revision, never <code>null</code>
		 */
		public Nodeid cset() {
			return Nodeid.fromBinary(header, 60);
		}
		
		/**
		 * Revision this element keeps patches against. For the patches of the very first revision returns {@link Nodeid#NULL}.
		 * @return revision of delta base, never <code>null</code>
		 */
		public Nodeid patchBase() {
			return deltaBase == null ? firstParent() : deltaBase;
		}
		
		/**
		 * Read data of the group element. 
		 * Note, {@link InputStream streams} obtained from several calls to this method
		 * can't be read simultaneously.
		 *  
		 * @return stream to access content of this group element, never <code>null</code>
		 */
		public InputStream data() {
			return new DataAccessInputStream(dataAccess);
		}
		
		/*package-local*/ Patch patch() throws IOException {
			if (patches == null) {
				dataAccess.reset();
				patches = new Patch();
				patches.read(dataAccess);
			}
			return patches;
		}

		public String toString() {
			int patchCount;
			try {
				patchCount = patch().count();
			} catch (IOException ex) {
				ex.printStackTrace();
				patchCount = -1;
			}
			return String.format("%s %s %s %s; patches:%d\n", node().shortNotation(), firstParent().shortNotation(), secondParent().shortNotation(), cset().shortNotation(), patchCount);
		}
	}
}