view src/org/tmatesoft/hg/internal/BundleGenerator.java @ 709:497e697636fc

Report merged lines as changed block if possible, not as a sequence of added/deleted blocks. To facilitate access to merge parent lines AddBlock got mergeLineAt() method that reports index of the line in the second parent (if any), while insertedAt() has been changed to report index in the first parent always
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 21 Aug 2013 16:23:27 +0200
parents 52af7f62e731
children
line wrap: on
line source
/*
 * Copyright (c) 2013 TMate Software Ltd
 *  
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * For information on how to redistribute this software under
 * the terms of a license other than GNU General Public License
 * contact TMate Software at support@hg4j.com
 */
package org.tmatesoft.hg.internal;

import static org.tmatesoft.hg.repo.HgRepository.NO_REVISION;
import static org.tmatesoft.hg.repo.HgRepository.TIP;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.tmatesoft.hg.core.HgIOException;
import org.tmatesoft.hg.core.Nodeid;
import org.tmatesoft.hg.internal.DataSerializer.OutputStreamSerializer;
import org.tmatesoft.hg.internal.Patch.PatchDataSource;
import org.tmatesoft.hg.repo.HgChangelog;
import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
import org.tmatesoft.hg.repo.HgDataFile;
import org.tmatesoft.hg.repo.HgInvalidControlFileException;
import org.tmatesoft.hg.repo.HgManifest;
import org.tmatesoft.hg.repo.HgRuntimeException;

/**
 * @see http://mercurial.selenic.com/wiki/BundleFormat
 * @author Artem Tikhomirov
 * @author TMate Software Ltd.
 */
public class BundleGenerator {

	private final Internals repo;

	public BundleGenerator(Internals hgRepo) {
		repo = hgRepo;
	}
	
	/**
	 * @return never <code>null</code>. empty file if no changesets were written
	 */
	public File create(List<Nodeid> changesets) throws HgIOException, IOException {
		final HgChangelog clog = repo.getRepo().getChangelog();
		final HgManifest manifest = repo.getRepo().getManifest();
		IntVector clogRevsVector = new IntVector(changesets.size(), 0);
		for (Nodeid n : changesets) {
			clogRevsVector.add(clog.getRevisionIndex(n));
		}
		clogRevsVector.sort(true);
		final int[] clogRevs = clogRevsVector.toArray();
		final IntMap<Nodeid> clogMap = new IntMap<Nodeid>(changesets.size());
		final IntVector manifestRevs = new IntVector(changesets.size(), 0);
		final List<HgDataFile> files = new ArrayList<HgDataFile>();
		clog.range(new HgChangelog.Inspector() {
			private Set<String> seenFiles = new HashSet<String>();
			public void next(int revisionIndex, Nodeid nodeid, RawChangeset cset) throws HgRuntimeException {
				clogMap.put(revisionIndex, nodeid);
				manifestRevs.add(manifest.getRevisionIndex(cset.manifest()));
				for (String f : cset.files()) {
					if (seenFiles.contains(f)) {
						continue;
					}
					seenFiles.add(f);
					HgDataFile df = repo.getRepo().getFileNode(f);
					files.add(df);
				}
			}
		}, clogRevs);
		manifestRevs.sort(true);
		//
		final File bundleFile = File.createTempFile("hg4j-", ".bundle");
		if (clogRevs.length == 0) {
			// nothing to write
			return bundleFile;
		}
		final FileOutputStream osBundle = new FileOutputStream(bundleFile);
		final OutputStreamSerializer outRaw = new OutputStreamSerializer(osBundle);
		outRaw.write("HG10UN".getBytes(), 0, 6);
		//
		RevlogStream clogStream = repo.getImplAccess().getChangelogStream();
		new ChunkGenerator(outRaw, clogMap).iterate(clogStream, clogRevs);
		outRaw.writeInt(0); // null chunk for changelog group
		//
		RevlogStream manifestStream = repo.getImplAccess().getManifestStream();
		new ChunkGenerator(outRaw, clogMap).iterate(manifestStream, manifestRevs.toArray(true));
		outRaw.writeInt(0); // null chunk for manifest group
		//
		EncodingHelper fnEncoder = repo.buildFileNameEncodingHelper();
		for (HgDataFile df : sortedByName(files)) {
			RevlogStream s = repo.getImplAccess().getStream(df);
			final IntVector fileRevs = new IntVector();
			s.iterate(0, TIP, false, new RevlogStream.Inspector() {
				
				public void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) throws HgRuntimeException {
					if (Arrays.binarySearch(clogRevs, linkRevision) >= 0) {
						fileRevs.add(revisionIndex);
					}
				}
			});
			fileRevs.sort(true);
			if (!fileRevs.isEmpty()) {
				// although BundleFormat page says "filename length, filename" for a file,
				// in fact there's a sort of 'filename chunk', i.e. filename length field includes
				// not only length of filename, but also length of the field itseld, i.e. filename.length+sizeof(int)
				byte[] fnameBytes = fnEncoder.toBundle(df.getPath());
				outRaw.writeInt(fnameBytes.length + 4);
				outRaw.writeByte(fnameBytes);
				new ChunkGenerator(outRaw, clogMap).iterate(s, fileRevs.toArray(true));
				outRaw.writeInt(0); // null chunk for file group
			}
		}
		outRaw.writeInt(0); // null chunk to indicate no more files (although BundleFormat page doesn't mention this)
		outRaw.done();
		osBundle.flush();
		osBundle.close();
		//return new HgBundle(repo.getSessionContext(), repo.getDataAccess(), bundleFile);
		return bundleFile;
	}
	
	private static Collection<HgDataFile> sortedByName(List<HgDataFile> files) {
		Collections.sort(files, new Comparator<HgDataFile>() {

			public int compare(HgDataFile o1, HgDataFile o2) {
				return o1.getPath().compareTo(o2.getPath());
			}
		});
		return files;
	}
	
	private static class ChunkGenerator implements RevlogStream.Inspector {
		
		private final DataSerializer ds;
		private final IntMap<Nodeid> parentMap;
		private final IntMap<Nodeid> clogMap;
		private byte[] prevContent;
		private int startParent;

		public ChunkGenerator(DataSerializer dataSerializer, IntMap<Nodeid> clogNodeidMap) {
			ds = dataSerializer;
			parentMap = new IntMap<Nodeid>(clogNodeidMap.size());
			clogMap = clogNodeidMap;
		}
		
		public void iterate(RevlogStream s, int[] revisions) throws HgRuntimeException {
			int[] p = s.parents(revisions[0], new int[2]);
			startParent = p[0];
			int[] revs2read;
			if (startParent == NO_REVISION) {
				revs2read = revisions;
				prevContent = new byte[0];
			} else {
				revs2read = new int[revisions.length + 1];
				revs2read[0] = startParent;
				System.arraycopy(revisions, 0, revs2read, 1, revisions.length);
			}
			// FIXME this is a hack to fill parentsMap with 
			// parents of elements that we are not going to meet with regular
			// iteration, e.g. changes from a different branch (with some older parent),
			// scenario: two revisions added to two different branches
			// revisions[10, 11], parents(10) == 9, parents(11) == 7
			// revs2read == [9,10,11], and parentsMap lacks entry for parent rev7.
			fillMissingParentsMap(s, revisions);
			s.iterate(revs2read, true, this);
		}
		
		private void fillMissingParentsMap(RevlogStream s, int[] revisions) throws HgRuntimeException {
			int[] p = new int[2];
			for (int i = 1; i < revisions.length; i++) {
				s.parents(revisions[i], p);
				if (p[0] != NO_REVISION && Arrays.binarySearch(revisions, p[0]) < 0) {
					parentMap.put(p[0], Nodeid.fromBinary(s.nodeid(p[0]), 0));
				}
				if (p[1] != NO_REVISION && Arrays.binarySearch(revisions, p[1]) < 0) {
					parentMap.put(p[1], Nodeid.fromBinary(s.nodeid(p[1]), 0));
				}
			}
		}
		
		public void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) throws HgRuntimeException {
			try {
				parentMap.put(revisionIndex, Nodeid.fromBinary(nodeid, 0));
				byte[] nextContent = data.byteArray();
				data.done();
				if (revisionIndex == startParent) {
					prevContent = nextContent;
					return;
				}
				Patch p = GeneratePatchInspector.delta(prevContent, nextContent);
				prevContent = nextContent;
				nextContent = null;
				PatchDataSource pds = p.new PatchDataSource();
				int len = pds.serializeLength() + 84;
				ds.writeInt(len);
				ds.write(nodeid, 0, Nodeid.SIZE);
				if (parent1Revision != NO_REVISION) {
					ds.writeByte(parentMap.get(parent1Revision).toByteArray());
				} else {
					ds.writeByte(Nodeid.NULL.toByteArray());
				}
				if (parent2Revision != NO_REVISION) {
					ds.writeByte(parentMap.get(parent2Revision).toByteArray());
				} else {
					ds.writeByte(Nodeid.NULL.toByteArray());
				}
				ds.writeByte(clogMap.get(linkRevision).toByteArray());
				pds.serialize(ds);
			} catch (IOException ex) {
				// XXX odd to have object with IOException to use where no checked exception is allowed 
				throw new HgInvalidControlFileException(ex.getMessage(), ex, null); 
			} catch (HgIOException ex) {
				throw new HgInvalidControlFileException(ex, true); // XXX any way to refactor ChunkGenerator not to get checked exception here?
			}
		}
	}
}