kitaev@213: /* kitaev@213: * Copyright (c) 2011 TMate Software Ltd kitaev@213: * kitaev@213: * This program is free software; you can redistribute it and/or modify kitaev@213: * it under the terms of the GNU General Public License as published by kitaev@213: * the Free Software Foundation; version 2 of the License. kitaev@213: * kitaev@213: * This program is distributed in the hope that it will be useful, kitaev@213: * but WITHOUT ANY WARRANTY; without even the implied warranty of kitaev@213: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the kitaev@213: * GNU General Public License for more details. kitaev@213: * kitaev@213: * For information on how to redistribute this software under kitaev@213: * the terms of a license other than GNU General Public License kitaev@213: * contact TMate Software at support@hg4j.com kitaev@213: */ kitaev@213: package org.tmatesoft.hg.repo; kitaev@213: kitaev@213: import static org.tmatesoft.hg.core.Nodeid.NULL; kitaev@213: kitaev@213: import java.io.File; kitaev@213: import java.io.IOException; kitaev@213: import java.util.LinkedList; kitaev@213: import java.util.List; kitaev@213: kitaev@213: import org.tmatesoft.hg.core.HgBadStateException; kitaev@213: import org.tmatesoft.hg.core.HgException; kitaev@213: import org.tmatesoft.hg.core.Nodeid; kitaev@213: import org.tmatesoft.hg.internal.ByteArrayChannel; kitaev@213: import org.tmatesoft.hg.internal.ByteArrayDataAccess; kitaev@213: import org.tmatesoft.hg.internal.DataAccess; kitaev@213: import org.tmatesoft.hg.internal.DataAccessProvider; kitaev@213: import org.tmatesoft.hg.internal.DigestHelper; kitaev@213: import org.tmatesoft.hg.internal.InflaterDataAccess; kitaev@213: import org.tmatesoft.hg.internal.RevlogStream; kitaev@213: import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; kitaev@213: import org.tmatesoft.hg.util.CancelledException; kitaev@213: kitaev@213: /** kitaev@213: * @see http://mercurial.selenic.com/wiki/BundleFormat kitaev@213: * kitaev@213: * @author Artem Tikhomirov kitaev@213: * @author TMate Software Ltd. kitaev@213: */ kitaev@213: public class HgBundle { kitaev@213: kitaev@213: private final File bundleFile; kitaev@213: private final DataAccessProvider accessProvider; kitaev@213: kitaev@213: HgBundle(DataAccessProvider dap, File bundle) { kitaev@213: accessProvider = dap; kitaev@213: bundleFile = bundle; kitaev@213: } kitaev@213: kitaev@213: private DataAccess getDataStream() throws IOException { kitaev@213: DataAccess da = accessProvider.create(bundleFile); kitaev@213: byte[] signature = new byte[6]; kitaev@213: if (da.length() > 6) { kitaev@213: da.readBytes(signature, 0, 6); kitaev@213: if (signature[0] == 'H' && signature[1] == 'G' && signature[2] == '1' && signature[3] == '0') { kitaev@213: if (signature[4] == 'G' && signature[5] == 'Z') { kitaev@213: return new InflaterDataAccess(da, 6, da.length() - 6); kitaev@213: } kitaev@213: if (signature[4] == 'B' && signature[5] == 'Z') { kitaev@213: throw HgRepository.notImplemented(); kitaev@213: } kitaev@213: if (signature[4] != 'U' || signature[5] != 'N') { kitaev@213: throw new HgBadStateException("Bad bundle signature:" + new String(signature)); kitaev@213: } kitaev@213: // "...UN", fall-through kitaev@213: } else { kitaev@213: da.reset(); kitaev@213: } kitaev@213: } kitaev@213: return da; kitaev@213: } kitaev@213: kitaev@213: private int uses = 0; kitaev@213: public HgBundle link() { kitaev@213: uses++; kitaev@213: return this; kitaev@213: } kitaev@213: public void unlink() { kitaev@213: uses--; kitaev@213: if (uses == 0 && bundleFile != null) { kitaev@213: bundleFile.deleteOnExit(); kitaev@213: } kitaev@213: } kitaev@213: public boolean inUse() { kitaev@213: return uses > 0; kitaev@213: } kitaev@213: kitaev@213: /** kitaev@213: * Get changes recorded in the bundle that are missing from the supplied repository. kitaev@213: * @param hgRepo repository that shall possess base revision for this bundle kitaev@213: * @param inspector callback to get each changeset found kitaev@213: */ kitaev@213: public void changes(final HgRepository hgRepo, final HgChangelog.Inspector inspector) throws HgException, IOException { kitaev@213: Inspector bundleInsp = new Inspector() { kitaev@213: DigestHelper dh = new DigestHelper(); kitaev@213: boolean emptyChangelog = true; kitaev@213: private DataAccess prevRevContent; kitaev@213: private int revisionIndex; kitaev@213: kitaev@213: public void changelogStart() { kitaev@213: emptyChangelog = true; kitaev@213: revisionIndex = 0; kitaev@213: } kitaev@213: kitaev@213: public void changelogEnd() { kitaev@213: if (emptyChangelog) { kitaev@213: throw new IllegalStateException("No changelog group in the bundle"); // XXX perhaps, just be silent and/or log? kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: /* kitaev@213: * Despite that BundleFormat wiki says: "Each Changelog entry patches the result of all previous patches kitaev@213: * (the previous, or parent patch of a given patch p is the patch that has a node equal to p's p1 field)", kitaev@213: * it seems not to hold true. Instead, each entry patches previous one, regardless of whether the one kitaev@213: * before is its parent (i.e. ge.firstParent()) or not. kitaev@213: * kitaev@213: Actual state in the changelog.i kitaev@213: Index Offset Flags Packed Actual Base Rev Link Rev Parent1 Parent2 nodeid kitaev@213: 50: 9212 0 209 329 48 50 49 -1 f1db8610da62a3e0beb8d360556ee1fd6eb9885e kitaev@213: 51: 9421 0 278 688 48 51 50 -1 9429c7bd1920fab164a9d2b621d38d57bcb49ae0 kitaev@213: 52: 9699 0 154 179 52 52 50 -1 30bd389788464287cee22ccff54c330a4b715de5 kitaev@213: 53: 9853 0 133 204 52 53 51 52 a6f39e595b2b54f56304470269a936ead77f5725 kitaev@213: 54: 9986 0 156 182 54 54 52 -1 fd4f2c98995beb051070630c272a9be87bef617d kitaev@213: kitaev@213: Excerpt from bundle (nodeid, p1, p2, cs): kitaev@213: f1db8610da62a3e0beb8d360556ee1fd6eb9885e 26e3eeaa39623de552b45ee1f55c14f36460f220 0000000000000000000000000000000000000000 f1db8610da62a3e0beb8d360556ee1fd6eb9885e; patches:4 kitaev@213: 9429c7bd1920fab164a9d2b621d38d57bcb49ae0 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 9429c7bd1920fab164a9d2b621d38d57bcb49ae0; patches:3 kitaev@213: > 30bd389788464287cee22ccff54c330a4b715de5 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 30bd389788464287cee22ccff54c330a4b715de5; patches:3 kitaev@213: a6f39e595b2b54f56304470269a936ead77f5725 9429c7bd1920fab164a9d2b621d38d57bcb49ae0 30bd389788464287cee22ccff54c330a4b715de5 a6f39e595b2b54f56304470269a936ead77f5725; patches:3 kitaev@213: fd4f2c98995beb051070630c272a9be87bef617d 30bd389788464287cee22ccff54c330a4b715de5 0000000000000000000000000000000000000000 fd4f2c98995beb051070630c272a9be87bef617d; patches:3 kitaev@213: kitaev@213: To recreate 30bd..e5, one have to take content of 9429..e0, not its p1 f1db..5e kitaev@213: */ kitaev@213: public boolean element(GroupElement ge) { kitaev@213: emptyChangelog = false; kitaev@213: HgChangelog changelog = hgRepo.getChangelog(); kitaev@213: try { kitaev@213: if (prevRevContent == null) { kitaev@213: if (NULL.equals(ge.firstParent()) && NULL.equals(ge.secondParent())) { kitaev@213: prevRevContent = new ByteArrayDataAccess(new byte[0]); kitaev@213: } else { kitaev@213: final Nodeid base = ge.firstParent(); kitaev@213: if (!changelog.isKnown(base) /*only first parent, that's Bundle contract*/) { kitaev@213: throw new IllegalStateException(String.format("Revision %s needs a parent %s, which is missing in the supplied repo %s", ge.node().shortNotation(), base.shortNotation(), hgRepo.toString())); kitaev@213: } kitaev@213: ByteArrayChannel bac = new ByteArrayChannel(); kitaev@213: changelog.rawContent(base, bac); // FIXME get DataAccess directly, to avoid kitaev@213: // extra byte[] (inside ByteArrayChannel) duplication just for the sake of subsequent ByteArrayDataChannel wrap. kitaev@213: prevRevContent = new ByteArrayDataAccess(bac.toArray()); kitaev@213: } kitaev@213: } kitaev@213: // kitaev@213: byte[] csetContent = ge.apply(prevRevContent); kitaev@213: dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid? kitaev@213: if (!ge.node().equalsTo(dh.asBinary())) { kitaev@213: throw new IllegalStateException("Integrity check failed on " + bundleFile + ", node:" + ge.node()); kitaev@213: } kitaev@213: ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent); kitaev@213: RawChangeset cs = RawChangeset.parse(csetDataAccess); kitaev@213: inspector.next(revisionIndex++, ge.node(), cs); kitaev@213: prevRevContent.done(); kitaev@213: prevRevContent = csetDataAccess.reset(); kitaev@213: } catch (CancelledException ex) { kitaev@213: return false; kitaev@213: } catch (Exception ex) { kitaev@213: throw new HgBadStateException(ex); // FIXME kitaev@213: } kitaev@213: return true; kitaev@213: } kitaev@213: kitaev@213: public void manifestStart() {} kitaev@213: public void manifestEnd() {} kitaev@213: public void fileStart(String name) {} kitaev@213: public void fileEnd(String name) {} kitaev@213: kitaev@213: }; kitaev@213: inspectChangelog(bundleInsp); kitaev@213: } kitaev@213: kitaev@213: public void dump() throws IOException { kitaev@213: Dump dump = new Dump(); kitaev@213: inspectAll(dump); kitaev@213: System.out.println("Total files:" + dump.names.size()); kitaev@213: for (String s : dump.names) { kitaev@213: System.out.println(s); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: // callback to minimize amount of Strings and Nodeids instantiated kitaev@213: public interface Inspector { kitaev@213: void changelogStart(); kitaev@213: kitaev@213: void changelogEnd(); kitaev@213: kitaev@213: void manifestStart(); kitaev@213: kitaev@213: void manifestEnd(); kitaev@213: kitaev@213: void fileStart(String name); kitaev@213: kitaev@213: void fileEnd(String name); kitaev@213: kitaev@213: /** kitaev@213: * XXX desperately need exceptions here kitaev@213: * @param element data element, instance might be reused, don't keep a reference to it or its raw data kitaev@213: * @return true to continue kitaev@213: */ kitaev@213: boolean element(GroupElement element); kitaev@213: } kitaev@213: kitaev@213: public static class Dump implements Inspector { kitaev@213: public final LinkedList names = new LinkedList(); kitaev@213: kitaev@213: public void changelogStart() { kitaev@213: System.out.println("Changelog group"); kitaev@213: } kitaev@213: kitaev@213: public void changelogEnd() { kitaev@213: } kitaev@213: kitaev@213: public void manifestStart() { kitaev@213: System.out.println("Manifest group"); kitaev@213: } kitaev@213: kitaev@213: public void manifestEnd() { kitaev@213: } kitaev@213: kitaev@213: public void fileStart(String name) { kitaev@213: names.add(name); kitaev@213: System.out.println(name); kitaev@213: } kitaev@213: kitaev@213: public void fileEnd(String name) { kitaev@213: } kitaev@213: kitaev@213: public boolean element(GroupElement ge) { kitaev@213: try { kitaev@213: System.out.printf(" %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches().size()); kitaev@213: } catch (Exception ex) { kitaev@213: ex.printStackTrace(); // FIXME kitaev@213: } kitaev@213: return true; kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: public void inspectChangelog(Inspector inspector) throws IOException { kitaev@213: if (inspector == null) { kitaev@213: throw new IllegalArgumentException(); kitaev@213: } kitaev@213: DataAccess da = getDataStream(); kitaev@213: try { kitaev@213: internalInspectChangelog(da, inspector); kitaev@213: } finally { kitaev@213: da.done(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: public void inspectManifest(Inspector inspector) throws IOException { kitaev@213: if (inspector == null) { kitaev@213: throw new IllegalArgumentException(); kitaev@213: } kitaev@213: DataAccess da = getDataStream(); kitaev@213: try { kitaev@213: if (da.isEmpty()) { kitaev@213: return; kitaev@213: } kitaev@213: skipGroup(da); // changelog kitaev@213: internalInspectManifest(da, inspector); kitaev@213: } finally { kitaev@213: da.done(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: public void inspectFiles(Inspector inspector) throws IOException { kitaev@213: if (inspector == null) { kitaev@213: throw new IllegalArgumentException(); kitaev@213: } kitaev@213: DataAccess da = getDataStream(); kitaev@213: try { kitaev@213: if (da.isEmpty()) { kitaev@213: return; kitaev@213: } kitaev@213: skipGroup(da); // changelog kitaev@213: if (da.isEmpty()) { kitaev@213: return; kitaev@213: } kitaev@213: skipGroup(da); // manifest kitaev@213: internalInspectFiles(da, inspector); kitaev@213: } finally { kitaev@213: da.done(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: public void inspectAll(Inspector inspector) throws IOException { kitaev@213: if (inspector == null) { kitaev@213: throw new IllegalArgumentException(); kitaev@213: } kitaev@213: DataAccess da = getDataStream(); kitaev@213: try { kitaev@213: internalInspectChangelog(da, inspector); kitaev@213: internalInspectManifest(da, inspector); kitaev@213: internalInspectFiles(da, inspector); kitaev@213: } finally { kitaev@213: da.done(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: private void internalInspectChangelog(DataAccess da, Inspector inspector) throws IOException { kitaev@213: if (da.isEmpty()) { kitaev@213: return; kitaev@213: } kitaev@213: inspector.changelogStart(); kitaev@213: readGroup(da, inspector); kitaev@213: inspector.changelogEnd(); kitaev@213: } kitaev@213: kitaev@213: private void internalInspectManifest(DataAccess da, Inspector inspector) throws IOException { kitaev@213: if (da.isEmpty()) { kitaev@213: return; kitaev@213: } kitaev@213: inspector.manifestStart(); kitaev@213: readGroup(da, inspector); kitaev@213: inspector.manifestEnd(); kitaev@213: } kitaev@213: kitaev@213: private void internalInspectFiles(DataAccess da, Inspector inspector) throws IOException { kitaev@213: while (!da.isEmpty()) { kitaev@213: int fnameLen = da.readInt(); kitaev@213: if (fnameLen <= 4) { kitaev@213: break; // null chunk, the last one. kitaev@213: } kitaev@213: byte[] fnameBuf = new byte[fnameLen - 4]; kitaev@213: da.readBytes(fnameBuf, 0, fnameBuf.length); kitaev@213: String name = new String(fnameBuf); kitaev@213: inspector.fileStart(name); kitaev@213: readGroup(da, inspector); kitaev@213: inspector.fileEnd(name); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: private static void readGroup(DataAccess da, Inspector inspector) throws IOException { kitaev@213: int len = da.readInt(); kitaev@213: boolean good2go = true; kitaev@213: while (len > 4 && !da.isEmpty() && good2go) { kitaev@213: byte[] nb = new byte[80]; kitaev@213: da.readBytes(nb, 0, 80); kitaev@213: int dataLength = len - 84 /* length field + 4 nodeids */; kitaev@213: byte[] data = new byte[dataLength]; kitaev@213: da.readBytes(data, 0, dataLength); kitaev@213: DataAccess slice = new ByteArrayDataAccess(data); // XXX in fact, may pass a slicing DataAccess. kitaev@213: // Just need to make sure that we seek to proper location afterwards (where next GroupElement starts), kitaev@213: // regardless whether that slice has read it or not. kitaev@213: GroupElement ge = new GroupElement(nb, slice); kitaev@213: good2go = inspector.element(ge); kitaev@213: slice.done(); // BADA doesn't implement done(), but it could (e.g. free array) kitaev@213: /// and we'd better tell it we are not going to use it any more. However, it's important to ensure Inspector kitaev@213: // implementations out there do not retain GroupElement.rawData() kitaev@213: len = da.isEmpty() ? 0 : da.readInt(); kitaev@213: } kitaev@213: // need to skip up to group end if inspector told he don't want to continue with the group, kitaev@213: // because outer code may try to read next group immediately as we return back. kitaev@213: while (len > 4 && !da.isEmpty()) { kitaev@213: da.skip(len - 4 /* length field */); kitaev@213: len = da.isEmpty() ? 0 : da.readInt(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: private static void skipGroup(DataAccess da) throws IOException { kitaev@213: int len = da.readInt(); kitaev@213: while (len > 4 && !da.isEmpty()) { kitaev@213: da.skip(len - 4); // sizeof(int) kitaev@213: len = da.isEmpty() ? 0 : da.readInt(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: public static class GroupElement { kitaev@213: private final byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192 kitaev@213: private final DataAccess dataAccess; kitaev@213: private List patches; kitaev@213: kitaev@213: GroupElement(byte[] fourNodeids, DataAccess rawDataAccess) { kitaev@213: assert fourNodeids != null && fourNodeids.length == 80; kitaev@213: header = fourNodeids; kitaev@213: dataAccess = rawDataAccess; kitaev@213: } kitaev@213: kitaev@213: public Nodeid node() { kitaev@213: return Nodeid.fromBinary(header, 0); kitaev@213: } kitaev@213: kitaev@213: public Nodeid firstParent() { kitaev@213: return Nodeid.fromBinary(header, 20); kitaev@213: } kitaev@213: kitaev@213: public Nodeid secondParent() { kitaev@213: return Nodeid.fromBinary(header, 40); kitaev@213: } kitaev@213: kitaev@213: public Nodeid cset() { // cs seems to be changeset kitaev@213: return Nodeid.fromBinary(header, 60); kitaev@213: } kitaev@213: kitaev@213: public DataAccess rawData() { kitaev@213: return dataAccess; kitaev@213: } kitaev@213: kitaev@213: public List patches() throws IOException { kitaev@213: if (patches == null) { kitaev@213: dataAccess.reset(); kitaev@213: LinkedList p = new LinkedList(); kitaev@213: while (!dataAccess.isEmpty()) { kitaev@213: RevlogStream.PatchRecord pr = RevlogStream.PatchRecord.read(dataAccess); kitaev@213: p.add(pr); kitaev@213: } kitaev@213: patches = p; kitaev@213: } kitaev@213: return patches; kitaev@213: } kitaev@213: kitaev@213: public byte[] apply(DataAccess baseContent) throws IOException { kitaev@213: return RevlogStream.apply(baseContent, -1, patches()); kitaev@213: } kitaev@213: } kitaev@213: }