changeset 530:0f6fa88e2162

Towards commit command: refactor clone, extract pieces to reuse. Describe a defect discovered when bundle has few patches with 0,0 parents
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 23 Jan 2013 17:46:12 +0100 (2013-01-23)
parents 95bdcf75e71e
children 95c2f43008bd
files src/org/tmatesoft/hg/core/HgCloneCommand.java src/org/tmatesoft/hg/internal/Internals.java src/org/tmatesoft/hg/internal/Patch.java src/org/tmatesoft/hg/internal/RevlogCompressor.java src/org/tmatesoft/hg/internal/RevlogStream.java src/org/tmatesoft/hg/internal/RevlogStreamWriter.java
diffstat 6 files changed, 277 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/core/HgCloneCommand.java	Mon Jan 21 19:41:51 2013 +0100
+++ b/src/org/tmatesoft/hg/core/HgCloneCommand.java	Wed Jan 23 17:46:12 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 TMate Software Ltd
+ * Copyright (c) 2011-2013 TMate Software Ltd
  *  
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -19,22 +19,21 @@
 import static org.tmatesoft.hg.core.Nodeid.NULL;
 import static org.tmatesoft.hg.internal.RequiresFile.*;
 
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.TreeMap;
-import java.util.zip.DeflaterOutputStream;
 
 import org.tmatesoft.hg.internal.ByteArrayDataAccess;
 import org.tmatesoft.hg.internal.DataAccess;
 import org.tmatesoft.hg.internal.DigestHelper;
 import org.tmatesoft.hg.internal.Lifecycle;
 import org.tmatesoft.hg.internal.RepoInitializer;
+import org.tmatesoft.hg.internal.RevlogCompressor;
+import org.tmatesoft.hg.internal.RevlogStreamWriter;
 import org.tmatesoft.hg.repo.HgBundle;
 import org.tmatesoft.hg.repo.HgBundle.GroupElement;
 import org.tmatesoft.hg.repo.HgInvalidControlFileException;
@@ -99,7 +98,7 @@
 			if (!destination.isDirectory()) {
 				throw new HgBadArgumentException(String.format("%s is not a directory", destination), null);
 			} else if (destination.list().length > 0) {
-				throw new HgBadArgumentException(String.format("% shall be empty", destination), null);
+				throw new HgBadArgumentException(String.format("%s shall be empty", destination), null);
 			}
 		} else {
 			destination.mkdirs();
@@ -146,8 +145,6 @@
 		private final TreeMap<Nodeid, Integer> changelogIndexes = new TreeMap<Nodeid, Integer>();
 		private boolean collectChangelogIndexes = false;
 
-		private int base = -1;
-		private long offset = 0;
 		private DataAccess prevRevContent;
 		private final DigestHelper dh = new DigestHelper();
 		private final ArrayList<Nodeid> revisionSequence = new ArrayList<Nodeid>(); // last visited nodes first
@@ -181,8 +178,7 @@
 
 		public void changelogStart() {
 			try {
-				base = -1;
-				offset = 0;
+				revlogHeader.offset(0).baseRevision(-1);
 				revisionSequence.clear();
 				indexFile = new FileOutputStream(new File(hgDir, filename = "store/00changelog.i"));
 				collectChangelogIndexes = true;
@@ -211,8 +207,7 @@
 
 		public void manifestStart() {
 			try {
-				base = -1;
-				offset = 0;
+				revlogHeader.offset(0).baseRevision(-1);
 				revisionSequence.clear();
 				indexFile = new FileOutputStream(new File(hgDir, filename = "store/00manifest.i"));
 			} catch (IOException ex) {
@@ -239,8 +234,7 @@
 		
 		public void fileStart(String name) {
 			try {
-				base = -1;
-				offset = 0;
+				revlogHeader.offset(0).baseRevision(-1);
 				revisionSequence.clear();
 				fncacheFiles.add("data/" + name + ".i"); // TODO post-1.0 this is pure guess, 
 				// need to investigate more how filenames are kept in fncache
@@ -284,6 +278,9 @@
 			String m = String.format("Can't find index of %s for file %s", p.shortNotation(), filename);
 			throw new HgInvalidControlFileException(m, null, null).setRevision(p);
 		}
+		
+		private RevlogStreamWriter.HeaderWriter revlogHeader = new RevlogStreamWriter.HeaderWriter(true);
+		private RevlogCompressor revlogDataZip = new RevlogCompressor();
 
 		public boolean element(GroupElement ge) {
 			try {
@@ -292,6 +289,8 @@
 				Nodeid p1 = ge.firstParent();
 				Nodeid p2 = ge.secondParent();
 				if (p1.isNull() && p2.isNull() /* or forced flag, does REVIDX_PUNCHED_FLAG indicate that? */) {
+					// FIXME NOTE, both parents isNull == true doesn't necessarily mean
+					// empty prevContent, see build.gradle sample below
 					prevRevContent = new ByteArrayDataAccess(new byte[0]);
 					writeComplete = true;
 				}
@@ -302,66 +301,48 @@
 					// TODO post-1.0 custom exception ChecksumCalculationFailed?
 					throw new HgInvalidStateException(String.format("Checksum failed: expected %s, calculated %s. File %s", node, calculated, filename));
 				}
-				final int link;
+				revlogHeader.nodeid(node);
 				if (collectChangelogIndexes) {
 					changelogIndexes.put(node, revisionSequence.size());
-					link = revisionSequence.size();
+					revlogHeader.linkRevision(revisionSequence.size());
 				} else {
 					Integer csRev = changelogIndexes.get(ge.cset());
 					if (csRev == null) {
 						throw new HgInvalidStateException(String.format("Changelog doesn't contain revision %s of %s", ge.cset().shortNotation(), filename));
 					}
-					link = csRev.intValue();
+					revlogHeader.linkRevision(csRev.intValue());
 				}
-				final int p1Rev = knownRevision(p1), p2Rev = knownRevision(p2);
+				revlogHeader.parents(knownRevision(p1), knownRevision(p2));
 				byte[] patchContent = ge.rawDataByteArray();
 				writeComplete = writeComplete || patchContent.length >= (/* 3/4 of actual */content.length - (content.length >>> 2));
 				if (writeComplete) {
-					base = revisionSequence.size();
+					revlogHeader.baseRevision(revisionSequence.size());
 				}
 				final byte[] sourceData = writeComplete ? content : patchContent;
-				final byte[] data;
-				ByteArrayOutputStream bos = new ByteArrayOutputStream(content.length);
-				DeflaterOutputStream dos = new DeflaterOutputStream(bos);
-				dos.write(sourceData);
-				dos.close();
-				final byte[] compressedData = bos.toByteArray();
-				dos = null;
-				bos = null;
-				final Byte dataPrefix;
-				if (compressedData.length >= (sourceData.length - (sourceData.length >>> 2))) {
+				revlogDataZip.reset(sourceData);
+				final int compressedLen;
+				final boolean useUncompressedData = revlogDataZip.getCompressedLengthEstimate() >= (sourceData.length - (sourceData.length >>> 2));
+				if (useUncompressedData) {
 					// compression wasn't too effective,
-					data = sourceData;
-					dataPrefix = 'u';
+					compressedLen = sourceData.length + 1 /*1 byte for 'u' - uncompressed prefix byte*/;
 				} else {
-					data = compressedData;
-					dataPrefix = null;
+					compressedLen= revlogDataZip.getCompressedLengthEstimate();
 				}
+		
+				revlogHeader.length(content.length, compressedLen);
+				
+				revlogHeader.write(indexFile);
 
-				ByteBuffer header = ByteBuffer.allocate(64 /* REVLOGV1_RECORD_SIZE */);
-				if (offset == 0) {
-					final int INLINEDATA = 1 << 16;
-					header.putInt(1 /* RevlogNG */ | INLINEDATA);
-					header.putInt(0);
+				if (useUncompressedData) {
+					indexFile.write((byte) 'u');
+					indexFile.write(sourceData);
 				} else {
-					header.putLong(offset << 16);
+					int actualCompressedLenWritten = revlogDataZip.writeCompressedData(indexFile);
+					if (actualCompressedLenWritten != compressedLen) {
+						throw new HgInvalidStateException(String.format("Expected %d bytes of compressed data, but actually wrote %d in %s", compressedLen, actualCompressedLenWritten, filename));
+					}
 				}
-				final int compressedLen = data.length + (dataPrefix == null ? 0 : 1);
-				header.putInt(compressedLen);
-				header.putInt(content.length);
-				header.putInt(base);
-				header.putInt(link);
-				header.putInt(p1Rev);
-				header.putInt(p2Rev);
-				header.put(node.toByteArray());
-				// assume 12 bytes left are zeros
-				indexFile.write(header.array());
-				if (dataPrefix != null) {
-					indexFile.write(dataPrefix.byteValue());
-				}
-				indexFile.write(data);
 				//
-				offset += compressedLen;
 				revisionSequence.add(node);
 				prevRevContent.done();
 				prevRevContent = new ByteArrayDataAccess(content);
@@ -371,6 +352,56 @@
 			}
 			return cancelException == null;
 		}
+/*
+ $ hg debugindex build.gradle
+   rev    offset  length   base linkrev nodeid       p1           p2
+     0         0     857      0     454 b2a1b20d1933 000000000000 000000000000
+     1       857     319      0     455 5324c8f2b550 b2a1b20d1933 000000000000
+     2      1176     533      0     460 4011d52141cd 5324c8f2b550 000000000000
+     3      1709      85      0     463 d0be58845306 4011d52141cd 000000000000
+     4      1794     105      0     464 3ddd456244a0 d0be58845306 000000000000
+     5      1899     160      0     466 a3f374fbf33a 3ddd456244a0 000000000000
+     6      2059     133      0     468 0227d28e0db6 a3f374fbf33a 000000000000
+
+once we get a bundle for this repository and look into it for the same file:
+
+ $hg debugbundle -a /tmp/hg-bundle-4418325145435980614.tmp
+format: id, p1, p2, cset, delta base, len(delta)
+
+build.gradle
+62a101b7994c6c5b0423ba6c802f8c64d24ef784 0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 6ec4af642ba8024edd636af15e672c97cc3294e4 0000000000000000000000000000000000000000 1368
+b2a1b20d1933d0605aab6780ee52fe5ab3073832 0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 7dcc920e2d57d5850ee9f08ac863251460565bd3 62a101b7994c6c5b0423ba6c802f8c64d24ef784 2373
+5324c8f2b5503a4d1ead3ac40a9851c27572166b b2a1b20d1933d0605aab6780ee52fe5ab3073832 0000000000000000000000000000000000000000 7b883bf03b14ccea8ee74db0a34f9f66ca644a3c b2a1b20d1933d0605aab6780ee52fe5ab3073832 579
+4011d52141cd717c92cbf350a93522d2f3ee415e 5324c8f2b5503a4d1ead3ac40a9851c27572166b 0000000000000000000000000000000000000000 55e9588b84b83aa96fe76a06ee8bf067c5d3c90e 5324c8f2b5503a4d1ead3ac40a9851c27572166b 1147
+d0be588453068787dcb3ee05f8edfe47fdd5ae78 4011d52141cd717c92cbf350a93522d2f3ee415e 0000000000000000000000000000000000000000 ad0322a4af204547c400e1846b2b83d446ab8da5 4011d52141cd717c92cbf350a93522d2f3ee415e 85
+3ddd456244a08f81779163d9faf922a6dcd9e53e d0be588453068787dcb3ee05f8edfe47fdd5ae78 0000000000000000000000000000000000000000 3ace1fc95d0a1a941b6427c60b6e624f96dd71ad d0be588453068787dcb3ee05f8edfe47fdd5ae78 151
+a3f374fbf33aba1cc3b4f472db022b5185880f5d 3ddd456244a08f81779163d9faf922a6dcd9e53e 0000000000000000000000000000000000000000 3ca4ae7bdd3890b8ed89bfea1b42af593e04b373 3ddd456244a08f81779163d9faf922a6dcd9e53e 195
+0227d28e0db69afebee34cd5a4151889fb6271da a3f374fbf33aba1cc3b4f472db022b5185880f5d 0000000000000000000000000000000000000000 31bd09da0dcfe48e1fc662143f91ff402238aa84 a3f374fbf33aba1cc3b4f472db022b5185880f5d 145
+
+but there's no delta base information in the bundle file, it's merely a hard-coded convention (always patches previous version, see 
+(a) changegroup.py#builddeltaheader(): # do nothing with basenode, it is implicitly the previous one in HG10
+(b) revlog.py#group(): prev, curr = revs[r], revs[r + 1]
+                           for c in bundler.revchunk(self, curr, prev):
+)
+
+
+It's unclear where the first chunk (identified 62a101b7...) comes from (by the way, there's no such changeset as 6ec4af... as specified in the chunk, while 7dcc920e.. IS changeset 454)
+
+EXPLANATION:
+if cloned repository comes from svnkit repo (where's the gradle branch):
+$hg debugindex build.gradle
+   rev    offset  length   base linkrev nodeid       p1           p2
+     0         0     590      0     213 62a101b7994c 000000000000 000000000000
+     1       590     872      0     452 b2a1b20d1933 000000000000 000000000000
+     2      1462     319      0     453 5324c8f2b550 b2a1b20d1933 000000000000
+     3      1781     533      0     459 4011d52141cd 5324c8f2b550 000000000000
+     4      2314      85      0     462 d0be58845306 4011d52141cd 000000000000
+     5      2399     105      0     466 3ddd456244a0 d0be58845306 000000000000
+     6      2504     160      0     468 a3f374fbf33a 3ddd456244a0 000000000000
+     7      2664     133      0     470 0227d28e0db6 a3f374fbf33a 000000000000
+
+and the aforementioned bundle was result of hg incoming svnkit!!! 
+ */
 
 		public void start(int count, Callback callback, Object token) {
 			progressSupport.start(count);
--- a/src/org/tmatesoft/hg/internal/Internals.java	Mon Jan 21 19:41:51 2013 +0100
+++ b/src/org/tmatesoft/hg/internal/Internals.java	Wed Jan 23 17:46:12 2013 +0100
@@ -88,7 +88,9 @@
 	 * Integer value, use negative for attempts to acquire lock until success, and zero to try once and fail immediately. 
 	 */
 	public static final String CFG_PROPERTY_FS_LOCK_TIMEOUT = "hg4j.fs.lock.timeout";
-	
+
+	public static final int REVLOGV1_RECORD_SIZE = 64;
+
 	private List<Filter.Factory> filterFactories;
 	private final HgRepository repo;
 	private final File repoDir;
--- a/src/org/tmatesoft/hg/internal/Patch.java	Mon Jan 21 19:41:51 2013 +0100
+++ b/src/org/tmatesoft/hg/internal/Patch.java	Wed Jan 23 17:46:12 2013 +0100
@@ -114,7 +114,11 @@
 			destIndex += start - prevEnd;
 			// insert new data from the patch, if any
 			byte[] d = data.get(i);
-			System.arraycopy(d, 0, rv, destIndex, d.length);
+			try {
+				System.arraycopy(d, 0, rv, destIndex, d.length);
+			} catch (ArrayIndexOutOfBoundsException ex) {
+				ex.printStackTrace();
+			}
 			destIndex += d.length;
 			prevEnd = ends.get(i);
 		}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/RevlogCompressor.java	Wed Jan 23 17:46:12 2013 +0100
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.zip.Deflater;
+import java.util.zip.DeflaterOutputStream;
+
+/**
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class RevlogCompressor {
+	private final Deflater zip;
+	private byte[] sourceData;
+	private int compressedLenEstimate;
+	
+	public RevlogCompressor() {
+		zip = new Deflater();
+	}
+
+	public void reset(byte[] source) {
+		sourceData = source;
+		compressedLenEstimate = -1;
+	}
+	
+	public int writeCompressedData(OutputStream out) throws IOException {
+		zip.reset();
+		DeflaterOutputStream dos = new DeflaterOutputStream(out, zip, Math.min(2048, sourceData.length));
+		dos.write(sourceData);
+		dos.finish();
+		return zip.getTotalOut();
+	}
+
+	public int getCompressedLengthEstimate() {
+		if (compressedLenEstimate != -1) {
+			return compressedLenEstimate;
+		}
+		zip.reset();
+		int rv = 0;
+		// from DeflaterOutputStream:
+		byte[] buffer = new byte[Math.min(2048, sourceData.length)];
+        for (int i = 0, stride = buffer.length; i < sourceData.length; i+= stride) {
+            zip.setInput(sourceData, i, Math.min(stride, sourceData.length - i));
+            while (!zip.needsInput()) {
+            	rv += zip.deflate(buffer, 0, buffer.length);
+            }
+        }
+        zip.finish();
+        while (!zip.finished()) {
+        	rv += zip.deflate(buffer, 0, buffer.length);
+        }
+        return compressedLenEstimate = rv;
+	}
+}
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java	Mon Jan 21 19:41:51 2013 +0100
+++ b/src/org/tmatesoft/hg/internal/RevlogStream.java	Wed Jan 23 17:46:12 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 TMate Software Ltd
+ * Copyright (c) 2010-2013 TMate Software Ltd
  *  
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -18,6 +18,7 @@
 
 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION;
 import static org.tmatesoft.hg.repo.HgRepository.TIP;
+import static org.tmatesoft.hg.internal.Internals.REVLOGV1_RECORD_SIZE;
 
 import java.io.File;
 import java.io.IOException;
@@ -202,8 +203,6 @@
 	}
 
 
-	private final int REVLOGV1_RECORD_SIZE = 64;
-
 	// should be possible to use TIP, ALL, or -1, -2, -n notation of Hg
 	// ? boolean needsNodeid
 	public void iterate(int start, int end, boolean needData, Inspector inspector) throws HgInvalidRevisionException, HgInvalidControlFileException {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/RevlogStreamWriter.java	Wed Jan 23 17:46:12 2013 +0100
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import static org.tmatesoft.hg.internal.Internals.REVLOGV1_RECORD_SIZE;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+import org.tmatesoft.hg.core.Nodeid;
+
+/**
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class RevlogStreamWriter {
+
+	
+	public static class HeaderWriter {
+		private final ByteBuffer header;
+		private final boolean isInline;
+		private long offset;
+		private int length, compressedLength;
+		private int baseRev, linkRev, p1, p2;
+		private Nodeid nodeid;
+		
+		public HeaderWriter(boolean inline) {
+			isInline = inline;
+			header = ByteBuffer.allocate(REVLOGV1_RECORD_SIZE);
+		}
+		
+		public HeaderWriter offset(long offset) {
+			this.offset = offset;
+			return this;
+		}
+		
+		public HeaderWriter baseRevision(int baseRevision) {
+			this.baseRev = baseRevision;
+			return this;
+		}
+		
+		public HeaderWriter length(int len, int compressedLen) {
+			this.length = len;
+			this.compressedLength = compressedLen;
+			return this;
+		}
+		
+		public HeaderWriter parents(int parent1, int parent2) {
+			p1 = parent1;
+			p2 = parent2;
+			return this;
+		}
+		
+		public HeaderWriter linkRevision(int linkRevision) {
+			this.linkRev = linkRevision;
+			return this;
+		}
+		
+		public HeaderWriter nodeid(Nodeid n) {
+			this.nodeid = n;
+			return this;
+		}
+
+		public void write(OutputStream out) throws IOException {
+			header.clear();
+			if (offset == 0) {
+				int version = 1 /* RevlogNG */;
+				if (isInline) {
+					final int INLINEDATA = 1 << 16; // FIXME extract constant
+					version |= INLINEDATA;
+				}
+				header.putInt(version);
+				header.putInt(0);
+			} else {
+				header.putLong(offset << 16);
+			}
+			header.putInt(compressedLength);
+			header.putInt(length);
+			header.putInt(baseRev);
+			header.putInt(linkRev);
+			header.putInt(p1);
+			header.putInt(p2);
+			header.put(nodeid.toByteArray());
+			// assume 12 bytes left are zeros
+			out.write(header.array());
+
+			// regardless whether it's inline or separate data,
+			// offset field always represent cumulative compressedLength 
+			// (while offset in the index file with inline==true differs by n*sizeof(header), where n is entry's position in the file) 
+			offset += compressedLength;
+		}
+	}
+	
+	public void addRevision(String text, int baseRevision, int linkRevision, int p1, int p2) {
+	}
+}