changeset 0:dbd663faec1f

Basic changelog parsing
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 17 Dec 2010 19:05:59 +0100
parents
children a3576694a4d1
files .classpath .hgignore .project .settings/org.eclipse.jdt.core.prefs src/com/tmate/hgkit/console/Main.java src/com/tmate/hgkit/ll/Changelog.java src/com/tmate/hgkit/ll/Changeset.java src/com/tmate/hgkit/ll/HgRepository.java src/com/tmate/hgkit/ll/Nodeid.java src/com/tmate/hgkit/ll/RevlogIndexStreamAccess.java src/com/tmate/hgkit/ll/RevlogIterator.java src/com/tmate/hgkit/ll/RevlogStream.java
diffstat 12 files changed, 490 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.classpath	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" path="src"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
+	<classpathentry kind="output" path="bin"/>
+</classpath>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,3 @@
+syntax:glob
+bin
+src/Extras.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.project	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>com.tmate.hgkit</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.settings/org.eclipse.jdt.core.prefs	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,12 @@
+#Wed Dec 15 01:43:42 CET 2010
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/console/Main.java	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,158 @@
+package com.tmate.hgkit.console;
+
+import java.io.BufferedInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
+
+import com.tmate.hgkit.ll.Changeset;
+
+/**
+ * 
+ * @author artem
+ */
+public class Main {
+
+	public static void main(String[] args) throws Exception {
+		Deflater zip1 = new Deflater(6, true);
+		final byte[] input = "Abstractions are valueless".getBytes();
+		zip1.setInput(input);
+		zip1.finish();
+		byte[] result1 = new byte[100];
+		int resLen1 = zip1.deflate(result1);
+		System.out.printf("%3d:", resLen1);
+		for (int i = 0; i < resLen1; i++) {
+			System.out.printf("%02X", result1[i]);
+		}
+		System.out.println();
+		//
+		Deflater zip2 = new Deflater(6, false);
+		zip2.setInput(input);
+		zip2.finish();
+		byte[] result2 = new byte[100];
+		int resLen2 = zip2.deflate(result2);
+		System.out.printf("%3d:", resLen2);
+		for (int i = 0; i < resLen2; i++) {
+			System.out.printf("%02X", result2[i]);
+		}
+		System.out.println();
+		//
+		LinkedList<Changeset> changelog = new LinkedList<Changeset>();
+		//
+		DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File("/temp/hg/hello/" + ".hg/store/00changelog.i"))));
+		DataInput di = dis;
+		dis.mark(10);
+		int versionField = di.readInt();
+		dis.reset();
+		final int INLINEDATA = 1 << 16;
+		
+		boolean inlineData = (versionField & INLINEDATA) != 0;
+		System.out.printf("%#8x, inline: %b\n", versionField, inlineData);
+		System.out.println("\tOffset\tFlags\tPacked\t  Actual\tBase Rev    Link Rev\tParent1\tParent2\tnodeid");
+		int entryCount = 0;
+		while (dis.available() > 0) {
+			long l = di.readLong();
+			long offset = l >>> 16;
+			int flags = (int) (l & 0X0FFFF);
+			int compressedLen = di.readInt();
+			int actualLen = di.readInt();
+			int baseRevision = di.readInt();
+			int linkRevision = di.readInt();
+			int parent1Revision = di.readInt();
+			int parent2Revision = di.readInt();
+			byte[] buf = new byte[32];
+			di.readFully(buf, 12, 20);
+			dis.skip(12);
+			System.out.printf("%14d %6X %10d %10d %10d %10d %8d %8d     %040x\n", offset, flags, compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, new BigInteger(buf));
+			if (inlineData) {
+				byte[] data = new byte[compressedLen];
+				di.readFully(data);
+				if (data[0] == 0x78 /* 'x' */) {
+					Inflater zlib = new Inflater();
+					zlib.setInput(data, 0, compressedLen);
+					byte[] result = new byte[actualLen*2];
+					int resultLen = zlib.inflate(result);
+					zlib.end();
+					if (resultLen != actualLen) {
+						System.err.printf("Expected:%d, decomressed to:%d bytes\n", actualLen, resultLen);
+					}
+					String resultString;
+					if (baseRevision != entryCount) {
+						// this is a patch
+						byte[] baseRevContent = changelog.get(baseRevision).rawData;
+						LinkedList<PatchRecord> bins = new LinkedList<PatchRecord>();
+						int p1, p2, len, patchElementIndex = 0;
+						do {
+							final int x = patchElementIndex;
+							p1 = (result[x] << 24) | (result[x+1] << 16) | (result[x+2] << 8) | result[x+3];
+							p2 = (result[x+4] << 24) | (result[x+5] << 16) | (result[x+6] << 8) | result[x+7];
+							len = (result[x+8] << 24) | (result[x+9] << 16) | (result[x+10] << 8) | result[x+11];
+							System.out.printf("%4d %4d %4d\n", p1, p2, len);
+							patchElementIndex += 12 + len;
+							bins.add(new PatchRecord(p1, p2, len, result, x+12));
+						} while (patchElementIndex < resultLen);
+						// 
+						result = apply(baseRevContent, bins);
+						resultLen = result.length;
+					}
+					resultString = new String(result, 0, resultLen, "UTF-8");
+					System.out.println(resultString);
+					entryCount++;
+					Changeset changeset = new Changeset();
+					changeset.read(result, 0, resultLen);
+					changelog.add(changeset);
+				} // TODO else if uncompressed
+			}
+		}
+		dis.close();
+		//
+		System.out.println("\n\n");
+		System.out.println("====================>");
+		for (Changeset cset : changelog) {
+			System.out.println(">");
+			cset.dump();
+			System.out.println("<");
+		}
+	}
+
+
+	// mpatch.c : apply()
+	private static byte[] apply(byte[] baseRevisionContent, List<PatchRecord> patch) {
+		byte[] tempBuf = new byte[512]; // XXX
+		int last = 0, destIndex = 0;
+		for (PatchRecord pr : patch) {
+			System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, pr.start-last);
+			destIndex += pr.start - last;
+			System.arraycopy(pr.data, 0, tempBuf, destIndex, pr.data.length);
+			destIndex += pr.data.length;
+			last = pr.end;
+		}
+		System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, baseRevisionContent.length - last);
+		destIndex += baseRevisionContent.length - last; // total length
+		byte[] rv = new byte[destIndex];
+		System.arraycopy(tempBuf, 0, rv, 0, destIndex);
+		return rv;
+	}
+
+	static class PatchRecord { // copy of struct frag from mpatch.c
+		int start, end, len;
+		byte[] data;
+
+		public PatchRecord(int p1, int p2, int len, byte[] src, int srcOffset) {
+		start = p1;
+				end = p2;
+				this.len = len;
+				data = new byte[len];
+				System.arraycopy(src, srcOffset, data, 0, len);
+		}
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/ll/Changelog.java	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,12 @@
+/**
+ * Copyright (c) 2010 Artem Tikhomirov
+ */
+package com.tmate.hgkit.ll;
+
+/**
+ * Representation of the Mercurial changelog file (list of ChangeSets)
+ * @author artem
+ */
+public class Changelog {
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/ll/Changeset.java	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,99 @@
+/**
+ * Copyright (c) 2010 Artem Tikhomirov 
+ */
+package com.tmate.hgkit.ll;
+
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+
+/**
+ * @see mercurial/changelog.py:read()
+ * <pre>
+        format used:
+        nodeid\n        : manifest node in ascii
+        user\n          : user, no \n or \r allowed
+        time tz extra\n : date (time is int or float, timezone is int)
+                        : extra is metadatas, encoded and separated by '\0'
+                        : older versions ignore it
+        files\n\n       : files modified by the cset, no \n or \r allowed
+        (.*)            : comment (free text, ideally utf-8)
+
+        changelog v0 doesn't use extra
+ * </pre>
+ * @author artem
+ */
+public class Changeset {
+	private /*final*/ Nodeid nodeid;
+	private String user;
+	private String comment;
+	private ArrayList<String> files;
+	private String timezone; // FIXME
+	public byte[] rawData; // FIXME
+	
+	public void dump() {
+		System.out.println("User:" + user);
+		System.out.println("Comment:" + comment);
+		System.out.println("Nodeid:" + nodeid);
+		System.out.println("Date:" + timezone);
+		System.out.println("Files: " + files.size());
+		for (String s : files) {
+			System.out.print('\t');
+			System.out.println(s);
+		}
+	}
+
+	public void read(byte[] buf, int offset, int length) {
+		rawData = new byte[length];
+		System.arraycopy(buf, offset, rawData, 0, length);
+		final int bufferEndIndex = offset + length;
+		final byte lineBreak = (byte) '\n';
+		int breakIndex1 = indexOf(buf, lineBreak, offset, bufferEndIndex);
+		if (breakIndex1 == -1) {
+			throw new IllegalArgumentException("Bad Changeset data");
+		}
+		nodeid = Nodeid.fromAscii(buf, 0, breakIndex1);
+		int breakIndex2 = indexOf(buf, lineBreak, breakIndex1+1, bufferEndIndex);
+		if (breakIndex2 == -1) {
+			throw new IllegalArgumentException("Bad Changeset data");
+		}
+		user = new String(buf, breakIndex1+1, breakIndex2 - breakIndex1 - 1);
+		int breakIndex3 = indexOf(buf, lineBreak, breakIndex2+1, bufferEndIndex);
+		if (breakIndex3 == -1) {
+			throw new IllegalArgumentException("Bad Changeset data");
+		}
+		timezone = new String(buf, breakIndex2+1, breakIndex3 - breakIndex2 - 1);
+		
+		//
+		int lastStart = breakIndex3 + 1;
+		int breakIndex4 = indexOf(buf, lineBreak, lastStart, bufferEndIndex);
+		files = new ArrayList<String>(5);
+		while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) {
+			files.add(new String(buf, lastStart, breakIndex4 - lastStart));
+			lastStart = breakIndex4 + 1;
+			if (buf[breakIndex4 + 1] == lineBreak) {
+				// found \n\n
+				break;
+			} else {
+				breakIndex4 = indexOf(buf, lineBreak, lastStart, bufferEndIndex);
+			}
+		}
+		if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) {
+			throw new IllegalArgumentException("Bad Changeset data");
+		}
+		try {
+			comment = new String(buf, breakIndex4+2, bufferEndIndex - breakIndex4 - 2, "UTF-8");
+		} catch (UnsupportedEncodingException ex) {
+			comment = "";
+			throw new IllegalStateException("Could hardly happen");
+		}
+	}
+
+	private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) {
+		for (int i = startOffset; i < endIndex; i++) {
+			if (src[i] == what) {
+				return i;
+			}
+		}
+		return -1;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/ll/HgRepository.java	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,31 @@
+/**
+ * Copyright (c) 2010 Artem Tikhomirov 
+ */
+package com.tmate.hgkit.ll;
+
+/**
+ * @author artem
+ *
+ */
+public class HgRepository {
+
+	
+	private Changelog changelog;
+
+	public void log() {
+		Changelog clog = getChangelog();
+		assert clog != null;
+		// TODO get data to the client
+	}
+
+	/**
+	 * @return
+	 */
+	private Changelog getChangelog() {
+		if (this.changelog == null) {
+			this.changelog = new Changelog();
+			// TODO init
+		}
+		return this.changelog;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/ll/Nodeid.java	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,45 @@
+/**
+ * Copyright (c) 2010 Artem Tikhomirov 
+ */
+package com.tmate.hgkit.ll;
+
+import java.math.BigInteger;
+import java.util.Formatter;
+
+/**
+ * @see mercurial/node.py
+ * @author artem
+ *
+ */
+public class Nodeid {
+	
+	public static int NULLREV = -1;
+	private final byte[] binaryData; 
+
+	public Nodeid(byte[] binaryRepresentation) {
+		// 5 int fields => 32 bytes
+		// byte[20] => 48 bytes
+		this.binaryData = binaryRepresentation;
+	}
+
+	@Override
+	public String toString() {
+		// FIXME temp impl.
+		// BEWARE, if binaryData[0] > 0x80, BigInteger treats it as negative  
+		return new BigInteger(binaryData).toString();
+	}
+
+	// binascii.unhexlify()
+	public static Nodeid fromAscii(byte[] asciiRepresentation, int offset, int length) {
+		assert length % 2 == 0; // Python's binascii.hexlify convert each byte into 2 digits
+		byte[] data = new byte[length / 2]; // XXX use known size instead? nodeid is always 20 bytes
+		for (int i = 0, j = offset; i < data.length; i++) {
+			int hiNibble = Character.digit(asciiRepresentation[j++], 16);
+			int lowNibble = Character.digit(asciiRepresentation[j++], 16);
+			data[i] = (byte) (((hiNibble << 4) | lowNibble) & 0xFF);
+		}
+		return new Nodeid(data);
+	}
+	
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/ll/RevlogIndexStreamAccess.java	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,60 @@
+/**
+ * Copyright (c) 2010 Artem Tikhomirov 
+ */
+package com.tmate.hgkit.ll;
+
+import java.io.DataInput;
+import java.io.IOException;
+
+/**
+ * @author artem
+ *
+ */
+public class RevlogIndexStreamAccess {
+
+	private final RevlogStream stream;
+
+	// takes RevlogStream. RevlogStream delegates calls for data to this accessor, which in turn refers back to RevlogStream to get
+	// correct [Input|Data]Stream according to revlog version (Revlogv0 or RevlogNG)
+
+	public RevlogIndexStreamAccess(RevlogStream stream) {
+		this.stream = stream;
+		// TODO Auto-generated constructor stub
+	}
+
+	
+	void readRevlogV0Record() throws IOException {
+		DataInput di = stream.getIndexStream();
+		int offset = di.readInt();
+		int compressedLen = di.readInt();
+		int baseRevision = di.readInt();
+		int linkRevision = di.readInt();
+//		int r = (((buf[0] & 0xff) << 24) | ((buf[1] & 0xff) << 16) | ((buf[2] & 0xff) << 8) | (buf[3] & 0xff));
+		byte[] buf = new byte[20];
+		di.readFully(buf, 0, 20);
+		Object nodeidOwn = buf.clone();
+		// XXX nodeid as an Object with hash/equals?
+		di.readFully(buf, 0, 20);
+		Object nodeidParent1 = buf.clone();
+		di.readFully(buf, 0, 20);
+		Object nodeidParent2 = buf.clone();
+	}
+	
+	// another subclass?
+	void readRevlogNGRecord() throws IOException {
+		DataInput di = stream.getIndexStream();
+		long l = di.readLong();
+		long offset = l >>> 16;
+		int flags = (int) (l & 0X0FFFF);
+		int compressedLen = di.readInt();
+		int actualLen = di.readInt();
+		int baseRevision = di.readInt();
+		int linkRevision = di.readInt();
+		int parent1Revision = di.readInt();
+		int parent2Revision = di.readInt();
+		byte[] buf = new byte[32];
+		di.readFully(buf, 0, 20+12);
+		Object nodeid = buf/*[0..20]*/;
+		
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/ll/RevlogIterator.java	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,16 @@
+/**
+ * Copyright (c) 2010 Artem Tikhomirov 
+ */
+package com.tmate.hgkit.ll;
+
+/**
+ * To walk against revlog
+ * XXX consider external iterator approach 
+ * @author artem
+ */
+public class RevlogIterator {
+
+	public RevlogIterator(RevlogStream stream) {
+		
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/ll/RevlogStream.java	Fri Dec 17 19:05:59 2010 +0100
@@ -0,0 +1,31 @@
+/**
+ * Copyright (c) 2010 Artem Tikhomirov 
+ */
+package com.tmate.hgkit.ll;
+
+import java.io.DataInput;
+
+/**
+ * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), 
+ * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)?
+ * @author artem
+ * @see http://mercurial.selenic.com/wiki/Revlog
+ * @see http://mercurial.selenic.com/wiki/RevlogNG
+ */
+public class RevlogStream {
+	
+	private void detectVersion() {
+		
+	}
+
+	/*package*/ DataInput getIndexStream() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	/*package*/ DataInput getDataStream() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+	
+}