Mercurial > jhg
changeset 0:dbd663faec1f
Basic changelog parsing
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Fri, 17 Dec 2010 19:05:59 +0100 |
parents | |
children | a3576694a4d1 |
files | .classpath .hgignore .project .settings/org.eclipse.jdt.core.prefs src/com/tmate/hgkit/console/Main.java src/com/tmate/hgkit/ll/Changelog.java src/com/tmate/hgkit/ll/Changeset.java src/com/tmate/hgkit/ll/HgRepository.java src/com/tmate/hgkit/ll/Nodeid.java src/com/tmate/hgkit/ll/RevlogIndexStreamAccess.java src/com/tmate/hgkit/ll/RevlogIterator.java src/com/tmate/hgkit/ll/RevlogStream.java |
diffstat | 12 files changed, 490 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.classpath Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8"?> +<classpath> + <classpathentry kind="src" path="src"/> + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/> + <classpathentry kind="output" path="bin"/> +</classpath>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgignore Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,3 @@ +syntax:glob +bin +src/Extras.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.project Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,17 @@ +<?xml version="1.0" encoding="UTF-8"?> +<projectDescription> + <name>com.tmate.hgkit</name> + <comment></comment> + <projects> + </projects> + <buildSpec> + <buildCommand> + <name>org.eclipse.jdt.core.javabuilder</name> + <arguments> + </arguments> + </buildCommand> + </buildSpec> + <natures> + <nature>org.eclipse.jdt.core.javanature</nature> + </natures> +</projectDescription>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/org.eclipse.jdt.core.prefs Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,12 @@ +#Wed Dec 15 01:43:42 CET 2010 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.5 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/com/tmate/hgkit/console/Main.java Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,158 @@ +package com.tmate.hgkit.console; + +import java.io.BufferedInputStream; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.zip.Deflater; +import java.util.zip.Inflater; + +import com.tmate.hgkit.ll.Changeset; + +/** + * + * @author artem + */ +public class Main { + + public static void main(String[] args) throws Exception { + Deflater zip1 = new Deflater(6, true); + final byte[] input = "Abstractions are valueless".getBytes(); + zip1.setInput(input); + zip1.finish(); + byte[] result1 = new byte[100]; + int resLen1 = zip1.deflate(result1); + System.out.printf("%3d:", resLen1); + for (int i = 0; i < resLen1; i++) { + System.out.printf("%02X", result1[i]); + } + System.out.println(); + // + Deflater zip2 = new Deflater(6, false); + zip2.setInput(input); + zip2.finish(); + byte[] result2 = new byte[100]; + int resLen2 = zip2.deflate(result2); + System.out.printf("%3d:", resLen2); + for (int i = 0; i < resLen2; i++) { + System.out.printf("%02X", result2[i]); + } + System.out.println(); + // + LinkedList<Changeset> changelog = new LinkedList<Changeset>(); + // + DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File("/temp/hg/hello/" + ".hg/store/00changelog.i")))); + DataInput di = dis; + dis.mark(10); + int versionField = di.readInt(); + dis.reset(); + final int INLINEDATA = 1 << 16; + + boolean inlineData = (versionField & INLINEDATA) != 0; + System.out.printf("%#8x, inline: %b\n", versionField, inlineData); + System.out.println("\tOffset\tFlags\tPacked\t Actual\tBase Rev Link Rev\tParent1\tParent2\tnodeid"); + int entryCount = 0; + while (dis.available() > 0) { + long l = di.readLong(); + long offset = l >>> 16; + int flags = (int) (l & 0X0FFFF); + int compressedLen = di.readInt(); + int actualLen = di.readInt(); + int baseRevision = di.readInt(); + int linkRevision = di.readInt(); + int parent1Revision = di.readInt(); + int parent2Revision = di.readInt(); + byte[] buf = new byte[32]; + di.readFully(buf, 12, 20); + dis.skip(12); + System.out.printf("%14d %6X %10d %10d %10d %10d %8d %8d %040x\n", offset, flags, compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, new BigInteger(buf)); + if (inlineData) { + byte[] data = new byte[compressedLen]; + di.readFully(data); + if (data[0] == 0x78 /* 'x' */) { + Inflater zlib = new Inflater(); + zlib.setInput(data, 0, compressedLen); + byte[] result = new byte[actualLen*2]; + int resultLen = zlib.inflate(result); + zlib.end(); + if (resultLen != actualLen) { + System.err.printf("Expected:%d, decomressed to:%d bytes\n", actualLen, resultLen); + } + String resultString; + if (baseRevision != entryCount) { + // this is a patch + byte[] baseRevContent = changelog.get(baseRevision).rawData; + LinkedList<PatchRecord> bins = new LinkedList<PatchRecord>(); + int p1, p2, len, patchElementIndex = 0; + do { + final int x = patchElementIndex; + p1 = (result[x] << 24) | (result[x+1] << 16) | (result[x+2] << 8) | result[x+3]; + p2 = (result[x+4] << 24) | (result[x+5] << 16) | (result[x+6] << 8) | result[x+7]; + len = (result[x+8] << 24) | (result[x+9] << 16) | (result[x+10] << 8) | result[x+11]; + System.out.printf("%4d %4d %4d\n", p1, p2, len); + patchElementIndex += 12 + len; + bins.add(new PatchRecord(p1, p2, len, result, x+12)); + } while (patchElementIndex < resultLen); + // + result = apply(baseRevContent, bins); + resultLen = result.length; + } + resultString = new String(result, 0, resultLen, "UTF-8"); + System.out.println(resultString); + entryCount++; + Changeset changeset = new Changeset(); + changeset.read(result, 0, resultLen); + changelog.add(changeset); + } // TODO else if uncompressed + } + } + dis.close(); + // + System.out.println("\n\n"); + System.out.println("====================>"); + for (Changeset cset : changelog) { + System.out.println(">"); + cset.dump(); + System.out.println("<"); + } + } + + + // mpatch.c : apply() + private static byte[] apply(byte[] baseRevisionContent, List<PatchRecord> patch) { + byte[] tempBuf = new byte[512]; // XXX + int last = 0, destIndex = 0; + for (PatchRecord pr : patch) { + System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, pr.start-last); + destIndex += pr.start - last; + System.arraycopy(pr.data, 0, tempBuf, destIndex, pr.data.length); + destIndex += pr.data.length; + last = pr.end; + } + System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, baseRevisionContent.length - last); + destIndex += baseRevisionContent.length - last; // total length + byte[] rv = new byte[destIndex]; + System.arraycopy(tempBuf, 0, rv, 0, destIndex); + return rv; + } + + static class PatchRecord { // copy of struct frag from mpatch.c + int start, end, len; + byte[] data; + + public PatchRecord(int p1, int p2, int len, byte[] src, int srcOffset) { + start = p1; + end = p2; + this.len = len; + data = new byte[len]; + System.arraycopy(src, srcOffset, data, 0, len); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/com/tmate/hgkit/ll/Changelog.java Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,12 @@ +/** + * Copyright (c) 2010 Artem Tikhomirov + */ +package com.tmate.hgkit.ll; + +/** + * Representation of the Mercurial changelog file (list of ChangeSets) + * @author artem + */ +public class Changelog { + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/com/tmate/hgkit/ll/Changeset.java Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2010 Artem Tikhomirov + */ +package com.tmate.hgkit.ll; + +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; + +/** + * @see mercurial/changelog.py:read() + * <pre> + format used: + nodeid\n : manifest node in ascii + user\n : user, no \n or \r allowed + time tz extra\n : date (time is int or float, timezone is int) + : extra is metadatas, encoded and separated by '\0' + : older versions ignore it + files\n\n : files modified by the cset, no \n or \r allowed + (.*) : comment (free text, ideally utf-8) + + changelog v0 doesn't use extra + * </pre> + * @author artem + */ +public class Changeset { + private /*final*/ Nodeid nodeid; + private String user; + private String comment; + private ArrayList<String> files; + private String timezone; // FIXME + public byte[] rawData; // FIXME + + public void dump() { + System.out.println("User:" + user); + System.out.println("Comment:" + comment); + System.out.println("Nodeid:" + nodeid); + System.out.println("Date:" + timezone); + System.out.println("Files: " + files.size()); + for (String s : files) { + System.out.print('\t'); + System.out.println(s); + } + } + + public void read(byte[] buf, int offset, int length) { + rawData = new byte[length]; + System.arraycopy(buf, offset, rawData, 0, length); + final int bufferEndIndex = offset + length; + final byte lineBreak = (byte) '\n'; + int breakIndex1 = indexOf(buf, lineBreak, offset, bufferEndIndex); + if (breakIndex1 == -1) { + throw new IllegalArgumentException("Bad Changeset data"); + } + nodeid = Nodeid.fromAscii(buf, 0, breakIndex1); + int breakIndex2 = indexOf(buf, lineBreak, breakIndex1+1, bufferEndIndex); + if (breakIndex2 == -1) { + throw new IllegalArgumentException("Bad Changeset data"); + } + user = new String(buf, breakIndex1+1, breakIndex2 - breakIndex1 - 1); + int breakIndex3 = indexOf(buf, lineBreak, breakIndex2+1, bufferEndIndex); + if (breakIndex3 == -1) { + throw new IllegalArgumentException("Bad Changeset data"); + } + timezone = new String(buf, breakIndex2+1, breakIndex3 - breakIndex2 - 1); + + // + int lastStart = breakIndex3 + 1; + int breakIndex4 = indexOf(buf, lineBreak, lastStart, bufferEndIndex); + files = new ArrayList<String>(5); + while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) { + files.add(new String(buf, lastStart, breakIndex4 - lastStart)); + lastStart = breakIndex4 + 1; + if (buf[breakIndex4 + 1] == lineBreak) { + // found \n\n + break; + } else { + breakIndex4 = indexOf(buf, lineBreak, lastStart, bufferEndIndex); + } + } + if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) { + throw new IllegalArgumentException("Bad Changeset data"); + } + try { + comment = new String(buf, breakIndex4+2, bufferEndIndex - breakIndex4 - 2, "UTF-8"); + } catch (UnsupportedEncodingException ex) { + comment = ""; + throw new IllegalStateException("Could hardly happen"); + } + } + + private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) { + for (int i = startOffset; i < endIndex; i++) { + if (src[i] == what) { + return i; + } + } + return -1; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/com/tmate/hgkit/ll/HgRepository.java Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,31 @@ +/** + * Copyright (c) 2010 Artem Tikhomirov + */ +package com.tmate.hgkit.ll; + +/** + * @author artem + * + */ +public class HgRepository { + + + private Changelog changelog; + + public void log() { + Changelog clog = getChangelog(); + assert clog != null; + // TODO get data to the client + } + + /** + * @return + */ + private Changelog getChangelog() { + if (this.changelog == null) { + this.changelog = new Changelog(); + // TODO init + } + return this.changelog; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/com/tmate/hgkit/ll/Nodeid.java Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2010 Artem Tikhomirov + */ +package com.tmate.hgkit.ll; + +import java.math.BigInteger; +import java.util.Formatter; + +/** + * @see mercurial/node.py + * @author artem + * + */ +public class Nodeid { + + public static int NULLREV = -1; + private final byte[] binaryData; + + public Nodeid(byte[] binaryRepresentation) { + // 5 int fields => 32 bytes + // byte[20] => 48 bytes + this.binaryData = binaryRepresentation; + } + + @Override + public String toString() { + // FIXME temp impl. + // BEWARE, if binaryData[0] > 0x80, BigInteger treats it as negative + return new BigInteger(binaryData).toString(); + } + + // binascii.unhexlify() + public static Nodeid fromAscii(byte[] asciiRepresentation, int offset, int length) { + assert length % 2 == 0; // Python's binascii.hexlify convert each byte into 2 digits + byte[] data = new byte[length / 2]; // XXX use known size instead? nodeid is always 20 bytes + for (int i = 0, j = offset; i < data.length; i++) { + int hiNibble = Character.digit(asciiRepresentation[j++], 16); + int lowNibble = Character.digit(asciiRepresentation[j++], 16); + data[i] = (byte) (((hiNibble << 4) | lowNibble) & 0xFF); + } + return new Nodeid(data); + } + + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/com/tmate/hgkit/ll/RevlogIndexStreamAccess.java Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,60 @@ +/** + * Copyright (c) 2010 Artem Tikhomirov + */ +package com.tmate.hgkit.ll; + +import java.io.DataInput; +import java.io.IOException; + +/** + * @author artem + * + */ +public class RevlogIndexStreamAccess { + + private final RevlogStream stream; + + // takes RevlogStream. RevlogStream delegates calls for data to this accessor, which in turn refers back to RevlogStream to get + // correct [Input|Data]Stream according to revlog version (Revlogv0 or RevlogNG) + + public RevlogIndexStreamAccess(RevlogStream stream) { + this.stream = stream; + // TODO Auto-generated constructor stub + } + + + void readRevlogV0Record() throws IOException { + DataInput di = stream.getIndexStream(); + int offset = di.readInt(); + int compressedLen = di.readInt(); + int baseRevision = di.readInt(); + int linkRevision = di.readInt(); +// int r = (((buf[0] & 0xff) << 24) | ((buf[1] & 0xff) << 16) | ((buf[2] & 0xff) << 8) | (buf[3] & 0xff)); + byte[] buf = new byte[20]; + di.readFully(buf, 0, 20); + Object nodeidOwn = buf.clone(); + // XXX nodeid as an Object with hash/equals? + di.readFully(buf, 0, 20); + Object nodeidParent1 = buf.clone(); + di.readFully(buf, 0, 20); + Object nodeidParent2 = buf.clone(); + } + + // another subclass? + void readRevlogNGRecord() throws IOException { + DataInput di = stream.getIndexStream(); + long l = di.readLong(); + long offset = l >>> 16; + int flags = (int) (l & 0X0FFFF); + int compressedLen = di.readInt(); + int actualLen = di.readInt(); + int baseRevision = di.readInt(); + int linkRevision = di.readInt(); + int parent1Revision = di.readInt(); + int parent2Revision = di.readInt(); + byte[] buf = new byte[32]; + di.readFully(buf, 0, 20+12); + Object nodeid = buf/*[0..20]*/; + + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/com/tmate/hgkit/ll/RevlogIterator.java Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,16 @@ +/** + * Copyright (c) 2010 Artem Tikhomirov + */ +package com.tmate.hgkit.ll; + +/** + * To walk against revlog + * XXX consider external iterator approach + * @author artem + */ +public class RevlogIterator { + + public RevlogIterator(RevlogStream stream) { + + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/com/tmate/hgkit/ll/RevlogStream.java Fri Dec 17 19:05:59 2010 +0100 @@ -0,0 +1,31 @@ +/** + * Copyright (c) 2010 Artem Tikhomirov + */ +package com.tmate.hgkit.ll; + +import java.io.DataInput; + +/** + * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), + * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)? + * @author artem + * @see http://mercurial.selenic.com/wiki/Revlog + * @see http://mercurial.selenic.com/wiki/RevlogNG + */ +public class RevlogStream { + + private void detectVersion() { + + } + + /*package*/ DataInput getIndexStream() { + // TODO Auto-generated method stub + return null; + } + + /*package*/ DataInput getDataStream() { + // TODO Auto-generated method stub + return null; + } + +}