diff src/com/tmate/hgkit/ll/RevlogStream.java @ 2:08db726a0fb7

Shaping out low-level Hg structures
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Sun, 19 Dec 2010 05:41:31 +0100
parents dbd663faec1f
children 24bb4f365164
line wrap: on
line diff
--- a/src/com/tmate/hgkit/ll/RevlogStream.java	Sat Dec 18 05:47:35 2010 +0100
+++ b/src/com/tmate/hgkit/ll/RevlogStream.java	Sun Dec 19 05:41:31 2010 +0100
@@ -4,6 +4,12 @@
 package com.tmate.hgkit.ll;
 
 import java.io.DataInput;
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.zip.Inflater;
 
 /**
  * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), 
@@ -13,7 +19,10 @@
  * @see http://mercurial.selenic.com/wiki/RevlogNG
  */
 public class RevlogStream {
-	
+
+	private List<IndexEntry> index; // indexed access highly needed
+	private boolean inline = false;
+
 	private void detectVersion() {
 		
 	}
@@ -27,5 +36,131 @@
 		// TODO Auto-generated method stub
 		return null;
 	}
+	public int revisionCount() {
+		initOutline();
+		return index.size();
+	}
+
+	// should be possible to use TIP, ALL
+	public void iterate(int start, int end, boolean needData, Revlog.Inspector inspector) {
+		initOutline();
+		final int indexSize = index.size();
+		if (start < 0 || start >= indexSize) {
+			throw new IllegalArgumentException("Bad left range boundary " + start);
+		}
+		if (end < start || end >= indexSize) {
+			throw new IllegalArgumentException("Bad right range boundary " + end);
+		}
+		// XXX may cache [start .. end] from index with a single read (pre-read)
+		
+		DataInput diIndex = null, diData = null;
+		diIndex = getIndexStream();
+		if (needData) {
+			diData = getDataStream();
+		}
+		try {
+			diIndex.skipBytes(inline ? (int) index.get(start).offset : start * 64);
+			for (int i = start; i <= end && i < indexSize; i++ ) {
+				IndexEntry ie = index.get(i);
+				long l = diIndex.readLong();
+				long offset = l >>> 16;
+				int flags = (int) (l & 0X0FFFF);
+				int compressedLen = diIndex.readInt();
+				int actualLen = diIndex.readInt();
+				int baseRevision = diIndex.readInt();
+				int linkRevision = diIndex.readInt();
+				int parent1Revision = diIndex.readInt();
+				int parent2Revision = diIndex.readInt();
+				byte[] buf = new byte[32];
+				// XXX Hg keeps 12 last bytes empty, we move them into front here
+				diIndex.readFully(buf, 12, 20);
+				diIndex.skipBytes(12);
+				byte[] data = null;
+				if (needData) {
+					byte[] dataBuf = new byte[compressedLen];
+					if (inline) {
+						diIndex.readFully(dataBuf);
+					} else {
+						diData.skipBytes((int) ie.offset); // FIXME not skip but seek!!!
+						diData.readFully(dataBuf);
+					}
+					if (dataBuf[0] == 0x78 /* 'x' */) {
+						Inflater zlib = new Inflater();
+						zlib.setInput(dataBuf, 0, compressedLen);
+						byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 
+						int resultLen = zlib.inflate(result);
+						zlib.end();
+						data = new byte[resultLen];
+						System.arraycopy(result, 0, data, 0, resultLen);
+					} else if (dataBuf[0] == 0x75 /* 'u' */) {
+						data = new byte[dataBuf.length - 1];
+						System.arraycopy(dataBuf, 1, data, 0, data.length);
+					} else {
+						// XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
+						// but I don't see reason not to just return data as is 
+						data = dataBuf;
+					}
+					// FIXME if patch data (based on baseRevision), then apply patches to get true content
+				}
+				inspector.next(compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, buf, data);
+			}
+		} catch (EOFException ex) {
+			// should not happen as long as we read inside known boundaries
+			throw new IllegalStateException(ex);
+		} catch (IOException ex) {
+			FIXME 
+		}
+	}
 	
+	private void initOutline() {
+		if (index != null && !index.isEmpty()) {
+			return;
+		}
+		ArrayList<IndexEntry> res = new ArrayList<IndexEntry>();
+		DataInput di = getIndexStream();
+		try {
+			int versionField = di.readInt();
+			// di.undreadInt();
+			final int INLINEDATA = 1 << 16;
+			inline = (versionField & INLINEDATA) != 0;
+			long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
+			while(true) { // EOFExcepiton should get us outta here. FIXME Out inputstream should has explicit no-more-data indicator
+				int compressedLen = di.readInt();
+				// 8+4 = 12 bytes total read
+//				int actualLen = di.readInt();
+//				int baseRevision = di.readInt();
+//				int linkRevision = di.readInt();
+//				int parent1Revision = di.readInt();
+//				int parent2Revision = di.readInt();
+//				byte[] nodeid = new byte[32];
+				res.add(new IndexEntry(offset, compressedLen));
+				if (inline) {
+					di.skipBytes(6*4 + 32 + compressedLen); // Check: 56 (skip) + 12 (read) = 64 (total RevlogNG record size)
+				} else {
+					di.skipBytes(6*4 + 32);
+				}
+				long l = di.readLong();
+				offset = l >>> 16;
+			}
+		} catch (EOFException ex) {
+			// fine, done then
+			index = res;
+		} catch (IOException ex) {
+			ex.printStackTrace();
+			// too bad, no outline then
+			index = Collections.emptyList();
+		}
+	}
+
+
+	// perhaps, package-local or protected, if anyone else from low-level needs them
+	private static class IndexEntry {
+		public final long offset;
+		public final int length; // data past fixed record (need to decide whether including header size or not), and whether length is of compressed data or not
+
+		public IndexEntry(long o, int l) {
+			offset = o;
+			length = l;
+		}
+	}
 }