diff src/com/tmate/hgkit/ll/RevlogStream.java @ 3:24bb4f365164

Rudimentary log functionality with basic infrastructure is in place
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Mon, 20 Dec 2010 02:50:36 +0100
parents 08db726a0fb7
children aa1912c70b36
line wrap: on
line diff
--- a/src/com/tmate/hgkit/ll/RevlogStream.java	Sun Dec 19 05:41:31 2010 +0100
+++ b/src/com/tmate/hgkit/ll/RevlogStream.java	Mon Dec 20 02:50:36 2010 +0100
@@ -3,12 +3,19 @@
  */
 package com.tmate.hgkit.ll;
 
+import java.io.BufferedInputStream;
 import java.io.DataInput;
+import java.io.DataInputStream;
 import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.LinkedList;
 import java.util.List;
+import java.util.zip.DataFormatException;
 import java.util.zip.Inflater;
 
 /**
@@ -22,29 +29,54 @@
 
 	private List<IndexEntry> index; // indexed access highly needed
 	private boolean inline = false;
+	private final File indexFile;
+
+	RevlogStream(File indexFile) {
+		this.indexFile = indexFile;
+	}
 
 	private void detectVersion() {
 		
 	}
 
 	/*package*/ DataInput getIndexStream() {
-		// TODO Auto-generated method stub
-		return null;
+		DataInputStream dis = null;
+		try {
+			dis = new DataInputStream(new BufferedInputStream(new FileInputStream(indexFile)));
+		} catch (FileNotFoundException ex) {
+			ex.printStackTrace();
+			// should not happen, we checked for existence
+		}
+		return dis;
 	}
 
 	/*package*/ DataInput getDataStream() {
-		// TODO Auto-generated method stub
-		return null;
+		final String indexName = indexFile.getName();
+		File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d");
+		try {
+			return new DataInputStream(new BufferedInputStream(new FileInputStream(dataFile)));
+		} catch (FileNotFoundException ex) {
+			ex.printStackTrace();
+			return null;
+		}
 	}
+
 	public int revisionCount() {
 		initOutline();
 		return index.size();
 	}
 
-	// should be possible to use TIP, ALL
+	// should be possible to use TIP, ALL, or -1, -2, -n notation of Hg
+	// ? boolean needsNodeid
 	public void iterate(int start, int end, boolean needData, Revlog.Inspector inspector) {
 		initOutline();
 		final int indexSize = index.size();
+		if (indexSize == 0) {
+			return;
+		}
+		if (end == -1 /*FIXME TIP*/) {
+			end = indexSize - 1;
+		}
 		if (start < 0 || start >= indexSize) {
 			throw new IllegalArgumentException("Bad left range boundary " + start);
 		}
@@ -55,12 +87,13 @@
 		
 		DataInput diIndex = null, diData = null;
 		diIndex = getIndexStream();
-		if (needData) {
+		if (needData && !inline) {
 			diData = getDataStream();
 		}
 		try {
 			diIndex.skipBytes(inline ? (int) index.get(start).offset : start * 64);
-			for (int i = start; i <= end && i < indexSize; i++ ) {
+			byte[] lastData = null;
+			for (int i = start; i <= end; i++ ) {
 				IndexEntry ie = index.get(i);
 				long l = diIndex.readLong();
 				long offset = l >>> 16;
@@ -81,34 +114,71 @@
 					if (inline) {
 						diIndex.readFully(dataBuf);
 					} else {
-						diData.skipBytes((int) ie.offset); // FIXME not skip but seek!!!
+						diData.skipBytes((int) ie.offset); // FIXME not skip but seek!!! (skip would work only for the first time)
 						diData.readFully(dataBuf);
 					}
 					if (dataBuf[0] == 0x78 /* 'x' */) {
-						Inflater zlib = new Inflater();
-						zlib.setInput(dataBuf, 0, compressedLen);
-						byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 
-						int resultLen = zlib.inflate(result);
-						zlib.end();
-						data = new byte[resultLen];
-						System.arraycopy(result, 0, data, 0, resultLen);
+						try {
+							Inflater zlib = new Inflater();
+							zlib.setInput(dataBuf, 0, compressedLen);
+							byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 
+							int resultLen = zlib.inflate(result);
+							zlib.end();
+							data = new byte[resultLen];
+							System.arraycopy(result, 0, data, 0, resultLen);
+						} catch (DataFormatException ex) {
+							ex.printStackTrace();
+							data = new byte[0]; // FIXME need better failure strategy
+						}
 					} else if (dataBuf[0] == 0x75 /* 'u' */) {
 						data = new byte[dataBuf.length - 1];
 						System.arraycopy(dataBuf, 1, data, 0, data.length);
 					} else {
 						// XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
-						// but I don't see reason not to just return data as is 
+						// but I don't see reason not to return data as is 
 						data = dataBuf;
 					}
-					// FIXME if patch data (based on baseRevision), then apply patches to get true content
+					// XXX 
+					if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch
+						// this is a patch
+						LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>();
+						int patchElementIndex = 0;
+						do {
+							final int x = patchElementIndex; // shorthand
+							int p1 = (data[x] << 24) | (data[x+1] << 16) | (data[x+2] << 8) | data[x+3];
+							int p2 = (data[x+4] << 24) | (data[x+5] << 16) | (data[x+6] << 8) | data[x+7];
+							int len = (data[x+8] << 24) | (data[x+9] << 16) | (data[x+10] << 8) | data[x+11];
+							patchElementIndex += 12 + len;
+							patches.add(new PatchRecord(p1, p2, len, data, x+12));
+						} while (patchElementIndex < data.length);
+						//
+						byte[] baseRevContent;
+						if (baseRevision == i - 1) {
+							baseRevContent = lastData;
+						} else {
+							// FIXME implement delta collection from few revisions
+							// read baseRevision plus all deltas between this revision and base. Need to do this effectively.
+							throw HgRepository.notImplemented();
+						}
+						
+						// FIXME need to collect all patches between baseRevision and current version 
+						data = apply(baseRevContent, patches);
+					}
+				} else {
+					if (inline) {
+						diIndex.skipBytes(compressedLen);
+					}
 				}
-				inspector.next(compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, buf, data);
+				inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, buf, data);
+				lastData = data;
 			}
 		} catch (EOFException ex) {
 			// should not happen as long as we read inside known boundaries
 			throw new IllegalStateException(ex);
 		} catch (IOException ex) {
-			FIXME 
+			throw new IllegalStateException(ex); // FIXME need better handling
+		} finally {
+			hackCloseFileStreams(diIndex, diData); // FIXME HACK!!!
 		}
 	}
 	
@@ -120,11 +190,11 @@
 		DataInput di = getIndexStream();
 		try {
 			int versionField = di.readInt();
-			// di.undreadInt();
+			di.readInt(); // just to skip next 2 bytes of offset + flags
 			final int INLINEDATA = 1 << 16;
 			inline = (versionField & INLINEDATA) != 0;
 			long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
-			while(true) { // EOFExcepiton should get us outta here. FIXME Out inputstream should has explicit no-more-data indicator
+			while(true) { // EOFExcepiton should get us outta here. FIXME Our inputstream should has explicit no-more-data indicator
 				int compressedLen = di.readInt();
 				// 8+4 = 12 bytes total read
 //				int actualLen = di.readInt();
@@ -135,9 +205,9 @@
 //				byte[] nodeid = new byte[32];
 				res.add(new IndexEntry(offset, compressedLen));
 				if (inline) {
-					di.skipBytes(6*4 + 32 + compressedLen); // Check: 56 (skip) + 12 (read) = 64 (total RevlogNG record size)
+					di.skipBytes(5*4 + 32 + compressedLen); // Check: 52 (skip) + 12 (read) = 64 (total RevlogNG record size)
 				} else {
-					di.skipBytes(6*4 + 32);
+					di.skipBytes(5*4 + 32);
 				}
 				long l = di.readLong();
 				offset = l >>> 16;
@@ -150,6 +220,22 @@
 			// too bad, no outline then
 			index = Collections.emptyList();
 		}
+		hackCloseFileStreams(di, null); // FIXME HACK!!!
+	}
+	
+	// FIXME HACK to deal with File/FileStream nature of out data source. Won't need this once implement
+	// own DataInput based on bytearray chunks or RandomAccessFile
+	private void hackCloseFileStreams(DataInput index, DataInput data) {
+		try {
+			if (index != null) {
+				((DataInputStream) index).close();
+			}
+			if (data != null) {
+				((DataInputStream) data).close();
+			}
+		} catch (IOException ex) {
+			ex.printStackTrace();
+		}
 	}
 
 
@@ -163,4 +249,37 @@
 			length = l;
 		}
 	}
+
+	// mpatch.c : apply()
+	// FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
+	private static byte[] apply(byte[] baseRevisionContent, List<PatchRecord> patch) {
+		byte[] tempBuf = new byte[512]; // XXX
+		int last = 0, destIndex = 0;
+		for (PatchRecord pr : patch) {
+			System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, pr.start-last);
+			destIndex += pr.start - last;
+			System.arraycopy(pr.data, 0, tempBuf, destIndex, pr.data.length);
+			destIndex += pr.data.length;
+			last = pr.end;
+		}
+		System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, baseRevisionContent.length - last);
+		destIndex += baseRevisionContent.length - last; // total length
+		byte[] rv = new byte[destIndex];
+		System.arraycopy(tempBuf, 0, rv, 0, destIndex);
+		return rv;
+	}
+
+	static class PatchRecord { // copy of struct frag from mpatch.c
+		int start, end, len;
+		byte[] data;
+
+		public PatchRecord(int p1, int p2, int len, byte[] src, int srcOffset) {
+		start = p1;
+				end = p2;
+				this.len = len;
+				data = new byte[len];
+				System.arraycopy(src, srcOffset, data, 0, len);
+		}
+	}
+
 }