changeset 51:9429c7bd1920 wrap-data-access

Try DataAccess to reach revision data instead of plain byte arrays
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Sun, 16 Jan 2011 01:20:26 +0100
parents f1db8610da62
children a6f39e595b2b
files src/com/tmate/hgkit/console/Main.java src/com/tmate/hgkit/fs/ByteArrayDataAccess.java src/com/tmate/hgkit/fs/DataAccess.java src/com/tmate/hgkit/fs/DataAccessProvider.java src/com/tmate/hgkit/fs/FilterDataAccess.java src/com/tmate/hgkit/fs/InflaterDataAccess.java src/com/tmate/hgkit/ll/Changelog.java src/com/tmate/hgkit/ll/HgBundle.java src/com/tmate/hgkit/ll/HgDataFile.java src/com/tmate/hgkit/ll/HgManifest.java src/com/tmate/hgkit/ll/Revlog.java src/com/tmate/hgkit/ll/RevlogStream.java
diffstat 12 files changed, 430 insertions(+), 55 deletions(-) [+]
line wrap: on
line diff
--- a/src/com/tmate/hgkit/console/Main.java	Sat Jan 15 01:41:49 2011 +0100
+++ b/src/com/tmate/hgkit/console/Main.java	Sun Jan 16 01:20:26 2011 +0100
@@ -22,13 +22,13 @@
 public class Main {
 
 	public static void main(String[] args) throws Exception {
-//		String repo = "/temp/hg/hello/.hg/";
-//		String filename = "store/00changelog.i";
+		String repo = "/temp/hg/hello/.hg/";
+		String filename = "store/00changelog.i";
 //		String filename = "store/data/hello.c.i";
 //		String filename = "store/data/docs/readme.i";
-		String repo = "/eclipse-3.7/ws.hg/com.tmate.hgkit/.hg/";
+//		String repo = "/eclipse-3.7/ws.hg/com.tmate.hgkit/.hg/";
 //		String filename = "store/data/design.txt.i";
-		String filename = "store/data/src/com/tmate/hgkit/ll/_revlog_stream.java.i";
+//		String filename = "store/data/src/com/tmate/hgkit/ll/_revlog_stream.java.i";
 		//
 		LinkedList<Changeset> changelog = new LinkedList<Changeset>();
 		//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/fs/ByteArrayDataAccess.java	Sun Jan 16 01:20:26 2011 +0100
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2011 Artem Tikhomirov 
+ */
+package com.tmate.hgkit.fs;
+
+import java.io.IOException;
+
+/**
+ *
+ * @author artem
+ */
+public class ByteArrayDataAccess extends DataAccess {
+
+	private final byte[] data;
+	private final int offset;
+	private final int length;
+	private int pos;
+
+	public ByteArrayDataAccess(byte[] data) {
+		this(data, 0, data.length);
+	}
+
+	public ByteArrayDataAccess(byte[] data, int offset, int length) {
+		this.data = data;
+		this.offset = offset;
+		this.length = length;
+		pos = 0;
+	}
+	
+	@Override
+	public byte readByte() throws IOException {
+		if (pos >= length) {
+			throw new IOException();
+		}
+		return data[offset + pos++];
+	}
+	@Override
+	public void readBytes(byte[] buf, int off, int len) throws IOException {
+		if (len > (this.length - pos)) {
+			throw new IOException();
+		}
+		System.arraycopy(data, pos, buf, off, len);
+		pos += len;
+	}
+
+	@Override
+	public void reset() {
+		pos = 0;
+	}
+	@Override
+	public long length() {
+		return length;
+	}
+	@Override
+	public void seek(long offset) {
+		pos = (int) offset;
+	}
+	@Override
+	public void skip(int bytes) throws IOException {
+		seek(pos + bytes);
+	}
+	@Override
+	public boolean isEmpty() {
+		return pos >= length;
+	}
+	
+	//
+	
+	// when byte[] needed from DA, we may save few cycles and some memory giving this (otherwise unsafe) access to underlying data
+	@Override
+	public byte[] byteArray() {
+		return data;
+	}
+}
--- a/src/com/tmate/hgkit/fs/DataAccess.java	Sat Jan 15 01:41:49 2011 +0100
+++ b/src/com/tmate/hgkit/fs/DataAccess.java	Sun Jan 16 01:20:26 2011 +0100
@@ -9,12 +9,20 @@
  * relevant parts of DataInput, non-stream nature (seek operation), explicit check for end of data.
  * convenient skip (+/- bytes)
  * Primary goal - effective file read, so that clients don't need to care whether to call few 
- * distinct getInt() or readBytes(totalForFewInts) and parse themselves instead in an attempt to optimize.  
+ * distinct getInt() or readBytes(totalForFewInts) and parse themselves instead in an attempt to optimize.
+ * Name: ByteSource? DataSource, DataInput, ByteInput 
  */
 public class DataAccess {
 	public boolean isEmpty() {
 		return true;
 	}
+	public long length() {
+		return 0;
+	}
+	// get this instance into initial state
+	public void reset() throws IOException {
+		// nop, empty instance is always in the initial state
+	}
 	// absolute positioning
 	public void seek(long offset) throws IOException {
 		throw new UnsupportedOperationException();
@@ -45,4 +53,17 @@
 	public byte readByte() throws IOException {
 		throw new UnsupportedOperationException();
 	}
+
+	// XXX decide whether may or may not change position in the DataAccess
+	// FIXME exception handling is not right, just for the sake of quick test
+	public byte[] byteArray() {
+		byte[] rv = new byte[(int) length()];
+		try {
+			reset();
+			readBytes(rv, 0, rv.length);
+		} catch (IOException ex) {
+			ex.printStackTrace();
+		}
+		return rv;
+	}
 }
\ No newline at end of file
--- a/src/com/tmate/hgkit/fs/DataAccessProvider.java	Sat Jan 15 01:41:49 2011 +0100
+++ b/src/com/tmate/hgkit/fs/DataAccessProvider.java	Sun Jan 16 01:20:26 2011 +0100
@@ -72,6 +72,16 @@
 		}
 		
 		@Override
+		public long length() {
+			return size;
+		}
+		
+		@Override
+		public void reset() throws IOException {
+			seek(0);
+		}
+		
+		@Override
 		public void seek(long offset) {
 			assert offset >= 0;
 			// offset may not necessarily be further than current position in the file (e.g. rewind) 
@@ -174,6 +184,16 @@
 		}
 		
 		@Override
+		public long length() {
+			return size;
+		}
+		
+		@Override
+		public void reset() throws IOException {
+			seek(0);
+		}
+		
+		@Override
 		public void seek(long offset) throws IOException {
 			if (offset > size) {
 				throw new IllegalArgumentException();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/fs/FilterDataAccess.java	Sun Jan 16 01:20:26 2011 +0100
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2011 Artem Tikhomirov 
+ */
+package com.tmate.hgkit.fs;
+
+import java.io.IOException;
+
+/**
+ * XXX Perhaps, DataAccessSlice? Unlike FilterInputStream, we limit amount of data read from DataAccess being filtered.
+ *   
+ * @author artem
+ */
+public class FilterDataAccess extends DataAccess {
+	private final DataAccess dataAccess;
+	private final long offset;
+	private final int length;
+	private int count;
+
+	public FilterDataAccess(DataAccess dataAccess, long offset, int length) {
+		this.dataAccess = dataAccess;
+		this.offset = offset;
+		this.length = length;
+		count = length;
+	}
+
+	protected int available() {
+		return count;
+	}
+
+	@Override
+	public void reset() throws IOException {
+		count = length;
+	}
+	
+	@Override
+	public boolean isEmpty() {
+		return count <= 0;
+	}
+	
+	@Override
+	public long length() {
+		return length;
+	}
+
+	@Override
+	public void seek(long localOffset) throws IOException {
+		if (localOffset < 0 || localOffset > length) {
+			throw new IllegalArgumentException();
+		}
+		dataAccess.seek(offset + localOffset);
+		count = (int) (length - localOffset);
+	}
+
+	@Override
+	public void skip(int bytes) throws IOException {
+		int newCount = count - bytes;
+		if (newCount < 0 || newCount > length) {
+			throw new IllegalArgumentException();
+		}
+		seek(length - newCount);
+		/*
+		 can't use next code because don't want to rewind backing DataAccess on reset()
+		 i.e. this.reset() modifies state of this instance only, while filtered DA may go further.
+		 Only actual this.skip/seek/read would rewind it to desired position 
+	  		dataAccess.skip(bytes);
+			count = newCount;
+		 */
+
+	}
+
+	@Override
+	public byte readByte() throws IOException {
+		if (count <= 0) {
+			throw new IllegalArgumentException("Underflow"); // XXX be descriptive
+		}
+		if (count == length) {
+			dataAccess.seek(offset);
+		}
+		count--;
+		return dataAccess.readByte();
+	}
+
+	@Override
+	public void readBytes(byte[] b, int off, int len) throws IOException {
+		if (count <= 0 || len > count) {
+			throw new IllegalArgumentException("Underflow"); // XXX be descriptive
+		}
+		if (count == length) {
+			dataAccess.seek(offset);
+		}
+		dataAccess.readBytes(b, off, len);
+		count -= len;
+	}
+
+	// done shall be no-op, as we have no idea what's going on with DataAccess we filter
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/com/tmate/hgkit/fs/InflaterDataAccess.java	Sun Jan 16 01:20:26 2011 +0100
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2011 Artem Tikhomirov 
+ */
+package com.tmate.hgkit.fs;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+import java.util.zip.ZipException;
+
+/**
+ * DataAccess counterpart for InflaterInputStream.
+ * XXX is it really needed to be subclass of FilterDataAccess? 
+ * @author artem
+ */
+public class InflaterDataAccess extends FilterDataAccess {
+
+	private final Inflater inflater;
+	private final byte[] buffer;
+	private final byte[] singleByte = new byte[1];
+	private int decompressedPos = 0;
+	private int decompressedLength = -1;
+
+	public InflaterDataAccess(DataAccess dataAccess, long offset, int length) {
+		this(dataAccess, offset, length, new Inflater(), 512);
+	}
+
+	public InflaterDataAccess(DataAccess dataAccess, long offset, int length, Inflater inflater, int bufSize) {
+		super(dataAccess, offset, length);
+		this.inflater = inflater;
+		buffer = new byte[bufSize];
+	}
+	
+	@Override
+	public void reset() throws IOException {
+		super.reset();
+		inflater.reset();
+		decompressedPos = 0;
+	}
+	
+	@Override
+	protected int available() {
+		throw new IllegalStateException("Can't tell how much uncompressed data left");
+	}
+	
+	@Override
+	public boolean isEmpty() {
+		return super.available() <= 0 && inflater.finished(); // and/or inflater.getRemaining() <= 0 ?
+	}
+	
+	@Override
+	public long length() {
+		if (decompressedLength != -1) {
+			return decompressedLength;
+		}
+		int c = 0;
+		try {
+			int oldPos = decompressedPos;
+			while (!isEmpty()) {
+				readByte();
+				c++;
+			}
+			decompressedLength = c + oldPos;
+			reset();
+			seek(oldPos);
+			return decompressedLength;
+		} catch (IOException ex) {
+			ex.printStackTrace(); // FIXME log error
+			decompressedLength = -1; // better luck next time?
+			return 0;
+		}
+	}
+	
+	@Override
+	public void seek(long localOffset) throws IOException {
+		System.out.println("Seek: " + localOffset);
+		if (localOffset < 0 /* || localOffset >= length() */) {
+			throw new IllegalArgumentException();
+		}
+		if (localOffset >= decompressedPos) {
+			skip((int) (localOffset - decompressedPos));
+		} else {
+			reset();
+			skip((int) localOffset);
+		}
+	}
+	
+	@Override
+	public void skip(int bytes) throws IOException {
+		if (bytes < 0) {
+			bytes += decompressedPos;
+			if (bytes < 0) {
+				throw new IOException("Underflow. Rewind past start of the slice.");
+			}
+			reset();
+			// fall-through
+		}
+		while (!isEmpty() && bytes > 0) {
+			readByte();
+			bytes--;
+		}
+		if (bytes != 0) {
+			throw new IOException("Underflow. Rewind past end of the slice");
+		}
+	}
+
+	@Override
+	public byte readByte() throws IOException {
+		readBytes(singleByte, 0, 1);
+		return singleByte[0];
+	}
+
+	@Override
+	public void readBytes(byte[] b, int off, int len) throws IOException {
+		try {
+		    int n;
+		    while (len > 0) {
+			    while ((n = inflater.inflate(b, off, len)) == 0) {
+					if (inflater.finished() || inflater.needsDictionary()) {
+	                    throw new EOFException();
+					}
+					if (inflater.needsInput()) {
+						// fill:
+						int toRead = super.available();
+						if (toRead > buffer.length) {
+							toRead = buffer.length;
+						}
+						super.readBytes(buffer, 0, toRead);
+						inflater.setInput(buffer, 0, toRead);
+					}
+			    }
+				off += n;
+				len -= n;
+				decompressedPos += n;
+				if (len == 0) {
+					return; // filled
+				}
+		    }
+		} catch (DataFormatException e) {
+		    String s = e.getMessage();
+		    throw new ZipException(s != null ? s : "Invalid ZLIB data format");
+		}
+    }
+}
--- a/src/com/tmate/hgkit/ll/Changelog.java	Sat Jan 15 01:41:49 2011 +0100
+++ b/src/com/tmate/hgkit/ll/Changelog.java	Sun Jan 16 01:20:26 2011 +0100
@@ -7,6 +7,8 @@
 import java.util.Arrays;
 import java.util.List;
 
+import com.tmate.hgkit.fs.DataAccess;
+
 /**
  * Representation of the Mercurial changelog file (list of ChangeSets)
  * @author artem
@@ -24,7 +26,8 @@
 	public void range(int start, int end, final Changeset.Inspector inspector) {
 		Revlog.Inspector i = new Revlog.Inspector() {
 			
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
+			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
+				byte[] data = da.byteArray();
 				Changeset cset = Changeset.parse(data, 0, data.length);
 				// XXX there's no guarantee for Changeset.Callback that distinct instance comes each time, consider instance reuse
 				inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset);
@@ -37,7 +40,8 @@
 		final ArrayList<Changeset> rv = new ArrayList<Changeset>(end - start + 1);
 		Revlog.Inspector i = new Revlog.Inspector() {
 			
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
+			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
+				byte[] data = da.byteArray();
 				Changeset cset = Changeset.parse(data, 0, data.length);
 				rv.add(cset);
 			}
@@ -52,8 +56,9 @@
 		}
 		Revlog.Inspector i = new Revlog.Inspector() {
 			
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
+			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
 				if (Arrays.binarySearch(revisions, revisionNumber) >= 0) {
+					byte[] data = da.byteArray();
 					Changeset cset = Changeset.parse(data, 0, data.length);
 					inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset);
 				}
--- a/src/com/tmate/hgkit/ll/HgBundle.java	Sat Jan 15 01:41:49 2011 +0100
+++ b/src/com/tmate/hgkit/ll/HgBundle.java	Sun Jan 16 01:20:26 2011 +0100
@@ -8,6 +8,7 @@
 import java.util.LinkedList;
 import java.util.List;
 
+import com.tmate.hgkit.fs.ByteArrayDataAccess;
 import com.tmate.hgkit.fs.DataAccess;
 import com.tmate.hgkit.fs.DataAccessProvider;
 
@@ -45,7 +46,7 @@
 			// (the previous, or parent patch of a given patch p is the patch that has a node equal to p's p1 field)
 			byte[] baseRevContent = hgRepo.getChangelog().content(base);
 			for (GroupElement ge : changelogGroup) {
-				byte[] csetContent = RevlogStream.apply(baseRevContent, -1, ge.patches);
+				byte[] csetContent = RevlogStream.apply(new ByteArrayDataAccess(baseRevContent), -1, ge.patches);
 				dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid?
 				if (!ge.node().equalsTo(dh.asBinary())) {
 					throw new IllegalStateException("Integrity check failed on " + bundleFile + ", node:" + ge.node());
--- a/src/com/tmate/hgkit/ll/HgDataFile.java	Sat Jan 15 01:41:49 2011 +0100
+++ b/src/com/tmate/hgkit/ll/HgDataFile.java	Sun Jan 16 01:20:26 2011 +0100
@@ -7,6 +7,8 @@
 
 import java.util.Arrays;
 
+import com.tmate.hgkit.fs.DataAccess;
+
 /**
  * Extends Revlog/uses RevlogStream?
  * ? name:HgFileNode?
@@ -52,7 +54,7 @@
 		Revlog.Inspector insp = new Revlog.Inspector() {
 			int count = 0;
 			
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
+			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) {
 				commitRevisions[count++] = linkRevision;
 			}
 		};
@@ -87,7 +89,7 @@
 			public int p2 = -1;
 			public byte[] nodeid;
 			
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
+			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) {
 				p1 = parent1Revision;
 				p2 = parent2Revision;
 				this.nodeid = new byte[20];
--- a/src/com/tmate/hgkit/ll/HgManifest.java	Sat Jan 15 01:41:49 2011 +0100
+++ b/src/com/tmate/hgkit/ll/HgManifest.java	Sun Jan 16 01:20:26 2011 +0100
@@ -3,6 +3,8 @@
  */
 package com.tmate.hgkit.ll;
 
+import com.tmate.hgkit.fs.DataAccess;
+
 /**
  *
  * @author artem
@@ -18,7 +20,7 @@
 
 			private boolean gtg = true; // good to go
 
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
+			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
 				if (!gtg) {
 					return;
 				}
@@ -27,6 +29,7 @@
 				String fname = null;
 				String flags = null;
 				Nodeid nid = null;
+				byte[] data = da.byteArray();
 				for (i = 0; gtg && i < actualLen; i++) {
 					int x = i;
 					for( ; data[i] != '\n' && i < actualLen; i++) {
--- a/src/com/tmate/hgkit/ll/Revlog.java	Sat Jan 15 01:41:49 2011 +0100
+++ b/src/com/tmate/hgkit/ll/Revlog.java	Sun Jan 16 01:20:26 2011 +0100
@@ -10,6 +10,8 @@
 import java.util.Map;
 import java.util.Set;
 
+import com.tmate.hgkit.fs.DataAccess;
+
 /**
  *
  * @author artem
@@ -70,8 +72,8 @@
 	public byte[] content(int revision) {
 		final byte[][] dataPtr = new byte[1][];
 		Revlog.Inspector insp = new Revlog.Inspector() {
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
-				dataPtr[0] = data;
+			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) {
+				dataPtr[0] = data.byteArray();
 			}
 		};
 		content.iterate(revision, revision, true, insp);
@@ -84,7 +86,7 @@
 	public interface Inspector {
 		// XXX boolean retVal to indicate whether to continue?
 		// TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) 
-		void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, byte[] data);
+		void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data);
 	}
 
 	/*
@@ -115,7 +117,7 @@
 			Inspector insp = new Inspector() {
 				final Nodeid[] sequentialRevisionNodeids = new Nodeid[revisionCount];
 				int ix = 0;
-				public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
+				public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess data) {
 					if (ix != revisionNumber) {
 						// XXX temp code, just to make sure I understand what's going on here
 						throw new IllegalStateException();
--- a/src/com/tmate/hgkit/ll/RevlogStream.java	Sat Jan 15 01:41:49 2011 +0100
+++ b/src/com/tmate/hgkit/ll/RevlogStream.java	Sun Jan 16 01:20:26 2011 +0100
@@ -11,11 +11,12 @@
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.zip.DataFormatException;
-import java.util.zip.Inflater;
 
+import com.tmate.hgkit.fs.ByteArrayDataAccess;
 import com.tmate.hgkit.fs.DataAccess;
 import com.tmate.hgkit.fs.DataAccessProvider;
+import com.tmate.hgkit.fs.FilterDataAccess;
+import com.tmate.hgkit.fs.InflaterDataAccess;
 
 /**
  * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), 
@@ -135,7 +136,7 @@
 		}
 		try {
 			byte[] nodeidBuf = new byte[20];
-			byte[] lastData = null;
+			DataAccess lastUserData = null;
 			int i;
 			boolean extraReadsToBaseRev = false;
 			if (needData && index.get(start).baseRevision < start) {
@@ -145,8 +146,12 @@
 				i = start;
 			}
 			
-			daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE);
+			daIndex.seek(inline ? index.get(i).offset : i * REVLOGV1_RECORD_SIZE);
 			for (; i <= end; i++ ) {
+				if (inline && needData) {
+					// inspector reading data (though FilterDataAccess) may have affected index position
+					daIndex.seek(index.get(i).offset);
+				}
 				long l = daIndex.readLong();
 				@SuppressWarnings("unused")
 				long offset = l >>> 16;
@@ -161,49 +166,41 @@
 				// Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
 				daIndex.readBytes(nodeidBuf, 0, 20);
 				daIndex.skip(12);
-				byte[] data = null;
+				DataAccess userDataAccess = null;
 				if (needData) {
-					byte[] dataBuf = new byte[compressedLen];
+					final byte firstByte;
+					long streamOffset = index.get(i).offset;
+					DataAccess streamDataAccess;
 					if (inline) {
-						daIndex.readBytes(dataBuf, 0, compressedLen);
+						streamDataAccess = daIndex;
+						streamOffset += REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream
 					} else {
-						daData.seek(index.get(i).offset);
-						daData.readBytes(dataBuf, 0, compressedLen);
+						streamDataAccess = daData;
+						daData.seek(streamOffset);
 					}
-					if (dataBuf[0] == 0x78 /* 'x' */) {
-						try {
-							Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative
-							zlib.setInput(dataBuf, 0, compressedLen);
-							byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 
-							int resultLen = zlib.inflate(result);
-							zlib.end();
-							data = new byte[resultLen];
-							System.arraycopy(result, 0, data, 0, resultLen);
-						} catch (DataFormatException ex) {
-							ex.printStackTrace();
-							data = new byte[0]; // FIXME need better failure strategy
-						}
-					} else if (dataBuf[0] == 0x75 /* 'u' */) {
-						data = new byte[dataBuf.length - 1];
-						System.arraycopy(dataBuf, 1, data, 0, data.length);
+					firstByte = streamDataAccess.readByte();
+					if (firstByte == 0x78 /* 'x' */) {
+						userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen);
+					} else if (firstByte == 0x75 /* 'u' */) {
+						userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
 					} else {
 						// XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
 						// but I don't see reason not to return data as is 
-						data = dataBuf;
+						userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen);
 					}
 					// XXX 
 					if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch
 						// this is a patch
 						LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>();
-						int patchElementIndex = 0;
-						do {
-							PatchRecord pr = PatchRecord.read(data, patchElementIndex);
+						while (!userDataAccess.isEmpty()) {
+							PatchRecord pr = PatchRecord.read(userDataAccess);
+							System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len);
 							patches.add(pr);
-							patchElementIndex += 12 + pr.len;
-						} while (patchElementIndex < data.length);
+						}
+						userDataAccess.done();
 						//
-						byte[] baseRevContent = lastData;
-						data = apply(baseRevContent, actualLen, patches);
+						byte[] userData = apply(lastUserData, actualLen, patches);
+						userDataAccess = new ByteArrayDataAccess(userData);
 					}
 				} else {
 					if (inline) {
@@ -211,9 +208,15 @@
 					}
 				}
 				if (!extraReadsToBaseRev || i >= start) {
-					inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data);
+					inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
 				}
-				lastData = data;
+				if (userDataAccess != null) {
+					userDataAccess.reset();
+					if (lastUserData != null) {
+						lastUserData.done();
+					}
+					lastUserData = userDataAccess;
+				}
 			}
 		} catch (IOException ex) {
 			throw new IllegalStateException(ex); // FIXME need better handling
@@ -292,10 +295,10 @@
 
 	// mpatch.c : apply()
 	// FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
-	/*package-local for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) {
+	/*package-local for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException {
 		int last = 0, destIndex = 0;
 		if (outcomeLen == -1) {
-			outcomeLen = baseRevisionContent.length;
+			outcomeLen = (int) baseRevisionContent.length();
 			for (PatchRecord pr : patch) {
 				outcomeLen += pr.start - last + pr.len;
 				last = pr.end;
@@ -303,15 +306,18 @@
 			outcomeLen -= last;
 			last = 0;
 		}
+		System.out.println(baseRevisionContent.length());
 		byte[] rv = new byte[outcomeLen];
 		for (PatchRecord pr : patch) {
-			System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last);
+			baseRevisionContent.seek(last);
+			baseRevisionContent.readBytes(rv, destIndex, pr.start-last);
 			destIndex += pr.start - last;
 			System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length);
 			destIndex += pr.data.length;
 			last = pr.end;
 		}
-		System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last);
+		baseRevisionContent.seek(last);
+		baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last));
 		return rv;
 	}