changeset 77:c677e1593919

Moved RevlogStream implementation into .internal
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Mon, 24 Jan 2011 05:33:47 +0100 (2011-01-24)
parents 658fa6b3a371
children c25c5c348d1b
files TODO cmdline/org/tmatesoft/hg/console/Log.java src/org/tmatesoft/hg/core/LogCommand.java src/org/tmatesoft/hg/core/Path.java src/org/tmatesoft/hg/internal/RevlogStream.java src/org/tmatesoft/hg/repo/Changelog.java src/org/tmatesoft/hg/repo/HgBundle.java src/org/tmatesoft/hg/repo/HgDataFile.java src/org/tmatesoft/hg/repo/HgManifest.java src/org/tmatesoft/hg/repo/HgRepository.java src/org/tmatesoft/hg/repo/Revlog.java src/org/tmatesoft/hg/repo/RevlogStream.java
diffstat 12 files changed, 432 insertions(+), 402 deletions(-) [+]
line wrap: on
line diff
--- a/TODO	Mon Jan 24 04:38:09 2011 +0100
+++ b/TODO	Mon Jan 24 05:33:47 2011 +0100
@@ -2,12 +2,16 @@
 ==============================
 Committed:
 * hg log
-  user, date, branch, limit
-  filename(multiple?)
+  + user, branch, limit
+  - date, 
+  + filename
+  - filename and follow history
+  - 
   
 * hg manifest (aka ls)
   
 * hg status
+  - copies for revisions
 
 * hg cat
 
--- a/cmdline/org/tmatesoft/hg/console/Log.java	Mon Jan 24 04:38:09 2011 +0100
+++ b/cmdline/org/tmatesoft/hg/console/Log.java	Mon Jan 24 05:33:47 2011 +0100
@@ -82,15 +82,16 @@
 			for (String fname : cmdLineOpts.files) {
 				HgDataFile f1 = hgRepo.getFileNode(fname);
 				System.out.println("History of the file: " + f1.getPath());
+				String normalizesName = hgRepo.getPathHelper().rewrite(fname);
 				if (cmdLineOpts.limit == -1) {
-					cmd.file(Path.create(fname)).execute(dump);
+					cmd.file(Path.create(normalizesName)).execute(dump);
 				} else {
 					int[] r = new int[] { 0, f1.getRevisionCount() };
 					if (fixRange(r, dump.reverseOrder, cmdLineOpts.limit) == 0) {
 						System.out.println("No changes");
 						continue;
 					}
-					cmd.range(r[0], r[1]).file(Path.create(fname)).execute(dump);
+					cmd.range(r[0], r[1]).file(Path.create(normalizesName)).execute(dump);
 				}
 				dump.complete();
 			}
--- a/src/org/tmatesoft/hg/core/LogCommand.java	Mon Jan 24 04:38:09 2011 +0100
+++ b/src/org/tmatesoft/hg/core/LogCommand.java	Mon Jan 24 05:33:47 2011 +0100
@@ -50,6 +50,7 @@
 	private int startRev = 0, endRev = TIP;
 	private Handler delegate;
 	private Calendar date;
+	private Path file;
 	private Cset changeset;
 
 	public LogCommand(HgRepository hgRepo) {
@@ -129,10 +130,15 @@
 		return this;
 	}
 	
-	// multiple? Bad idea, would need to include extra method into Handler to tell start of next file
+	/**
+	 * Visit history of a given file only.
+	 * @param file path relative to repository root. Pass <code>null</code> to reset.
+	 */
 	public LogCommand file(Path file) {
+		// multiple? Bad idea, would need to include extra method into Handler to tell start of next file
 		// implicit --follow in this case
-		throw HgRepository.notImplemented();
+		this.file = file;
+		return this;
 	}
 
 	/**
@@ -161,7 +167,11 @@
 			delegate = handler;
 			count = 0;
 			changeset = new Cset(new StatusCollector(repo), new PathPool(repo.getPathHelper()));
-			repo.getChangelog().range(startRev, endRev, this);
+			if (file == null) {
+				repo.getChangelog().range(startRev, endRev, this);
+			} else {
+				repo.getFileNode(file).history(startRev, endRev, this);
+			}
 		} finally {
 			delegate = null;
 			changeset = null;
--- a/src/org/tmatesoft/hg/core/Path.java	Mon Jan 24 04:38:09 2011 +0100
+++ b/src/org/tmatesoft/hg/core/Path.java	Mon Jan 24 05:33:47 2011 +0100
@@ -69,6 +69,9 @@
 		if (path == null) {
 			throw new IllegalArgumentException();
 		}
+		if (path.indexOf('\\') != -1) {
+			throw new IllegalArgumentException();
+		}
 		Path rv = new Path(path);
 		return rv;
 	}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/RevlogStream.java	Mon Jan 24 05:33:47 2011 +0100
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2010-2011 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@svnkit.com
+ */
+package org.tmatesoft.hg.internal;
+
+import static org.tmatesoft.hg.repo.HgRepository.TIP;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+
+import org.tmatesoft.hg.core.Nodeid;
+
+
+/**
+ * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), 
+ * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)?
+ * 
+ * @see http://mercurial.selenic.com/wiki/Revlog
+ * @see http://mercurial.selenic.com/wiki/RevlogNG
+ *
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class RevlogStream {
+
+	private List<IndexEntry> index; // indexed access highly needed
+	private boolean inline = false;
+	private final File indexFile;
+	private final DataAccessProvider dataAccess;
+
+	// if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP.
+	public RevlogStream(DataAccessProvider dap, File indexFile) {
+		this.dataAccess = dap;
+		this.indexFile = indexFile;
+	}
+
+	/*package*/ DataAccess getIndexStream() {
+		return dataAccess.create(indexFile);
+	}
+
+	/*package*/ DataAccess getDataStream() {
+		final String indexName = indexFile.getName();
+		File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d");
+		return dataAccess.create(dataFile);
+	}
+	
+	public int revisionCount() {
+		initOutline();
+		return index.size();
+	}
+	
+	public int dataLength(int revision) {
+		// XXX in fact, use of iterate() instead of this implementation may be quite reasonable.
+		//
+		final int indexSize = revisionCount();
+		DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset)
+		if (revision == TIP) {
+			revision = indexSize - 1;
+		}
+		try {
+			int recordOffset = inline ? (int) index.get(revision).offset : revision * REVLOGV1_RECORD_SIZE;
+			daIndex.seek(recordOffset + 12); // 6+2+4
+			int actualLen = daIndex.readInt();
+			return actualLen; 
+		} catch (IOException ex) {
+			ex.printStackTrace(); // log error. FIXME better handling
+			throw new IllegalStateException(ex);
+		} finally {
+			daIndex.done();
+		}
+	}
+	
+	// Perhaps, RevlogStream should be limited to use of plain int revisions for access,
+	// while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep
+	// map of nodeids, and once this comes true, we may get rid of this method.
+	// Unlike its counterpart, Revlog#getLocalRevisionNumber, doesn't fail with exception if node not found,
+	// returns a predefined constant instead
+	public int findLocalRevisionNumber(Nodeid nodeid) {
+		// XXX this one may be implemented with iterate() once there's mechanism to stop iterations
+		final int indexSize = revisionCount();
+		DataAccess daIndex = getIndexStream();
+		try {
+			byte[] nodeidBuf = new byte[20];
+			for (int i = 0; i < indexSize; i++) {
+				daIndex.skip(8);
+				int compressedLen = daIndex.readInt();
+				daIndex.skip(20);
+				daIndex.readBytes(nodeidBuf, 0, 20);
+				if (nodeid.equalsTo(nodeidBuf)) {
+					return i;
+				}
+				daIndex.skip(inline ? 12 + compressedLen : 12);
+			}
+		} catch (IOException ex) {
+			ex.printStackTrace(); // log error. FIXME better handling
+			throw new IllegalStateException(ex);
+		} finally {
+			daIndex.done();
+		}
+		return Integer.MIN_VALUE;
+	}
+
+
+	private final int REVLOGV1_RECORD_SIZE = 64;
+
+	// should be possible to use TIP, ALL, or -1, -2, -n notation of Hg
+	// ? boolean needsNodeid
+	public void iterate(int start, int end, boolean needData, Inspector inspector) {
+		initOutline();
+		final int indexSize = index.size();
+		if (indexSize == 0) {
+			return;
+		}
+		if (end == TIP) {
+			end = indexSize - 1;
+		}
+		if (start == TIP) {
+			start = indexSize - 1;
+		}
+		if (start < 0 || start >= indexSize) {
+			throw new IllegalArgumentException("Bad left range boundary " + start);
+		}
+		if (end < start || end >= indexSize) {
+			throw new IllegalArgumentException("Bad right range boundary " + end);
+		}
+		// XXX may cache [start .. end] from index with a single read (pre-read)
+		
+		DataAccess daIndex = null, daData = null;
+		daIndex = getIndexStream();
+		if (needData && !inline) {
+			daData = getDataStream();
+		}
+		try {
+			byte[] nodeidBuf = new byte[20];
+			byte[] lastData = null;
+			int i;
+			boolean extraReadsToBaseRev = false;
+			if (needData && index.get(start).baseRevision < start) {
+				i = index.get(start).baseRevision;
+				extraReadsToBaseRev = true;
+			} else {
+				i = start;
+			}
+			
+			daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE);
+			for (; i <= end; i++ ) {
+				long l = daIndex.readLong();
+				@SuppressWarnings("unused")
+				long offset = l >>> 16;
+				@SuppressWarnings("unused")
+				int flags = (int) (l & 0X0FFFF);
+				int compressedLen = daIndex.readInt();
+				int actualLen = daIndex.readInt();
+				int baseRevision = daIndex.readInt();
+				int linkRevision = daIndex.readInt();
+				int parent1Revision = daIndex.readInt();
+				int parent2Revision = daIndex.readInt();
+				// Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
+				daIndex.readBytes(nodeidBuf, 0, 20);
+				daIndex.skip(12);
+				byte[] data = null;
+				if (needData) {
+					byte[] dataBuf = new byte[compressedLen];
+					if (inline) {
+						daIndex.readBytes(dataBuf, 0, compressedLen);
+					} else {
+						daData.seek(index.get(i).offset);
+						daData.readBytes(dataBuf, 0, compressedLen);
+					}
+					if (dataBuf[0] == 0x78 /* 'x' */) {
+						try {
+							Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative
+							zlib.setInput(dataBuf, 0, compressedLen);
+							byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 
+							int resultLen = zlib.inflate(result);
+							zlib.end();
+							data = new byte[resultLen];
+							System.arraycopy(result, 0, data, 0, resultLen);
+						} catch (DataFormatException ex) {
+							ex.printStackTrace();
+							data = new byte[0]; // FIXME need better failure strategy
+						}
+					} else if (dataBuf[0] == 0x75 /* 'u' */) {
+						data = new byte[dataBuf.length - 1];
+						System.arraycopy(dataBuf, 1, data, 0, data.length);
+					} else {
+						// XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
+						// but I don't see reason not to return data as is 
+						data = dataBuf;
+					}
+					// XXX 
+					if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch
+						// this is a patch
+						LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>();
+						int patchElementIndex = 0;
+						do {
+							PatchRecord pr = PatchRecord.read(data, patchElementIndex);
+							patches.add(pr);
+							patchElementIndex += 12 + pr.len;
+						} while (patchElementIndex < data.length);
+						//
+						byte[] baseRevContent = lastData;
+						data = apply(baseRevContent, actualLen, patches);
+					}
+				} else {
+					if (inline) {
+						daIndex.skip(compressedLen);
+					}
+				}
+				if (!extraReadsToBaseRev || i >= start) {
+					inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data);
+				}
+				lastData = data;
+			}
+		} catch (IOException ex) {
+			throw new IllegalStateException(ex); // FIXME need better handling
+		} finally {
+			daIndex.done();
+			if (daData != null) {
+				daData.done();
+			}
+		}
+	}
+	
+	private void initOutline() {
+		if (index != null && !index.isEmpty()) {
+			return;
+		}
+		ArrayList<IndexEntry> res = new ArrayList<IndexEntry>();
+		DataAccess da = getIndexStream();
+		try {
+			int versionField = da.readInt();
+			da.readInt(); // just to skip next 2 bytes of offset + flags
+			final int INLINEDATA = 1 << 16;
+			inline = (versionField & INLINEDATA) != 0;
+			long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
+			while(true) {
+				int compressedLen = da.readInt();
+				// 8+4 = 12 bytes total read here
+				@SuppressWarnings("unused")
+				int actualLen = da.readInt();
+				int baseRevision = da.readInt();
+				// 12 + 8 = 20 bytes read here
+//				int linkRevision = di.readInt();
+//				int parent1Revision = di.readInt();
+//				int parent2Revision = di.readInt();
+//				byte[] nodeid = new byte[32];
+				if (inline) {
+					res.add(new IndexEntry(offset + REVLOGV1_RECORD_SIZE * res.size(), baseRevision));
+					da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
+				} else {
+					res.add(new IndexEntry(offset, baseRevision));
+					da.skip(3*4 + 32);
+				}
+				if (da.isEmpty()) {
+					// fine, done then
+					res.trimToSize();
+					index = res;
+					break;
+				} else {
+					// start reading next record
+					long l = da.readLong();
+					offset = l >>> 16;
+				}
+			}
+		} catch (IOException ex) {
+			ex.printStackTrace(); // log error
+			// too bad, no outline then.
+			index = Collections.emptyList();
+		} finally {
+			da.done();
+		}
+		
+	}
+	
+
+	// perhaps, package-local or protected, if anyone else from low-level needs them
+	// XXX think over if we should keep offset in case of separate data file - we read the field anyway. Perhaps, distinct entry classes for Inline and non-inline indexes?
+	private static class IndexEntry {
+		public final long offset; // for separate .i and .d - copy of index record entry, for inline index - actual offset of the record in the .i file (record entry + revision * record size))
+		//public final int length; // data past fixed record (need to decide whether including header size or not), and whether length is of compressed data or not
+		public final int baseRevision;
+
+		public IndexEntry(long o, int baseRev) {
+			offset = o;
+			baseRevision = baseRev;
+		}
+	}
+
+	// mpatch.c : apply()
+	// FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
+	public/*for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) {
+		int last = 0, destIndex = 0;
+		if (outcomeLen == -1) {
+			outcomeLen = baseRevisionContent.length;
+			for (PatchRecord pr : patch) {
+				outcomeLen += pr.start - last + pr.len;
+				last = pr.end;
+			}
+			outcomeLen -= last;
+			last = 0;
+		}
+		byte[] rv = new byte[outcomeLen];
+		for (PatchRecord pr : patch) {
+			System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last);
+			destIndex += pr.start - last;
+			System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length);
+			destIndex += pr.data.length;
+			last = pr.end;
+		}
+		System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last);
+		return rv;
+	}
+
+	// @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description
+	public static class PatchRecord {
+		/*
+		   Given there are pr1 and pr2:
+		     pr1.start to pr1.end will be replaced with pr's data (of pr1.len)
+		     pr1.end to pr2.start gets copied from base
+		 */
+		public int start, end, len;
+		public byte[] data;
+
+		// TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed 
+		private PatchRecord(int p1, int p2, int length, byte[] src) {
+			start = p1;
+			end = p2;
+			len = length;
+			data = src;
+		}
+
+		/*package-local*/ static PatchRecord read(byte[] data, int offset) {
+			final int x = offset; // shorthand
+			int p1 =  ((data[x] & 0xFF)<< 24)    | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8)  | (data[x+3] & 0xFF);
+			int p2 =  ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8)  | (data[x+7] & 0xFF);
+			int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF);
+			byte[] dataCopy = new byte[len];
+			System.arraycopy(data, x+12, dataCopy, 0, len);
+			return new PatchRecord(p1, p2, len, dataCopy);
+		}
+
+		public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException {
+			int p1 = da.readInt();
+			int p2 = da.readInt();
+			int len = da.readInt();
+			byte[] src = new byte[len];
+			da.readBytes(src, 0, len);
+			return new PatchRecord(p1, p2, len, src);
+		}
+	}
+
+	// FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data 
+	// instantly - e.g. calculate hash, or comparing two revisions
+	public interface Inspector {
+		// XXX boolean retVal to indicate whether to continue?
+		// TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) 
+		void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, byte[] data);
+	}
+}
--- a/src/org/tmatesoft/hg/repo/Changelog.java	Mon Jan 24 04:38:09 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/Changelog.java	Mon Jan 24 05:33:47 2011 +0100
@@ -21,6 +21,7 @@
 import java.util.List;
 
 import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.RevlogStream;
 
 
 /**
@@ -40,7 +41,7 @@
 	}
 
 	public void range(int start, int end, final Changeset.Inspector inspector) {
-		Revlog.Inspector i = new Revlog.Inspector() {
+		RevlogStream.Inspector i = new RevlogStream.Inspector() {
 			
 			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
 				Changeset cset = Changeset.parse(data, 0, data.length);
@@ -53,7 +54,7 @@
 
 	public List<Changeset> range(int start, int end) {
 		final ArrayList<Changeset> rv = new ArrayList<Changeset>(end - start + 1);
-		Revlog.Inspector i = new Revlog.Inspector() {
+		RevlogStream.Inspector i = new RevlogStream.Inspector() {
 			
 			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
 				Changeset cset = Changeset.parse(data, 0, data.length);
@@ -68,7 +69,7 @@
 		if (revisions == null || revisions.length == 0) {
 			return;
 		}
-		Revlog.Inspector i = new Revlog.Inspector() {
+		RevlogStream.Inspector i = new RevlogStream.Inspector() {
 			
 			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
 				if (Arrays.binarySearch(revisions, revisionNumber) >= 0) {
--- a/src/org/tmatesoft/hg/repo/HgBundle.java	Mon Jan 24 04:38:09 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgBundle.java	Mon Jan 24 05:33:47 2011 +0100
@@ -25,6 +25,7 @@
 import org.tmatesoft.hg.internal.DataAccess;
 import org.tmatesoft.hg.internal.DataAccessProvider;
 import org.tmatesoft.hg.internal.DigestHelper;
+import org.tmatesoft.hg.internal.RevlogStream;
 
 
 /**
--- a/src/org/tmatesoft/hg/repo/HgDataFile.java	Mon Jan 24 04:38:09 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgDataFile.java	Mon Jan 24 05:33:47 2011 +0100
@@ -20,6 +20,7 @@
 
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.core.Path;
+import org.tmatesoft.hg.internal.RevlogStream;
 
 
 
@@ -45,6 +46,7 @@
 		return content != null; // XXX need better impl
 	}
 
+	// human-readable (i.e. "COPYING", not "store/data/_c_o_p_y_i_n_g.i")
 	public Path getPath() {
 		return path; // hgRepo.backresolve(this) -> name?
 	}
@@ -65,8 +67,17 @@
 		if (!exists()) {
 			throw new IllegalStateException("Can't get history of invalid repository file node"); 
 		}
+		final int last = content.revisionCount() - 1;
+		if (start < 0 || start > last) {
+			throw new IllegalArgumentException();
+		}
+		if (end == TIP) {
+			end = last;
+		} else if (end < start || end > last) {
+			throw new IllegalArgumentException();
+		}
 		final int[] commitRevisions = new int[end - start + 1];
-		Revlog.Inspector insp = new Revlog.Inspector() {
+		RevlogStream.Inspector insp = new RevlogStream.Inspector() {
 			int count = 0;
 			
 			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
--- a/src/org/tmatesoft/hg/repo/HgManifest.java	Mon Jan 24 04:38:09 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgManifest.java	Mon Jan 24 05:33:47 2011 +0100
@@ -17,6 +17,7 @@
 package org.tmatesoft.hg.repo;
 
 import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.RevlogStream;
 
 
 /**
@@ -31,7 +32,7 @@
 	}
 
 	public void walk(int start, int end, final Inspector inspector) {
-		Revlog.Inspector insp = new Revlog.Inspector() {
+		RevlogStream.Inspector insp = new RevlogStream.Inspector() {
 
 			private boolean gtg = true; // good to go
 
--- a/src/org/tmatesoft/hg/repo/HgRepository.java	Mon Jan 24 04:38:09 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgRepository.java	Mon Jan 24 05:33:47 2011 +0100
@@ -24,6 +24,7 @@
 import org.tmatesoft.hg.core.Path;
 import org.tmatesoft.hg.internal.DataAccessProvider;
 import org.tmatesoft.hg.internal.RequiresFile;
+import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.util.FileWalker;
 import org.tmatesoft.hg.util.PathRewrite;
 
@@ -124,11 +125,13 @@
 	public HgDataFile getFileNode(String path) {
 		String nPath = normalizePath.rewrite(path);
 		String storagePath = dataPathHelper.rewrite(nPath);
-		return getFileNode(Path.create(storagePath));
+		RevlogStream content = resolve(Path.create(storagePath));
+		return new HgDataFile(this, Path.create(nPath), content);
 	}
 
 	public HgDataFile getFileNode(Path path) {
-		RevlogStream content = resolve(path);
+		String storagePath = dataPathHelper.rewrite(path.toString());
+		RevlogStream content = resolve(Path.create(storagePath));
 		// XXX no content when no file? or HgDataFile.exists() to detect that? How about files that were removed in previous releases?
 		return new HgDataFile(this, path, content);
 	}
--- a/src/org/tmatesoft/hg/repo/Revlog.java	Mon Jan 24 04:38:09 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/Revlog.java	Mon Jan 24 05:33:47 2011 +0100
@@ -27,6 +27,7 @@
 import java.util.Set;
 
 import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.internal.RevlogStream;
 
 
 /**
@@ -92,7 +93,7 @@
 	 */
 	public byte[] content(int revision) {
 		final byte[][] dataPtr = new byte[1][];
-		Revlog.Inspector insp = new Revlog.Inspector() {
+		RevlogStream.Inspector insp = new RevlogStream.Inspector() {
 			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
 				dataPtr[0] = data;
 			}
@@ -123,7 +124,7 @@
 		if (parent2 != null && parent2.length < 20) {
 			throw new IllegalArgumentException(parent2.toString());
 		}
-		class ParentCollector implements Revlog.Inspector {
+		class ParentCollector implements RevlogStream.Inspector {
 			public int p1 = -1;
 			public int p2 = -1;
 			public byte[] nodeid;
@@ -158,15 +159,6 @@
 		}
 	}
 
-	// FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data 
-	// instantly - e.g. calculate hash, or comparing two revisions
-	// XXX seems that RevlogStream is better place for this class. 
-	public interface Inspector {
-		// XXX boolean retVal to indicate whether to continue?
-		// TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) 
-		void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, byte[] data);
-	}
-
 	/*
 	 * XXX think over if it's better to do either:
 	 * pw = getChangelog().new ParentWalker(); pw.init() and pass pw instance around as needed
@@ -192,7 +184,7 @@
 			secondParent = new HashMap<Nodeid, Nodeid>(firstParent.size() >> 1); // assume branches/merges are less frequent
 			allNodes = new LinkedHashSet<Nodeid>();
 			
-			Inspector insp = new Inspector() {
+			RevlogStream.Inspector insp = new RevlogStream.Inspector() {
 				final Nodeid[] sequentialRevisionNodeids = new Nodeid[revisionCount];
 				int ix = 0;
 				public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
--- a/src/org/tmatesoft/hg/repo/RevlogStream.java	Mon Jan 24 04:38:09 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,376 +0,0 @@
-/*
- * Copyright (c) 2010-2011 TMate Software Ltd
- *  
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * For information on how to redistribute this software under
- * the terms of a license other than GNU General Public License
- * contact TMate Software at support@svnkit.com
- */
-package org.tmatesoft.hg.repo;
-
-import static org.tmatesoft.hg.repo.HgRepository.TIP;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.zip.DataFormatException;
-import java.util.zip.Inflater;
-
-import org.tmatesoft.hg.core.Nodeid;
-import org.tmatesoft.hg.internal.DataAccess;
-import org.tmatesoft.hg.internal.DataAccessProvider;
-
-
-/**
- * XXX move to .internal?
- * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), 
- * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)?
- * 
- * @see http://mercurial.selenic.com/wiki/Revlog
- * @see http://mercurial.selenic.com/wiki/RevlogNG
- *
- * @author Artem Tikhomirov
- * @author TMate Software Ltd.
- */
-public class RevlogStream {
-
-	private List<IndexEntry> index; // indexed access highly needed
-	private boolean inline = false;
-	private final File indexFile;
-	private final DataAccessProvider dataAccess;
-
-	// if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP.
-	RevlogStream(DataAccessProvider dap, File indexFile) {
-		this.dataAccess = dap;
-		this.indexFile = indexFile;
-	}
-
-	/*package*/ DataAccess getIndexStream() {
-		return dataAccess.create(indexFile);
-	}
-
-	/*package*/ DataAccess getDataStream() {
-		final String indexName = indexFile.getName();
-		File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d");
-		return dataAccess.create(dataFile);
-	}
-	
-	public int revisionCount() {
-		initOutline();
-		return index.size();
-	}
-	
-	public int dataLength(int revision) {
-		// XXX in fact, use of iterate() instead of this implementation may be quite reasonable.
-		//
-		final int indexSize = revisionCount();
-		DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset)
-		if (revision == TIP) {
-			revision = indexSize - 1;
-		}
-		try {
-			int recordOffset = inline ? (int) index.get(revision).offset : revision * REVLOGV1_RECORD_SIZE;
-			daIndex.seek(recordOffset + 12); // 6+2+4
-			int actualLen = daIndex.readInt();
-			return actualLen; 
-		} catch (IOException ex) {
-			ex.printStackTrace(); // log error. FIXME better handling
-			throw new IllegalStateException(ex);
-		} finally {
-			daIndex.done();
-		}
-	}
-	
-	// Perhaps, RevlogStream should be limited to use of plain int revisions for access,
-	// while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep
-	// map of nodeids, and once this comes true, we may get rid of this method.
-	// Unlike its counterpart, Revlog#getLocalRevisionNumber, doesn't fail with exception if node not found,
-	// returns a predefined constant instead
-	/*package-local*/ int findLocalRevisionNumber(Nodeid nodeid) {
-		// XXX this one may be implemented with iterate() once there's mechanism to stop iterations
-		final int indexSize = revisionCount();
-		DataAccess daIndex = getIndexStream();
-		try {
-			byte[] nodeidBuf = new byte[20];
-			for (int i = 0; i < indexSize; i++) {
-				daIndex.skip(8);
-				int compressedLen = daIndex.readInt();
-				daIndex.skip(20);
-				daIndex.readBytes(nodeidBuf, 0, 20);
-				if (nodeid.equalsTo(nodeidBuf)) {
-					return i;
-				}
-				daIndex.skip(inline ? 12 + compressedLen : 12);
-			}
-		} catch (IOException ex) {
-			ex.printStackTrace(); // log error. FIXME better handling
-			throw new IllegalStateException(ex);
-		} finally {
-			daIndex.done();
-		}
-		return Integer.MIN_VALUE;
-	}
-
-
-	private final int REVLOGV1_RECORD_SIZE = 64;
-
-	// should be possible to use TIP, ALL, or -1, -2, -n notation of Hg
-	// ? boolean needsNodeid
-	public void iterate(int start, int end, boolean needData, Revlog.Inspector inspector) {
-		initOutline();
-		final int indexSize = index.size();
-		if (indexSize == 0) {
-			return;
-		}
-		if (end == TIP) {
-			end = indexSize - 1;
-		}
-		if (start == TIP) {
-			start = indexSize - 1;
-		}
-		if (start < 0 || start >= indexSize) {
-			throw new IllegalArgumentException("Bad left range boundary " + start);
-		}
-		if (end < start || end >= indexSize) {
-			throw new IllegalArgumentException("Bad right range boundary " + end);
-		}
-		// XXX may cache [start .. end] from index with a single read (pre-read)
-		
-		DataAccess daIndex = null, daData = null;
-		daIndex = getIndexStream();
-		if (needData && !inline) {
-			daData = getDataStream();
-		}
-		try {
-			byte[] nodeidBuf = new byte[20];
-			byte[] lastData = null;
-			int i;
-			boolean extraReadsToBaseRev = false;
-			if (needData && index.get(start).baseRevision < start) {
-				i = index.get(start).baseRevision;
-				extraReadsToBaseRev = true;
-			} else {
-				i = start;
-			}
-			
-			daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE);
-			for (; i <= end; i++ ) {
-				long l = daIndex.readLong();
-				@SuppressWarnings("unused")
-				long offset = l >>> 16;
-				@SuppressWarnings("unused")
-				int flags = (int) (l & 0X0FFFF);
-				int compressedLen = daIndex.readInt();
-				int actualLen = daIndex.readInt();
-				int baseRevision = daIndex.readInt();
-				int linkRevision = daIndex.readInt();
-				int parent1Revision = daIndex.readInt();
-				int parent2Revision = daIndex.readInt();
-				// Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
-				daIndex.readBytes(nodeidBuf, 0, 20);
-				daIndex.skip(12);
-				byte[] data = null;
-				if (needData) {
-					byte[] dataBuf = new byte[compressedLen];
-					if (inline) {
-						daIndex.readBytes(dataBuf, 0, compressedLen);
-					} else {
-						daData.seek(index.get(i).offset);
-						daData.readBytes(dataBuf, 0, compressedLen);
-					}
-					if (dataBuf[0] == 0x78 /* 'x' */) {
-						try {
-							Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative
-							zlib.setInput(dataBuf, 0, compressedLen);
-							byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 
-							int resultLen = zlib.inflate(result);
-							zlib.end();
-							data = new byte[resultLen];
-							System.arraycopy(result, 0, data, 0, resultLen);
-						} catch (DataFormatException ex) {
-							ex.printStackTrace();
-							data = new byte[0]; // FIXME need better failure strategy
-						}
-					} else if (dataBuf[0] == 0x75 /* 'u' */) {
-						data = new byte[dataBuf.length - 1];
-						System.arraycopy(dataBuf, 1, data, 0, data.length);
-					} else {
-						// XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
-						// but I don't see reason not to return data as is 
-						data = dataBuf;
-					}
-					// XXX 
-					if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch
-						// this is a patch
-						LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>();
-						int patchElementIndex = 0;
-						do {
-							PatchRecord pr = PatchRecord.read(data, patchElementIndex);
-							patches.add(pr);
-							patchElementIndex += 12 + pr.len;
-						} while (patchElementIndex < data.length);
-						//
-						byte[] baseRevContent = lastData;
-						data = apply(baseRevContent, actualLen, patches);
-					}
-				} else {
-					if (inline) {
-						daIndex.skip(compressedLen);
-					}
-				}
-				if (!extraReadsToBaseRev || i >= start) {
-					inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data);
-				}
-				lastData = data;
-			}
-		} catch (IOException ex) {
-			throw new IllegalStateException(ex); // FIXME need better handling
-		} finally {
-			daIndex.done();
-			if (daData != null) {
-				daData.done();
-			}
-		}
-	}
-	
-	private void initOutline() {
-		if (index != null && !index.isEmpty()) {
-			return;
-		}
-		ArrayList<IndexEntry> res = new ArrayList<IndexEntry>();
-		DataAccess da = getIndexStream();
-		try {
-			int versionField = da.readInt();
-			da.readInt(); // just to skip next 2 bytes of offset + flags
-			final int INLINEDATA = 1 << 16;
-			inline = (versionField & INLINEDATA) != 0;
-			long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
-			while(true) {
-				int compressedLen = da.readInt();
-				// 8+4 = 12 bytes total read here
-				@SuppressWarnings("unused")
-				int actualLen = da.readInt();
-				int baseRevision = da.readInt();
-				// 12 + 8 = 20 bytes read here
-//				int linkRevision = di.readInt();
-//				int parent1Revision = di.readInt();
-//				int parent2Revision = di.readInt();
-//				byte[] nodeid = new byte[32];
-				if (inline) {
-					res.add(new IndexEntry(offset + REVLOGV1_RECORD_SIZE * res.size(), baseRevision));
-					da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
-				} else {
-					res.add(new IndexEntry(offset, baseRevision));
-					da.skip(3*4 + 32);
-				}
-				if (da.isEmpty()) {
-					// fine, done then
-					res.trimToSize();
-					index = res;
-					break;
-				} else {
-					// start reading next record
-					long l = da.readLong();
-					offset = l >>> 16;
-				}
-			}
-		} catch (IOException ex) {
-			ex.printStackTrace(); // log error
-			// too bad, no outline then.
-			index = Collections.emptyList();
-		} finally {
-			da.done();
-		}
-		
-	}
-	
-
-	// perhaps, package-local or protected, if anyone else from low-level needs them
-	// XXX think over if we should keep offset in case of separate data file - we read the field anyway. Perhaps, distinct entry classes for Inline and non-inline indexes?
-	private static class IndexEntry {
-		public final long offset; // for separate .i and .d - copy of index record entry, for inline index - actual offset of the record in the .i file (record entry + revision * record size))
-		//public final int length; // data past fixed record (need to decide whether including header size or not), and whether length is of compressed data or not
-		public final int baseRevision;
-
-		public IndexEntry(long o, int baseRev) {
-			offset = o;
-			baseRevision = baseRev;
-		}
-	}
-
-	// mpatch.c : apply()
-	// FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
-	/*package-local for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) {
-		int last = 0, destIndex = 0;
-		if (outcomeLen == -1) {
-			outcomeLen = baseRevisionContent.length;
-			for (PatchRecord pr : patch) {
-				outcomeLen += pr.start - last + pr.len;
-				last = pr.end;
-			}
-			outcomeLen -= last;
-			last = 0;
-		}
-		byte[] rv = new byte[outcomeLen];
-		for (PatchRecord pr : patch) {
-			System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last);
-			destIndex += pr.start - last;
-			System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length);
-			destIndex += pr.data.length;
-			last = pr.end;
-		}
-		System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last);
-		return rv;
-	}
-
-	// @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description
-	/*package-local*/ static class PatchRecord { // copy of struct frag from mpatch.c
-		/*
-		   Given there are pr1 and pr2:
-		     pr1.start to pr1.end will be replaced with pr's data (of pr1.len)
-		     pr1.end to pr2.start gets copied from base
-		 */
-		int start, end, len;
-		byte[] data;
-
-		// TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed 
-		private PatchRecord(int p1, int p2, int length, byte[] src) {
-			start = p1;
-			end = p2;
-			len = length;
-			data = src;
-		}
-
-		/*package-local*/ static PatchRecord read(byte[] data, int offset) {
-			final int x = offset; // shorthand
-			int p1 =  ((data[x] & 0xFF)<< 24)    | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8)  | (data[x+3] & 0xFF);
-			int p2 =  ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8)  | (data[x+7] & 0xFF);
-			int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF);
-			byte[] dataCopy = new byte[len];
-			System.arraycopy(data, x+12, dataCopy, 0, len);
-			return new PatchRecord(p1, p2, len, dataCopy);
-		}
-
-		/*package-local*/ static PatchRecord read(DataAccess da) throws IOException {
-			int p1 = da.readInt();
-			int p2 = da.readInt();
-			int len = da.readInt();
-			byte[] src = new byte[len];
-			da.readBytes(src, 0, len);
-			return new PatchRecord(p1, p2, len, src);
-		}
-		
-		
-	}
-}