diff src/org/tmatesoft/hg/internal/RevlogStream.java @ 157:d5268ca7715b

Merged branch wrap-data-access into default for resource-friendly data access. Updated API to promote that friendliness to clients (channels, not byte[]). More exceptions
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 09 Mar 2011 05:22:17 +0100
parents src/com/tmate/hgkit/ll/RevlogStream.java@a6f39e595b2b src/com/tmate/hgkit/ll/RevlogStream.java@a3a2e5deb320
children b413b16d10a5
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java	Wed Mar 02 01:06:09 2011 +0100
+++ b/src/org/tmatesoft/hg/internal/RevlogStream.java	Wed Mar 09 05:22:17 2011 +0100
@@ -25,8 +25,6 @@
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.zip.DataFormatException;
-import java.util.zip.Inflater;
 
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.repo.HgRepository;
@@ -186,10 +184,10 @@
 			start = indexSize - 1;
 		}
 		if (start < 0 || start >= indexSize) {
-			throw new IllegalArgumentException("Bad left range boundary " + start);
+			throw new IllegalArgumentException(String.format("Bad left range boundary %d in [0..%d]", start, indexSize-1));
 		}
 		if (end < start || end >= indexSize) {
-			throw new IllegalArgumentException("Bad right range boundary " + end);
+			throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1));
 		}
 		// XXX may cache [start .. end] from index with a single read (pre-read)
 		
@@ -200,7 +198,7 @@
 		}
 		try {
 			byte[] nodeidBuf = new byte[20];
-			byte[] lastData = null;
+			DataAccess lastUserData = null;
 			int i;
 			boolean extraReadsToBaseRev = false;
 			if (needData && index.get(start).baseRevision < start) {
@@ -210,9 +208,13 @@
 				i = start;
 			}
 			
-			daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE);
+			daIndex.seek(inline ? index.get(i).offset : i * REVLOGV1_RECORD_SIZE);
 			for (; i <= end; i++ ) {
-				long l = daIndex.readLong();  // 0
+				if (inline && needData) {
+					// inspector reading data (though FilterDataAccess) may have affected index position
+					daIndex.seek(index.get(i).offset);
+				}
+				long l = daIndex.readLong(); // 0
 				@SuppressWarnings("unused")
 				long offset = l >>> 16;
 				@SuppressWarnings("unused")
@@ -226,49 +228,41 @@
 				// Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
 				daIndex.readBytes(nodeidBuf, 0, 20); // +32
 				daIndex.skip(12);
-				byte[] data = null;
+				DataAccess userDataAccess = null;
 				if (needData) {
-					byte[] dataBuf = new byte[compressedLen];
+					final byte firstByte;
+					long streamOffset = index.get(i).offset;
+					DataAccess streamDataAccess;
 					if (inline) {
-						daIndex.readBytes(dataBuf, 0, compressedLen);
+						streamDataAccess = daIndex;
+						streamOffset += REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream
 					} else {
-						daData.seek(index.get(i).offset);
-						daData.readBytes(dataBuf, 0, compressedLen);
+						streamDataAccess = daData;
+						daData.seek(streamOffset);
 					}
-					if (dataBuf[0] == 0x78 /* 'x' */) {
-						try {
-							Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative
-							zlib.setInput(dataBuf, 0, compressedLen);
-							byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 
-							int resultLen = zlib.inflate(result);
-							zlib.end();
-							data = new byte[resultLen];
-							System.arraycopy(result, 0, data, 0, resultLen);
-						} catch (DataFormatException ex) {
-							ex.printStackTrace();
-							data = new byte[0]; // FIXME need better failure strategy
-						}
-					} else if (dataBuf[0] == 0x75 /* 'u' */) {
-						data = new byte[dataBuf.length - 1];
-						System.arraycopy(dataBuf, 1, data, 0, data.length);
+					firstByte = streamDataAccess.readByte();
+					if (firstByte == 0x78 /* 'x' */) {
+						userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen);
+					} else if (firstByte == 0x75 /* 'u' */) {
+						userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
 					} else {
 						// XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
 						// but I don't see reason not to return data as is 
-						data = dataBuf;
+						userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen);
 					}
 					// XXX 
 					if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch
 						// this is a patch
 						LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>();
-						int patchElementIndex = 0;
-						do {
-							PatchRecord pr = PatchRecord.read(data, patchElementIndex);
+						while (!userDataAccess.isEmpty()) {
+							PatchRecord pr = PatchRecord.read(userDataAccess);
+//							System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len);
 							patches.add(pr);
-							patchElementIndex += 12 + pr.len;
-						} while (patchElementIndex < data.length);
+						}
+						userDataAccess.done();
 						//
-						byte[] baseRevContent = lastData;
-						data = apply(baseRevContent, actualLen, patches);
+						byte[] userData = apply(lastUserData, actualLen, patches);
+						userDataAccess = new ByteArrayDataAccess(userData);
 					}
 				} else {
 					if (inline) {
@@ -276,9 +270,15 @@
 					}
 				}
 				if (!extraReadsToBaseRev || i >= start) {
-					inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data);
+					inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
 				}
-				lastData = data;
+				if (userDataAccess != null) {
+					userDataAccess.reset();
+					if (lastUserData != null) {
+						lastUserData.done();
+					}
+					lastUserData = userDataAccess;
+				}
 			}
 		} catch (IOException ex) {
 			throw new IllegalStateException(ex); // FIXME need better handling
@@ -357,10 +357,10 @@
 
 	// mpatch.c : apply()
 	// FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
-	public/*for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) {
+	public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException {
 		int last = 0, destIndex = 0;
 		if (outcomeLen == -1) {
-			outcomeLen = baseRevisionContent.length;
+			outcomeLen = (int) baseRevisionContent.length();
 			for (PatchRecord pr : patch) {
 				outcomeLen += pr.start - last + pr.len;
 				last = pr.end;
@@ -370,13 +370,15 @@
 		}
 		byte[] rv = new byte[outcomeLen];
 		for (PatchRecord pr : patch) {
-			System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last);
+			baseRevisionContent.seek(last);
+			baseRevisionContent.readBytes(rv, destIndex, pr.start-last);
 			destIndex += pr.start - last;
 			System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length);
 			destIndex += pr.data.length;
 			last = pr.end;
 		}
-		System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last);
+		baseRevisionContent.seek(last);
+		baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last));
 		return rv;
 	}
 
@@ -422,7 +424,8 @@
 	// instantly - e.g. calculate hash, or comparing two revisions
 	public interface Inspector {
 		// XXX boolean retVal to indicate whether to continue?
-		// TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) 
-		void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, byte[] data);
+		// TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call)
+		// implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment
+		void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data);
 	}
 }