changeset 26:71a9ba42cee8

Memory-mapped files for bigger files. Defect reading number of bytes greater than size of the buffer fixed
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Sun, 09 Jan 2011 15:59:54 +0100
parents da8ccbfae64d
children b0a15cefdfd6
files design.txt src/com/tmate/hgkit/fs/DataAccessProvider.java
diffstat 2 files changed, 91 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/design.txt	Thu Jan 06 04:45:40 2011 +0100
+++ b/design.txt	Sun Jan 09 15:59:54 2011 +0100
@@ -30,15 +30,12 @@
 +calculate sha1 digest for file to see I can deal with nodeid
 *.hgignored processing
 +Nodeid to keep 20 bytes always, Revlog.Inspector to get nodeid array of meaningful data exact size (nor heading 00 bytes, nor 12 extra bytes from the spec)
++DataAccess - implement memory mapped files, 
 
-DataAccess - collect debug info (buffer misses, file size/total read operations) to find out better strategy to buffer size detection.
-DataAccess - implement memory mapped files, compare performance.
+DataAccess - collect debug info (buffer misses, file size/total read operations) to find out better strategy to buffer size detection. Compare performance.
 delta merge
 Changeset to get index (local revision number)
 RevisionWalker (on manifest) and WorkingCopyWalker (io.File) talking to ? and/or dirstate 
-
-
-
  
 Status operation from GUI - guess, usually on a file/subfolder, hence API should allow for starting path (unlike cmdline, seems useless to implement include/exclide patterns - GUI users hardly enter them, ever)
 
@@ -58,4 +55,7 @@
 * NIO and mapped files - should be fast. Although seems to give less control on mem usage. 
 * Regular InputStreams and chunked stream on top - allocate List<byte[]>, each (but last) chunk of fixed size (depending on initial file size) 
 
-<<<<<
\ No newline at end of file
+<<<<<
+
+Tests:
+DataAccess - readBytes(length > memBufferSize, length*2 > memBufferSize) - to check impl is capable to read huge chunks of data, regardless of own buffer size
\ No newline at end of file
--- a/src/com/tmate/hgkit/fs/DataAccessProvider.java	Thu Jan 06 04:45:40 2011 +0100
+++ b/src/com/tmate/hgkit/fs/DataAccessProvider.java	Sun Jan 09 15:59:54 2011 +0100
@@ -31,12 +31,14 @@
 		try {
 			FileChannel fc = new FileInputStream(f).getChannel();
 			if (fc.size() > mapioMagicBoundary) {
-				return new MemoryMapFileAccess(fc, fc.size());
+				// TESTS: bufLen of 1024 was used to test MemMapFileAccess
+				return new MemoryMapFileAccess(fc, fc.size(), mapioMagicBoundary);
 			} else {
 				// XXX once implementation is more or less stable,
 				// may want to try ByteBuffer.allocateDirect() to see
 				// if there's any performance gain. 
 				boolean useDirectBuffer = false;
+				// TESTS: bufferSize of 100 was used to check buffer underflow states when readBytes reads chunks bigger than bufSize 
 				return new FileAccess(fc, fc.size(), bufferSize, useDirectBuffer);
 			}
 		} catch (IOException ex) {
@@ -50,33 +52,92 @@
 	private static class MemoryMapFileAccess extends DataAccess {
 		private FileChannel fileChannel;
 		private final long size;
-		private long position = 0;
+		private long position = 0; // always points to buffer's absolute position in the file
+		private final int memBufferSize;
+		private MappedByteBuffer buffer;
 
-		public MemoryMapFileAccess(FileChannel fc, long channelSize) {
+		public MemoryMapFileAccess(FileChannel fc, long channelSize, int /*long?*/ bufferSize) {
 			fileChannel = fc;
 			size = channelSize;
+			memBufferSize = bufferSize;
 		}
 
 		@Override
+		public boolean isEmpty() {
+			return position + (buffer == null ? 0 : buffer.position()) >= size;
+		}
+		
+		@Override
 		public void seek(long offset) {
-			position = offset;
+			assert offset >= 0;
+			// offset may not necessarily be further than current position in the file (e.g. rewind) 
+			if (buffer != null && /*offset is within buffer*/ offset >= position && (offset - position) < buffer.limit()) {
+				buffer.position((int) (offset - position));
+			} else {
+				position = offset;
+				buffer = null;
+			}
 		}
 
 		@Override
 		public void skip(int bytes) throws IOException {
-			position += bytes;
+			assert bytes >= 0;
+			if (buffer == null) {
+				position += bytes;
+				return;
+			}
+			if (buffer.remaining() > bytes) {
+				buffer.position(buffer.position() + bytes);
+			} else {
+				position += buffer.position() + bytes;
+				buffer = null;
+			}
 		}
 
-		private boolean fill() throws IOException {
-			final int BUFFER_SIZE = 8 * 1024;
+		private void fill() throws IOException {
+			if (buffer != null) {
+				position += buffer.position(); 
+			}
 			long left = size - position;
-			MappedByteBuffer rv = fileChannel.map(FileChannel.MapMode.READ_ONLY, position, left < BUFFER_SIZE ? left : BUFFER_SIZE);
-			position += rv.capacity();
-			return rv.hasRemaining();
+			buffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, position, left < memBufferSize ? left : memBufferSize);
+		}
+
+		@Override
+		public void readBytes(byte[] buf, int offset, int length) throws IOException {
+			if (buffer == null || !buffer.hasRemaining()) {
+				fill();
+			}
+			// XXX in fact, we may try to create a MappedByteBuffer of exactly length size here, and read right away
+			while (length > 0) {
+				int tail = buffer.remaining();
+				if (tail == 0) {
+					throw new IOException();
+				}
+				if (tail >= length) {
+					buffer.get(buf, offset, length);
+				} else {
+					buffer.get(buf, offset, tail);
+					fill();
+				}
+				offset += tail;
+				length -= tail;
+			}
+		}
+
+		@Override
+		public byte readByte() throws IOException {
+			if (buffer == null || !buffer.hasRemaining()) {
+				fill();
+			}
+			if (buffer.hasRemaining()) {
+				return buffer.get();
+			}
+			throw new IOException();
 		}
 
 		@Override
 		public void done() {
+			buffer = null;
 			if (fileChannel != null) {
 				try {
 					fileChannel.close();
@@ -152,16 +213,22 @@
 
 		@Override
 		public void readBytes(byte[] buf, int offset, int length) throws IOException {
-			final int tail = buffer.remaining();
-			if (tail >= length) {
-				buffer.get(buf, offset, length);
-			} else {
-				buffer.get(buf, offset, tail);
-				if (fill()) {
-					buffer.get(buf, offset + tail, length - tail);
+			if (!buffer.hasRemaining()) {
+				fill();
+			}
+			while (length > 0) {
+				int tail = buffer.remaining();
+				if (tail == 0) {
+					throw new IOException(); // shall not happen provided stream contains expected data and no attempts to read past isEmpty() == true are made.
+				}
+				if (tail >= length) {
+					buffer.get(buf, offset, length);
 				} else {
-					throw new IOException(); // shall not happen provided stream contains expected data and no attempts to read past nonEmpty() == false are made. 
+					buffer.get(buf, offset, tail);
+					fill();
 				}
+				offset += tail;
+				length -= tail;
 			}
 		}