changeset 420:6c22bdc0bdfd

Respect long offsets in revlogs
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 22 Mar 2012 22:56:01 +0100
parents 7f136a3fa671
children fdd7d756dea0
files src/org/tmatesoft/hg/internal/ByteArrayDataAccess.java src/org/tmatesoft/hg/internal/DataAccess.java src/org/tmatesoft/hg/internal/DataAccessProvider.java src/org/tmatesoft/hg/internal/FilterDataAccess.java src/org/tmatesoft/hg/internal/InflaterDataAccess.java src/org/tmatesoft/hg/internal/Internals.java src/org/tmatesoft/hg/internal/Patch.java src/org/tmatesoft/hg/internal/RevlogDump.java src/org/tmatesoft/hg/internal/RevlogStream.java src/org/tmatesoft/hg/repo/HgDataFile.java
diffstat 10 files changed, 95 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/ByteArrayDataAccess.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/ByteArrayDataAccess.java	Thu Mar 22 22:56:01 2012 +0100
@@ -69,7 +69,7 @@
 	}
 	@Override
 	public void seek(int offset) {
-		pos = (int) offset;
+		pos = offset;
 	}
 	@Override
 	public void skip(int bytes) throws IOException {
--- a/src/org/tmatesoft/hg/internal/DataAccess.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/DataAccess.java	Thu Mar 22 22:56:01 2012 +0100
@@ -33,9 +33,22 @@
 		return true;
 	}
 	// TODO throws IOException (few subclasses have non-trivial length() operation)
+	// long length and offset are needed only in RevlogStream, makes no sense elsewhere
+	// because chunks Mercurial operates with fit into int (4 bytes actualLength field)
+	// For those that may face large pieces of data (actual data streams) there are #longLength 
+	// and #longSeek() to implement
 	public int length() {
 		return 0;
 	}
+	
+	public long longLength() {
+		return length();
+	}
+	
+	public void longSeek(long offset) throws IOException {
+		seek(Internals.ltoi(offset));
+	}
+	
 	/**
 	 * get this instance into initial state
 	 * @throws IOException
--- a/src/org/tmatesoft/hg/internal/DataAccessProvider.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/DataAccessProvider.java	Thu Mar 22 22:56:01 2012 +0100
@@ -23,7 +23,6 @@
 import java.nio.MappedByteBuffer;
 import java.nio.channels.FileChannel;
 
-import org.tmatesoft.hg.core.HgBadStateException;
 import org.tmatesoft.hg.core.SessionContext;
 
 /**
@@ -69,10 +68,7 @@
 		}
 		try {
 			FileChannel fc = new FileInputStream(f).getChannel();
-			int flen = (int) fc.size();
-			if (fc.size() - flen != 0) {
-				throw new HgBadStateException("Files greater than 2Gb are not yet supported");
-			}
+			long flen = fc.size();
 			if (flen > mapioMagicBoundary) {
 				// TESTS: bufLen of 1024 was used to test MemMapFileAccess
 				return new MemoryMapFileAccess(fc, flen, getConfigOption(context, CFG_PROPERTY_MAPIO_BUFFER_SIZE, 100*1024 /*same as default boundary*/));
@@ -91,18 +87,17 @@
 		return new DataAccess(); // non-null, empty.
 	}
 
-	// DOESN'T WORK YET 
 	private static class MemoryMapFileAccess extends DataAccess {
 		private FileChannel fileChannel;
-		private final int size;
+		private final long size;
 		private long position = 0; // always points to buffer's absolute position in the file
 		private final int memBufferSize;
 		private MappedByteBuffer buffer;
 
-		public MemoryMapFileAccess(FileChannel fc, int channelSize, int bufferSize) {
+		public MemoryMapFileAccess(FileChannel fc, long channelSize, int bufferSize) {
 			fileChannel = fc;
 			size = channelSize;
-			memBufferSize = bufferSize > channelSize ? channelSize : bufferSize; // no reason to waste memory more than there's data 
+			memBufferSize = bufferSize > channelSize ? (int) channelSize : bufferSize; // no reason to waste memory more than there's data 
 		}
 
 		@Override
@@ -111,22 +106,28 @@
 		}
 		
 		@Override
+		public DataAccess reset() throws IOException {
+			longSeek(0);
+			return this;
+		}
+
+		@Override
 		public int length() {
+			return Internals.ltoi(longLength());
+		}
+		
+		@Override
+		public long longLength() {
 			return size;
 		}
 		
 		@Override
-		public DataAccess reset() throws IOException {
-			seek(0);
-			return this;
-		}
-		
-		@Override
-		public void seek(int offset) {
+		public void longSeek(long offset) {
 			assert offset >= 0;
 			// offset may not necessarily be further than current position in the file (e.g. rewind) 
 			if (buffer != null && /*offset is within buffer*/ offset >= position && (offset - position) < buffer.limit()) {
-				buffer.position((int) (offset - position));
+				// cast is ok according to check above
+				buffer.position(Internals.ltoi(offset - position));
 			} else {
 				position = offset;
 				buffer = null;
@@ -134,6 +135,11 @@
 		}
 
 		@Override
+		public void seek(int offset) {
+			longSeek(offset);
+		}
+
+		@Override
 		public void skip(int bytes) throws IOException {
 			assert bytes >= 0;
 			if (buffer == null) {
@@ -206,14 +212,14 @@
 	// (almost) regular file access - FileChannel and buffers.
 	private static class FileAccess extends DataAccess {
 		private FileChannel fileChannel;
-		private final int size;
+		private final long size;
 		private ByteBuffer buffer;
-		private int bufferStartInFile = 0; // offset of this.buffer in the file.
+		private long bufferStartInFile = 0; // offset of this.buffer in the file.
 
-		public FileAccess(FileChannel fc, int channelSize, int bufferSizeHint, boolean useDirect) {
+		public FileAccess(FileChannel fc, long channelSize, int bufferSizeHint, boolean useDirect) {
 			fileChannel = fc;
 			size = channelSize;
-			final int capacity = size < bufferSizeHint ? size : bufferSizeHint;
+			final int capacity = size < bufferSizeHint ? (int) size : bufferSizeHint;
 			buffer = useDirect ? ByteBuffer.allocateDirect(capacity) : ByteBuffer.allocate(capacity);
 			buffer.flip(); // or .limit(0) to indicate it's empty
 		}
@@ -224,23 +230,29 @@
 		}
 		
 		@Override
+		public DataAccess reset() throws IOException {
+			longSeek(0);
+			return this;
+		}
+
+		@Override
 		public int length() {
-			return size;
+			return Internals.ltoi(longLength());
 		}
 		
 		@Override
-		public DataAccess reset() throws IOException {
-			seek(0);
-			return this;
+		public long longLength() {
+			return size;
 		}
-		
+
 		@Override
-		public void seek(int offset) throws IOException {
+		public void longSeek(long offset) throws IOException {
 			if (offset > size) {
 				throw new IllegalArgumentException(String.format("Can't seek to %d for the file of size %d (buffer start:%d)", offset, size, bufferStartInFile));
 			}
 			if (offset < bufferStartInFile + buffer.limit() && offset >= bufferStartInFile) {
-				buffer.position((int) (offset - bufferStartInFile));
+				// cast to int is safe, we've checked we fit into buffer
+				buffer.position(Internals.ltoi(offset - bufferStartInFile));
 			} else {
 				// out of current buffer, invalidate it (force re-read) 
 				// XXX or ever re-read it right away?
@@ -252,6 +264,11 @@
 		}
 
 		@Override
+		public void seek(int offset) throws IOException {
+			longSeek(offset);
+		}
+
+		@Override
 		public void skip(int bytes) throws IOException {
 			final int newPos = buffer.position() + bytes;
 			if (newPos >= 0 && newPos < buffer.limit()) {
@@ -259,7 +276,7 @@
 				buffer.position(newPos);
 			} else {
 				//
-				seek(bufferStartInFile + newPos);
+				longSeek(bufferStartInFile + newPos);
 			}
 		}
 
--- a/src/org/tmatesoft/hg/internal/FilterDataAccess.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/FilterDataAccess.java	Thu Mar 22 22:56:01 2012 +0100
@@ -28,11 +28,11 @@
  */
 public class FilterDataAccess extends DataAccess {
 	private final DataAccess dataAccess;
-	private final int offset;
+	private final long offset;
 	private final int length;
 	private int count;
 
-	public FilterDataAccess(DataAccess dataAccess, int offset, int length) {
+	public FilterDataAccess(DataAccess dataAccess, long offset, int length) {
 		this.dataAccess = dataAccess;
 		this.offset = offset;
 		this.length = length;
@@ -64,8 +64,8 @@
 		if (localOffset < 0 || localOffset > length) {
 			throw new IllegalArgumentException();
 		}
-		dataAccess.seek(offset + localOffset);
-		count = (int) (length - localOffset);
+		dataAccess.longSeek(offset + localOffset);
+		count = length - localOffset;
 	}
 
 	@Override
@@ -91,7 +91,7 @@
 			throw new IOException(String.format("Underflow. Bytes left: %d. FilterDA[offset: %d, length: %d]", count, offset, length));
 		}
 		if (count == length) {
-			dataAccess.seek(offset);
+			dataAccess.longSeek(offset);
 		}
 		count--;
 		return dataAccess.readByte();
@@ -106,7 +106,7 @@
 			throw new IOException(String.format("Underflow. Bytes left: %d, asked to read %d. FilterDA[offset: %d, length: %d]", count, len, offset, length));
 		}
 		if (count == length) {
-			dataAccess.seek(offset);
+			dataAccess.longSeek(offset);
 		}
 		dataAccess.readBytes(b, off, len);
 		count -= len;
--- a/src/org/tmatesoft/hg/internal/InflaterDataAccess.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/InflaterDataAccess.java	Thu Mar 22 22:56:01 2012 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 TMate Software Ltd
+ * Copyright (c) 2011-2012 TMate Software Ltd
  *  
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -40,15 +40,15 @@
 	private int decompressedPos = 0;
 	private int decompressedLength;
 
-	public InflaterDataAccess(DataAccess dataAccess, int offset, int compressedLength) {
+	public InflaterDataAccess(DataAccess dataAccess, long offset, int compressedLength) {
 		this(dataAccess, offset, compressedLength, -1, new Inflater(), new byte[512]);
 	}
 
-	public InflaterDataAccess(DataAccess dataAccess, int offset, int compressedLength, int actualLength) {
+	public InflaterDataAccess(DataAccess dataAccess, long offset, int compressedLength, int actualLength) {
 		this(dataAccess, offset, compressedLength, actualLength, new Inflater(), new byte[512]);
 	}
 
-	public InflaterDataAccess(DataAccess dataAccess, int offset, int compressedLength, int actualLength, Inflater inflater, byte[] buf) {
+	public InflaterDataAccess(DataAccess dataAccess, long offset, int compressedLength, int actualLength, Inflater inflater, byte[] buf) {
 		super(dataAccess, offset, compressedLength);
 		if (inflater == null || buf == null) {
 			throw new IllegalArgumentException();
@@ -119,10 +119,10 @@
 			throw new IllegalArgumentException();
 		}
 		if (localOffset >= decompressedPos) {
-			skip((int) (localOffset - decompressedPos));
+			skip(localOffset - decompressedPos);
 		} else {
 			reset();
-			skip((int) localOffset);
+			skip(localOffset);
 		}
 	}
 	
--- a/src/org/tmatesoft/hg/internal/Internals.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/Internals.java	Thu Mar 22 22:56:01 2012 +0100
@@ -394,4 +394,15 @@
 		}
 		return sb;
 	}
+	
+	/**
+	 * keep an eye on all long to int downcasts to get a chance notice the lost of data
+	 * Use if there's even subtle chance there might be loss
+	 * (ok not to use if there's no way for l to be greater than int) 
+	 */
+	public static int ltoi(long l) {
+		int i = (int) l;
+		assert ((long) i) == l : "Loss of data!";
+		return i;
+	}
 }
--- a/src/org/tmatesoft/hg/internal/Patch.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/Patch.java	Thu Mar 22 22:56:01 2012 +0100
@@ -120,7 +120,7 @@
 		}
 		baseRevisionContent.seek(prevEnd);
 		// copy everything in the source past last record's end
-		baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - prevEnd));
+		baseRevisionContent.readBytes(rv, destIndex, (baseRevisionContent.length() - prevEnd));
 		return rv;
 	}
 	
--- a/src/org/tmatesoft/hg/internal/RevlogDump.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/RevlogDump.java	Thu Mar 22 22:56:01 2012 +0100
@@ -76,7 +76,7 @@
 		while (dis.available() > 0) {
 			long l = di.readLong();
 			long offset = entryIndex == 0 ? 0 : (l >>> 16);
-			int flags = (int) (l & 0X0FFFF);
+			int flags = (int) (l & 0x0FFFF);
 			int compressedLen = di.readInt();
 			int actualLen = di.readInt();
 			int baseRevision = di.readInt();
--- a/src/org/tmatesoft/hg/internal/RevlogStream.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/RevlogStream.java	Thu Mar 22 22:56:01 2012 +0100
@@ -66,7 +66,7 @@
 	}
 
 	/*package*/ DataAccess getIndexStream() {
-		// XXX may supply a hint that I'll need really few bytes of data (perhaps, at some offset) 
+		// TODO post 1.0 may supply a hint that I'll need really few bytes of data (perhaps, at some offset) 
 		// to avoid mmap files when only few bytes are to be read (i.e. #dataLength())
 		return dataAccess.create(indexFile);
 	}
@@ -326,7 +326,7 @@
 			final int INLINEDATA = 1 << 16;
 			inline = (versionField & INLINEDATA) != 0;
 			IntVector resBases, resOffsets = null;
-			int entryCountGuess = da.length() / REVLOGV1_RECORD_SIZE;
+			int entryCountGuess = Internals.ltoi(da.longLength() / REVLOGV1_RECORD_SIZE);
 			if (inline) {
 				entryCountGuess >>>= 2; // pure guess, assume useful data takes 3/4 of total space
 				resOffsets = new IntVector(entryCountGuess, 5000);
@@ -347,7 +347,7 @@
 //				byte[] nodeid = new byte[32];
 				resBases.add(baseRevision);
 				if (inline) {
-					int o = (int) offset;
+					int o = Internals.ltoi(offset);
 					if (o != offset) {
 						// just in case, can't happen, ever, unless HG (or some other bad tool) produces index file 
 						// with inlined data of size greater than 2 Gb.
@@ -465,7 +465,7 @@
 				long l = daIndex.readLong(); // 0
 				long offset = i == 0 ? 0 : (l >>> 16);
 				@SuppressWarnings("unused")
-				int flags = (int) (l & 0X0FFFF);
+				int flags = (int) (l & 0x0FFFF);
 				int compressedLen = daIndex.readInt(); // +8
 				int actualLen = daIndex.readInt(); // +12
 				int baseRevision = daIndex.readInt(); // +16
@@ -477,15 +477,15 @@
 				daIndex.skip(12);
 				DataAccess userDataAccess = null;
 				if (needData) {
-					int streamOffset;
+					long streamOffset;
 					DataAccess streamDataAccess;
 					if (inline) {
 						streamDataAccess = daIndex;
 						streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream
 					} else {
-						streamOffset = (int) offset;
+						streamOffset = offset;
 						streamDataAccess = daData;
-						daData.seek(streamOffset);
+						daData.longSeek(streamOffset);
 					}
 					final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch
 					if (streamDataAccess.isEmpty() || compressedLen == 0) {
--- a/src/org/tmatesoft/hg/repo/HgDataFile.java	Thu Mar 22 21:36:41 2012 +0100
+++ b/src/org/tmatesoft/hg/repo/HgDataFile.java	Thu Mar 22 22:56:01 2012 +0100
@@ -41,6 +41,7 @@
 import org.tmatesoft.hg.internal.FilterByteChannel;
 import org.tmatesoft.hg.internal.FilterDataAccess;
 import org.tmatesoft.hg.internal.IntMap;
+import org.tmatesoft.hg.internal.Internals;
 import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.util.ByteChannel;
 import org.tmatesoft.hg.util.CancelSupport;
@@ -129,7 +130,8 @@
 		} else if (fileRevisionIndex == WORKING_COPY) {
 			File f = getRepo().getFile(this);
 			if (f.exists()) {
-				return (int) /*FIXME long!*/ f.length();
+				// single revision can't be greater than 2^32, shall be safe to cast to int
+				return Internals.ltoi(f.length());
 			}
 			Nodeid fileRev = getWorkingCopyRevision();
 			if (fileRev == null) {