changeset 576:3c4db86e8c1f

Issue 43: poor performance with InflaterDataAccess. Phase 2: inflate into buffer, effective skip and readByte/readBytes()
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 16 Apr 2013 19:31:57 +0200
parents 8bf184c9d733
children 5d7399ade999
files cmdline/org/tmatesoft/hg/console/Main.java src/org/tmatesoft/hg/internal/InflaterDataAccess.java test/org/tmatesoft/hg/test/TestInflaterDataAccess.java
diffstat 3 files changed, 133 insertions(+), 64 deletions(-) [+]
line wrap: on
line diff
--- a/cmdline/org/tmatesoft/hg/console/Main.java	Tue Apr 16 16:59:59 2013 +0200
+++ b/cmdline/org/tmatesoft/hg/console/Main.java	Tue Apr 16 19:31:57 2013 +0200
@@ -102,7 +102,7 @@
 
 	public static void main(String[] args) throws Exception {
 		Main m = new Main(args);
-		m.checkFileSneakerPerformance2();
+		m.checkFileSneakerPerformance();
 //		m.testRevert();
 //		m.testCheckout();
 //		m.tryExtensions();
@@ -135,8 +135,8 @@
 		HgChangesetFileSneaker fs2 = new HgChangesetFileSneaker(hgRepo);
 		fs1.followRenames(true);
 		fs2.followRenames(true);
-		Nodeid cset = hgRepo.getChangelog().getRevision(TIP);
-		Path fname = Path.create("dir3/file8");
+		Nodeid cset = hgRepo.getChangelog().getRevision(2);
+		Path fname = Path.create("dir9/file9"); // close to the manifest end 
 		fs1.changeset(cset);
 		fs2.changeset(cset);
 //		hgRepo.getManifest().getFileRevision(TIP, fname);
@@ -153,17 +153,17 @@
 		}
 		ManifestRevision mr = new ManifestRevision(null, null);
 		final long _s1 = System.nanoTime();
-		hgRepo.getManifest().walk(2, 2, mr);
+		hgRepo.getManifest().walk(0, 0, mr);
 		final long _e1 = System.nanoTime();
-		hgRepo.getManifest().getFileRevision(2, fname);
+		hgRepo.getManifest().getFileRevision(0, fname);
 		final long _e2 = System.nanoTime();
 		System.out.printf("\n\tManifestRevision:%d ms, getFileRevision:%d ms\n", (_e1-_s1)/1000000, (_e2-_e1)/1000000);
 	}
 	
 	//  -agentlib:hprof=cpu=times,heap=sites,depth=10
 	private void checkFileSneakerPerformance2() throws Exception {
-		Path fname = Path.create("dir3/file8");
-		hgRepo.getManifest().getFileRevision(2, fname);
+		Path fname = Path.create("dir9/file9"); // close to the manifest end
+		hgRepo.getManifest().getFileRevision(0, fname);
 //		ManifestRevision mr = new ManifestRevision(null, null);
 //		hgRepo.getManifest().walk(2, 2, mr);
 	}
--- a/src/org/tmatesoft/hg/internal/InflaterDataAccess.java	Tue Apr 16 16:59:59 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/InflaterDataAccess.java	Tue Apr 16 19:31:57 2013 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 TMate Software Ltd
+ * Copyright (c) 2011-2013 TMate Software Ltd
  *  
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -16,8 +16,8 @@
  */
 package org.tmatesoft.hg.internal;
 
-import java.io.EOFException;
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.zip.DataFormatException;
 import java.util.zip.Inflater;
 import java.util.zip.ZipException;
@@ -32,9 +32,9 @@
 public class InflaterDataAccess extends FilterDataAccess {
 
 	private final Inflater inflater;
-	private final byte[] buffer;
-	private final byte[] singleByte = new byte[1];
-	private int decompressedPos = 0;
+	private final byte[] inBuffer;
+	private final ByteBuffer outBuffer;
+	private int inflaterPos = 0;
 	private int decompressedLength;
 
 	public InflaterDataAccess(DataAccess dataAccess, long offset, int compressedLength) {
@@ -52,20 +52,23 @@
 		}
 		this.inflater = inflater;
 		this.decompressedLength = actualLength;
-		buffer = buf;
+		inBuffer = buf;
+		outBuffer = ByteBuffer.allocate(inBuffer.length * 2);
+		outBuffer.limit(0); // there's nothing to read in the buffer 
 	}
 	
 	@Override
 	public InflaterDataAccess reset() throws IOException {
 		super.reset();
 		inflater.reset();
-		decompressedPos = 0;
+		inflaterPos = 0;
+		outBuffer.clear().limit(0); // or flip(), to indicate nothing to read
 		return this;
 	}
 	
 	@Override
 	protected int available() throws IOException {
-		return length() - decompressedPos;
+		return length() - decompressedPosition();
 	}
 	
 	@Override
@@ -80,29 +83,16 @@
 		if (decompressedLength != -1) {
 			return decompressedLength;
 		}
-		decompressedLength = 0; // guard to avoid endless loop in case length() would get invoked from below. 
-		int c = 0;
-		int oldPos = decompressedPos;
-		byte[] dummy = new byte[buffer.length];
-		try {
-			int toRead = -1;
-			do {
-				while (!inflater.needsInput()) {
-					c += inflater.inflate(dummy, 0, dummy.length);
-				}
-				if (inflater.needsInput() && (toRead = super.available()) > 0) {
-					// fill:
-					if (toRead > buffer.length) {
-						toRead = buffer.length;
-					}
-					super.readBytes(buffer, 0, toRead);
-					inflater.setInput(buffer, 0, toRead);
-				}
-			} while(toRead > 0);
-		} catch (DataFormatException ex) {
-			throw new IOException(ex.toString());
-		}
-		decompressedLength = c + oldPos;
+		decompressedLength = 0; // guard to avoid endless loop in case length() would get invoked from below.
+		final int oldPos = decompressedPosition();
+		final int inflatedUpTo = inflaterPos;
+		int inflatedMore = 0, c;
+		do {
+			outBuffer.limit(outBuffer.position()); // pretend the buffer is consumed
+			c = fillOutBuffer();
+			inflatedMore += c;
+		} while (c == outBuffer.capacity()); // once we unpacked less than capacity, input is over
+		decompressedLength = inflatedUpTo + inflatedMore;
 		reset();
 		seek(oldPos);
 		return decompressedLength;
@@ -113,8 +103,9 @@
 		if (localOffset < 0 /* || localOffset >= length() */) {
 			throw new IllegalArgumentException();
 		}
-		if (localOffset >= decompressedPos) {
-			skip(localOffset - decompressedPos);
+		int currentPos = decompressedPosition();
+		if (localOffset >= currentPos) {
+			skip(localOffset - currentPos);
 		} else {
 			reset();
 			skip(localOffset);
@@ -125,57 +116,122 @@
 	public void skip(final int bytesToSkip) throws IOException {
 		int bytes = bytesToSkip;
 		if (bytes < 0) {
-			bytes += decompressedPos;
+			bytes += decompressedPosition();
 			if (bytes < 0) {
-				throw new IOException(String.format("Underflow. Rewind past start of the slice. To skip:%d, decPos:%d, decLen:%d. Left:%d", bytesToSkip, decompressedPos, decompressedLength, bytes));
+				throw new IOException(String.format("Underflow. Rewind past start of the slice. To skip:%d, decPos:%d, decLen:%d. Left:%d", bytesToSkip, inflaterPos, decompressedLength, bytes));
 			}
 			reset();
 			// fall-through
 		}
 		while (!isEmpty() && bytes > 0) {
-			readByte();
-			bytes--;
+			int fromBuffer = outBuffer.remaining();
+			if (fromBuffer > 0) {
+				if (fromBuffer >= bytes) {
+					outBuffer.position(outBuffer.position() + bytes);
+					bytes = 0;
+					break;
+				} else {
+					bytes -= fromBuffer;
+					outBuffer.limit(outBuffer.position()); // mark consumed
+					// fall through to fill the buffer
+				}
+			}
+			fillOutBuffer();
 		}
 		if (bytes != 0) {
-			throw new IOException(String.format("Underflow. Rewind past end of the slice. To skip:%d, decPos:%d, decLen:%d. Left:%d", bytesToSkip, decompressedPos, decompressedLength, bytes));
+			throw new IOException(String.format("Underflow. Rewind past end of the slice. To skip:%d, decPos:%d, decLen:%d. Left:%d", bytesToSkip, inflaterPos, decompressedLength, bytes));
 		}
 	}
 
 	@Override
 	public byte readByte() throws IOException {
-		readBytes(singleByte, 0, 1);
-		return singleByte[0];
+		if (!outBuffer.hasRemaining()) {
+			fillOutBuffer();
+		}
+		return outBuffer.get();
 	}
 
 	@Override
 	public void readBytes(byte[] b, int off, int len) throws IOException {
+		do {
+			int fromBuffer = outBuffer.remaining();
+			if (fromBuffer > 0) {
+				if (fromBuffer >= len) {
+					outBuffer.get(b, off, len);
+					return;
+				} else {
+					outBuffer.get(b, off, fromBuffer);
+					off += fromBuffer;
+					len -= fromBuffer;
+					// fall-through
+				}
+			}
+			fillOutBuffer();
+		} while (len > 0);
+	}
+	
+	@Override
+	public void readBytes(ByteBuffer buf) throws IOException {
+		int len = Math.min(available(), buf.remaining());
+		while (len > 0) {
+			if (outBuffer.remaining() >= len) {
+				ByteBuffer slice = outBuffer.slice();
+				slice.limit(len);
+				buf.put(slice);
+				outBuffer.position(outBuffer.position() + len);
+				return;
+			} else { 
+				len -= outBuffer.remaining();
+				buf.put(outBuffer);
+			}
+			fillOutBuffer();
+		}
+	}
+	
+	private int decompressedPosition() {
+		assert outBuffer.remaining() <= inflaterPos; 
+		return inflaterPos - outBuffer.remaining();
+	}
+	
+	// after #fillOutBuffer(), outBuffer is ready for read
+	private int fillOutBuffer() throws IOException {
+		assert !outBuffer.hasRemaining();
 		try {
-		    int n;
-		    while (len > 0) {
-			    while ((n = inflater.inflate(b, off, len)) == 0) {
+			int inflatedBytes = 0;
+		    outBuffer.clear();
+		    int len = outBuffer.capacity();
+		    int off = 0;
+		    do {
+			    int n;
+			    while ((n = inflater.inflate(outBuffer.array(), off, len)) == 0) {
 			    	// XXX few last bytes (checksum?) may be ignored by inflater, thus inflate may return 0 in
 			    	// perfectly legal conditions (when all data already expanded, but there are still some bytes
 			    	// in the input stream)
 					int toRead = -1;
 					if (inflater.needsInput() && (toRead = super.available()) > 0) {
-						// fill:
-						if (toRead > buffer.length) {
-							toRead = buffer.length;
+						// fill
+						if (toRead > inBuffer.length) {
+							toRead = inBuffer.length;
 						}
-						super.readBytes(buffer, 0, toRead);
-						inflater.setInput(buffer, 0, toRead);
+						super.readBytes(inBuffer, 0, toRead);
+						inflater.setInput(inBuffer, 0, toRead);
 					} else {
+						// inflated nothing and doesn't want any more data (or no date available) - assume we're done 
+						assert inflater.finished();
+						assert toRead <= 0;
+						break;
 						// prevent hang up in this cycle if no more data is available, see Issue 25
-						throw new EOFException(String.format("No more compressed data is available to satisfy request for %d bytes. [finished:%b, needDict:%b, needInp:%b, available:%d", len, inflater.finished(), inflater.needsDictionary(), inflater.needsInput(), toRead));
+//						throw new EOFException(String.format("No more compressed data is available to satisfy request for %d bytes. [finished:%b, needDict:%b, needInp:%b, available:%d", len, inflater.finished(), inflater.needsDictionary(), inflater.needsInput(), toRead));
 					}
 			    }
 				off += n;
 				len -= n;
-				decompressedPos += n;
-				if (len == 0) {
-					return; // filled
-				}
-		    }
+				inflatedBytes += n;
+		    } while (len > 0 && !inflater.finished()); // either the buffer is filled or nothing more to unpack
+		    inflaterPos += inflatedBytes;
+		    outBuffer.limit(inflatedBytes);
+		    assert outBuffer.position() == 0; // didn't change since #clear() above
+		    return inflatedBytes;
 		} catch (DataFormatException e) {
 		    String s = e.getMessage();
 		    throw new ZipException(s != null ? s : "Invalid ZLIB data format");
--- a/test/org/tmatesoft/hg/test/TestInflaterDataAccess.java	Tue Apr 16 16:59:59 2013 +0200
+++ b/test/org/tmatesoft/hg/test/TestInflaterDataAccess.java	Tue Apr 16 19:31:57 2013 +0200
@@ -22,6 +22,7 @@
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.Inflater;
 
+import org.junit.Rule;
 import org.junit.Test;
 import org.tmatesoft.hg.internal.ByteArrayDataAccess;
 import org.tmatesoft.hg.internal.DataAccess;
@@ -33,7 +34,8 @@
  * @author TMate Software Ltd.
  */
 public class TestInflaterDataAccess {
-	private final ErrorCollectorExt errorCollector = new ErrorCollectorExt();
+	@Rule
+	public final ErrorCollectorExt errorCollector = new ErrorCollectorExt();
 	
 	private final byte[] testContent1 = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.".getBytes();
 	
@@ -70,13 +72,15 @@
 		ida = new InflaterDataAccess(zip, 0, zip.length(), -1, new Inflater(), new byte[25]);
 		byte[] dummy = new byte[30];
 		ida.readBytes(dummy, 0, dummy.length);
-		int lll = ida.length();
 		errorCollector.assertEquals("#length() after readBytes()", testContent1.length, ida.length());
 		//
 		ida = new InflaterDataAccess(zip, 0, zip.length(), -1, new Inflater(), new byte[25]);
 		// consume most of the stream, so that all original compressed data is already read
-		ida.readBytes(ByteBuffer.allocate(testContent1.length - 1));
+		dummy = new byte[testContent1.length - 1];
+		ida.readBytes(dummy, 0, dummy.length);
 		errorCollector.assertEquals("#length() after origin was completely read", testContent1.length, ida.length());
+		//
+		errorCollector.assertFalse(ida.isEmpty()); // check InflaterDataAccess#available() positive
 	}
 
 	@Test
@@ -95,6 +99,15 @@
 		ida.readBytes(chunk2, 2, 10);
 		errorCollector.assertTrue(new ByteArraySlice(testContent1, 10, 22).equalsTo(chunk1));
 		errorCollector.assertTrue(new ByteArraySlice(testContent1, 10+22+5, 12).equalsTo(chunk2));
+		int consumed = 10+22+5+12;
+		// 
+		// check that even when original content is completely unpacked, leftovers in the outBuffer are recognized   
+		ida.readBytes(ByteBuffer.allocate(testContent1.length - consumed - 2)); // unpack up to an end (almost)
+		errorCollector.assertFalse(ida.isEmpty()); // check InflaterDataAccess#available() positive
+		//
+		ByteBuffer chunk3 = ByteBuffer.allocate(10);
+		ida.readBytes(chunk3);
+		errorCollector.assertEquals(2, chunk3.flip().remaining());
 	}
 
 	private static class ByteArraySlice {