changeset 355:f2c11fe7f3e9

Newline filter shall respect whole stream when deciding whether to process line terminators, hence added stream preview functionality
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 06 Dec 2011 12:57:21 +0100 (2011-12-06)
parents 5f9073eabf06
children 91d75e1bac9f
files build.xml src/org/tmatesoft/hg/internal/Filter.java src/org/tmatesoft/hg/internal/FilterByteChannel.java src/org/tmatesoft/hg/internal/NewlineFilter.java src/org/tmatesoft/hg/internal/Preview.java src/org/tmatesoft/hg/repo/HgDataFile.java src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java src/org/tmatesoft/hg/repo/Revlog.java test/org/tmatesoft/hg/test/TestNewlineFilter.java
diffstat 9 files changed, 259 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/build.xml	Thu Dec 01 05:21:40 2011 +0100
+++ b/build.xml	Tue Dec 06 12:57:21 2011 +0100
@@ -87,6 +87,7 @@
 			<test name="org.tmatesoft.hg.test.TestManifest" />
 			<test name="org.tmatesoft.hg.test.TestStatus" />
 			<test name="org.tmatesoft.hg.test.TestStorePath" />
+			<test name="org.tmatesoft.hg.test.TestNewlineFilter" />
 			<test name="org.tmatesoft.hg.test.TestIgnore" />
 			<test name="org.tmatesoft.hg.test.TestDirstate" />
 			<test name="org.tmatesoft.hg.test.TestBranches" />
--- a/src/org/tmatesoft/hg/internal/Filter.java	Thu Dec 01 05:21:40 2011 +0100
+++ b/src/org/tmatesoft/hg/internal/Filter.java	Tue Dec 06 12:57:21 2011 +0100
@@ -28,10 +28,18 @@
  */
 public interface Filter {
 
-	// returns a buffer ready to be read. may return original buffer.
-	// original buffer may not be fully consumed, #compact() might be operation to perform 
+	/**
+	 * Filters data and returns a new buffer with data or an original buffer.
+	 * Original buffer may not be fully consumed, #compact() might be operation to perform
+	 * @param src
+	 * @return a buffer ready to be read
+	 */
+	//  
 	ByteBuffer filter(ByteBuffer src);
 
+	/*
+	 * Factory doesn't look into data (i.e. could't do a preview), works solely with path
+	 */
 	interface Factory {
 		void initialize(HgRepository hgRepo);
 		// may return null if for a given path and/or options this filter doesn't make any sense
--- a/src/org/tmatesoft/hg/internal/FilterByteChannel.java	Thu Dec 01 05:21:40 2011 +0100
+++ b/src/org/tmatesoft/hg/internal/FilterByteChannel.java	Tue Dec 06 12:57:21 2011 +0100
@@ -18,7 +18,9 @@
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
 
 import org.tmatesoft.hg.util.Adaptable;
 import org.tmatesoft.hg.util.ByteChannel;
@@ -55,6 +57,20 @@
 
 	// adapters or implemented interfaces of the original class shall not be obfuscated by filter
 	public <T> T getAdapter(Class<T> adapterClass) {
+		if (adapterClass == Preview.class) {
+			ArrayList<Preview> previewers = new ArrayList<Preview>(filters.length);
+			for (Filter f : filters) {
+				if (f instanceof Preview /*FIXME or getAdapter != null*/) {
+					previewers.add((Preview) f);
+				}
+			}
+			if (!previewers.isEmpty()) {
+				@SuppressWarnings("unchecked")
+				T rv = (T) new PreviewSupport(previewers);
+				return rv;
+			}
+			// fall through to let delegate answer
+		}
 		if (delegate instanceof Adaptable) {
 			return ((Adaptable) delegate).getAdapter(adapterClass);
 		}
@@ -63,4 +79,22 @@
 		}
 		return null;
 	}
+
+	private static class PreviewSupport implements Preview {
+		private final Preview[] participants;
+
+		public PreviewSupport(List<Preview> previewers) {
+			participants = new Preview[previewers.size()];
+			previewers.toArray(participants);
+		}
+		
+		public void preview(ByteBuffer src) {
+			final int originalPos = src.position();
+			for (Preview p : participants) {
+				p.preview(src);
+				// reset to initial state
+				src.position(originalPos);
+			}
+		}
+	}
 }
--- a/src/org/tmatesoft/hg/internal/NewlineFilter.java	Thu Dec 01 05:21:40 2011 +0100
+++ b/src/org/tmatesoft/hg/internal/NewlineFilter.java	Tue Dec 06 12:57:21 2011 +0100
@@ -16,6 +16,8 @@
  */
 package org.tmatesoft.hg.internal;
 
+import static java.lang.Math.max;
+import static java.lang.Math.min;
 import static org.tmatesoft.hg.internal.Filter.Direction.FromRepo;
 import static org.tmatesoft.hg.internal.Filter.Direction.ToRepo;
 import static org.tmatesoft.hg.internal.KeywordFilter.copySlice;
@@ -26,6 +28,7 @@
 import java.util.ArrayList;
 import java.util.Map;
 
+import org.tmatesoft.hg.core.HgBadStateException;
 import org.tmatesoft.hg.repo.HgInternals;
 import org.tmatesoft.hg.repo.HgRepository;
 import org.tmatesoft.hg.util.Path;
@@ -35,20 +38,25 @@
  * @author Artem Tikhomirov
  * @author TMate Software Ltd.
  */
-public class NewlineFilter implements Filter {
+public class NewlineFilter implements Filter, Preview {
 
-	// if allowInconsistent is true, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is,
-	// i.e. doesn't try to convert them into appropriate newline characters. XXX revisit if Keyword extension behaves differently
+	// if processInconsistent is false, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is,
+	// i.e. doesn't try to convert them into appropriate newline characters. 
+	// XXX revisit if Keyword extension behaves differently - WTF???
 	private final boolean processInconsistent;
 	private final boolean winToNix;
+	
+	// NOTE, if processInconsistent == true, foundCRLF and foundLoneLF are not initialized
+	private boolean foundLoneLF = false;
+	private boolean foundCRLF = false;
 
-	// next two factory methods for testsing purposes
-	public static NewlineFilter createWin2Nix(boolean allowMixed) {
-		return new NewlineFilter(!allowMixed, 0);
+	// next two factory methods for test purposes
+	public static NewlineFilter createWin2Nix(boolean processMixed) {
+		return new NewlineFilter(!processMixed, 0);
 	}
 	
-	public static NewlineFilter createNix2Win(boolean allowMixed) {
-		return new NewlineFilter(!allowMixed, 1);
+	public static NewlineFilter createNix2Win(boolean processMixed) {
+		return new NewlineFilter(!processMixed, 1);
 	}
 
 	private NewlineFilter(boolean onlyConsistent, int transform) {
@@ -57,15 +65,70 @@
 	}
 
 	public ByteBuffer filter(ByteBuffer src) {
+		if (!previewDone) {
+			throw new HgBadStateException("This filter requires preview operation prior to actual filtering");
+		}
+		if (!processInconsistent && foundLoneLF && foundCRLF) {
+			// do not process inconsistent newlines
+			return src;
+		}
 		if (winToNix) {
+			if (!processInconsistent && !foundCRLF) {
+				// no reason to process if no CRLF in the data stream
+				return src;
+			}
 			return win2nix(src);
 		} else {
+			if (!processInconsistent && !foundLoneLF) {
+				return src;
+			}
 			return nix2win(src);
 		}
 	}
 	
-	private boolean foundLoneLF = false;
-	private boolean foundCRLF = false;
+	private boolean prevBufLastByteWasCR = false;
+	private boolean previewDone = false;
+
+	public void preview(ByteBuffer src) {
+		previewDone = true; // guard
+		if (processInconsistent) {
+			// gonna handle them anyway, no need to check. TODO Do not implement Preview directly, but rather 
+			// conditionally through getAdapter when processInconsistent is false (sic!)
+			return;
+		}
+		if (foundLoneLF && foundCRLF) {
+			// already know it's inconsistent
+			return;
+		}
+		final byte CR = (byte) '\r';
+		final byte LF = (byte) '\n';
+		int x = src.position();
+		while (x < src.limit()) {
+			int in = indexOf(LF, src, x);
+			if (in == -1) {
+				// no line feed, but what if it's CRLF broken in the middle?
+				prevBufLastByteWasCR = CR == src.get(src.limit() - 1);
+				return;
+			}
+			if (in == 0) {
+				if (prevBufLastByteWasCR) {
+					foundCRLF = true;
+				} else {
+					foundLoneLF = true;
+				}
+			} else { // in > 0 && in >= x
+				if (src.get(in - 1) == CR) {
+					foundCRLF = true;
+				} else {
+					foundLoneLF = true;
+				}
+			}
+			if (foundCRLF && foundLoneLF) {
+				return;
+			}
+			x = in + 1;
+		}
+	}
 
 	private ByteBuffer win2nix(ByteBuffer src) {
 		int lookupStart = src.position(); // source index
@@ -78,17 +141,21 @@
 			int in = indexOf(LF, src, lookupStart);
 			if (in != -1) {
 				if (ir == -1 || ir > in) {
-					// lone LF. CR, if present, goes after LF, process up to closest LF, let next iteration decide what to do with CR@ir
-					foundLoneLF = true;
-					// XXX respect onlyConsistent. if foundCRLF then shall not process further
+					// lone LF. CR, if present, goes after LF, process up to that lone, closest LF; let next iteration decide what to do with CR@ir
+					if (!processInconsistent && foundCRLF) {
+						assert foundLoneLF == true : "preview() shall initialize this";
+						fail(src, in);
+					}
 					dst = consume(src, lookupStart, in+1, dst);
 					lookupStart = in + 1;
 				} else {
 					// ir < in
 					if (onlyCRup2limit(src, ir, in)) {
 						// CR...CRLF;
-						foundCRLF = true;
-						// XXX respect onlyConsistent. if foundLoneLF then shall not process further
+						if (!processInconsistent && foundLoneLF) {
+							assert foundCRLF == true : "preview() shall initialize this";
+							fail(src, ir);
+						}
 						dst = consume(src, lookupStart, ir, dst);
 						dst.put(LF);
 						lookupStart = in+1;
@@ -147,8 +214,11 @@
 			int in = indexOf(LF, src, x);
 			if (in != -1) {
 				if (in > x && src.get(in - 1) == CR) {
-					foundCRLF = true;
-					// XXX respect onlyConsistent. if foundLoneLF then shall not process further
+					// found CRLF
+					if (!processInconsistent && foundLoneLF) {
+						assert foundCRLF == true : "preview() shall initialize this";
+						fail(src, in-1);
+					}
 					if (dst == null) {
 						dst = ByteBuffer.allocate(src.remaining() * 2);
 					}
@@ -156,8 +226,10 @@
 					x = in + 1;
 				} else {
 					// found stand-alone LF, need to output CRLF
-					foundLoneLF = true;
-					// XXX respect onlyConsistent. if foundCRLF then shall not process further
+					if (!processInconsistent && foundCRLF) {
+						assert foundLoneLF == true : "preview() shall initialize this";
+						fail(src, in);
+					}
 					if (dst == null) {
 						dst = ByteBuffer.allocate(src.remaining() * 2);
 					}
@@ -180,9 +252,13 @@
 	}
 
 
+	// Test: nlFilter.fail(ByteBuffer.wrap(new "test string".getBytes()), 5);
 	private void fail(ByteBuffer b, int pos) {
-		// FIXME checked(?) HgFilterException instead
-		throw new RuntimeException(String.format("Inconsistent newline characters in the stream (char 0x%x, local index:%d)", b.get(pos), pos));
+		StringBuilder sb = new StringBuilder();
+		for (int i = max(pos-10, 0), x = min(pos + 10, b.limit()); i < x; i++) {
+			sb.append(String.format("%02x ", b.get(i)));
+		}
+		throw new HgBadStateException(String.format("Inconsistent newline characters in the stream %s (char 0x%x, local index:%d)", sb.toString(), b.get(pos), pos));
 	}
 
 	private static int indexOf(byte ch, ByteBuffer b, int from) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/Preview.java	Tue Dec 06 12:57:21 2011 +0100
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2011 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import java.nio.ByteBuffer;
+import java.nio.channels.ByteChannel;
+
+/**
+ * Filters may need to look into data stream before actual processing takes place,
+ * for example, to find out whether filter shall be applied at all.
+ * 
+ * {@link ByteChannel ByteChannels} that may use filters shall be checked for {@link Preview} adaptable before writing to them. 
+ * 
+ * E.g. newline filter handles streams with mixed newline endings according to configuration option 
+ * (either process or ignore), hence, need to check complete stream first.
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public interface Preview {
+	/**
+	 * Supplied buffer may be shared between few Preview filter instances, hence implementers shall NOT consume (move position)
+	 * of the buffer. Caller enforces this reseting buffer position between calls to previewers.
+	 * <p>
+	 * XXX if this approach turns out to impose a great burden on filter implementations, FilterByteChannel may be modified to keep
+	 * record of how far each filter had read it's buffer. 
+	 */
+	void preview(ByteBuffer src);
+}
\ No newline at end of file
--- a/src/org/tmatesoft/hg/repo/HgDataFile.java	Thu Dec 01 05:21:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgDataFile.java	Tue Dec 06 12:57:21 2011 +0100
@@ -159,6 +159,8 @@
 				}
 			}
 		} else {
+			// FIXME not TIP, but revision according to dirstate!!!
+			// add tests for this case
 			contentWithFilters(TIP, sink);
 		}
 	}
@@ -219,12 +221,12 @@
 		}
 		ErrorHandlingInspector insp;
 		if (metadata.none(revision)) {
-			insp = new ContentPipe(sink, 0);
+			insp = new ContentPipe(sink, 0, getRepo().getContext().getLog());
 		} else if (metadata.known(revision)) {
-			insp = new ContentPipe(sink, metadata.dataOffset(revision));
+			insp = new ContentPipe(sink, metadata.dataOffset(revision), getRepo().getContext().getLog());
 		} else {
 			// do not know if there's metadata
-			insp = new MetadataInspector(metadata, getPath(), new ContentPipe(sink, 0));
+			insp = new MetadataInspector(metadata, getPath(), new ContentPipe(sink, 0, getRepo().getContext().getLog()));
 		}
 		insp.checkCancelled();
 		super.content.iterate(revision, revision, true, insp);
--- a/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java	Thu Dec 01 05:21:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java	Tue Dec 06 12:57:21 2011 +0100
@@ -39,6 +39,7 @@
 import org.tmatesoft.hg.internal.FilterByteChannel;
 import org.tmatesoft.hg.internal.ManifestRevision;
 import org.tmatesoft.hg.internal.PathScope;
+import org.tmatesoft.hg.internal.Preview;
 import org.tmatesoft.hg.util.ByteChannel;
 import org.tmatesoft.hg.util.CancelledException;
 import org.tmatesoft.hg.util.FileInfo;
@@ -469,6 +470,22 @@
 			is = f.newInputChannel();
 			ByteBuffer fb = ByteBuffer.allocate(min(1 + data.length * 2 /*to fit couple of lines appended; never zero*/, 8192));
 			FilterByteChannel filters = new FilterByteChannel(check, repo.getFiltersFromWorkingDirToRepo(p));
+			Preview preview = filters.getAdapter(Preview.class);
+			if (preview != null) {
+				while (is.read(fb) != -1) {
+					fb.flip();
+					preview.preview(fb);
+					fb.clear();
+				}
+				// reset channel to read once again
+				try {
+					is.close();
+				} catch (IOException ex) {
+					repo.getContext().getLog().info(getClass(), ex, null);
+				}
+				is = f.newInputChannel();
+				fb.clear();
+			}
 			while (is.read(fb) != -1 && check.sameSoFar()) {
 				fb.flip();
 				filters.write(fb);
--- a/src/org/tmatesoft/hg/repo/Revlog.java	Thu Dec 01 05:21:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/Revlog.java	Tue Dec 06 12:57:21 2011 +0100
@@ -36,11 +36,13 @@
 import org.tmatesoft.hg.internal.ArrayHelper;
 import org.tmatesoft.hg.internal.DataAccess;
 import org.tmatesoft.hg.internal.Experimental;
+import org.tmatesoft.hg.internal.Preview;
 import org.tmatesoft.hg.internal.RevlogStream;
 import org.tmatesoft.hg.util.Adaptable;
 import org.tmatesoft.hg.util.ByteChannel;
 import org.tmatesoft.hg.util.CancelSupport;
 import org.tmatesoft.hg.util.CancelledException;
+import org.tmatesoft.hg.util.LogFacility;
 import org.tmatesoft.hg.util.ProgressSupport;
 
 
@@ -178,7 +180,7 @@
 		if (sink == null) {
 			throw new IllegalArgumentException();
 		}
-		ContentPipe insp = new ContentPipe(sink, 0);
+		ContentPipe insp = new ContentPipe(sink, 0, repo.getContext().getLog());
 		insp.checkCancelled();
 		content.iterate(revision, revision, true, insp);
 		insp.checkFailed();
@@ -596,16 +598,19 @@
 	protected static class ContentPipe extends ErrorHandlingInspector implements RevlogStream.Inspector, CancelSupport {
 		private final ByteChannel sink;
 		private final int offset;
+		private final LogFacility logFacility;
 
 		/**
 		 * @param _sink - cannot be <code>null</code>
 		 * @param seekOffset - when positive, orders to pipe bytes to the sink starting from specified offset, not from the first byte available in DataAccess
+		 * @param log optional facility to put warnings/debug messages into, may be null.
 		 */
-		public ContentPipe(ByteChannel _sink, int seekOffset) {
+		public ContentPipe(ByteChannel _sink, int seekOffset, LogFacility log) {
 			assert _sink != null;
 			sink = _sink;
 			setCancelSupport(CancelSupport.Factory.get(_sink));
 			offset = seekOffset;
+			logFacility = log;
 		}
 		
 		protected void prepare(int revisionNumber, DataAccess da) throws HgException, IOException {
@@ -618,16 +623,37 @@
 			try {
 				prepare(revisionNumber, da); // XXX perhaps, prepare shall return DA (sliced, if needed)
 				final ProgressSupport progressSupport = ProgressSupport.Factory.get(sink);
-				ByteBuffer buf = ByteBuffer.allocate(512);
-				progressSupport.start(da.length());
+				ByteBuffer buf = ByteBuffer.allocate(actualLen > 8192 ? 8192 : actualLen);
+				Preview p = getAdapter(sink, Preview.class);
+				if (p != null) {
+					progressSupport.start(2 * da.length());
+					while (!da.isEmpty()) {
+						checkCancelled();
+						da.readBytes(buf);
+						p.preview(buf);
+						buf.clear();
+					}
+					da.reset();
+					prepare(revisionNumber, da);
+					progressSupport.worked(da.length());
+					buf.clear();
+				} else {
+					progressSupport.start(da.length());
+				}
 				while (!da.isEmpty()) {
 					checkCancelled();
 					da.readBytes(buf);
-					buf.flip();
+					buf.flip(); // post: position == 0
 					// XXX I may not rely on returned number of bytes but track change in buf position instead.
-					int consumed = sink.write(buf); 
-					// FIXME in fact, bad sink implementation (that consumes no bytes) would result in endless loop. Need to account for this 
-					buf.compact();
+					
+					int consumed = sink.write(buf);
+					if ((consumed == 0 || consumed != buf.position()) && logFacility != null) {
+						logFacility.warn(getClass(), "Bad data sink when reading revision %d. Reported %d bytes consumed, byt actually read %d", revisionNumber, consumed, buf.position());
+					}
+					if (buf.position() == 0) {
+						throw new HgBadStateException("Bad sink implementation (consumes no bytes) results in endless loop");
+					}
+					buf.compact(); // ensure (a) there's space for new (b) data starts at 0
 					progressSupport.worked(consumed);
 				}
 				progressSupport.done(); // XXX shall specify whether #done() is invoked always or only if completed successfully.
--- a/test/org/tmatesoft/hg/test/TestNewlineFilter.java	Thu Dec 01 05:21:40 2011 +0100
+++ b/test/org/tmatesoft/hg/test/TestNewlineFilter.java	Tue Dec 06 12:57:21 2011 +0100
@@ -89,22 +89,16 @@
 
 	@Test
 	public void testStrictMixed_CRLF_2_LF() {
-		try {
-			byte[] result = apply(NewlineFilter.createWin2Nix(false), crlf_2.getBytes());
-			Assert.fail("Shall fail when eol.only-consistent is true:" + new String(result));
-		} catch (RuntimeException ex) {
-			// fine
-		}
+		final byte[] input = crlf_2.getBytes();
+		byte[] result = apply(NewlineFilter.createWin2Nix(false), input);
+		Assert.assertArrayEquals(input, result);
 	}
 
 	@Test
 	public void testStrictMixed_LF_2_CRLF() {
-		try {
-			byte[] result = apply(NewlineFilter.createNix2Win(false), lf_2.getBytes());
-			Assert.fail("Shall fail when eol.only-consistent is true:" + new String(result));
-		} catch (RuntimeException ex) {
-			// fine
-		}
+		final byte[] input = lf_2.getBytes();
+		byte[] result = apply(NewlineFilter.createNix2Win(false), input);
+		Assert.assertArrayEquals(input, result);
 	}
 	
 	@Test
@@ -116,6 +110,9 @@
 		NewlineFilter nlFilter = NewlineFilter.createWin2Nix(false);
 		ByteBuffer input = ByteBuffer.allocate(i1.length + i2.length);
 		ByteBuffer res = ByteBuffer.allocate(i1.length + i2.length); // at most of the original size
+		nlFilter.preview(ByteBuffer.wrap(i1));
+		nlFilter.preview(ByteBuffer.wrap(i2));
+		//
 		input.put(i1).flip();
 		res.put(nlFilter.filter(input));
 		Assert.assertTrue("Unpocessed chars shall be left in input buffer", input.remaining() > 0);
@@ -136,6 +133,11 @@
 		res.clear();
 		input.clear();
 		input.put(i1).put("\r\r\r".getBytes()).flip();
+		// preview requred
+		nlFilter.preview(input);
+		nlFilter.preview(ByteBuffer.wrap(i2));
+		// input.position(0); correctly written preview shall not affect buffer position 
+		//
 		res.put(nlFilter.filter(input));
 		Assert.assertTrue("Unpocessed chars shall be left in input buffer", input.remaining() > 0);
 		input.compact();
@@ -179,7 +181,10 @@
 	}
 
 	private static byte[] apply(NewlineFilter nlFilter, byte[] input) {
-		ByteBuffer result = nlFilter.filter(ByteBuffer.wrap(input));
+		final ByteBuffer inputBuffer = ByteBuffer.wrap(input);
+		nlFilter.preview(inputBuffer);
+		// inputBuffer.position(0); correctly written filter shall not affect buffer position
+		ByteBuffer result = nlFilter.filter(inputBuffer);
 		byte[] res = new byte[result.remaining()];
 		result.get(res);
 		return res;