changeset 353:0f3687e79f5a

Treat content with target line endings as correct regardless eol.only-consistent setting
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 01 Dec 2011 03:05:28 +0100 (2011-12-01)
parents 7b34d24b8f4d
children 5f9073eabf06
files src/org/tmatesoft/hg/internal/NewlineFilter.java test/org/tmatesoft/hg/test/TestNewlineFilter.java
diffstat 2 files changed, 159 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/NewlineFilter.java	Wed Nov 30 05:11:07 2011 +0100
+++ b/src/org/tmatesoft/hg/internal/NewlineFilter.java	Thu Dec 01 03:05:28 2011 +0100
@@ -39,7 +39,7 @@
 
 	// if allowInconsistent is true, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is,
 	// i.e. doesn't try to convert them into appropriate newline characters. XXX revisit if Keyword extension behaves differently
-	private final boolean allowInconsistent;
+	private final boolean processInconsistent;
 	private final boolean winToNix;
 
 	// next two factory methods for testsing purposes
@@ -51,9 +51,9 @@
 		return new NewlineFilter(!allowMixed, 1);
 	}
 
-	private NewlineFilter(boolean failIfInconsistent, int transform) {
+	private NewlineFilter(boolean onlyConsistent, int transform) {
 		winToNix = transform == 0;
-		allowInconsistent = !failIfInconsistent;
+		processInconsistent = !onlyConsistent;
 	}
 
 	public ByteBuffer filter(ByteBuffer src) {
@@ -63,82 +63,117 @@
 			return nix2win(src);
 		}
 	}
+	
+	private boolean foundLoneLF = false;
+	private boolean foundCRLF = false;
 
 	private ByteBuffer win2nix(ByteBuffer src) {
-		int x = src.position(); // source index
-		int lookupStart = x;
+		int lookupStart = src.position(); // source index
 		ByteBuffer dst = null;
-		while (x < src.limit()) {
+		final byte CR = (byte) '\r';
+		final byte LF = (byte) '\n';
+		while (lookupStart < src.limit()) {
 			// x, lookupStart, ir and in are absolute positions within src buffer, which is never read with modifying operations
-			int ir = indexOf('\r', src, lookupStart);
-			int in = indexOf('\n', src, lookupStart);
-			if (ir == -1) {
-				if (in == -1 || allowInconsistent) {
-					if (dst != null) {
-						copySlice(src, x, src.limit(), dst);
-						x = src.limit(); // consumed all
-					}
-					break;
+			int ir = indexOf(CR, src, lookupStart);
+			int in = indexOf(LF, src, lookupStart);
+			if (in != -1) {
+				if (ir == -1 || ir > in) {
+					// lone LF. CR, if present, goes after LF, process up to closest LF, let next iteration decide what to do with CR@ir
+					foundLoneLF = true;
+					// XXX respect onlyConsistent. if foundCRLF then shall not process further
+					dst = consume(src, lookupStart, in+1, dst);
+					lookupStart = in + 1;
 				} else {
-					fail(src, in);
+					// ir < in
+					if (onlyCRup2limit(src, ir, in)) {
+						// CR...CRLF;
+						foundCRLF = true;
+						// XXX respect onlyConsistent. if foundLoneLF then shall not process further
+						dst = consume(src, lookupStart, ir, dst);
+						dst.put(LF);
+						lookupStart = in+1;
+					} else {
+						// CR...CR...^CR....LF
+						dst = consume(src, lookupStart, ir+1, dst);
+						// although can search for ^CR, here I copy CR one by one as I don't expect huge sequences of CR to optimize for
+						lookupStart = ir+1;
+					}
 				}
-			}
-			// in == -1 while ir != -1 may be valid case if ir is the last char of the buffer, we check below for that
-			if (in != -1 && in != ir+1 && !allowInconsistent) {
-				fail(src, in);
-			}
-			if (dst == null) {
-				dst = ByteBuffer.allocate(src.remaining());
-			}
-			copySlice(src, x, ir, dst);
-			if (ir+1 == src.limit()) {
-				// last char of the buffer - 
-				// consume src till that char and let next iteration work on it
-				x = ir;
+			} else {
+				// no newlines
+				if (ir != -1 && onlyCRup2limit(src, ir, src.limit())) {
+					// \r as last character(s) is the only case we care about when there're no LF found
+					// cases like \r\r\r<EOB>\n shall be handled like \r\n, hence onlyCRup2limit
+					dst = consume(src, lookupStart, ir, dst);
+					lookupStart = src.limit() - 1; // leave only last CR for next buffer
+				} else {
+					// consume all. don't create a copy of src if there's no dst yet
+					if (dst != null) {
+						copySlice(src, lookupStart, src.limit(), dst);
+						lookupStart = src.limit();
+					}
+				}
 				break;
 			}
-			if (in != ir + 1) {
-				x = ir+1; // generally in, but if allowInconsistent==true and \r is not followed by \n, then 
-				// cases like "one \r two \r\n three" shall be processed correctly (second pair would be ignored if x==in)
-				lookupStart = ir+1;
-			} else {
-				x = in;
-				lookupStart = x+1; // skip \n for next lookup
+		}
+		src.position(lookupStart); // mark we've consumed up to x
+		return dst == null ? src : (ByteBuffer) dst.flip();
+	}
+	
+	// true if [from..limit) are CR
+	private static boolean onlyCRup2limit(ByteBuffer src, int from, int limit) {
+		// extended version of (ir+1 == src.limit()): check all in [ir..src.limit) are CR
+		for (int i = from; i < limit; i++) {
+			if (src.get(i) != '\r') {
+				return false;
 			}
 		}
-		src.position(x); // mark we've consumed up to x
-		return dst == null ? src : (ByteBuffer) dst.flip();
+		return true;
+	}
+	private static ByteBuffer consume(ByteBuffer src, int from, int to, ByteBuffer dst) {
+		if (dst == null) {
+			dst = ByteBuffer.allocate(src.remaining());
+		}
+		copySlice(src, from, to, dst);
+		return dst;
 	}
 
 	private ByteBuffer nix2win(ByteBuffer src) {
 		int x = src.position();
 		ByteBuffer dst = null;
+		final byte CR = (byte) '\r';
+		final byte LF = (byte) '\n';
 		while (x < src.limit()) {
-			int in = indexOf('\n', src, x);
-			int ir = indexOf('\r', src, x, in == -1 ? src.limit() : in);
-			if (in == -1) {
-				if (ir == -1 || allowInconsistent) {
-					break;
+			int in = indexOf(LF, src, x);
+			if (in != -1) {
+				if (in > x && src.get(in - 1) == CR) {
+					foundCRLF = true;
+					// XXX respect onlyConsistent. if foundLoneLF then shall not process further
+					if (dst == null) {
+						dst = ByteBuffer.allocate(src.remaining() * 2);
+					}
+					copySlice(src, x, in+1, dst);
+					x = in + 1;
 				} else {
-					fail(src, ir);
+					// found stand-alone LF, need to output CRLF
+					foundLoneLF = true;
+					// XXX respect onlyConsistent. if foundCRLF then shall not process further
+					if (dst == null) {
+						dst = ByteBuffer.allocate(src.remaining() * 2);
+					}
+					copySlice(src, x, in, dst);
+					dst.put(CR);
+					dst.put(LF);
+					x = in + 1;
 				}
-			} else if (ir != -1 && !allowInconsistent) {
-				fail(src, ir);
+			} else {
+				// no newlines (no LF), just copy what left
+				if (dst != null) {
+					copySlice(src, x, src.limit(), dst);
+					x = src.limit();
+				}
+				break;
 			}
-			
-			// x <= in < src.limit
-			// allowInconsistent && x <= ir < in   || ir == -1  
-			if (dst == null) {
-				// buffer full of \n grows as much as twice in size
-				dst = ByteBuffer.allocate(src.remaining() * 2);
-			}
-			copySlice(src, x, in, dst);
-			if (ir == -1 || ir+1 != in) {
-				dst.put((byte) '\r');
-			} // otherwise (ir!=-1 && ir+1==in) we found \r\n pair, don't convert to \r\r\n
-			// we may copy \n at src[in] on the next iteration, but would need extra lookupIndex variable then.
-			dst.put((byte) '\n');
-			x = in+1;
 		}
 		src.position(x);
 		return dst == null ? src : (ByteBuffer) dst.flip();
@@ -146,15 +181,16 @@
 
 
 	private void fail(ByteBuffer b, int pos) {
+		// FIXME checked(?) HgFilterException instead
 		throw new RuntimeException(String.format("Inconsistent newline characters in the stream (char 0x%x, local index:%d)", b.get(pos), pos));
 	}
 
-	private static int indexOf(char ch, ByteBuffer b, int from) {
+	private static int indexOf(byte ch, ByteBuffer b, int from) {
 		return indexOf(ch, b, from, b.limit());
 	}
 
 	// looks up in buf[from..to)
-	private static int indexOf(char ch, ByteBuffer b, int from, int to) {
+	private static int indexOf(byte ch, ByteBuffer b, int from, int to) {
 		for (int i = from; i < to; i++) {
 			byte c = b.get(i);
 			if (ch == c) {
@@ -165,7 +201,7 @@
 	}
 
 	public static class Factory implements Filter.Factory {
-		private boolean failIfInconsistent = true;
+		private boolean processOnlyConsistent = true;
 		private Path.Matcher lfMatcher;
 		private Path.Matcher crlfMatcher;
 		private Path.Matcher binMatcher;
@@ -174,7 +210,7 @@
 		private String nativeOSFormat;
 
 		public void initialize(HgRepository hgRepo) {
-			failIfInconsistent = hgRepo.getConfiguration().getBooleanValue("eol", "only-consistent", true);
+			processOnlyConsistent = hgRepo.getConfiguration().getBooleanValue("eol", "only-consistent", true);
 			File cfgFile = new File(hgRepo.getWorkingDir(), ".hgeol");
 			if (!cfgFile.canRead()) {
 				return;
@@ -238,19 +274,19 @@
 				return null;
 			}
 			if (crlfMatcher != null && crlfMatcher.accept(path)) {
-				return new NewlineFilter(failIfInconsistent, 1);
+				return new NewlineFilter(processOnlyConsistent, 1);
 			} else if (lfMatcher != null && lfMatcher.accept(path)) {
-				return new NewlineFilter(failIfInconsistent, 0);
+				return new NewlineFilter(processOnlyConsistent, 0);
 			} else if (nativeMatcher != null && nativeMatcher.accept(path)) {
 				if (nativeOSFormat.equals(nativeRepoFormat)) {
 					return null;
 				}
 				if (opts.getDirection() == FromRepo) {
 					int transform = "CRLF".equals(nativeOSFormat) ? 1 : 0;
-					return new NewlineFilter(failIfInconsistent, transform);
+					return new NewlineFilter(processOnlyConsistent, transform);
 				} else if (opts.getDirection() == ToRepo) {
 					int transform = "CRLF".equals(nativeOSFormat) ? 0 : 1;
-					return new NewlineFilter(failIfInconsistent, transform);
+					return new NewlineFilter(processOnlyConsistent, transform);
 				}
 				return null;
 			}
--- a/test/org/tmatesoft/hg/test/TestNewlineFilter.java	Wed Nov 30 05:11:07 2011 +0100
+++ b/test/org/tmatesoft/hg/test/TestNewlineFilter.java	Thu Dec 01 03:05:28 2011 +0100
@@ -106,6 +106,61 @@
 			// fine
 		}
 	}
+	
+	@Test
+	public void testBufferEndInTheMiddle_CRLF_2_LF() {
+		// filter works with ByteBuffer that may end with \r, and the next one starting with \n
+		// need to make sure this is handled correctly.
+		byte[] i1 = "\r\nA\r\nBC\r".getBytes();
+		byte[] i2 = "\n\r\nDEF\r\n".getBytes();
+		NewlineFilter nlFilter = NewlineFilter.createWin2Nix(false);
+		ByteBuffer input = ByteBuffer.allocate(i1.length + i2.length);
+		ByteBuffer res = ByteBuffer.allocate(i1.length + i2.length); // at most of the original size
+		input.put(i1).flip();
+		res.put(nlFilter.filter(input));
+		Assert.assertTrue("Unpocessed chars shall be left in input buffer", input.remaining() > 0);
+		input.compact();
+		input.put(i2);
+		input.flip();
+		res.put(nlFilter.filter(input));
+		Assert.assertTrue("Input shall be consumed completely", input.remaining() == 0);
+		//
+		res.flip();
+		byte[] result = new byte[res.remaining()];
+		res.get(result);
+		Assert.assertArrayEquals(lf_1.getBytes(), result);
+		//
+		//
+		// check the same, with extra \r at the end of first portion
+		nlFilter = NewlineFilter.createWin2Nix(false);
+		res.clear();
+		input.clear();
+		input.put(i1).put("\r\r\r".getBytes()).flip();
+		res.put(nlFilter.filter(input));
+		Assert.assertTrue("Unpocessed chars shall be left in input buffer", input.remaining() > 0);
+		input.compact();
+		input.put(i2);
+		input.flip();
+		res.put(nlFilter.filter(input));
+		Assert.assertTrue("Input shall be consumed completely", input.remaining() == 0);
+		res.flip();
+		result = new byte[res.remaining()];
+		res.get(result);
+		Assert.assertArrayEquals(lf_1.getBytes(), result);
+	}
+	
+	@Test
+	public void testNoConversionLoneCR() {
+		// CRLF -> LF
+		final byte[] input = "\r\nA\rBC\r\rDE\r\nFGH".getBytes();
+		final byte[] output = "\nA\rBC\r\rDE\nFGH".getBytes();
+		byte[] result = apply(NewlineFilter.createWin2Nix(false), input);
+		Assert.assertArrayEquals(output, result);
+		//
+		// LF -> CRLF
+		result = apply(NewlineFilter.createNix2Win(false), output);
+		Assert.assertArrayEquals(input, result);
+	}
 
 
 	@Test