# HG changeset patch # User Artem Tikhomirov # Date 1322705128 -3600 # Node ID 0f3687e79f5a730b0a510354d1e8dccf1ccd0b90 # Parent 7b34d24b8f4da51de5309da3e96a6cfa2f5ffd89 Treat content with target line endings as correct regardless eol.only-consistent setting diff -r 7b34d24b8f4d -r 0f3687e79f5a src/org/tmatesoft/hg/internal/NewlineFilter.java --- a/src/org/tmatesoft/hg/internal/NewlineFilter.java Wed Nov 30 05:11:07 2011 +0100 +++ b/src/org/tmatesoft/hg/internal/NewlineFilter.java Thu Dec 01 03:05:28 2011 +0100 @@ -39,7 +39,7 @@ // if allowInconsistent is true, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is, // i.e. doesn't try to convert them into appropriate newline characters. XXX revisit if Keyword extension behaves differently - private final boolean allowInconsistent; + private final boolean processInconsistent; private final boolean winToNix; // next two factory methods for testsing purposes @@ -51,9 +51,9 @@ return new NewlineFilter(!allowMixed, 1); } - private NewlineFilter(boolean failIfInconsistent, int transform) { + private NewlineFilter(boolean onlyConsistent, int transform) { winToNix = transform == 0; - allowInconsistent = !failIfInconsistent; + processInconsistent = !onlyConsistent; } public ByteBuffer filter(ByteBuffer src) { @@ -63,82 +63,117 @@ return nix2win(src); } } + + private boolean foundLoneLF = false; + private boolean foundCRLF = false; private ByteBuffer win2nix(ByteBuffer src) { - int x = src.position(); // source index - int lookupStart = x; + int lookupStart = src.position(); // source index ByteBuffer dst = null; - while (x < src.limit()) { + final byte CR = (byte) '\r'; + final byte LF = (byte) '\n'; + while (lookupStart < src.limit()) { // x, lookupStart, ir and in are absolute positions within src buffer, which is never read with modifying operations - int ir = indexOf('\r', src, lookupStart); - int in = indexOf('\n', src, lookupStart); - if (ir == -1) { - if (in == -1 || allowInconsistent) { - if (dst != null) { - copySlice(src, x, src.limit(), dst); - x = src.limit(); // consumed all - } - break; + int ir = indexOf(CR, src, lookupStart); + int in = indexOf(LF, src, lookupStart); + if (in != -1) { + if (ir == -1 || ir > in) { + // lone LF. CR, if present, goes after LF, process up to closest LF, let next iteration decide what to do with CR@ir + foundLoneLF = true; + // XXX respect onlyConsistent. if foundCRLF then shall not process further + dst = consume(src, lookupStart, in+1, dst); + lookupStart = in + 1; } else { - fail(src, in); + // ir < in + if (onlyCRup2limit(src, ir, in)) { + // CR...CRLF; + foundCRLF = true; + // XXX respect onlyConsistent. if foundLoneLF then shall not process further + dst = consume(src, lookupStart, ir, dst); + dst.put(LF); + lookupStart = in+1; + } else { + // CR...CR...^CR....LF + dst = consume(src, lookupStart, ir+1, dst); + // although can search for ^CR, here I copy CR one by one as I don't expect huge sequences of CR to optimize for + lookupStart = ir+1; + } } - } - // in == -1 while ir != -1 may be valid case if ir is the last char of the buffer, we check below for that - if (in != -1 && in != ir+1 && !allowInconsistent) { - fail(src, in); - } - if (dst == null) { - dst = ByteBuffer.allocate(src.remaining()); - } - copySlice(src, x, ir, dst); - if (ir+1 == src.limit()) { - // last char of the buffer - - // consume src till that char and let next iteration work on it - x = ir; + } else { + // no newlines + if (ir != -1 && onlyCRup2limit(src, ir, src.limit())) { + // \r as last character(s) is the only case we care about when there're no LF found + // cases like \r\r\r\n shall be handled like \r\n, hence onlyCRup2limit + dst = consume(src, lookupStart, ir, dst); + lookupStart = src.limit() - 1; // leave only last CR for next buffer + } else { + // consume all. don't create a copy of src if there's no dst yet + if (dst != null) { + copySlice(src, lookupStart, src.limit(), dst); + lookupStart = src.limit(); + } + } break; } - if (in != ir + 1) { - x = ir+1; // generally in, but if allowInconsistent==true and \r is not followed by \n, then - // cases like "one \r two \r\n three" shall be processed correctly (second pair would be ignored if x==in) - lookupStart = ir+1; - } else { - x = in; - lookupStart = x+1; // skip \n for next lookup + } + src.position(lookupStart); // mark we've consumed up to x + return dst == null ? src : (ByteBuffer) dst.flip(); + } + + // true if [from..limit) are CR + private static boolean onlyCRup2limit(ByteBuffer src, int from, int limit) { + // extended version of (ir+1 == src.limit()): check all in [ir..src.limit) are CR + for (int i = from; i < limit; i++) { + if (src.get(i) != '\r') { + return false; } } - src.position(x); // mark we've consumed up to x - return dst == null ? src : (ByteBuffer) dst.flip(); + return true; + } + private static ByteBuffer consume(ByteBuffer src, int from, int to, ByteBuffer dst) { + if (dst == null) { + dst = ByteBuffer.allocate(src.remaining()); + } + copySlice(src, from, to, dst); + return dst; } private ByteBuffer nix2win(ByteBuffer src) { int x = src.position(); ByteBuffer dst = null; + final byte CR = (byte) '\r'; + final byte LF = (byte) '\n'; while (x < src.limit()) { - int in = indexOf('\n', src, x); - int ir = indexOf('\r', src, x, in == -1 ? src.limit() : in); - if (in == -1) { - if (ir == -1 || allowInconsistent) { - break; + int in = indexOf(LF, src, x); + if (in != -1) { + if (in > x && src.get(in - 1) == CR) { + foundCRLF = true; + // XXX respect onlyConsistent. if foundLoneLF then shall not process further + if (dst == null) { + dst = ByteBuffer.allocate(src.remaining() * 2); + } + copySlice(src, x, in+1, dst); + x = in + 1; } else { - fail(src, ir); + // found stand-alone LF, need to output CRLF + foundLoneLF = true; + // XXX respect onlyConsistent. if foundCRLF then shall not process further + if (dst == null) { + dst = ByteBuffer.allocate(src.remaining() * 2); + } + copySlice(src, x, in, dst); + dst.put(CR); + dst.put(LF); + x = in + 1; } - } else if (ir != -1 && !allowInconsistent) { - fail(src, ir); + } else { + // no newlines (no LF), just copy what left + if (dst != null) { + copySlice(src, x, src.limit(), dst); + x = src.limit(); + } + break; } - - // x <= in < src.limit - // allowInconsistent && x <= ir < in || ir == -1 - if (dst == null) { - // buffer full of \n grows as much as twice in size - dst = ByteBuffer.allocate(src.remaining() * 2); - } - copySlice(src, x, in, dst); - if (ir == -1 || ir+1 != in) { - dst.put((byte) '\r'); - } // otherwise (ir!=-1 && ir+1==in) we found \r\n pair, don't convert to \r\r\n - // we may copy \n at src[in] on the next iteration, but would need extra lookupIndex variable then. - dst.put((byte) '\n'); - x = in+1; } src.position(x); return dst == null ? src : (ByteBuffer) dst.flip(); @@ -146,15 +181,16 @@ private void fail(ByteBuffer b, int pos) { + // FIXME checked(?) HgFilterException instead throw new RuntimeException(String.format("Inconsistent newline characters in the stream (char 0x%x, local index:%d)", b.get(pos), pos)); } - private static int indexOf(char ch, ByteBuffer b, int from) { + private static int indexOf(byte ch, ByteBuffer b, int from) { return indexOf(ch, b, from, b.limit()); } // looks up in buf[from..to) - private static int indexOf(char ch, ByteBuffer b, int from, int to) { + private static int indexOf(byte ch, ByteBuffer b, int from, int to) { for (int i = from; i < to; i++) { byte c = b.get(i); if (ch == c) { @@ -165,7 +201,7 @@ } public static class Factory implements Filter.Factory { - private boolean failIfInconsistent = true; + private boolean processOnlyConsistent = true; private Path.Matcher lfMatcher; private Path.Matcher crlfMatcher; private Path.Matcher binMatcher; @@ -174,7 +210,7 @@ private String nativeOSFormat; public void initialize(HgRepository hgRepo) { - failIfInconsistent = hgRepo.getConfiguration().getBooleanValue("eol", "only-consistent", true); + processOnlyConsistent = hgRepo.getConfiguration().getBooleanValue("eol", "only-consistent", true); File cfgFile = new File(hgRepo.getWorkingDir(), ".hgeol"); if (!cfgFile.canRead()) { return; @@ -238,19 +274,19 @@ return null; } if (crlfMatcher != null && crlfMatcher.accept(path)) { - return new NewlineFilter(failIfInconsistent, 1); + return new NewlineFilter(processOnlyConsistent, 1); } else if (lfMatcher != null && lfMatcher.accept(path)) { - return new NewlineFilter(failIfInconsistent, 0); + return new NewlineFilter(processOnlyConsistent, 0); } else if (nativeMatcher != null && nativeMatcher.accept(path)) { if (nativeOSFormat.equals(nativeRepoFormat)) { return null; } if (opts.getDirection() == FromRepo) { int transform = "CRLF".equals(nativeOSFormat) ? 1 : 0; - return new NewlineFilter(failIfInconsistent, transform); + return new NewlineFilter(processOnlyConsistent, transform); } else if (opts.getDirection() == ToRepo) { int transform = "CRLF".equals(nativeOSFormat) ? 0 : 1; - return new NewlineFilter(failIfInconsistent, transform); + return new NewlineFilter(processOnlyConsistent, transform); } return null; } diff -r 7b34d24b8f4d -r 0f3687e79f5a test/org/tmatesoft/hg/test/TestNewlineFilter.java --- a/test/org/tmatesoft/hg/test/TestNewlineFilter.java Wed Nov 30 05:11:07 2011 +0100 +++ b/test/org/tmatesoft/hg/test/TestNewlineFilter.java Thu Dec 01 03:05:28 2011 +0100 @@ -106,6 +106,61 @@ // fine } } + + @Test + public void testBufferEndInTheMiddle_CRLF_2_LF() { + // filter works with ByteBuffer that may end with \r, and the next one starting with \n + // need to make sure this is handled correctly. + byte[] i1 = "\r\nA\r\nBC\r".getBytes(); + byte[] i2 = "\n\r\nDEF\r\n".getBytes(); + NewlineFilter nlFilter = NewlineFilter.createWin2Nix(false); + ByteBuffer input = ByteBuffer.allocate(i1.length + i2.length); + ByteBuffer res = ByteBuffer.allocate(i1.length + i2.length); // at most of the original size + input.put(i1).flip(); + res.put(nlFilter.filter(input)); + Assert.assertTrue("Unpocessed chars shall be left in input buffer", input.remaining() > 0); + input.compact(); + input.put(i2); + input.flip(); + res.put(nlFilter.filter(input)); + Assert.assertTrue("Input shall be consumed completely", input.remaining() == 0); + // + res.flip(); + byte[] result = new byte[res.remaining()]; + res.get(result); + Assert.assertArrayEquals(lf_1.getBytes(), result); + // + // + // check the same, with extra \r at the end of first portion + nlFilter = NewlineFilter.createWin2Nix(false); + res.clear(); + input.clear(); + input.put(i1).put("\r\r\r".getBytes()).flip(); + res.put(nlFilter.filter(input)); + Assert.assertTrue("Unpocessed chars shall be left in input buffer", input.remaining() > 0); + input.compact(); + input.put(i2); + input.flip(); + res.put(nlFilter.filter(input)); + Assert.assertTrue("Input shall be consumed completely", input.remaining() == 0); + res.flip(); + result = new byte[res.remaining()]; + res.get(result); + Assert.assertArrayEquals(lf_1.getBytes(), result); + } + + @Test + public void testNoConversionLoneCR() { + // CRLF -> LF + final byte[] input = "\r\nA\rBC\r\rDE\r\nFGH".getBytes(); + final byte[] output = "\nA\rBC\r\rDE\nFGH".getBytes(); + byte[] result = apply(NewlineFilter.createWin2Nix(false), input); + Assert.assertArrayEquals(output, result); + // + // LF -> CRLF + result = apply(NewlineFilter.createNix2Win(false), output); + Assert.assertArrayEquals(input, result); + } @Test