# HG changeset patch # User Artem Tikhomirov # Date 1323172641 -3600 # Node ID f2c11fe7f3e9f3359b7a752094d787a86978dcee # Parent 5f9073eabf06e33a837b2627cac1ae4e61f2738b Newline filter shall respect whole stream when deciding whether to process line terminators, hence added stream preview functionality diff -r 5f9073eabf06 -r f2c11fe7f3e9 build.xml --- a/build.xml Thu Dec 01 05:21:40 2011 +0100 +++ b/build.xml Tue Dec 06 12:57:21 2011 +0100 @@ -87,6 +87,7 @@ + diff -r 5f9073eabf06 -r f2c11fe7f3e9 src/org/tmatesoft/hg/internal/Filter.java --- a/src/org/tmatesoft/hg/internal/Filter.java Thu Dec 01 05:21:40 2011 +0100 +++ b/src/org/tmatesoft/hg/internal/Filter.java Tue Dec 06 12:57:21 2011 +0100 @@ -28,10 +28,18 @@ */ public interface Filter { - // returns a buffer ready to be read. may return original buffer. - // original buffer may not be fully consumed, #compact() might be operation to perform + /** + * Filters data and returns a new buffer with data or an original buffer. + * Original buffer may not be fully consumed, #compact() might be operation to perform + * @param src + * @return a buffer ready to be read + */ + // ByteBuffer filter(ByteBuffer src); + /* + * Factory doesn't look into data (i.e. could't do a preview), works solely with path + */ interface Factory { void initialize(HgRepository hgRepo); // may return null if for a given path and/or options this filter doesn't make any sense diff -r 5f9073eabf06 -r f2c11fe7f3e9 src/org/tmatesoft/hg/internal/FilterByteChannel.java --- a/src/org/tmatesoft/hg/internal/FilterByteChannel.java Thu Dec 01 05:21:40 2011 +0100 +++ b/src/org/tmatesoft/hg/internal/FilterByteChannel.java Tue Dec 06 12:57:21 2011 +0100 @@ -18,7 +18,9 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collection; +import java.util.List; import org.tmatesoft.hg.util.Adaptable; import org.tmatesoft.hg.util.ByteChannel; @@ -55,6 +57,20 @@ // adapters or implemented interfaces of the original class shall not be obfuscated by filter public T getAdapter(Class adapterClass) { + if (adapterClass == Preview.class) { + ArrayList previewers = new ArrayList(filters.length); + for (Filter f : filters) { + if (f instanceof Preview /*FIXME or getAdapter != null*/) { + previewers.add((Preview) f); + } + } + if (!previewers.isEmpty()) { + @SuppressWarnings("unchecked") + T rv = (T) new PreviewSupport(previewers); + return rv; + } + // fall through to let delegate answer + } if (delegate instanceof Adaptable) { return ((Adaptable) delegate).getAdapter(adapterClass); } @@ -63,4 +79,22 @@ } return null; } + + private static class PreviewSupport implements Preview { + private final Preview[] participants; + + public PreviewSupport(List previewers) { + participants = new Preview[previewers.size()]; + previewers.toArray(participants); + } + + public void preview(ByteBuffer src) { + final int originalPos = src.position(); + for (Preview p : participants) { + p.preview(src); + // reset to initial state + src.position(originalPos); + } + } + } } diff -r 5f9073eabf06 -r f2c11fe7f3e9 src/org/tmatesoft/hg/internal/NewlineFilter.java --- a/src/org/tmatesoft/hg/internal/NewlineFilter.java Thu Dec 01 05:21:40 2011 +0100 +++ b/src/org/tmatesoft/hg/internal/NewlineFilter.java Tue Dec 06 12:57:21 2011 +0100 @@ -16,6 +16,8 @@ */ package org.tmatesoft.hg.internal; +import static java.lang.Math.max; +import static java.lang.Math.min; import static org.tmatesoft.hg.internal.Filter.Direction.FromRepo; import static org.tmatesoft.hg.internal.Filter.Direction.ToRepo; import static org.tmatesoft.hg.internal.KeywordFilter.copySlice; @@ -26,6 +28,7 @@ import java.util.ArrayList; import java.util.Map; +import org.tmatesoft.hg.core.HgBadStateException; import org.tmatesoft.hg.repo.HgInternals; import org.tmatesoft.hg.repo.HgRepository; import org.tmatesoft.hg.util.Path; @@ -35,20 +38,25 @@ * @author Artem Tikhomirov * @author TMate Software Ltd. */ -public class NewlineFilter implements Filter { +public class NewlineFilter implements Filter, Preview { - // if allowInconsistent is true, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is, - // i.e. doesn't try to convert them into appropriate newline characters. XXX revisit if Keyword extension behaves differently + // if processInconsistent is false, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is, + // i.e. doesn't try to convert them into appropriate newline characters. + // XXX revisit if Keyword extension behaves differently - WTF??? private final boolean processInconsistent; private final boolean winToNix; + + // NOTE, if processInconsistent == true, foundCRLF and foundLoneLF are not initialized + private boolean foundLoneLF = false; + private boolean foundCRLF = false; - // next two factory methods for testsing purposes - public static NewlineFilter createWin2Nix(boolean allowMixed) { - return new NewlineFilter(!allowMixed, 0); + // next two factory methods for test purposes + public static NewlineFilter createWin2Nix(boolean processMixed) { + return new NewlineFilter(!processMixed, 0); } - public static NewlineFilter createNix2Win(boolean allowMixed) { - return new NewlineFilter(!allowMixed, 1); + public static NewlineFilter createNix2Win(boolean processMixed) { + return new NewlineFilter(!processMixed, 1); } private NewlineFilter(boolean onlyConsistent, int transform) { @@ -57,15 +65,70 @@ } public ByteBuffer filter(ByteBuffer src) { + if (!previewDone) { + throw new HgBadStateException("This filter requires preview operation prior to actual filtering"); + } + if (!processInconsistent && foundLoneLF && foundCRLF) { + // do not process inconsistent newlines + return src; + } if (winToNix) { + if (!processInconsistent && !foundCRLF) { + // no reason to process if no CRLF in the data stream + return src; + } return win2nix(src); } else { + if (!processInconsistent && !foundLoneLF) { + return src; + } return nix2win(src); } } - private boolean foundLoneLF = false; - private boolean foundCRLF = false; + private boolean prevBufLastByteWasCR = false; + private boolean previewDone = false; + + public void preview(ByteBuffer src) { + previewDone = true; // guard + if (processInconsistent) { + // gonna handle them anyway, no need to check. TODO Do not implement Preview directly, but rather + // conditionally through getAdapter when processInconsistent is false (sic!) + return; + } + if (foundLoneLF && foundCRLF) { + // already know it's inconsistent + return; + } + final byte CR = (byte) '\r'; + final byte LF = (byte) '\n'; + int x = src.position(); + while (x < src.limit()) { + int in = indexOf(LF, src, x); + if (in == -1) { + // no line feed, but what if it's CRLF broken in the middle? + prevBufLastByteWasCR = CR == src.get(src.limit() - 1); + return; + } + if (in == 0) { + if (prevBufLastByteWasCR) { + foundCRLF = true; + } else { + foundLoneLF = true; + } + } else { // in > 0 && in >= x + if (src.get(in - 1) == CR) { + foundCRLF = true; + } else { + foundLoneLF = true; + } + } + if (foundCRLF && foundLoneLF) { + return; + } + x = in + 1; + } + } private ByteBuffer win2nix(ByteBuffer src) { int lookupStart = src.position(); // source index @@ -78,17 +141,21 @@ int in = indexOf(LF, src, lookupStart); if (in != -1) { if (ir == -1 || ir > in) { - // lone LF. CR, if present, goes after LF, process up to closest LF, let next iteration decide what to do with CR@ir - foundLoneLF = true; - // XXX respect onlyConsistent. if foundCRLF then shall not process further + // lone LF. CR, if present, goes after LF, process up to that lone, closest LF; let next iteration decide what to do with CR@ir + if (!processInconsistent && foundCRLF) { + assert foundLoneLF == true : "preview() shall initialize this"; + fail(src, in); + } dst = consume(src, lookupStart, in+1, dst); lookupStart = in + 1; } else { // ir < in if (onlyCRup2limit(src, ir, in)) { // CR...CRLF; - foundCRLF = true; - // XXX respect onlyConsistent. if foundLoneLF then shall not process further + if (!processInconsistent && foundLoneLF) { + assert foundCRLF == true : "preview() shall initialize this"; + fail(src, ir); + } dst = consume(src, lookupStart, ir, dst); dst.put(LF); lookupStart = in+1; @@ -147,8 +214,11 @@ int in = indexOf(LF, src, x); if (in != -1) { if (in > x && src.get(in - 1) == CR) { - foundCRLF = true; - // XXX respect onlyConsistent. if foundLoneLF then shall not process further + // found CRLF + if (!processInconsistent && foundLoneLF) { + assert foundCRLF == true : "preview() shall initialize this"; + fail(src, in-1); + } if (dst == null) { dst = ByteBuffer.allocate(src.remaining() * 2); } @@ -156,8 +226,10 @@ x = in + 1; } else { // found stand-alone LF, need to output CRLF - foundLoneLF = true; - // XXX respect onlyConsistent. if foundCRLF then shall not process further + if (!processInconsistent && foundCRLF) { + assert foundLoneLF == true : "preview() shall initialize this"; + fail(src, in); + } if (dst == null) { dst = ByteBuffer.allocate(src.remaining() * 2); } @@ -180,9 +252,13 @@ } + // Test: nlFilter.fail(ByteBuffer.wrap(new "test string".getBytes()), 5); private void fail(ByteBuffer b, int pos) { - // FIXME checked(?) HgFilterException instead - throw new RuntimeException(String.format("Inconsistent newline characters in the stream (char 0x%x, local index:%d)", b.get(pos), pos)); + StringBuilder sb = new StringBuilder(); + for (int i = max(pos-10, 0), x = min(pos + 10, b.limit()); i < x; i++) { + sb.append(String.format("%02x ", b.get(i))); + } + throw new HgBadStateException(String.format("Inconsistent newline characters in the stream %s (char 0x%x, local index:%d)", sb.toString(), b.get(pos), pos)); } private static int indexOf(byte ch, ByteBuffer b, int from) { diff -r 5f9073eabf06 -r f2c11fe7f3e9 src/org/tmatesoft/hg/internal/Preview.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/Preview.java Tue Dec 06 12:57:21 2011 +0100 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2011 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@hg4j.com + */ +package org.tmatesoft.hg.internal; + +import java.nio.ByteBuffer; +import java.nio.channels.ByteChannel; + +/** + * Filters may need to look into data stream before actual processing takes place, + * for example, to find out whether filter shall be applied at all. + * + * {@link ByteChannel ByteChannels} that may use filters shall be checked for {@link Preview} adaptable before writing to them. + * + * E.g. newline filter handles streams with mixed newline endings according to configuration option + * (either process or ignore), hence, need to check complete stream first. + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public interface Preview { + /** + * Supplied buffer may be shared between few Preview filter instances, hence implementers shall NOT consume (move position) + * of the buffer. Caller enforces this reseting buffer position between calls to previewers. + *

+ * XXX if this approach turns out to impose a great burden on filter implementations, FilterByteChannel may be modified to keep + * record of how far each filter had read it's buffer. + */ + void preview(ByteBuffer src); +} \ No newline at end of file diff -r 5f9073eabf06 -r f2c11fe7f3e9 src/org/tmatesoft/hg/repo/HgDataFile.java --- a/src/org/tmatesoft/hg/repo/HgDataFile.java Thu Dec 01 05:21:40 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/HgDataFile.java Tue Dec 06 12:57:21 2011 +0100 @@ -159,6 +159,8 @@ } } } else { + // FIXME not TIP, but revision according to dirstate!!! + // add tests for this case contentWithFilters(TIP, sink); } } @@ -219,12 +221,12 @@ } ErrorHandlingInspector insp; if (metadata.none(revision)) { - insp = new ContentPipe(sink, 0); + insp = new ContentPipe(sink, 0, getRepo().getContext().getLog()); } else if (metadata.known(revision)) { - insp = new ContentPipe(sink, metadata.dataOffset(revision)); + insp = new ContentPipe(sink, metadata.dataOffset(revision), getRepo().getContext().getLog()); } else { // do not know if there's metadata - insp = new MetadataInspector(metadata, getPath(), new ContentPipe(sink, 0)); + insp = new MetadataInspector(metadata, getPath(), new ContentPipe(sink, 0, getRepo().getContext().getLog())); } insp.checkCancelled(); super.content.iterate(revision, revision, true, insp); diff -r 5f9073eabf06 -r f2c11fe7f3e9 src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java --- a/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java Thu Dec 01 05:21:40 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java Tue Dec 06 12:57:21 2011 +0100 @@ -39,6 +39,7 @@ import org.tmatesoft.hg.internal.FilterByteChannel; import org.tmatesoft.hg.internal.ManifestRevision; import org.tmatesoft.hg.internal.PathScope; +import org.tmatesoft.hg.internal.Preview; import org.tmatesoft.hg.util.ByteChannel; import org.tmatesoft.hg.util.CancelledException; import org.tmatesoft.hg.util.FileInfo; @@ -469,6 +470,22 @@ is = f.newInputChannel(); ByteBuffer fb = ByteBuffer.allocate(min(1 + data.length * 2 /*to fit couple of lines appended; never zero*/, 8192)); FilterByteChannel filters = new FilterByteChannel(check, repo.getFiltersFromWorkingDirToRepo(p)); + Preview preview = filters.getAdapter(Preview.class); + if (preview != null) { + while (is.read(fb) != -1) { + fb.flip(); + preview.preview(fb); + fb.clear(); + } + // reset channel to read once again + try { + is.close(); + } catch (IOException ex) { + repo.getContext().getLog().info(getClass(), ex, null); + } + is = f.newInputChannel(); + fb.clear(); + } while (is.read(fb) != -1 && check.sameSoFar()) { fb.flip(); filters.write(fb); diff -r 5f9073eabf06 -r f2c11fe7f3e9 src/org/tmatesoft/hg/repo/Revlog.java --- a/src/org/tmatesoft/hg/repo/Revlog.java Thu Dec 01 05:21:40 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/Revlog.java Tue Dec 06 12:57:21 2011 +0100 @@ -36,11 +36,13 @@ import org.tmatesoft.hg.internal.ArrayHelper; import org.tmatesoft.hg.internal.DataAccess; import org.tmatesoft.hg.internal.Experimental; +import org.tmatesoft.hg.internal.Preview; import org.tmatesoft.hg.internal.RevlogStream; import org.tmatesoft.hg.util.Adaptable; import org.tmatesoft.hg.util.ByteChannel; import org.tmatesoft.hg.util.CancelSupport; import org.tmatesoft.hg.util.CancelledException; +import org.tmatesoft.hg.util.LogFacility; import org.tmatesoft.hg.util.ProgressSupport; @@ -178,7 +180,7 @@ if (sink == null) { throw new IllegalArgumentException(); } - ContentPipe insp = new ContentPipe(sink, 0); + ContentPipe insp = new ContentPipe(sink, 0, repo.getContext().getLog()); insp.checkCancelled(); content.iterate(revision, revision, true, insp); insp.checkFailed(); @@ -596,16 +598,19 @@ protected static class ContentPipe extends ErrorHandlingInspector implements RevlogStream.Inspector, CancelSupport { private final ByteChannel sink; private final int offset; + private final LogFacility logFacility; /** * @param _sink - cannot be null * @param seekOffset - when positive, orders to pipe bytes to the sink starting from specified offset, not from the first byte available in DataAccess + * @param log optional facility to put warnings/debug messages into, may be null. */ - public ContentPipe(ByteChannel _sink, int seekOffset) { + public ContentPipe(ByteChannel _sink, int seekOffset, LogFacility log) { assert _sink != null; sink = _sink; setCancelSupport(CancelSupport.Factory.get(_sink)); offset = seekOffset; + logFacility = log; } protected void prepare(int revisionNumber, DataAccess da) throws HgException, IOException { @@ -618,16 +623,37 @@ try { prepare(revisionNumber, da); // XXX perhaps, prepare shall return DA (sliced, if needed) final ProgressSupport progressSupport = ProgressSupport.Factory.get(sink); - ByteBuffer buf = ByteBuffer.allocate(512); - progressSupport.start(da.length()); + ByteBuffer buf = ByteBuffer.allocate(actualLen > 8192 ? 8192 : actualLen); + Preview p = getAdapter(sink, Preview.class); + if (p != null) { + progressSupport.start(2 * da.length()); + while (!da.isEmpty()) { + checkCancelled(); + da.readBytes(buf); + p.preview(buf); + buf.clear(); + } + da.reset(); + prepare(revisionNumber, da); + progressSupport.worked(da.length()); + buf.clear(); + } else { + progressSupport.start(da.length()); + } while (!da.isEmpty()) { checkCancelled(); da.readBytes(buf); - buf.flip(); + buf.flip(); // post: position == 0 // XXX I may not rely on returned number of bytes but track change in buf position instead. - int consumed = sink.write(buf); - // FIXME in fact, bad sink implementation (that consumes no bytes) would result in endless loop. Need to account for this - buf.compact(); + + int consumed = sink.write(buf); + if ((consumed == 0 || consumed != buf.position()) && logFacility != null) { + logFacility.warn(getClass(), "Bad data sink when reading revision %d. Reported %d bytes consumed, byt actually read %d", revisionNumber, consumed, buf.position()); + } + if (buf.position() == 0) { + throw new HgBadStateException("Bad sink implementation (consumes no bytes) results in endless loop"); + } + buf.compact(); // ensure (a) there's space for new (b) data starts at 0 progressSupport.worked(consumed); } progressSupport.done(); // XXX shall specify whether #done() is invoked always or only if completed successfully. diff -r 5f9073eabf06 -r f2c11fe7f3e9 test/org/tmatesoft/hg/test/TestNewlineFilter.java --- a/test/org/tmatesoft/hg/test/TestNewlineFilter.java Thu Dec 01 05:21:40 2011 +0100 +++ b/test/org/tmatesoft/hg/test/TestNewlineFilter.java Tue Dec 06 12:57:21 2011 +0100 @@ -89,22 +89,16 @@ @Test public void testStrictMixed_CRLF_2_LF() { - try { - byte[] result = apply(NewlineFilter.createWin2Nix(false), crlf_2.getBytes()); - Assert.fail("Shall fail when eol.only-consistent is true:" + new String(result)); - } catch (RuntimeException ex) { - // fine - } + final byte[] input = crlf_2.getBytes(); + byte[] result = apply(NewlineFilter.createWin2Nix(false), input); + Assert.assertArrayEquals(input, result); } @Test public void testStrictMixed_LF_2_CRLF() { - try { - byte[] result = apply(NewlineFilter.createNix2Win(false), lf_2.getBytes()); - Assert.fail("Shall fail when eol.only-consistent is true:" + new String(result)); - } catch (RuntimeException ex) { - // fine - } + final byte[] input = lf_2.getBytes(); + byte[] result = apply(NewlineFilter.createNix2Win(false), input); + Assert.assertArrayEquals(input, result); } @Test @@ -116,6 +110,9 @@ NewlineFilter nlFilter = NewlineFilter.createWin2Nix(false); ByteBuffer input = ByteBuffer.allocate(i1.length + i2.length); ByteBuffer res = ByteBuffer.allocate(i1.length + i2.length); // at most of the original size + nlFilter.preview(ByteBuffer.wrap(i1)); + nlFilter.preview(ByteBuffer.wrap(i2)); + // input.put(i1).flip(); res.put(nlFilter.filter(input)); Assert.assertTrue("Unpocessed chars shall be left in input buffer", input.remaining() > 0); @@ -136,6 +133,11 @@ res.clear(); input.clear(); input.put(i1).put("\r\r\r".getBytes()).flip(); + // preview requred + nlFilter.preview(input); + nlFilter.preview(ByteBuffer.wrap(i2)); + // input.position(0); correctly written preview shall not affect buffer position + // res.put(nlFilter.filter(input)); Assert.assertTrue("Unpocessed chars shall be left in input buffer", input.remaining() > 0); input.compact(); @@ -179,7 +181,10 @@ } private static byte[] apply(NewlineFilter nlFilter, byte[] input) { - ByteBuffer result = nlFilter.filter(ByteBuffer.wrap(input)); + final ByteBuffer inputBuffer = ByteBuffer.wrap(input); + nlFilter.preview(inputBuffer); + // inputBuffer.position(0); correctly written filter shall not affect buffer position + ByteBuffer result = nlFilter.filter(inputBuffer); byte[] res = new byte[result.remaining()]; result.get(res); return res;