tikhomirov@113: /* tikhomirov@456: * Copyright (c) 2011-2012 TMate Software Ltd tikhomirov@113: * tikhomirov@113: * This program is free software; you can redistribute it and/or modify tikhomirov@113: * it under the terms of the GNU General Public License as published by tikhomirov@113: * the Free Software Foundation; version 2 of the License. tikhomirov@113: * tikhomirov@113: * This program is distributed in the hope that it will be useful, tikhomirov@113: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@113: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@113: * GNU General Public License for more details. tikhomirov@113: * tikhomirov@113: * For information on how to redistribute this software under tikhomirov@113: * the terms of a license other than GNU General Public License tikhomirov@130: * contact TMate Software at support@hg4j.com tikhomirov@113: */ tikhomirov@113: package org.tmatesoft.hg.internal; tikhomirov@113: tikhomirov@355: import static java.lang.Math.max; tikhomirov@355: import static java.lang.Math.min; tikhomirov@113: import static org.tmatesoft.hg.internal.Filter.Direction.FromRepo; tikhomirov@113: import static org.tmatesoft.hg.internal.Filter.Direction.ToRepo; tikhomirov@113: import static org.tmatesoft.hg.internal.KeywordFilter.copySlice; tikhomirov@456: import static org.tmatesoft.hg.util.LogFacility.Severity.Warn; tikhomirov@113: tikhomirov@113: import java.io.File; tikhomirov@113: import java.nio.ByteBuffer; tikhomirov@114: import java.util.ArrayList; tikhomirov@114: import java.util.Map; tikhomirov@113: tikhomirov@483: import org.tmatesoft.hg.repo.HgInvalidFileException; tikhomirov@423: import org.tmatesoft.hg.repo.HgInvalidStateException; tikhomirov@113: import org.tmatesoft.hg.repo.HgRepository; tikhomirov@356: import org.tmatesoft.hg.util.Adaptable; tikhomirov@133: import org.tmatesoft.hg.util.Path; tikhomirov@113: tikhomirov@113: /** tikhomirov@113: * tikhomirov@113: * @author Artem Tikhomirov tikhomirov@113: * @author TMate Software Ltd. tikhomirov@113: */ tikhomirov@356: public class NewlineFilter implements Filter, Preview, Adaptable { tikhomirov@113: tikhomirov@355: // if processInconsistent is false, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is, tikhomirov@355: // i.e. doesn't try to convert them into appropriate newline characters. tikhomirov@355: // XXX revisit if Keyword extension behaves differently - WTF??? tikhomirov@353: private final boolean processInconsistent; tikhomirov@113: private final boolean winToNix; tikhomirov@355: tikhomirov@355: // NOTE, if processInconsistent == true, foundCRLF and foundLoneLF are not initialized tikhomirov@355: private boolean foundLoneLF = false; tikhomirov@355: private boolean foundCRLF = false; tikhomirov@113: tikhomirov@355: // next two factory methods for test purposes tikhomirov@355: public static NewlineFilter createWin2Nix(boolean processMixed) { tikhomirov@355: return new NewlineFilter(!processMixed, 0); tikhomirov@352: } tikhomirov@352: tikhomirov@355: public static NewlineFilter createNix2Win(boolean processMixed) { tikhomirov@355: return new NewlineFilter(!processMixed, 1); tikhomirov@352: } tikhomirov@352: tikhomirov@353: private NewlineFilter(boolean onlyConsistent, int transform) { tikhomirov@113: winToNix = transform == 0; tikhomirov@353: processInconsistent = !onlyConsistent; tikhomirov@113: } tikhomirov@113: tikhomirov@113: public ByteBuffer filter(ByteBuffer src) { tikhomirov@356: if (!processInconsistent && !previewDone) { tikhomirov@423: throw new HgInvalidStateException("This filter requires preview operation prior to actual filtering when eol.only-consistent is true"); tikhomirov@355: } tikhomirov@355: if (!processInconsistent && foundLoneLF && foundCRLF) { tikhomirov@355: // do not process inconsistent newlines tikhomirov@355: return src; tikhomirov@355: } tikhomirov@113: if (winToNix) { tikhomirov@355: if (!processInconsistent && !foundCRLF) { tikhomirov@355: // no reason to process if no CRLF in the data stream tikhomirov@355: return src; tikhomirov@355: } tikhomirov@113: return win2nix(src); tikhomirov@113: } else { tikhomirov@355: if (!processInconsistent && !foundLoneLF) { tikhomirov@355: return src; tikhomirov@355: } tikhomirov@113: return nix2win(src); tikhomirov@113: } tikhomirov@113: } tikhomirov@353: tikhomirov@356: public T getAdapter(Class adapterClass) { tikhomirov@356: // conditionally through getAdapter tikhomirov@356: if (Preview.class == adapterClass) { tikhomirov@356: // when processInconsistent is false, we need to preview data stream to ensure line terminators are consistent. tikhomirov@356: // otherwise, no need to look into the stream tikhomirov@356: if (!processInconsistent) { tikhomirov@356: return adapterClass.cast(this); tikhomirov@356: } tikhomirov@356: } tikhomirov@356: return null; tikhomirov@356: } tikhomirov@356: tikhomirov@355: private boolean prevBufLastByteWasCR = false; tikhomirov@355: private boolean previewDone = false; tikhomirov@355: tikhomirov@355: public void preview(ByteBuffer src) { tikhomirov@355: previewDone = true; // guard tikhomirov@355: if (processInconsistent) { tikhomirov@355: // gonna handle them anyway, no need to check. TODO Do not implement Preview directly, but rather tikhomirov@355: return; tikhomirov@355: } tikhomirov@355: if (foundLoneLF && foundCRLF) { tikhomirov@355: // already know it's inconsistent tikhomirov@355: return; tikhomirov@355: } tikhomirov@355: final byte CR = (byte) '\r'; tikhomirov@355: final byte LF = (byte) '\n'; tikhomirov@355: int x = src.position(); tikhomirov@355: while (x < src.limit()) { tikhomirov@355: int in = indexOf(LF, src, x); tikhomirov@355: if (in == -1) { tikhomirov@355: // no line feed, but what if it's CRLF broken in the middle? tikhomirov@355: prevBufLastByteWasCR = CR == src.get(src.limit() - 1); tikhomirov@355: return; tikhomirov@355: } tikhomirov@355: if (in == 0) { tikhomirov@355: if (prevBufLastByteWasCR) { tikhomirov@355: foundCRLF = true; tikhomirov@355: } else { tikhomirov@355: foundLoneLF = true; tikhomirov@355: } tikhomirov@355: } else { // in > 0 && in >= x tikhomirov@355: if (src.get(in - 1) == CR) { tikhomirov@355: foundCRLF = true; tikhomirov@355: } else { tikhomirov@355: foundLoneLF = true; tikhomirov@355: } tikhomirov@355: } tikhomirov@355: if (foundCRLF && foundLoneLF) { tikhomirov@355: return; tikhomirov@355: } tikhomirov@355: x = in + 1; tikhomirov@355: } tikhomirov@355: } tikhomirov@113: tikhomirov@113: private ByteBuffer win2nix(ByteBuffer src) { tikhomirov@353: int lookupStart = src.position(); // source index tikhomirov@113: ByteBuffer dst = null; tikhomirov@353: final byte CR = (byte) '\r'; tikhomirov@353: final byte LF = (byte) '\n'; tikhomirov@353: while (lookupStart < src.limit()) { tikhomirov@113: // x, lookupStart, ir and in are absolute positions within src buffer, which is never read with modifying operations tikhomirov@353: int ir = indexOf(CR, src, lookupStart); tikhomirov@353: int in = indexOf(LF, src, lookupStart); tikhomirov@353: if (in != -1) { tikhomirov@353: if (ir == -1 || ir > in) { tikhomirov@355: // lone LF. CR, if present, goes after LF, process up to that lone, closest LF; let next iteration decide what to do with CR@ir tikhomirov@355: if (!processInconsistent && foundCRLF) { tikhomirov@355: assert foundLoneLF == true : "preview() shall initialize this"; tikhomirov@355: fail(src, in); tikhomirov@355: } tikhomirov@353: dst = consume(src, lookupStart, in+1, dst); tikhomirov@353: lookupStart = in + 1; tikhomirov@113: } else { tikhomirov@353: // ir < in tikhomirov@353: if (onlyCRup2limit(src, ir, in)) { tikhomirov@353: // CR...CRLF; tikhomirov@355: if (!processInconsistent && foundLoneLF) { tikhomirov@355: assert foundCRLF == true : "preview() shall initialize this"; tikhomirov@355: fail(src, ir); tikhomirov@355: } tikhomirov@353: dst = consume(src, lookupStart, ir, dst); tikhomirov@353: dst.put(LF); tikhomirov@353: lookupStart = in+1; tikhomirov@353: } else { tikhomirov@353: // CR...CR...^CR....LF tikhomirov@353: dst = consume(src, lookupStart, ir+1, dst); tikhomirov@353: // although can search for ^CR, here I copy CR one by one as I don't expect huge sequences of CR to optimize for tikhomirov@353: lookupStart = ir+1; tikhomirov@353: } tikhomirov@113: } tikhomirov@353: } else { tikhomirov@353: // no newlines tikhomirov@353: if (ir != -1 && onlyCRup2limit(src, ir, src.limit())) { tikhomirov@353: // \r as last character(s) is the only case we care about when there're no LF found tikhomirov@353: // cases like \r\r\r\n shall be handled like \r\n, hence onlyCRup2limit tikhomirov@353: dst = consume(src, lookupStart, ir, dst); tikhomirov@353: lookupStart = src.limit() - 1; // leave only last CR for next buffer tikhomirov@353: } else { tikhomirov@353: // consume all. don't create a copy of src if there's no dst yet tikhomirov@353: if (dst != null) { tikhomirov@353: copySlice(src, lookupStart, src.limit(), dst); tikhomirov@353: lookupStart = src.limit(); tikhomirov@353: } tikhomirov@353: } tikhomirov@113: break; tikhomirov@113: } tikhomirov@353: } tikhomirov@353: src.position(lookupStart); // mark we've consumed up to x tikhomirov@353: return dst == null ? src : (ByteBuffer) dst.flip(); tikhomirov@353: } tikhomirov@353: tikhomirov@353: // true if [from..limit) are CR tikhomirov@353: private static boolean onlyCRup2limit(ByteBuffer src, int from, int limit) { tikhomirov@353: // extended version of (ir+1 == src.limit()): check all in [ir..src.limit) are CR tikhomirov@353: for (int i = from; i < limit; i++) { tikhomirov@353: if (src.get(i) != '\r') { tikhomirov@353: return false; tikhomirov@113: } tikhomirov@113: } tikhomirov@353: return true; tikhomirov@353: } tikhomirov@353: private static ByteBuffer consume(ByteBuffer src, int from, int to, ByteBuffer dst) { tikhomirov@353: if (dst == null) { tikhomirov@353: dst = ByteBuffer.allocate(src.remaining()); tikhomirov@353: } tikhomirov@353: copySlice(src, from, to, dst); tikhomirov@353: return dst; tikhomirov@113: } tikhomirov@113: tikhomirov@113: private ByteBuffer nix2win(ByteBuffer src) { tikhomirov@113: int x = src.position(); tikhomirov@113: ByteBuffer dst = null; tikhomirov@353: final byte CR = (byte) '\r'; tikhomirov@353: final byte LF = (byte) '\n'; tikhomirov@113: while (x < src.limit()) { tikhomirov@353: int in = indexOf(LF, src, x); tikhomirov@353: if (in != -1) { tikhomirov@353: if (in > x && src.get(in - 1) == CR) { tikhomirov@355: // found CRLF tikhomirov@355: if (!processInconsistent && foundLoneLF) { tikhomirov@355: assert foundCRLF == true : "preview() shall initialize this"; tikhomirov@355: fail(src, in-1); tikhomirov@355: } tikhomirov@353: if (dst == null) { tikhomirov@353: dst = ByteBuffer.allocate(src.remaining() * 2); tikhomirov@353: } tikhomirov@353: copySlice(src, x, in+1, dst); tikhomirov@353: x = in + 1; tikhomirov@113: } else { tikhomirov@353: // found stand-alone LF, need to output CRLF tikhomirov@355: if (!processInconsistent && foundCRLF) { tikhomirov@355: assert foundLoneLF == true : "preview() shall initialize this"; tikhomirov@355: fail(src, in); tikhomirov@355: } tikhomirov@353: if (dst == null) { tikhomirov@353: dst = ByteBuffer.allocate(src.remaining() * 2); tikhomirov@353: } tikhomirov@353: copySlice(src, x, in, dst); tikhomirov@353: dst.put(CR); tikhomirov@353: dst.put(LF); tikhomirov@353: x = in + 1; tikhomirov@113: } tikhomirov@353: } else { tikhomirov@353: // no newlines (no LF), just copy what left tikhomirov@353: if (dst != null) { tikhomirov@353: copySlice(src, x, src.limit(), dst); tikhomirov@353: x = src.limit(); tikhomirov@353: } tikhomirov@353: break; tikhomirov@113: } tikhomirov@113: } tikhomirov@113: src.position(x); tikhomirov@113: return dst == null ? src : (ByteBuffer) dst.flip(); tikhomirov@113: } tikhomirov@113: tikhomirov@113: tikhomirov@355: // Test: nlFilter.fail(ByteBuffer.wrap(new "test string".getBytes()), 5); tikhomirov@113: private void fail(ByteBuffer b, int pos) { tikhomirov@355: StringBuilder sb = new StringBuilder(); tikhomirov@355: for (int i = max(pos-10, 0), x = min(pos + 10, b.limit()); i < x; i++) { tikhomirov@355: sb.append(String.format("%02x ", b.get(i))); tikhomirov@355: } tikhomirov@423: // TODO post-1.0 need HgBadDataException (not InvalidState but smth closer to data stream error) tikhomirov@423: // but don't want to add class for the single use now tikhomirov@423: throw new HgInvalidStateException(String.format("Inconsistent newline characters in the stream %s (char 0x%x, local index:%d)", sb.toString(), b.get(pos), pos)); tikhomirov@113: } tikhomirov@113: tikhomirov@353: private static int indexOf(byte ch, ByteBuffer b, int from) { tikhomirov@113: return indexOf(ch, b, from, b.limit()); tikhomirov@113: } tikhomirov@113: tikhomirov@113: // looks up in buf[from..to) tikhomirov@353: private static int indexOf(byte ch, ByteBuffer b, int from, int to) { tikhomirov@113: for (int i = from; i < to; i++) { tikhomirov@113: byte c = b.get(i); tikhomirov@113: if (ch == c) { tikhomirov@113: return i; tikhomirov@113: } tikhomirov@113: } tikhomirov@113: return -1; tikhomirov@113: } tikhomirov@113: tikhomirov@113: public static class Factory implements Filter.Factory { tikhomirov@353: private boolean processOnlyConsistent = true; tikhomirov@114: private Path.Matcher lfMatcher; tikhomirov@114: private Path.Matcher crlfMatcher; tikhomirov@114: private Path.Matcher binMatcher; tikhomirov@114: private Path.Matcher nativeMatcher; tikhomirov@114: private String nativeRepoFormat; tikhomirov@114: private String nativeOSFormat; tikhomirov@113: tikhomirov@331: public void initialize(HgRepository hgRepo) { tikhomirov@353: processOnlyConsistent = hgRepo.getConfiguration().getBooleanValue("eol", "only-consistent", true); tikhomirov@237: File cfgFile = new File(hgRepo.getWorkingDir(), ".hgeol"); tikhomirov@114: if (!cfgFile.canRead()) { tikhomirov@114: return; tikhomirov@113: } tikhomirov@114: // XXX if .hgeol is not checked out, we may get it from repository tikhomirov@114: // HgDataFile cfgFileNode = hgRepo.getFileNode(".hgeol"); tikhomirov@114: // if (!cfgFileNode.exists()) { tikhomirov@114: // return; tikhomirov@114: // } tikhomirov@114: // XXX perhaps, add HgDataFile.hasWorkingCopy and workingCopyContent()? tikhomirov@490: ConfigFile hgeol = new ConfigFile(hgRepo.getSessionContext()); tikhomirov@295: try { tikhomirov@295: hgeol.addLocation(cfgFile); tikhomirov@483: } catch (HgInvalidFileException ex) { tikhomirov@490: hgRepo.getSessionContext().getLog().dump(getClass(), Warn, ex, null); tikhomirov@295: } tikhomirov@114: nativeRepoFormat = hgeol.getSection("repository").get("native"); tikhomirov@114: if (nativeRepoFormat == null) { tikhomirov@114: nativeRepoFormat = "LF"; tikhomirov@114: } tikhomirov@114: final String os = System.getProperty("os.name"); // XXX need centralized set of properties tikhomirov@114: nativeOSFormat = os.indexOf("Windows") != -1 ? "CRLF" : "LF"; tikhomirov@114: // I assume pattern ordering in .hgeol is not important tikhomirov@114: ArrayList lfPatterns = new ArrayList(); tikhomirov@114: ArrayList crlfPatterns = new ArrayList(); tikhomirov@114: ArrayList nativePatterns = new ArrayList(); tikhomirov@114: ArrayList binPatterns = new ArrayList(); tikhomirov@114: for (Map.Entry e : hgeol.getSection("patterns").entrySet()) { tikhomirov@114: if ("CRLF".equals(e.getValue())) { tikhomirov@114: crlfPatterns.add(e.getKey()); tikhomirov@114: } else if ("LF".equals(e.getValue())) { tikhomirov@114: lfPatterns.add(e.getKey()); tikhomirov@114: } else if ("native".equals(e.getValue())) { tikhomirov@114: nativePatterns.add(e.getKey()); tikhomirov@114: } else if ("BIN".equals(e.getValue())) { tikhomirov@114: binPatterns.add(e.getKey()); tikhomirov@114: } else { tikhomirov@490: hgRepo.getSessionContext().getLog().dump(getClass(), Warn, "Can't recognize .hgeol entry: %s for %s", e.getValue(), e.getKey()); tikhomirov@114: } tikhomirov@114: } tikhomirov@114: if (!crlfPatterns.isEmpty()) { tikhomirov@114: crlfMatcher = new PathGlobMatcher(crlfPatterns.toArray(new String[crlfPatterns.size()])); tikhomirov@114: } tikhomirov@114: if (!lfPatterns.isEmpty()) { tikhomirov@114: lfMatcher = new PathGlobMatcher(lfPatterns.toArray(new String[lfPatterns.size()])); tikhomirov@114: } tikhomirov@114: if (!binPatterns.isEmpty()) { tikhomirov@114: binMatcher = new PathGlobMatcher(binPatterns.toArray(new String[binPatterns.size()])); tikhomirov@114: } tikhomirov@114: if (!nativePatterns.isEmpty()) { tikhomirov@114: nativeMatcher = new PathGlobMatcher(nativePatterns.toArray(new String[nativePatterns.size()])); tikhomirov@114: } tikhomirov@114: } tikhomirov@114: tikhomirov@114: public Filter create(Path path, Options opts) { tikhomirov@114: if (binMatcher == null && crlfMatcher == null && lfMatcher == null && nativeMatcher == null) { tikhomirov@114: // not initialized - perhaps, no .hgeol found tikhomirov@114: return null; tikhomirov@114: } tikhomirov@114: if (binMatcher != null && binMatcher.accept(path)) { tikhomirov@114: return null; tikhomirov@114: } tikhomirov@114: if (crlfMatcher != null && crlfMatcher.accept(path)) { tikhomirov@353: return new NewlineFilter(processOnlyConsistent, 1); tikhomirov@114: } else if (lfMatcher != null && lfMatcher.accept(path)) { tikhomirov@353: return new NewlineFilter(processOnlyConsistent, 0); tikhomirov@114: } else if (nativeMatcher != null && nativeMatcher.accept(path)) { tikhomirov@114: if (nativeOSFormat.equals(nativeRepoFormat)) { tikhomirov@114: return null; tikhomirov@114: } tikhomirov@114: if (opts.getDirection() == FromRepo) { tikhomirov@114: int transform = "CRLF".equals(nativeOSFormat) ? 1 : 0; tikhomirov@353: return new NewlineFilter(processOnlyConsistent, transform); tikhomirov@114: } else if (opts.getDirection() == ToRepo) { tikhomirov@114: int transform = "CRLF".equals(nativeOSFormat) ? 0 : 1; tikhomirov@353: return new NewlineFilter(processOnlyConsistent, transform); tikhomirov@114: } tikhomirov@114: return null; tikhomirov@114: } tikhomirov@114: return null; tikhomirov@113: } tikhomirov@113: } tikhomirov@113: }