comparison src/org/tmatesoft/hg/internal/NewlineFilter.java @ 355:f2c11fe7f3e9

Newline filter shall respect whole stream when deciding whether to process line terminators, hence added stream preview functionality
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 06 Dec 2011 12:57:21 +0100
parents 0f3687e79f5a
children 91d75e1bac9f
comparison
equal deleted inserted replaced
354:5f9073eabf06 355:f2c11fe7f3e9
14 * the terms of a license other than GNU General Public License 14 * the terms of a license other than GNU General Public License
15 * contact TMate Software at support@hg4j.com 15 * contact TMate Software at support@hg4j.com
16 */ 16 */
17 package org.tmatesoft.hg.internal; 17 package org.tmatesoft.hg.internal;
18 18
19 import static java.lang.Math.max;
20 import static java.lang.Math.min;
19 import static org.tmatesoft.hg.internal.Filter.Direction.FromRepo; 21 import static org.tmatesoft.hg.internal.Filter.Direction.FromRepo;
20 import static org.tmatesoft.hg.internal.Filter.Direction.ToRepo; 22 import static org.tmatesoft.hg.internal.Filter.Direction.ToRepo;
21 import static org.tmatesoft.hg.internal.KeywordFilter.copySlice; 23 import static org.tmatesoft.hg.internal.KeywordFilter.copySlice;
22 24
23 import java.io.File; 25 import java.io.File;
24 import java.io.IOException; 26 import java.io.IOException;
25 import java.nio.ByteBuffer; 27 import java.nio.ByteBuffer;
26 import java.util.ArrayList; 28 import java.util.ArrayList;
27 import java.util.Map; 29 import java.util.Map;
28 30
31 import org.tmatesoft.hg.core.HgBadStateException;
29 import org.tmatesoft.hg.repo.HgInternals; 32 import org.tmatesoft.hg.repo.HgInternals;
30 import org.tmatesoft.hg.repo.HgRepository; 33 import org.tmatesoft.hg.repo.HgRepository;
31 import org.tmatesoft.hg.util.Path; 34 import org.tmatesoft.hg.util.Path;
32 35
33 /** 36 /**
34 * 37 *
35 * @author Artem Tikhomirov 38 * @author Artem Tikhomirov
36 * @author TMate Software Ltd. 39 * @author TMate Software Ltd.
37 */ 40 */
38 public class NewlineFilter implements Filter { 41 public class NewlineFilter implements Filter, Preview {
39 42
40 // if allowInconsistent is true, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is, 43 // if processInconsistent is false, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is,
41 // i.e. doesn't try to convert them into appropriate newline characters. XXX revisit if Keyword extension behaves differently 44 // i.e. doesn't try to convert them into appropriate newline characters.
45 // XXX revisit if Keyword extension behaves differently - WTF???
42 private final boolean processInconsistent; 46 private final boolean processInconsistent;
43 private final boolean winToNix; 47 private final boolean winToNix;
44
45 // next two factory methods for testsing purposes
46 public static NewlineFilter createWin2Nix(boolean allowMixed) {
47 return new NewlineFilter(!allowMixed, 0);
48 }
49 48
50 public static NewlineFilter createNix2Win(boolean allowMixed) { 49 // NOTE, if processInconsistent == true, foundCRLF and foundLoneLF are not initialized
51 return new NewlineFilter(!allowMixed, 1); 50 private boolean foundLoneLF = false;
51 private boolean foundCRLF = false;
52
53 // next two factory methods for test purposes
54 public static NewlineFilter createWin2Nix(boolean processMixed) {
55 return new NewlineFilter(!processMixed, 0);
56 }
57
58 public static NewlineFilter createNix2Win(boolean processMixed) {
59 return new NewlineFilter(!processMixed, 1);
52 } 60 }
53 61
54 private NewlineFilter(boolean onlyConsistent, int transform) { 62 private NewlineFilter(boolean onlyConsistent, int transform) {
55 winToNix = transform == 0; 63 winToNix = transform == 0;
56 processInconsistent = !onlyConsistent; 64 processInconsistent = !onlyConsistent;
57 } 65 }
58 66
59 public ByteBuffer filter(ByteBuffer src) { 67 public ByteBuffer filter(ByteBuffer src) {
68 if (!previewDone) {
69 throw new HgBadStateException("This filter requires preview operation prior to actual filtering");
70 }
71 if (!processInconsistent && foundLoneLF && foundCRLF) {
72 // do not process inconsistent newlines
73 return src;
74 }
60 if (winToNix) { 75 if (winToNix) {
76 if (!processInconsistent && !foundCRLF) {
77 // no reason to process if no CRLF in the data stream
78 return src;
79 }
61 return win2nix(src); 80 return win2nix(src);
62 } else { 81 } else {
82 if (!processInconsistent && !foundLoneLF) {
83 return src;
84 }
63 return nix2win(src); 85 return nix2win(src);
64 } 86 }
65 } 87 }
66 88
67 private boolean foundLoneLF = false; 89 private boolean prevBufLastByteWasCR = false;
68 private boolean foundCRLF = false; 90 private boolean previewDone = false;
91
92 public void preview(ByteBuffer src) {
93 previewDone = true; // guard
94 if (processInconsistent) {
95 // gonna handle them anyway, no need to check. TODO Do not implement Preview directly, but rather
96 // conditionally through getAdapter when processInconsistent is false (sic!)
97 return;
98 }
99 if (foundLoneLF && foundCRLF) {
100 // already know it's inconsistent
101 return;
102 }
103 final byte CR = (byte) '\r';
104 final byte LF = (byte) '\n';
105 int x = src.position();
106 while (x < src.limit()) {
107 int in = indexOf(LF, src, x);
108 if (in == -1) {
109 // no line feed, but what if it's CRLF broken in the middle?
110 prevBufLastByteWasCR = CR == src.get(src.limit() - 1);
111 return;
112 }
113 if (in == 0) {
114 if (prevBufLastByteWasCR) {
115 foundCRLF = true;
116 } else {
117 foundLoneLF = true;
118 }
119 } else { // in > 0 && in >= x
120 if (src.get(in - 1) == CR) {
121 foundCRLF = true;
122 } else {
123 foundLoneLF = true;
124 }
125 }
126 if (foundCRLF && foundLoneLF) {
127 return;
128 }
129 x = in + 1;
130 }
131 }
69 132
70 private ByteBuffer win2nix(ByteBuffer src) { 133 private ByteBuffer win2nix(ByteBuffer src) {
71 int lookupStart = src.position(); // source index 134 int lookupStart = src.position(); // source index
72 ByteBuffer dst = null; 135 ByteBuffer dst = null;
73 final byte CR = (byte) '\r'; 136 final byte CR = (byte) '\r';
76 // x, lookupStart, ir and in are absolute positions within src buffer, which is never read with modifying operations 139 // x, lookupStart, ir and in are absolute positions within src buffer, which is never read with modifying operations
77 int ir = indexOf(CR, src, lookupStart); 140 int ir = indexOf(CR, src, lookupStart);
78 int in = indexOf(LF, src, lookupStart); 141 int in = indexOf(LF, src, lookupStart);
79 if (in != -1) { 142 if (in != -1) {
80 if (ir == -1 || ir > in) { 143 if (ir == -1 || ir > in) {
81 // lone LF. CR, if present, goes after LF, process up to closest LF, let next iteration decide what to do with CR@ir 144 // lone LF. CR, if present, goes after LF, process up to that lone, closest LF; let next iteration decide what to do with CR@ir
82 foundLoneLF = true; 145 if (!processInconsistent && foundCRLF) {
83 // XXX respect onlyConsistent. if foundCRLF then shall not process further 146 assert foundLoneLF == true : "preview() shall initialize this";
147 fail(src, in);
148 }
84 dst = consume(src, lookupStart, in+1, dst); 149 dst = consume(src, lookupStart, in+1, dst);
85 lookupStart = in + 1; 150 lookupStart = in + 1;
86 } else { 151 } else {
87 // ir < in 152 // ir < in
88 if (onlyCRup2limit(src, ir, in)) { 153 if (onlyCRup2limit(src, ir, in)) {
89 // CR...CRLF; 154 // CR...CRLF;
90 foundCRLF = true; 155 if (!processInconsistent && foundLoneLF) {
91 // XXX respect onlyConsistent. if foundLoneLF then shall not process further 156 assert foundCRLF == true : "preview() shall initialize this";
157 fail(src, ir);
158 }
92 dst = consume(src, lookupStart, ir, dst); 159 dst = consume(src, lookupStart, ir, dst);
93 dst.put(LF); 160 dst.put(LF);
94 lookupStart = in+1; 161 lookupStart = in+1;
95 } else { 162 } else {
96 // CR...CR...^CR....LF 163 // CR...CR...^CR....LF
145 final byte LF = (byte) '\n'; 212 final byte LF = (byte) '\n';
146 while (x < src.limit()) { 213 while (x < src.limit()) {
147 int in = indexOf(LF, src, x); 214 int in = indexOf(LF, src, x);
148 if (in != -1) { 215 if (in != -1) {
149 if (in > x && src.get(in - 1) == CR) { 216 if (in > x && src.get(in - 1) == CR) {
150 foundCRLF = true; 217 // found CRLF
151 // XXX respect onlyConsistent. if foundLoneLF then shall not process further 218 if (!processInconsistent && foundLoneLF) {
219 assert foundCRLF == true : "preview() shall initialize this";
220 fail(src, in-1);
221 }
152 if (dst == null) { 222 if (dst == null) {
153 dst = ByteBuffer.allocate(src.remaining() * 2); 223 dst = ByteBuffer.allocate(src.remaining() * 2);
154 } 224 }
155 copySlice(src, x, in+1, dst); 225 copySlice(src, x, in+1, dst);
156 x = in + 1; 226 x = in + 1;
157 } else { 227 } else {
158 // found stand-alone LF, need to output CRLF 228 // found stand-alone LF, need to output CRLF
159 foundLoneLF = true; 229 if (!processInconsistent && foundCRLF) {
160 // XXX respect onlyConsistent. if foundCRLF then shall not process further 230 assert foundLoneLF == true : "preview() shall initialize this";
231 fail(src, in);
232 }
161 if (dst == null) { 233 if (dst == null) {
162 dst = ByteBuffer.allocate(src.remaining() * 2); 234 dst = ByteBuffer.allocate(src.remaining() * 2);
163 } 235 }
164 copySlice(src, x, in, dst); 236 copySlice(src, x, in, dst);
165 dst.put(CR); 237 dst.put(CR);
178 src.position(x); 250 src.position(x);
179 return dst == null ? src : (ByteBuffer) dst.flip(); 251 return dst == null ? src : (ByteBuffer) dst.flip();
180 } 252 }
181 253
182 254
255 // Test: nlFilter.fail(ByteBuffer.wrap(new "test string".getBytes()), 5);
183 private void fail(ByteBuffer b, int pos) { 256 private void fail(ByteBuffer b, int pos) {
184 // FIXME checked(?) HgFilterException instead 257 StringBuilder sb = new StringBuilder();
185 throw new RuntimeException(String.format("Inconsistent newline characters in the stream (char 0x%x, local index:%d)", b.get(pos), pos)); 258 for (int i = max(pos-10, 0), x = min(pos + 10, b.limit()); i < x; i++) {
259 sb.append(String.format("%02x ", b.get(i)));
260 }
261 throw new HgBadStateException(String.format("Inconsistent newline characters in the stream %s (char 0x%x, local index:%d)", sb.toString(), b.get(pos), pos));
186 } 262 }
187 263
188 private static int indexOf(byte ch, ByteBuffer b, int from) { 264 private static int indexOf(byte ch, ByteBuffer b, int from) {
189 return indexOf(ch, b, from, b.limit()); 265 return indexOf(ch, b, from, b.limit());
190 } 266 }