comparison src/org/tmatesoft/hg/internal/NewlineFilter.java @ 353:0f3687e79f5a

Treat content with target line endings as correct regardless eol.only-consistent setting
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 01 Dec 2011 03:05:28 +0100
parents 7b34d24b8f4d
children f2c11fe7f3e9
comparison
equal deleted inserted replaced
352:7b34d24b8f4d 353:0f3687e79f5a
37 */ 37 */
38 public class NewlineFilter implements Filter { 38 public class NewlineFilter implements Filter {
39 39
40 // if allowInconsistent is true, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is, 40 // if allowInconsistent is true, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is,
41 // i.e. doesn't try to convert them into appropriate newline characters. XXX revisit if Keyword extension behaves differently 41 // i.e. doesn't try to convert them into appropriate newline characters. XXX revisit if Keyword extension behaves differently
42 private final boolean allowInconsistent; 42 private final boolean processInconsistent;
43 private final boolean winToNix; 43 private final boolean winToNix;
44 44
45 // next two factory methods for testsing purposes 45 // next two factory methods for testsing purposes
46 public static NewlineFilter createWin2Nix(boolean allowMixed) { 46 public static NewlineFilter createWin2Nix(boolean allowMixed) {
47 return new NewlineFilter(!allowMixed, 0); 47 return new NewlineFilter(!allowMixed, 0);
49 49
50 public static NewlineFilter createNix2Win(boolean allowMixed) { 50 public static NewlineFilter createNix2Win(boolean allowMixed) {
51 return new NewlineFilter(!allowMixed, 1); 51 return new NewlineFilter(!allowMixed, 1);
52 } 52 }
53 53
54 private NewlineFilter(boolean failIfInconsistent, int transform) { 54 private NewlineFilter(boolean onlyConsistent, int transform) {
55 winToNix = transform == 0; 55 winToNix = transform == 0;
56 allowInconsistent = !failIfInconsistent; 56 processInconsistent = !onlyConsistent;
57 } 57 }
58 58
59 public ByteBuffer filter(ByteBuffer src) { 59 public ByteBuffer filter(ByteBuffer src) {
60 if (winToNix) { 60 if (winToNix) {
61 return win2nix(src); 61 return win2nix(src);
62 } else { 62 } else {
63 return nix2win(src); 63 return nix2win(src);
64 } 64 }
65 } 65 }
66
67 private boolean foundLoneLF = false;
68 private boolean foundCRLF = false;
66 69
67 private ByteBuffer win2nix(ByteBuffer src) { 70 private ByteBuffer win2nix(ByteBuffer src) {
68 int x = src.position(); // source index 71 int lookupStart = src.position(); // source index
69 int lookupStart = x;
70 ByteBuffer dst = null; 72 ByteBuffer dst = null;
71 while (x < src.limit()) { 73 final byte CR = (byte) '\r';
74 final byte LF = (byte) '\n';
75 while (lookupStart < src.limit()) {
72 // x, lookupStart, ir and in are absolute positions within src buffer, which is never read with modifying operations 76 // x, lookupStart, ir and in are absolute positions within src buffer, which is never read with modifying operations
73 int ir = indexOf('\r', src, lookupStart); 77 int ir = indexOf(CR, src, lookupStart);
74 int in = indexOf('\n', src, lookupStart); 78 int in = indexOf(LF, src, lookupStart);
75 if (ir == -1) { 79 if (in != -1) {
76 if (in == -1 || allowInconsistent) { 80 if (ir == -1 || ir > in) {
81 // lone LF. CR, if present, goes after LF, process up to closest LF, let next iteration decide what to do with CR@ir
82 foundLoneLF = true;
83 // XXX respect onlyConsistent. if foundCRLF then shall not process further
84 dst = consume(src, lookupStart, in+1, dst);
85 lookupStart = in + 1;
86 } else {
87 // ir < in
88 if (onlyCRup2limit(src, ir, in)) {
89 // CR...CRLF;
90 foundCRLF = true;
91 // XXX respect onlyConsistent. if foundLoneLF then shall not process further
92 dst = consume(src, lookupStart, ir, dst);
93 dst.put(LF);
94 lookupStart = in+1;
95 } else {
96 // CR...CR...^CR....LF
97 dst = consume(src, lookupStart, ir+1, dst);
98 // although can search for ^CR, here I copy CR one by one as I don't expect huge sequences of CR to optimize for
99 lookupStart = ir+1;
100 }
101 }
102 } else {
103 // no newlines
104 if (ir != -1 && onlyCRup2limit(src, ir, src.limit())) {
105 // \r as last character(s) is the only case we care about when there're no LF found
106 // cases like \r\r\r<EOB>\n shall be handled like \r\n, hence onlyCRup2limit
107 dst = consume(src, lookupStart, ir, dst);
108 lookupStart = src.limit() - 1; // leave only last CR for next buffer
109 } else {
110 // consume all. don't create a copy of src if there's no dst yet
77 if (dst != null) { 111 if (dst != null) {
78 copySlice(src, x, src.limit(), dst); 112 copySlice(src, lookupStart, src.limit(), dst);
79 x = src.limit(); // consumed all 113 lookupStart = src.limit();
80 } 114 }
81 break; 115 }
82 } else {
83 fail(src, in);
84 }
85 }
86 // in == -1 while ir != -1 may be valid case if ir is the last char of the buffer, we check below for that
87 if (in != -1 && in != ir+1 && !allowInconsistent) {
88 fail(src, in);
89 }
90 if (dst == null) {
91 dst = ByteBuffer.allocate(src.remaining());
92 }
93 copySlice(src, x, ir, dst);
94 if (ir+1 == src.limit()) {
95 // last char of the buffer -
96 // consume src till that char and let next iteration work on it
97 x = ir;
98 break; 116 break;
99 } 117 }
100 if (in != ir + 1) { 118 }
101 x = ir+1; // generally in, but if allowInconsistent==true and \r is not followed by \n, then 119 src.position(lookupStart); // mark we've consumed up to x
102 // cases like "one \r two \r\n three" shall be processed correctly (second pair would be ignored if x==in)
103 lookupStart = ir+1;
104 } else {
105 x = in;
106 lookupStart = x+1; // skip \n for next lookup
107 }
108 }
109 src.position(x); // mark we've consumed up to x
110 return dst == null ? src : (ByteBuffer) dst.flip(); 120 return dst == null ? src : (ByteBuffer) dst.flip();
121 }
122
123 // true if [from..limit) are CR
124 private static boolean onlyCRup2limit(ByteBuffer src, int from, int limit) {
125 // extended version of (ir+1 == src.limit()): check all in [ir..src.limit) are CR
126 for (int i = from; i < limit; i++) {
127 if (src.get(i) != '\r') {
128 return false;
129 }
130 }
131 return true;
132 }
133 private static ByteBuffer consume(ByteBuffer src, int from, int to, ByteBuffer dst) {
134 if (dst == null) {
135 dst = ByteBuffer.allocate(src.remaining());
136 }
137 copySlice(src, from, to, dst);
138 return dst;
111 } 139 }
112 140
113 private ByteBuffer nix2win(ByteBuffer src) { 141 private ByteBuffer nix2win(ByteBuffer src) {
114 int x = src.position(); 142 int x = src.position();
115 ByteBuffer dst = null; 143 ByteBuffer dst = null;
144 final byte CR = (byte) '\r';
145 final byte LF = (byte) '\n';
116 while (x < src.limit()) { 146 while (x < src.limit()) {
117 int in = indexOf('\n', src, x); 147 int in = indexOf(LF, src, x);
118 int ir = indexOf('\r', src, x, in == -1 ? src.limit() : in); 148 if (in != -1) {
119 if (in == -1) { 149 if (in > x && src.get(in - 1) == CR) {
120 if (ir == -1 || allowInconsistent) { 150 foundCRLF = true;
121 break; 151 // XXX respect onlyConsistent. if foundLoneLF then shall not process further
122 } else { 152 if (dst == null) {
123 fail(src, ir); 153 dst = ByteBuffer.allocate(src.remaining() * 2);
124 } 154 }
125 } else if (ir != -1 && !allowInconsistent) { 155 copySlice(src, x, in+1, dst);
126 fail(src, ir); 156 x = in + 1;
127 } 157 } else {
128 158 // found stand-alone LF, need to output CRLF
129 // x <= in < src.limit 159 foundLoneLF = true;
130 // allowInconsistent && x <= ir < in || ir == -1 160 // XXX respect onlyConsistent. if foundCRLF then shall not process further
131 if (dst == null) { 161 if (dst == null) {
132 // buffer full of \n grows as much as twice in size 162 dst = ByteBuffer.allocate(src.remaining() * 2);
133 dst = ByteBuffer.allocate(src.remaining() * 2); 163 }
134 } 164 copySlice(src, x, in, dst);
135 copySlice(src, x, in, dst); 165 dst.put(CR);
136 if (ir == -1 || ir+1 != in) { 166 dst.put(LF);
137 dst.put((byte) '\r'); 167 x = in + 1;
138 } // otherwise (ir!=-1 && ir+1==in) we found \r\n pair, don't convert to \r\r\n 168 }
139 // we may copy \n at src[in] on the next iteration, but would need extra lookupIndex variable then. 169 } else {
140 dst.put((byte) '\n'); 170 // no newlines (no LF), just copy what left
141 x = in+1; 171 if (dst != null) {
172 copySlice(src, x, src.limit(), dst);
173 x = src.limit();
174 }
175 break;
176 }
142 } 177 }
143 src.position(x); 178 src.position(x);
144 return dst == null ? src : (ByteBuffer) dst.flip(); 179 return dst == null ? src : (ByteBuffer) dst.flip();
145 } 180 }
146 181
147 182
148 private void fail(ByteBuffer b, int pos) { 183 private void fail(ByteBuffer b, int pos) {
184 // FIXME checked(?) HgFilterException instead
149 throw new RuntimeException(String.format("Inconsistent newline characters in the stream (char 0x%x, local index:%d)", b.get(pos), pos)); 185 throw new RuntimeException(String.format("Inconsistent newline characters in the stream (char 0x%x, local index:%d)", b.get(pos), pos));
150 } 186 }
151 187
152 private static int indexOf(char ch, ByteBuffer b, int from) { 188 private static int indexOf(byte ch, ByteBuffer b, int from) {
153 return indexOf(ch, b, from, b.limit()); 189 return indexOf(ch, b, from, b.limit());
154 } 190 }
155 191
156 // looks up in buf[from..to) 192 // looks up in buf[from..to)
157 private static int indexOf(char ch, ByteBuffer b, int from, int to) { 193 private static int indexOf(byte ch, ByteBuffer b, int from, int to) {
158 for (int i = from; i < to; i++) { 194 for (int i = from; i < to; i++) {
159 byte c = b.get(i); 195 byte c = b.get(i);
160 if (ch == c) { 196 if (ch == c) {
161 return i; 197 return i;
162 } 198 }
163 } 199 }
164 return -1; 200 return -1;
165 } 201 }
166 202
167 public static class Factory implements Filter.Factory { 203 public static class Factory implements Filter.Factory {
168 private boolean failIfInconsistent = true; 204 private boolean processOnlyConsistent = true;
169 private Path.Matcher lfMatcher; 205 private Path.Matcher lfMatcher;
170 private Path.Matcher crlfMatcher; 206 private Path.Matcher crlfMatcher;
171 private Path.Matcher binMatcher; 207 private Path.Matcher binMatcher;
172 private Path.Matcher nativeMatcher; 208 private Path.Matcher nativeMatcher;
173 private String nativeRepoFormat; 209 private String nativeRepoFormat;
174 private String nativeOSFormat; 210 private String nativeOSFormat;
175 211
176 public void initialize(HgRepository hgRepo) { 212 public void initialize(HgRepository hgRepo) {
177 failIfInconsistent = hgRepo.getConfiguration().getBooleanValue("eol", "only-consistent", true); 213 processOnlyConsistent = hgRepo.getConfiguration().getBooleanValue("eol", "only-consistent", true);
178 File cfgFile = new File(hgRepo.getWorkingDir(), ".hgeol"); 214 File cfgFile = new File(hgRepo.getWorkingDir(), ".hgeol");
179 if (!cfgFile.canRead()) { 215 if (!cfgFile.canRead()) {
180 return; 216 return;
181 } 217 }
182 // XXX if .hgeol is not checked out, we may get it from repository 218 // XXX if .hgeol is not checked out, we may get it from repository
236 } 272 }
237 if (binMatcher != null && binMatcher.accept(path)) { 273 if (binMatcher != null && binMatcher.accept(path)) {
238 return null; 274 return null;
239 } 275 }
240 if (crlfMatcher != null && crlfMatcher.accept(path)) { 276 if (crlfMatcher != null && crlfMatcher.accept(path)) {
241 return new NewlineFilter(failIfInconsistent, 1); 277 return new NewlineFilter(processOnlyConsistent, 1);
242 } else if (lfMatcher != null && lfMatcher.accept(path)) { 278 } else if (lfMatcher != null && lfMatcher.accept(path)) {
243 return new NewlineFilter(failIfInconsistent, 0); 279 return new NewlineFilter(processOnlyConsistent, 0);
244 } else if (nativeMatcher != null && nativeMatcher.accept(path)) { 280 } else if (nativeMatcher != null && nativeMatcher.accept(path)) {
245 if (nativeOSFormat.equals(nativeRepoFormat)) { 281 if (nativeOSFormat.equals(nativeRepoFormat)) {
246 return null; 282 return null;
247 } 283 }
248 if (opts.getDirection() == FromRepo) { 284 if (opts.getDirection() == FromRepo) {
249 int transform = "CRLF".equals(nativeOSFormat) ? 1 : 0; 285 int transform = "CRLF".equals(nativeOSFormat) ? 1 : 0;
250 return new NewlineFilter(failIfInconsistent, transform); 286 return new NewlineFilter(processOnlyConsistent, transform);
251 } else if (opts.getDirection() == ToRepo) { 287 } else if (opts.getDirection() == ToRepo) {
252 int transform = "CRLF".equals(nativeOSFormat) ? 0 : 1; 288 int transform = "CRLF".equals(nativeOSFormat) ? 0 : 1;
253 return new NewlineFilter(failIfInconsistent, transform); 289 return new NewlineFilter(processOnlyConsistent, transform);
254 } 290 }
255 return null; 291 return null;
256 } 292 }
257 return null; 293 return null;
258 } 294 }