Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/NewlineFilter.java @ 353:0f3687e79f5a
Treat content with target line endings as correct regardless eol.only-consistent setting
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 01 Dec 2011 03:05:28 +0100 |
parents | 7b34d24b8f4d |
children | f2c11fe7f3e9 |
comparison
equal
deleted
inserted
replaced
352:7b34d24b8f4d | 353:0f3687e79f5a |
---|---|
37 */ | 37 */ |
38 public class NewlineFilter implements Filter { | 38 public class NewlineFilter implements Filter { |
39 | 39 |
40 // if allowInconsistent is true, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is, | 40 // if allowInconsistent is true, filter simply pass incorrect newline characters (single \r or \r\n on *nix and single \n on Windows) as is, |
41 // i.e. doesn't try to convert them into appropriate newline characters. XXX revisit if Keyword extension behaves differently | 41 // i.e. doesn't try to convert them into appropriate newline characters. XXX revisit if Keyword extension behaves differently |
42 private final boolean allowInconsistent; | 42 private final boolean processInconsistent; |
43 private final boolean winToNix; | 43 private final boolean winToNix; |
44 | 44 |
45 // next two factory methods for testsing purposes | 45 // next two factory methods for testsing purposes |
46 public static NewlineFilter createWin2Nix(boolean allowMixed) { | 46 public static NewlineFilter createWin2Nix(boolean allowMixed) { |
47 return new NewlineFilter(!allowMixed, 0); | 47 return new NewlineFilter(!allowMixed, 0); |
49 | 49 |
50 public static NewlineFilter createNix2Win(boolean allowMixed) { | 50 public static NewlineFilter createNix2Win(boolean allowMixed) { |
51 return new NewlineFilter(!allowMixed, 1); | 51 return new NewlineFilter(!allowMixed, 1); |
52 } | 52 } |
53 | 53 |
54 private NewlineFilter(boolean failIfInconsistent, int transform) { | 54 private NewlineFilter(boolean onlyConsistent, int transform) { |
55 winToNix = transform == 0; | 55 winToNix = transform == 0; |
56 allowInconsistent = !failIfInconsistent; | 56 processInconsistent = !onlyConsistent; |
57 } | 57 } |
58 | 58 |
59 public ByteBuffer filter(ByteBuffer src) { | 59 public ByteBuffer filter(ByteBuffer src) { |
60 if (winToNix) { | 60 if (winToNix) { |
61 return win2nix(src); | 61 return win2nix(src); |
62 } else { | 62 } else { |
63 return nix2win(src); | 63 return nix2win(src); |
64 } | 64 } |
65 } | 65 } |
66 | |
67 private boolean foundLoneLF = false; | |
68 private boolean foundCRLF = false; | |
66 | 69 |
67 private ByteBuffer win2nix(ByteBuffer src) { | 70 private ByteBuffer win2nix(ByteBuffer src) { |
68 int x = src.position(); // source index | 71 int lookupStart = src.position(); // source index |
69 int lookupStart = x; | |
70 ByteBuffer dst = null; | 72 ByteBuffer dst = null; |
71 while (x < src.limit()) { | 73 final byte CR = (byte) '\r'; |
74 final byte LF = (byte) '\n'; | |
75 while (lookupStart < src.limit()) { | |
72 // x, lookupStart, ir and in are absolute positions within src buffer, which is never read with modifying operations | 76 // x, lookupStart, ir and in are absolute positions within src buffer, which is never read with modifying operations |
73 int ir = indexOf('\r', src, lookupStart); | 77 int ir = indexOf(CR, src, lookupStart); |
74 int in = indexOf('\n', src, lookupStart); | 78 int in = indexOf(LF, src, lookupStart); |
75 if (ir == -1) { | 79 if (in != -1) { |
76 if (in == -1 || allowInconsistent) { | 80 if (ir == -1 || ir > in) { |
81 // lone LF. CR, if present, goes after LF, process up to closest LF, let next iteration decide what to do with CR@ir | |
82 foundLoneLF = true; | |
83 // XXX respect onlyConsistent. if foundCRLF then shall not process further | |
84 dst = consume(src, lookupStart, in+1, dst); | |
85 lookupStart = in + 1; | |
86 } else { | |
87 // ir < in | |
88 if (onlyCRup2limit(src, ir, in)) { | |
89 // CR...CRLF; | |
90 foundCRLF = true; | |
91 // XXX respect onlyConsistent. if foundLoneLF then shall not process further | |
92 dst = consume(src, lookupStart, ir, dst); | |
93 dst.put(LF); | |
94 lookupStart = in+1; | |
95 } else { | |
96 // CR...CR...^CR....LF | |
97 dst = consume(src, lookupStart, ir+1, dst); | |
98 // although can search for ^CR, here I copy CR one by one as I don't expect huge sequences of CR to optimize for | |
99 lookupStart = ir+1; | |
100 } | |
101 } | |
102 } else { | |
103 // no newlines | |
104 if (ir != -1 && onlyCRup2limit(src, ir, src.limit())) { | |
105 // \r as last character(s) is the only case we care about when there're no LF found | |
106 // cases like \r\r\r<EOB>\n shall be handled like \r\n, hence onlyCRup2limit | |
107 dst = consume(src, lookupStart, ir, dst); | |
108 lookupStart = src.limit() - 1; // leave only last CR for next buffer | |
109 } else { | |
110 // consume all. don't create a copy of src if there's no dst yet | |
77 if (dst != null) { | 111 if (dst != null) { |
78 copySlice(src, x, src.limit(), dst); | 112 copySlice(src, lookupStart, src.limit(), dst); |
79 x = src.limit(); // consumed all | 113 lookupStart = src.limit(); |
80 } | 114 } |
81 break; | 115 } |
82 } else { | |
83 fail(src, in); | |
84 } | |
85 } | |
86 // in == -1 while ir != -1 may be valid case if ir is the last char of the buffer, we check below for that | |
87 if (in != -1 && in != ir+1 && !allowInconsistent) { | |
88 fail(src, in); | |
89 } | |
90 if (dst == null) { | |
91 dst = ByteBuffer.allocate(src.remaining()); | |
92 } | |
93 copySlice(src, x, ir, dst); | |
94 if (ir+1 == src.limit()) { | |
95 // last char of the buffer - | |
96 // consume src till that char and let next iteration work on it | |
97 x = ir; | |
98 break; | 116 break; |
99 } | 117 } |
100 if (in != ir + 1) { | 118 } |
101 x = ir+1; // generally in, but if allowInconsistent==true and \r is not followed by \n, then | 119 src.position(lookupStart); // mark we've consumed up to x |
102 // cases like "one \r two \r\n three" shall be processed correctly (second pair would be ignored if x==in) | |
103 lookupStart = ir+1; | |
104 } else { | |
105 x = in; | |
106 lookupStart = x+1; // skip \n for next lookup | |
107 } | |
108 } | |
109 src.position(x); // mark we've consumed up to x | |
110 return dst == null ? src : (ByteBuffer) dst.flip(); | 120 return dst == null ? src : (ByteBuffer) dst.flip(); |
121 } | |
122 | |
123 // true if [from..limit) are CR | |
124 private static boolean onlyCRup2limit(ByteBuffer src, int from, int limit) { | |
125 // extended version of (ir+1 == src.limit()): check all in [ir..src.limit) are CR | |
126 for (int i = from; i < limit; i++) { | |
127 if (src.get(i) != '\r') { | |
128 return false; | |
129 } | |
130 } | |
131 return true; | |
132 } | |
133 private static ByteBuffer consume(ByteBuffer src, int from, int to, ByteBuffer dst) { | |
134 if (dst == null) { | |
135 dst = ByteBuffer.allocate(src.remaining()); | |
136 } | |
137 copySlice(src, from, to, dst); | |
138 return dst; | |
111 } | 139 } |
112 | 140 |
113 private ByteBuffer nix2win(ByteBuffer src) { | 141 private ByteBuffer nix2win(ByteBuffer src) { |
114 int x = src.position(); | 142 int x = src.position(); |
115 ByteBuffer dst = null; | 143 ByteBuffer dst = null; |
144 final byte CR = (byte) '\r'; | |
145 final byte LF = (byte) '\n'; | |
116 while (x < src.limit()) { | 146 while (x < src.limit()) { |
117 int in = indexOf('\n', src, x); | 147 int in = indexOf(LF, src, x); |
118 int ir = indexOf('\r', src, x, in == -1 ? src.limit() : in); | 148 if (in != -1) { |
119 if (in == -1) { | 149 if (in > x && src.get(in - 1) == CR) { |
120 if (ir == -1 || allowInconsistent) { | 150 foundCRLF = true; |
121 break; | 151 // XXX respect onlyConsistent. if foundLoneLF then shall not process further |
122 } else { | 152 if (dst == null) { |
123 fail(src, ir); | 153 dst = ByteBuffer.allocate(src.remaining() * 2); |
124 } | 154 } |
125 } else if (ir != -1 && !allowInconsistent) { | 155 copySlice(src, x, in+1, dst); |
126 fail(src, ir); | 156 x = in + 1; |
127 } | 157 } else { |
128 | 158 // found stand-alone LF, need to output CRLF |
129 // x <= in < src.limit | 159 foundLoneLF = true; |
130 // allowInconsistent && x <= ir < in || ir == -1 | 160 // XXX respect onlyConsistent. if foundCRLF then shall not process further |
131 if (dst == null) { | 161 if (dst == null) { |
132 // buffer full of \n grows as much as twice in size | 162 dst = ByteBuffer.allocate(src.remaining() * 2); |
133 dst = ByteBuffer.allocate(src.remaining() * 2); | 163 } |
134 } | 164 copySlice(src, x, in, dst); |
135 copySlice(src, x, in, dst); | 165 dst.put(CR); |
136 if (ir == -1 || ir+1 != in) { | 166 dst.put(LF); |
137 dst.put((byte) '\r'); | 167 x = in + 1; |
138 } // otherwise (ir!=-1 && ir+1==in) we found \r\n pair, don't convert to \r\r\n | 168 } |
139 // we may copy \n at src[in] on the next iteration, but would need extra lookupIndex variable then. | 169 } else { |
140 dst.put((byte) '\n'); | 170 // no newlines (no LF), just copy what left |
141 x = in+1; | 171 if (dst != null) { |
172 copySlice(src, x, src.limit(), dst); | |
173 x = src.limit(); | |
174 } | |
175 break; | |
176 } | |
142 } | 177 } |
143 src.position(x); | 178 src.position(x); |
144 return dst == null ? src : (ByteBuffer) dst.flip(); | 179 return dst == null ? src : (ByteBuffer) dst.flip(); |
145 } | 180 } |
146 | 181 |
147 | 182 |
148 private void fail(ByteBuffer b, int pos) { | 183 private void fail(ByteBuffer b, int pos) { |
184 // FIXME checked(?) HgFilterException instead | |
149 throw new RuntimeException(String.format("Inconsistent newline characters in the stream (char 0x%x, local index:%d)", b.get(pos), pos)); | 185 throw new RuntimeException(String.format("Inconsistent newline characters in the stream (char 0x%x, local index:%d)", b.get(pos), pos)); |
150 } | 186 } |
151 | 187 |
152 private static int indexOf(char ch, ByteBuffer b, int from) { | 188 private static int indexOf(byte ch, ByteBuffer b, int from) { |
153 return indexOf(ch, b, from, b.limit()); | 189 return indexOf(ch, b, from, b.limit()); |
154 } | 190 } |
155 | 191 |
156 // looks up in buf[from..to) | 192 // looks up in buf[from..to) |
157 private static int indexOf(char ch, ByteBuffer b, int from, int to) { | 193 private static int indexOf(byte ch, ByteBuffer b, int from, int to) { |
158 for (int i = from; i < to; i++) { | 194 for (int i = from; i < to; i++) { |
159 byte c = b.get(i); | 195 byte c = b.get(i); |
160 if (ch == c) { | 196 if (ch == c) { |
161 return i; | 197 return i; |
162 } | 198 } |
163 } | 199 } |
164 return -1; | 200 return -1; |
165 } | 201 } |
166 | 202 |
167 public static class Factory implements Filter.Factory { | 203 public static class Factory implements Filter.Factory { |
168 private boolean failIfInconsistent = true; | 204 private boolean processOnlyConsistent = true; |
169 private Path.Matcher lfMatcher; | 205 private Path.Matcher lfMatcher; |
170 private Path.Matcher crlfMatcher; | 206 private Path.Matcher crlfMatcher; |
171 private Path.Matcher binMatcher; | 207 private Path.Matcher binMatcher; |
172 private Path.Matcher nativeMatcher; | 208 private Path.Matcher nativeMatcher; |
173 private String nativeRepoFormat; | 209 private String nativeRepoFormat; |
174 private String nativeOSFormat; | 210 private String nativeOSFormat; |
175 | 211 |
176 public void initialize(HgRepository hgRepo) { | 212 public void initialize(HgRepository hgRepo) { |
177 failIfInconsistent = hgRepo.getConfiguration().getBooleanValue("eol", "only-consistent", true); | 213 processOnlyConsistent = hgRepo.getConfiguration().getBooleanValue("eol", "only-consistent", true); |
178 File cfgFile = new File(hgRepo.getWorkingDir(), ".hgeol"); | 214 File cfgFile = new File(hgRepo.getWorkingDir(), ".hgeol"); |
179 if (!cfgFile.canRead()) { | 215 if (!cfgFile.canRead()) { |
180 return; | 216 return; |
181 } | 217 } |
182 // XXX if .hgeol is not checked out, we may get it from repository | 218 // XXX if .hgeol is not checked out, we may get it from repository |
236 } | 272 } |
237 if (binMatcher != null && binMatcher.accept(path)) { | 273 if (binMatcher != null && binMatcher.accept(path)) { |
238 return null; | 274 return null; |
239 } | 275 } |
240 if (crlfMatcher != null && crlfMatcher.accept(path)) { | 276 if (crlfMatcher != null && crlfMatcher.accept(path)) { |
241 return new NewlineFilter(failIfInconsistent, 1); | 277 return new NewlineFilter(processOnlyConsistent, 1); |
242 } else if (lfMatcher != null && lfMatcher.accept(path)) { | 278 } else if (lfMatcher != null && lfMatcher.accept(path)) { |
243 return new NewlineFilter(failIfInconsistent, 0); | 279 return new NewlineFilter(processOnlyConsistent, 0); |
244 } else if (nativeMatcher != null && nativeMatcher.accept(path)) { | 280 } else if (nativeMatcher != null && nativeMatcher.accept(path)) { |
245 if (nativeOSFormat.equals(nativeRepoFormat)) { | 281 if (nativeOSFormat.equals(nativeRepoFormat)) { |
246 return null; | 282 return null; |
247 } | 283 } |
248 if (opts.getDirection() == FromRepo) { | 284 if (opts.getDirection() == FromRepo) { |
249 int transform = "CRLF".equals(nativeOSFormat) ? 1 : 0; | 285 int transform = "CRLF".equals(nativeOSFormat) ? 1 : 0; |
250 return new NewlineFilter(failIfInconsistent, transform); | 286 return new NewlineFilter(processOnlyConsistent, transform); |
251 } else if (opts.getDirection() == ToRepo) { | 287 } else if (opts.getDirection() == ToRepo) { |
252 int transform = "CRLF".equals(nativeOSFormat) ? 0 : 1; | 288 int transform = "CRLF".equals(nativeOSFormat) ? 0 : 1; |
253 return new NewlineFilter(failIfInconsistent, transform); | 289 return new NewlineFilter(processOnlyConsistent, transform); |
254 } | 290 } |
255 return null; | 291 return null; |
256 } | 292 } |
257 return null; | 293 return null; |
258 } | 294 } |