Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/KeywordFilter.java @ 711:a62079bc422b
Keyword filtering that doesn't depend on input buffer size and the way input lines got split between filter() calls. KewordFilter got state to keep processed suspicious ...$ lines
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Fri, 11 Oct 2013 21:35:41 +0200 |
parents | cf200271439a |
children |
comparison
equal
deleted
inserted
replaced
710:cf200271439a | 711:a62079bc422b |
---|---|
19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error; | 19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error; |
20 | 20 |
21 import java.nio.ByteBuffer; | 21 import java.nio.ByteBuffer; |
22 import java.util.ArrayList; | 22 import java.util.ArrayList; |
23 import java.util.Date; | 23 import java.util.Date; |
24 import java.util.Map; | |
24 import java.util.TreeMap; | 25 import java.util.TreeMap; |
25 | 26 |
26 import org.tmatesoft.hg.core.Nodeid; | 27 import org.tmatesoft.hg.core.Nodeid; |
27 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; | 28 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; |
28 import org.tmatesoft.hg.repo.HgRepository; | 29 import org.tmatesoft.hg.repo.HgRepository; |
34 * | 35 * |
35 * @author Artem Tikhomirov | 36 * @author Artem Tikhomirov |
36 * @author TMate Software Ltd. | 37 * @author TMate Software Ltd. |
37 */ | 38 */ |
38 public class KeywordFilter implements Filter { | 39 public class KeywordFilter implements Filter { |
39 // present implementation is stateless, however, filter use pattern shall not assume that. In fact, Factory may us that | |
40 private final HgRepository repo; | 40 private final HgRepository repo; |
41 private final boolean isExpanding; | 41 private final boolean isExpanding; |
42 private final TreeMap<String,String> keywords; | 42 private final Map<String,String> keywords; |
43 private final int minBufferLen; | |
44 private final Path path; | 43 private final Path path; |
45 private RawChangeset latestFileCset; | 44 private RawChangeset latestFileCset; |
45 private final ByteVector unprocessedBuffer; | |
46 | 46 |
47 /** | 47 /** |
48 * | 48 * |
49 * @param hgRepo | 49 * @param hgRepo |
50 * @param path | 50 * @param path |
51 * @param expand <code>true</code> to expand keywords, <code>false</code> to shrink | 51 * @param expand <code>true</code> to expand keywords, <code>false</code> to shrink |
52 */ | 52 */ |
53 private KeywordFilter(HgRepository hgRepo, Path p, boolean expand) { | 53 private KeywordFilter(HgRepository hgRepo, Path p, Map<String, String> kw, boolean expand) { |
54 repo = hgRepo; | 54 repo = hgRepo; |
55 path = p; | 55 path = p; |
56 isExpanding = expand; | 56 isExpanding = expand; |
57 keywords = new TreeMap<String,String>(); | 57 keywords = kw; |
58 keywords.put("Id", "Id"); | 58 unprocessedBuffer = expand ? new ByteVector(0, 0) : new ByteVector(120, 50); |
59 keywords.put("Revision", "Revision"); | |
60 keywords.put("Author", "Author"); | |
61 keywords.put("Date", "Date"); | |
62 keywords.put("LastChangedRevision", "LastChangedRevision"); | |
63 keywords.put("LastChangedBy", "LastChangedBy"); | |
64 keywords.put("LastChangedDate", "LastChangedDate"); | |
65 keywords.put("Source", "Source"); | |
66 keywords.put("Header", "Header"); | |
67 | |
68 int l = 0; | |
69 for (String s : keywords.keySet()) { | |
70 if (s.length() > l) { | |
71 l = s.length(); | |
72 } | |
73 } | |
74 // TODO post-1.0 later may implement #filter() not to read full kw value (just "$kw:"). However, limit of maxLen + 2 would keep valid. | |
75 // for buffers less then minBufferLen, there are chances #filter() implementation would never end | |
76 // (i.e. for input "$LongestKey"$ | |
77 minBufferLen = l + 2 + (isExpanding ? 0 : 120 /*any reasonable constant for max possible kw value length*/); | |
78 } | 59 } |
79 | 60 |
80 /** | 61 /** |
81 * @param src buffer ready to be read | 62 * @param src buffer ready to be read |
82 * @return buffer ready to be read and original buffer's position modified to reflect consumed bytes. IOW, if source buffer | 63 * @return buffer ready to be read and original buffer's position modified to reflect consumed bytes. IOW, if source buffer |
83 * on return has remaining bytes, they are assumed not-read (not processed) and next chunk passed to filter is supposed to | 64 * on return has remaining bytes, they are assumed not-read (not processed) and next chunk passed to filter is supposed to |
84 * start with them | 65 * start with them |
85 */ | 66 */ |
86 public ByteBuffer filter(ByteBuffer src) { | 67 public ByteBuffer filter(ByteBuffer src) { |
87 int keywordStart = indexOf(src, '$', src.position(), false); | 68 // when unprocessedBuffer is empty, we are looking for first $ in the input, |
88 if (keywordStart != -1 && src.capacity() < minBufferLen) { | 69 // when we've already got anything unprocessed, newline is of interest, too |
89 // FIXME this check is unlucky when small files are read for status 'areTheSame' check - small buffer is allocated. | 70 int kwBreak = indexOf(src, '$', src.position(), !unprocessedBuffer.isEmpty()); |
90 // the check for keywordStart('$') is a temp solution to minimize the chances to get this exception. | 71 ByteBuffer outBuffer = null; |
91 // Complete solution requires complete rewriting of this method to respect cases when keywords are split between buffers. | 72 while (kwBreak != -1) { |
92 // With 'honest' partial kw handling, need for this check would be gone. | 73 if (unprocessedBuffer.isEmpty()) { |
93 throw new IllegalStateException(String.format("Need buffer of at least %d bytes to ensure filter won't hang", minBufferLen)); | 74 // both expand and collapse cases |
94 } | 75 assert src.get(kwBreak) == '$'; |
95 ByteBuffer rv = null; | 76 |
96 int x = src.position(); | 77 int end = indexOf(src, '$', kwBreak+1, true); |
97 int copyFrom = x; // needs to be updated each time we copy a slice, but not each time we modify source index (x) | 78 if (end == -1) { |
98 while (x < src.limit()) { | 79 for (int i = kwBreak; i < src.limit(); i++) { |
99 if (keywordStart == -1) { | 80 unprocessedBuffer.add(src.get(i)); |
100 int i = indexOf(src, '$', x, false); | 81 } |
101 if (i == -1) { | 82 src.limit(kwBreak); |
102 if (rv == null) { | 83 kwBreak = -1; |
103 return src; | 84 // src up to kwBreak is left and returned either with outBuffer or alone |
85 } else if (src.get(end) == '$') { | |
86 StringBuilder sb = new StringBuilder(end - kwBreak); | |
87 for (int i = kwBreak+1; i < end; i++) { | |
88 if (src.get(i) == ':' || src.get(i) == ' ') { | |
89 break; | |
90 } | |
91 sb.append((char) src.get(i)); | |
92 } | |
93 final String keyword = sb.toString(); | |
94 if (knownKeyword(keyword)) { | |
95 // copy src up to kw, including starting $keyword | |
96 outBuffer = append(outBuffer, src, kwBreak - src.position() + 1+keyword.length()); | |
97 // replace kwStart..end with new content | |
98 outBuffer = ensureCapacityFor(outBuffer, (isExpanding ? 200 : 1)); | |
99 if (isExpanding) { | |
100 outBuffer.put((byte) ':'); | |
101 outBuffer.put((byte) ' '); | |
102 outBuffer = expandKeywordValue(keyword, outBuffer); | |
103 outBuffer.put((byte) ' '); | |
104 } | |
105 outBuffer.put((byte) '$'); | |
106 // src is consumed up to end | |
107 src.position(end+1); | |
108 kwBreak = indexOf(src, '$', end+1, false); | |
104 } else { | 109 } else { |
105 copySlice(src, copyFrom, src.limit(), rv); | 110 // no (or unknown) keyword, try with '$' at src[end] |
106 rv.flip(); | 111 kwBreak = end; |
107 src.position(src.limit()); | 112 } |
108 return rv; | 113 } else { |
109 } | 114 // newline, ignore keyword start |
115 kwBreak = indexOf(src, '$', end+1, false); | |
110 } | 116 } |
111 keywordStart = i; | 117 } else { |
112 // fall-through | 118 // we've got smth unprocessed, and we've matched either $ or NL |
113 } | 119 // the only chance to get here is when src is in the very start |
114 if (keywordStart >= 0) { | 120 if (src.get(kwBreak) == '$') { |
115 int i = indexOf(src, '$', keywordStart+1, true); | 121 // closed tag |
116 if (i == -1) { | 122 for (int i = src.position(); i <= kwBreak; i++) { |
117 // end of buffer reached | 123 // consume src: going to handle its [position*()..kwBreak] as part of unprocessedBuffer |
118 if (rv == null) { | 124 unprocessedBuffer.add(src.get()); |
119 if (keywordStart == x) { | 125 } |
120 // TODO post-1.0 in fact, x might be equal to keywordStart and to src.position() here ('$' is first character in the buffer, | 126 StringBuilder sb = new StringBuilder(unprocessedBuffer.size()); |
121 // and there are no other '$' not eols till the end of the buffer). This would lead to deadlock (filter won't consume any | 127 assert unprocessedBuffer.get(0) == '$'; |
122 // bytes). To prevent this, either shall copy bytes [keywordStart..buffer.limit()) to local buffer and use it on the next invocation, | 128 for (int i = 1; i < unprocessedBuffer.size(); i++) { |
123 // or add lookup of the keywords right after first '$' is found (do not wait for closing '$'). For now, large enough src buffer would be sufficient | 129 char ch = (char) unprocessedBuffer.get(i); |
124 // not to run into such situation | 130 if (ch == ':' || ch == ' ') { |
125 throw new IllegalStateException("Try src buffer of a greater size"); | 131 break; |
126 } | 132 } |
127 rv = ByteBuffer.allocate(keywordStart - copyFrom); | 133 sb.append(ch); |
128 } | 134 } |
129 // copy all from source till latest possible kw start | 135 final String keyword = sb.toString(); |
130 copySlice(src, copyFrom, keywordStart, rv); | 136 if (knownKeyword(keyword)) { |
131 rv.flip(); | 137 outBuffer = ensureCapacityFor(outBuffer, keyword.length() + (isExpanding ? 200 : 2)); |
132 // and tell caller we've consumed only to the potential kw start | 138 outBuffer.put((byte) '$'); |
133 src.position(keywordStart); | 139 outBuffer.put(keyword.getBytes()); |
134 return rv; | 140 if (isExpanding) { |
135 } else if (src.get(i) == '$') { | 141 outBuffer.put((byte) ':'); |
136 // end of keyword, or start of a new one. | 142 outBuffer.put((byte) ' '); |
137 String keyword; | 143 outBuffer = expandKeywordValue(keyword, outBuffer); |
138 if ((keyword = matchKeyword(src, keywordStart, i)) != null) { | 144 outBuffer.put((byte) ' '); |
139 if (rv == null) { | |
140 // src.remaining(), not .capacity because src is not read, and remaining represents | |
141 // actual bytes count, while capacity - potential. | |
142 // Factor of 4 is pure guess and a HACK, need to be fixed with re-expanding buffer on demand | |
143 rv = ByteBuffer.allocate(isExpanding ? src.remaining() * 4 : src.remaining()); | |
144 } | 145 } |
145 copySlice(src, copyFrom, keywordStart+1, rv); | 146 outBuffer.put((byte) '$'); |
146 rv.put(keyword.getBytes()); | |
147 if (isExpanding) { | |
148 rv.put((byte) ':'); | |
149 rv.put((byte) ' '); | |
150 expandKeywordValue(keyword, rv); | |
151 rv.put((byte) ' '); | |
152 } | |
153 rv.put((byte) '$'); | |
154 keywordStart = -1; | |
155 x = i+1; | |
156 copyFrom = x; | |
157 continue; | |
158 } else { | 147 } else { |
159 if (rv != null) { | 148 outBuffer = append(outBuffer, unprocessedBuffer.toByteArray()); |
160 // we've already did some substitution, thus need to copy bytes we've scanned. | 149 } |
161 copySlice(src, x, i, rv); | 150 // src part is consumed already, do nothing here, look for next possible kw |
162 copyFrom = i; | 151 kwBreak = indexOf(src, '$', kwBreak+1, false); |
163 } // no else in attempt to avoid rv creation if no real kw would be found | |
164 keywordStart = i; | |
165 x = i; // '$' at i wasn't consumed, hence x points to i, not i+1. This is to avoid problems with case: "sdfsd $ asdfs $Id$ sdf" | |
166 continue; | |
167 } | |
168 } else { | 152 } else { |
169 assert src.get(i) == '\n' || src.get(i) == '\r'; | 153 // newline => tag without close |
170 // line break | 154 outBuffer = append(outBuffer, unprocessedBuffer.toByteArray()); |
171 if (rv != null) { | 155 kwBreak = indexOf(src, '$', kwBreak+1, false); |
172 copySlice(src, x, i+1, rv); | |
173 copyFrom = i+1; | |
174 } | |
175 x = i+1; | |
176 keywordStart = -1; // Wasn't keyword, really | |
177 continue; // try once again | |
178 } | 156 } |
179 } | 157 unprocessedBuffer.clear(); |
180 } | 158 } |
181 if (keywordStart != -1) { | 159 } while (kwBreak != -1); |
182 if (rv == null) { | 160 if (outBuffer == null) { |
183 // no expansion happened yet, and we have potential kw start | 161 return src; |
184 rv = ByteBuffer.allocate(keywordStart - src.position()); | 162 } |
185 copySlice(src, src.position(), keywordStart, rv); | 163 outBuffer = ensureCapacityFor(outBuffer, src.remaining()); |
186 } | 164 outBuffer.put(src); |
187 src.position(keywordStart); | 165 outBuffer.flip(); |
188 } | 166 return outBuffer; |
189 if (rv != null) { | 167 } |
190 rv.flip(); | 168 private boolean knownKeyword(String kw) { |
191 return rv; | 169 return keywords.containsKey(kw); |
192 } | 170 } |
193 return src; | 171 |
194 } | 172 private static ByteBuffer append(ByteBuffer out, byte[] data) { |
195 | 173 out = ensureCapacityFor(out, data.length); |
196 /** | 174 out.put(data); |
197 * @param keyword | 175 return out; |
198 * @param rv | 176 } |
199 */ | 177 private static ByteBuffer append(ByteBuffer out, ByteBuffer in, int count) { |
200 private void expandKeywordValue(String keyword, ByteBuffer rv) { | 178 out = ensureCapacityFor(out, count); |
179 while (count-- > 0) { | |
180 out.put(in.get()); | |
181 } | |
182 return out; | |
183 } | |
184 private static ByteBuffer ensureCapacityFor(ByteBuffer out, int exansion) { | |
185 if (out == null || out.remaining() < exansion) { | |
186 ByteBuffer newOut = ByteBuffer.allocate(out == null ? exansion*2 : out.capacity() + exansion); | |
187 if (out != null) { | |
188 out.flip(); | |
189 newOut.put(out); | |
190 } | |
191 return newOut; | |
192 } | |
193 return out; | |
194 } | |
195 | |
196 private ByteBuffer expandKeywordValue(String keyword, ByteBuffer rv) { | |
197 byte[] toInject; | |
201 if ("Id".equals(keyword)) { | 198 if ("Id".equals(keyword)) { |
202 rv.put(identityString().getBytes()); | 199 toInject = identityString().getBytes(); |
203 } else if ("Revision".equals(keyword)) { | 200 } else if ("Revision".equals(keyword)) { |
204 rv.put(revision().getBytes()); | 201 toInject = revision().getBytes(); |
205 } else if ("Author".equals(keyword)) { | 202 } else if ("Author".equals(keyword)) { |
206 rv.put(username().getBytes()); | 203 toInject = username().getBytes(); |
207 } else if ("Date".equals(keyword)) { | 204 } else if ("Date".equals(keyword)) { |
208 rv.put(date().getBytes()); | 205 toInject = date().getBytes(); |
209 } else { | 206 } else { |
210 throw new IllegalStateException(String.format("Keyword %s is not yet supported", keyword)); | 207 throw new IllegalStateException(String.format("Keyword %s is not yet supported", keyword)); |
211 } | 208 } |
212 } | 209 rv = ensureCapacityFor(rv, toInject.length); |
213 | 210 rv.put(toInject); |
214 private String matchKeyword(ByteBuffer src, int kwStart, int kwEnd) { | 211 return rv; |
215 assert kwEnd - kwStart - 1 > 0; | |
216 assert src.get(kwStart) == src.get(kwEnd) && src.get(kwEnd) == '$'; | |
217 char[] chars = new char[kwEnd - kwStart - 1]; | |
218 int i; | |
219 for (i = 0; i < chars.length; i++) { | |
220 char c = (char) src.get(kwStart + 1 + i); | |
221 if (c == ':') { | |
222 break; | |
223 } | |
224 chars[i] = c; | |
225 } | |
226 String kw = new String(chars, 0, i); | |
227 // XXX may use subMap to look up keywords based on few available characters (not waiting till closing $) | |
228 // System.out.println(keywords.subMap("I", "J")); | |
229 // System.out.println(keywords.subMap("A", "B")); | |
230 // System.out.println(keywords.subMap("Au", "B")); | |
231 return keywords.get(kw); | |
232 } | 212 } |
233 | 213 |
234 // copies part of the src buffer, [from..to). doesn't modify src position | 214 // copies part of the src buffer, [from..to). doesn't modify src position |
235 static void copySlice(ByteBuffer src, int from, int to, ByteBuffer dst) { | 215 static void copySlice(ByteBuffer src, int from, int to, ByteBuffer dst) { |
236 if (to > src.limit()) { | 216 if (to > src.limit()) { |
304 } | 284 } |
305 return latestFileCset; | 285 return latestFileCset; |
306 } | 286 } |
307 | 287 |
308 public static class Factory implements Filter.Factory { | 288 public static class Factory implements Filter.Factory { |
309 | 289 private final Map<String,String> keywords; |
310 private HgRepository repo; | 290 private HgRepository repo; |
311 private Path.Matcher matcher; | 291 private Path.Matcher matcher; |
292 | |
293 public Factory() { | |
294 keywords = new TreeMap<String,String>(); | |
295 keywords.put("Id", "Id"); | |
296 keywords.put("Revision", "Revision"); | |
297 keywords.put("Author", "Author"); | |
298 keywords.put("Date", "Date"); | |
299 keywords.put("LastChangedRevision", "LastChangedRevision"); | |
300 keywords.put("LastChangedBy", "LastChangedBy"); | |
301 keywords.put("LastChangedDate", "LastChangedDate"); | |
302 keywords.put("Source", "Source"); | |
303 keywords.put("Header", "Header"); | |
304 } | |
312 | 305 |
313 public void initialize(HgRepository hgRepo) { | 306 public void initialize(HgRepository hgRepo) { |
314 repo = hgRepo; | 307 repo = hgRepo; |
315 ArrayList<String> patterns = new ArrayList<String>(); | 308 ArrayList<String> patterns = new ArrayList<String>(); |
316 for (Pair<String,String> e : hgRepo.getConfiguration().getSection("keyword")) { | 309 for (Pair<String,String> e : hgRepo.getConfiguration().getSection("keyword")) { |
322 // TODO post-1.0 read and respect keyword patterns from [keywordmaps] | 315 // TODO post-1.0 read and respect keyword patterns from [keywordmaps] |
323 } | 316 } |
324 | 317 |
325 public Filter create(Path path, Options opts) { | 318 public Filter create(Path path, Options opts) { |
326 if (matcher.accept(path)) { | 319 if (matcher.accept(path)) { |
327 return new KeywordFilter(repo, path, opts.getDirection() == Filter.Direction.FromRepo); | 320 return new KeywordFilter(repo, path, keywords, opts.getDirection() == Filter.Direction.FromRepo); |
328 } | 321 } |
329 return null; | 322 return null; |
330 } | 323 } |
331 } | 324 } |
332 | 325 |