comparison src/org/tmatesoft/hg/internal/KeywordFilter.java @ 711:a62079bc422b

Keyword filtering that doesn't depend on input buffer size and the way input lines got split between filter() calls. KewordFilter got state to keep processed suspicious ...$ lines
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 11 Oct 2013 21:35:41 +0200
parents cf200271439a
children
comparison
equal deleted inserted replaced
710:cf200271439a 711:a62079bc422b
19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error; 19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error;
20 20
21 import java.nio.ByteBuffer; 21 import java.nio.ByteBuffer;
22 import java.util.ArrayList; 22 import java.util.ArrayList;
23 import java.util.Date; 23 import java.util.Date;
24 import java.util.Map;
24 import java.util.TreeMap; 25 import java.util.TreeMap;
25 26
26 import org.tmatesoft.hg.core.Nodeid; 27 import org.tmatesoft.hg.core.Nodeid;
27 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; 28 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
28 import org.tmatesoft.hg.repo.HgRepository; 29 import org.tmatesoft.hg.repo.HgRepository;
34 * 35 *
35 * @author Artem Tikhomirov 36 * @author Artem Tikhomirov
36 * @author TMate Software Ltd. 37 * @author TMate Software Ltd.
37 */ 38 */
38 public class KeywordFilter implements Filter { 39 public class KeywordFilter implements Filter {
39 // present implementation is stateless, however, filter use pattern shall not assume that. In fact, Factory may us that
40 private final HgRepository repo; 40 private final HgRepository repo;
41 private final boolean isExpanding; 41 private final boolean isExpanding;
42 private final TreeMap<String,String> keywords; 42 private final Map<String,String> keywords;
43 private final int minBufferLen;
44 private final Path path; 43 private final Path path;
45 private RawChangeset latestFileCset; 44 private RawChangeset latestFileCset;
45 private final ByteVector unprocessedBuffer;
46 46
47 /** 47 /**
48 * 48 *
49 * @param hgRepo 49 * @param hgRepo
50 * @param path 50 * @param path
51 * @param expand <code>true</code> to expand keywords, <code>false</code> to shrink 51 * @param expand <code>true</code> to expand keywords, <code>false</code> to shrink
52 */ 52 */
53 private KeywordFilter(HgRepository hgRepo, Path p, boolean expand) { 53 private KeywordFilter(HgRepository hgRepo, Path p, Map<String, String> kw, boolean expand) {
54 repo = hgRepo; 54 repo = hgRepo;
55 path = p; 55 path = p;
56 isExpanding = expand; 56 isExpanding = expand;
57 keywords = new TreeMap<String,String>(); 57 keywords = kw;
58 keywords.put("Id", "Id"); 58 unprocessedBuffer = expand ? new ByteVector(0, 0) : new ByteVector(120, 50);
59 keywords.put("Revision", "Revision");
60 keywords.put("Author", "Author");
61 keywords.put("Date", "Date");
62 keywords.put("LastChangedRevision", "LastChangedRevision");
63 keywords.put("LastChangedBy", "LastChangedBy");
64 keywords.put("LastChangedDate", "LastChangedDate");
65 keywords.put("Source", "Source");
66 keywords.put("Header", "Header");
67
68 int l = 0;
69 for (String s : keywords.keySet()) {
70 if (s.length() > l) {
71 l = s.length();
72 }
73 }
74 // TODO post-1.0 later may implement #filter() not to read full kw value (just "$kw:"). However, limit of maxLen + 2 would keep valid.
75 // for buffers less then minBufferLen, there are chances #filter() implementation would never end
76 // (i.e. for input "$LongestKey"$
77 minBufferLen = l + 2 + (isExpanding ? 0 : 120 /*any reasonable constant for max possible kw value length*/);
78 } 59 }
79 60
80 /** 61 /**
81 * @param src buffer ready to be read 62 * @param src buffer ready to be read
82 * @return buffer ready to be read and original buffer's position modified to reflect consumed bytes. IOW, if source buffer 63 * @return buffer ready to be read and original buffer's position modified to reflect consumed bytes. IOW, if source buffer
83 * on return has remaining bytes, they are assumed not-read (not processed) and next chunk passed to filter is supposed to 64 * on return has remaining bytes, they are assumed not-read (not processed) and next chunk passed to filter is supposed to
84 * start with them 65 * start with them
85 */ 66 */
86 public ByteBuffer filter(ByteBuffer src) { 67 public ByteBuffer filter(ByteBuffer src) {
87 int keywordStart = indexOf(src, '$', src.position(), false); 68 // when unprocessedBuffer is empty, we are looking for first $ in the input,
88 if (keywordStart != -1 && src.capacity() < minBufferLen) { 69 // when we've already got anything unprocessed, newline is of interest, too
89 // FIXME this check is unlucky when small files are read for status 'areTheSame' check - small buffer is allocated. 70 int kwBreak = indexOf(src, '$', src.position(), !unprocessedBuffer.isEmpty());
90 // the check for keywordStart('$') is a temp solution to minimize the chances to get this exception. 71 ByteBuffer outBuffer = null;
91 // Complete solution requires complete rewriting of this method to respect cases when keywords are split between buffers. 72 while (kwBreak != -1) {
92 // With 'honest' partial kw handling, need for this check would be gone. 73 if (unprocessedBuffer.isEmpty()) {
93 throw new IllegalStateException(String.format("Need buffer of at least %d bytes to ensure filter won't hang", minBufferLen)); 74 // both expand and collapse cases
94 } 75 assert src.get(kwBreak) == '$';
95 ByteBuffer rv = null; 76
96 int x = src.position(); 77 int end = indexOf(src, '$', kwBreak+1, true);
97 int copyFrom = x; // needs to be updated each time we copy a slice, but not each time we modify source index (x) 78 if (end == -1) {
98 while (x < src.limit()) { 79 for (int i = kwBreak; i < src.limit(); i++) {
99 if (keywordStart == -1) { 80 unprocessedBuffer.add(src.get(i));
100 int i = indexOf(src, '$', x, false); 81 }
101 if (i == -1) { 82 src.limit(kwBreak);
102 if (rv == null) { 83 kwBreak = -1;
103 return src; 84 // src up to kwBreak is left and returned either with outBuffer or alone
85 } else if (src.get(end) == '$') {
86 StringBuilder sb = new StringBuilder(end - kwBreak);
87 for (int i = kwBreak+1; i < end; i++) {
88 if (src.get(i) == ':' || src.get(i) == ' ') {
89 break;
90 }
91 sb.append((char) src.get(i));
92 }
93 final String keyword = sb.toString();
94 if (knownKeyword(keyword)) {
95 // copy src up to kw, including starting $keyword
96 outBuffer = append(outBuffer, src, kwBreak - src.position() + 1+keyword.length());
97 // replace kwStart..end with new content
98 outBuffer = ensureCapacityFor(outBuffer, (isExpanding ? 200 : 1));
99 if (isExpanding) {
100 outBuffer.put((byte) ':');
101 outBuffer.put((byte) ' ');
102 outBuffer = expandKeywordValue(keyword, outBuffer);
103 outBuffer.put((byte) ' ');
104 }
105 outBuffer.put((byte) '$');
106 // src is consumed up to end
107 src.position(end+1);
108 kwBreak = indexOf(src, '$', end+1, false);
104 } else { 109 } else {
105 copySlice(src, copyFrom, src.limit(), rv); 110 // no (or unknown) keyword, try with '$' at src[end]
106 rv.flip(); 111 kwBreak = end;
107 src.position(src.limit()); 112 }
108 return rv; 113 } else {
109 } 114 // newline, ignore keyword start
115 kwBreak = indexOf(src, '$', end+1, false);
110 } 116 }
111 keywordStart = i; 117 } else {
112 // fall-through 118 // we've got smth unprocessed, and we've matched either $ or NL
113 } 119 // the only chance to get here is when src is in the very start
114 if (keywordStart >= 0) { 120 if (src.get(kwBreak) == '$') {
115 int i = indexOf(src, '$', keywordStart+1, true); 121 // closed tag
116 if (i == -1) { 122 for (int i = src.position(); i <= kwBreak; i++) {
117 // end of buffer reached 123 // consume src: going to handle its [position*()..kwBreak] as part of unprocessedBuffer
118 if (rv == null) { 124 unprocessedBuffer.add(src.get());
119 if (keywordStart == x) { 125 }
120 // TODO post-1.0 in fact, x might be equal to keywordStart and to src.position() here ('$' is first character in the buffer, 126 StringBuilder sb = new StringBuilder(unprocessedBuffer.size());
121 // and there are no other '$' not eols till the end of the buffer). This would lead to deadlock (filter won't consume any 127 assert unprocessedBuffer.get(0) == '$';
122 // bytes). To prevent this, either shall copy bytes [keywordStart..buffer.limit()) to local buffer and use it on the next invocation, 128 for (int i = 1; i < unprocessedBuffer.size(); i++) {
123 // or add lookup of the keywords right after first '$' is found (do not wait for closing '$'). For now, large enough src buffer would be sufficient 129 char ch = (char) unprocessedBuffer.get(i);
124 // not to run into such situation 130 if (ch == ':' || ch == ' ') {
125 throw new IllegalStateException("Try src buffer of a greater size"); 131 break;
126 } 132 }
127 rv = ByteBuffer.allocate(keywordStart - copyFrom); 133 sb.append(ch);
128 } 134 }
129 // copy all from source till latest possible kw start 135 final String keyword = sb.toString();
130 copySlice(src, copyFrom, keywordStart, rv); 136 if (knownKeyword(keyword)) {
131 rv.flip(); 137 outBuffer = ensureCapacityFor(outBuffer, keyword.length() + (isExpanding ? 200 : 2));
132 // and tell caller we've consumed only to the potential kw start 138 outBuffer.put((byte) '$');
133 src.position(keywordStart); 139 outBuffer.put(keyword.getBytes());
134 return rv; 140 if (isExpanding) {
135 } else if (src.get(i) == '$') { 141 outBuffer.put((byte) ':');
136 // end of keyword, or start of a new one. 142 outBuffer.put((byte) ' ');
137 String keyword; 143 outBuffer = expandKeywordValue(keyword, outBuffer);
138 if ((keyword = matchKeyword(src, keywordStart, i)) != null) { 144 outBuffer.put((byte) ' ');
139 if (rv == null) {
140 // src.remaining(), not .capacity because src is not read, and remaining represents
141 // actual bytes count, while capacity - potential.
142 // Factor of 4 is pure guess and a HACK, need to be fixed with re-expanding buffer on demand
143 rv = ByteBuffer.allocate(isExpanding ? src.remaining() * 4 : src.remaining());
144 } 145 }
145 copySlice(src, copyFrom, keywordStart+1, rv); 146 outBuffer.put((byte) '$');
146 rv.put(keyword.getBytes());
147 if (isExpanding) {
148 rv.put((byte) ':');
149 rv.put((byte) ' ');
150 expandKeywordValue(keyword, rv);
151 rv.put((byte) ' ');
152 }
153 rv.put((byte) '$');
154 keywordStart = -1;
155 x = i+1;
156 copyFrom = x;
157 continue;
158 } else { 147 } else {
159 if (rv != null) { 148 outBuffer = append(outBuffer, unprocessedBuffer.toByteArray());
160 // we've already did some substitution, thus need to copy bytes we've scanned. 149 }
161 copySlice(src, x, i, rv); 150 // src part is consumed already, do nothing here, look for next possible kw
162 copyFrom = i; 151 kwBreak = indexOf(src, '$', kwBreak+1, false);
163 } // no else in attempt to avoid rv creation if no real kw would be found
164 keywordStart = i;
165 x = i; // '$' at i wasn't consumed, hence x points to i, not i+1. This is to avoid problems with case: "sdfsd $ asdfs $Id$ sdf"
166 continue;
167 }
168 } else { 152 } else {
169 assert src.get(i) == '\n' || src.get(i) == '\r'; 153 // newline => tag without close
170 // line break 154 outBuffer = append(outBuffer, unprocessedBuffer.toByteArray());
171 if (rv != null) { 155 kwBreak = indexOf(src, '$', kwBreak+1, false);
172 copySlice(src, x, i+1, rv);
173 copyFrom = i+1;
174 }
175 x = i+1;
176 keywordStart = -1; // Wasn't keyword, really
177 continue; // try once again
178 } 156 }
179 } 157 unprocessedBuffer.clear();
180 } 158 }
181 if (keywordStart != -1) { 159 } while (kwBreak != -1);
182 if (rv == null) { 160 if (outBuffer == null) {
183 // no expansion happened yet, and we have potential kw start 161 return src;
184 rv = ByteBuffer.allocate(keywordStart - src.position()); 162 }
185 copySlice(src, src.position(), keywordStart, rv); 163 outBuffer = ensureCapacityFor(outBuffer, src.remaining());
186 } 164 outBuffer.put(src);
187 src.position(keywordStart); 165 outBuffer.flip();
188 } 166 return outBuffer;
189 if (rv != null) { 167 }
190 rv.flip(); 168 private boolean knownKeyword(String kw) {
191 return rv; 169 return keywords.containsKey(kw);
192 } 170 }
193 return src; 171
194 } 172 private static ByteBuffer append(ByteBuffer out, byte[] data) {
195 173 out = ensureCapacityFor(out, data.length);
196 /** 174 out.put(data);
197 * @param keyword 175 return out;
198 * @param rv 176 }
199 */ 177 private static ByteBuffer append(ByteBuffer out, ByteBuffer in, int count) {
200 private void expandKeywordValue(String keyword, ByteBuffer rv) { 178 out = ensureCapacityFor(out, count);
179 while (count-- > 0) {
180 out.put(in.get());
181 }
182 return out;
183 }
184 private static ByteBuffer ensureCapacityFor(ByteBuffer out, int exansion) {
185 if (out == null || out.remaining() < exansion) {
186 ByteBuffer newOut = ByteBuffer.allocate(out == null ? exansion*2 : out.capacity() + exansion);
187 if (out != null) {
188 out.flip();
189 newOut.put(out);
190 }
191 return newOut;
192 }
193 return out;
194 }
195
196 private ByteBuffer expandKeywordValue(String keyword, ByteBuffer rv) {
197 byte[] toInject;
201 if ("Id".equals(keyword)) { 198 if ("Id".equals(keyword)) {
202 rv.put(identityString().getBytes()); 199 toInject = identityString().getBytes();
203 } else if ("Revision".equals(keyword)) { 200 } else if ("Revision".equals(keyword)) {
204 rv.put(revision().getBytes()); 201 toInject = revision().getBytes();
205 } else if ("Author".equals(keyword)) { 202 } else if ("Author".equals(keyword)) {
206 rv.put(username().getBytes()); 203 toInject = username().getBytes();
207 } else if ("Date".equals(keyword)) { 204 } else if ("Date".equals(keyword)) {
208 rv.put(date().getBytes()); 205 toInject = date().getBytes();
209 } else { 206 } else {
210 throw new IllegalStateException(String.format("Keyword %s is not yet supported", keyword)); 207 throw new IllegalStateException(String.format("Keyword %s is not yet supported", keyword));
211 } 208 }
212 } 209 rv = ensureCapacityFor(rv, toInject.length);
213 210 rv.put(toInject);
214 private String matchKeyword(ByteBuffer src, int kwStart, int kwEnd) { 211 return rv;
215 assert kwEnd - kwStart - 1 > 0;
216 assert src.get(kwStart) == src.get(kwEnd) && src.get(kwEnd) == '$';
217 char[] chars = new char[kwEnd - kwStart - 1];
218 int i;
219 for (i = 0; i < chars.length; i++) {
220 char c = (char) src.get(kwStart + 1 + i);
221 if (c == ':') {
222 break;
223 }
224 chars[i] = c;
225 }
226 String kw = new String(chars, 0, i);
227 // XXX may use subMap to look up keywords based on few available characters (not waiting till closing $)
228 // System.out.println(keywords.subMap("I", "J"));
229 // System.out.println(keywords.subMap("A", "B"));
230 // System.out.println(keywords.subMap("Au", "B"));
231 return keywords.get(kw);
232 } 212 }
233 213
234 // copies part of the src buffer, [from..to). doesn't modify src position 214 // copies part of the src buffer, [from..to). doesn't modify src position
235 static void copySlice(ByteBuffer src, int from, int to, ByteBuffer dst) { 215 static void copySlice(ByteBuffer src, int from, int to, ByteBuffer dst) {
236 if (to > src.limit()) { 216 if (to > src.limit()) {
304 } 284 }
305 return latestFileCset; 285 return latestFileCset;
306 } 286 }
307 287
308 public static class Factory implements Filter.Factory { 288 public static class Factory implements Filter.Factory {
309 289 private final Map<String,String> keywords;
310 private HgRepository repo; 290 private HgRepository repo;
311 private Path.Matcher matcher; 291 private Path.Matcher matcher;
292
293 public Factory() {
294 keywords = new TreeMap<String,String>();
295 keywords.put("Id", "Id");
296 keywords.put("Revision", "Revision");
297 keywords.put("Author", "Author");
298 keywords.put("Date", "Date");
299 keywords.put("LastChangedRevision", "LastChangedRevision");
300 keywords.put("LastChangedBy", "LastChangedBy");
301 keywords.put("LastChangedDate", "LastChangedDate");
302 keywords.put("Source", "Source");
303 keywords.put("Header", "Header");
304 }
312 305
313 public void initialize(HgRepository hgRepo) { 306 public void initialize(HgRepository hgRepo) {
314 repo = hgRepo; 307 repo = hgRepo;
315 ArrayList<String> patterns = new ArrayList<String>(); 308 ArrayList<String> patterns = new ArrayList<String>();
316 for (Pair<String,String> e : hgRepo.getConfiguration().getSection("keyword")) { 309 for (Pair<String,String> e : hgRepo.getConfiguration().getSection("keyword")) {
322 // TODO post-1.0 read and respect keyword patterns from [keywordmaps] 315 // TODO post-1.0 read and respect keyword patterns from [keywordmaps]
323 } 316 }
324 317
325 public Filter create(Path path, Options opts) { 318 public Filter create(Path path, Options opts) {
326 if (matcher.accept(path)) { 319 if (matcher.accept(path)) {
327 return new KeywordFilter(repo, path, opts.getDirection() == Filter.Direction.FromRepo); 320 return new KeywordFilter(repo, path, keywords, opts.getDirection() == Filter.Direction.FromRepo);
328 } 321 }
329 return null; 322 return null;
330 } 323 }
331 } 324 }
332 325