hg4j: src/org/tmatesoft/hg/internal/KeywordFilter.java comparison

comparison src/org/tmatesoft/hg/internal/KeywordFilter.java @ 711:a62079bc422b

Keyword filtering that doesn't depend on input buffer size and the way input lines got split between filter() calls. KewordFilter got state to keep processed suspicious ...$ lines

author	Artem Tikhomirov <tikhomirov.artem@gmail.com>
date	Fri, 11 Oct 2013 21:35:41 +0200
parents	cf200271439a
children

comparison

equal deleted inserted replaced

-:cf200271439a
+:a62079bc422b
 import static org.tmatesoft.hg.util.LogFacility.Severity.Error;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Date;
+import java.util.Map;
 import java.util.TreeMap;
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
 import org.tmatesoft.hg.repo.HgRepository;
 *
 * @author Artem Tikhomirov
 * @author TMate Software Ltd.
 */
 public class KeywordFilter implements Filter {
-	// present implementation is stateless, however, filter use pattern shall not assume that. In fact, Factory may us that
 	private final HgRepository repo;
 	private final boolean isExpanding;
-	private final TreeMap<String,String> keywords;
+	private final Map<String,String> keywords;
-	private final int minBufferLen;
 	private final Path path;
 	private RawChangeset latestFileCset;
+	private final ByteVector unprocessedBuffer;
 	/**
 	 *
 	 * @param hgRepo
 	 * @param path
 	 * @param expand <code>true</code> to expand keywords, <code>false</code> to shrink
 	 */
-	private KeywordFilter(HgRepository hgRepo, Path p, boolean expand) {
+	private KeywordFilter(HgRepository hgRepo, Path p, Map<String, String> kw, boolean expand) {
 		repo = hgRepo;
 		path = p;
 		isExpanding = expand;
-		keywords = new TreeMap<String,String>();
+		keywords = kw;
-		keywords.put("Id", "Id");
+		unprocessedBuffer = expand ? new ByteVector(0, 0) :  new ByteVector(120, 50);
-		keywords.put("Revision", "Revision");
-		keywords.put("Author", "Author");
-		keywords.put("Date", "Date");
-		keywords.put("LastChangedRevision", "LastChangedRevision");
-		keywords.put("LastChangedBy", "LastChangedBy");
-		keywords.put("LastChangedDate", "LastChangedDate");
-		keywords.put("Source", "Source");
-		keywords.put("Header", "Header");
-		int l = 0;
-		for (String s : keywords.keySet()) {
-			if (s.length() > l) {
-				l = s.length();
-			}
-		}
-		// TODO post-1.0 later may implement #filter() not to read full kw value (just "$kw:"). However, limit of maxLen + 2 would keep valid.
-		// for buffers less then minBufferLen, there are chances #filter() implementation would never end
-		// (i.e. for input "$LongestKey"$
-		minBufferLen = l + 2 + (isExpanding ? 0 : 120 /*any reasonable constant for max possible kw value length*/);
 	}
 	/**
 	 * @param src buffer ready to be read
 	 * @return buffer ready to be read and original buffer's position modified to reflect consumed bytes. IOW, if source buffer
 	 * on return has remaining bytes, they are assumed not-read (not processed) and next chunk passed to filter is supposed to
 	 * start with them
 	 */
 	public ByteBuffer filter(ByteBuffer src) {
-		int keywordStart = indexOf(src, '$', src.position(), false);
+		// when unprocessedBuffer is empty, we are looking for first $ in the input,
-		if (keywordStart != -1 && src.capacity() < minBufferLen) {
+		// when we've already got anything unprocessed, newline is of interest, too
-			// FIXME this check is unlucky when small files are read for status 'areTheSame' check - small buffer is allocated.
+		int kwBreak = indexOf(src, '$', src.position(), !unprocessedBuffer.isEmpty());
-			// the check for keywordStart('$') is a temp solution to minimize the chances to get this exception.
+		ByteBuffer outBuffer = null;
-			// Complete solution requires complete rewriting of this method to respect cases when keywords are split between buffers.
+		while (kwBreak != -1) {
-			// With 'honest' partial kw handling, need for this check would be gone.
+			if (unprocessedBuffer.isEmpty()) {
-			throw new IllegalStateException(String.format("Need buffer of at least %d bytes to ensure filter won't hang", minBufferLen));
+				// both expand and collapse cases
-		}
+				assert src.get(kwBreak) == '$';
-		ByteBuffer rv = null;
-		int x = src.position();
+				int end = indexOf(src, '$', kwBreak+1, true);
-		int copyFrom = x; // needs to be updated each time we copy a slice, but not each time we modify source index (x)
+				if (end == -1) {
-		while (x < src.limit()) {
+					for (int i = kwBreak; i < src.limit(); i++) {
-			if (keywordStart == -1) {
+						unprocessedBuffer.add(src.get(i));
-				int i = indexOf(src, '$', x, false);
+					}
-				if (i == -1) {
+					src.limit(kwBreak);
-					if (rv == null) {
+					kwBreak = -1;
-						return src;
+					// src up to kwBreak is left and returned either with outBuffer or alone
+				} else if (src.get(end) == '$') {
+					StringBuilder sb = new StringBuilder(end - kwBreak);
+					for (int i = kwBreak+1; i < end; i++) {
+						if (src.get(i) == ':' || src.get(i) == ' ') {
+							break;
+						}
+						sb.append((char) src.get(i));
+					}
+					final String keyword = sb.toString();
+					if (knownKeyword(keyword)) {
+						// copy src up to kw, including starting $keyword
+						outBuffer = append(outBuffer, src, kwBreak - src.position() + 1+keyword.length());
+						// replace kwStart..end with new content
+						outBuffer = ensureCapacityFor(outBuffer, (isExpanding ? 200 : 1));
+						if (isExpanding) {
+							outBuffer.put((byte) ':');
+							outBuffer.put((byte) ' ');
+							outBuffer = expandKeywordValue(keyword, outBuffer);
+							outBuffer.put((byte) ' ');
+						}
+						outBuffer.put((byte) '$');
+						// src is consumed up to end
+						src.position(end+1);
+						kwBreak = indexOf(src, '$', end+1, false);
 					} else {
-						copySlice(src, copyFrom, src.limit(), rv);
+						// no (or unknown) keyword, try with '$' at src[end]
-						rv.flip();
+						kwBreak = end;
-						src.position(src.limit());
+					}
-						return rv;
+				} else {
-					}
+					// newline, ignore keyword start
+					kwBreak = indexOf(src, '$', end+1, false);
 				}
-				keywordStart = i;
+			} else {
-				// fall-through
+				// we've got smth unprocessed, and we've matched either $ or NL
-			}
+				// the only chance to get here is when src is in the very start
-			if (keywordStart >= 0) {
+				if (src.get(kwBreak) == '$') {
-				int i = indexOf(src, '$', keywordStart+1, true);
+					// closed tag
-				if (i == -1) {
+					for (int i = src.position(); i <= kwBreak; i++) {
-					// end of buffer reached
+						// consume src: going to handle its [position*()..kwBreak] as part of unprocessedBuffer
-					if (rv == null) {
+						unprocessedBuffer.add(src.get());
-						if (keywordStart == x) {
+					}
-							// TODO post-1.0 in fact, x might be equal to keywordStart and to src.position() here ('$' is first character in the buffer,
+					StringBuilder sb = new StringBuilder(unprocessedBuffer.size());
-							// and there are no other '$' not eols till the end of the buffer). This would lead to deadlock (filter won't consume any
+					assert unprocessedBuffer.get(0) == '$';
-							// bytes). To prevent this, either shall copy bytes [keywordStart..buffer.limit()) to local buffer and use it on the next invocation,
+					for (int i = 1; i < unprocessedBuffer.size(); i++) {
-							// or add lookup of the keywords right after first '$' is found (do not wait for closing '$'). For now, large enough src buffer would be sufficient
+						char ch = (char) unprocessedBuffer.get(i);
-							// not to run into such situation
+						if (ch == ':' || ch == ' ') {
-							throw new IllegalStateException("Try src buffer of a greater size");
+							break;
 						}
-						rv = ByteBuffer.allocate(keywordStart - copyFrom);
+						sb.append(ch);
 					}
-					// copy all from source till latest possible kw start
+					final String keyword = sb.toString();
-					copySlice(src, copyFrom, keywordStart, rv);
+					if (knownKeyword(keyword)) {
-					rv.flip();
+						outBuffer = ensureCapacityFor(outBuffer, keyword.length() + (isExpanding ? 200 : 2));
-					// and tell caller we've consumed only to the potential kw start
+						outBuffer.put((byte) '$');
-					src.position(keywordStart);
+						outBuffer.put(keyword.getBytes());
-					return rv;
+						if (isExpanding) {
-				} else if (src.get(i) == '$') {
+							outBuffer.put((byte) ':');
-					// end of keyword, or start of a new one.
+							outBuffer.put((byte) ' ');
-					String keyword;
+							outBuffer = expandKeywordValue(keyword, outBuffer);
-					if ((keyword = matchKeyword(src, keywordStart, i)) != null) {
+							outBuffer.put((byte) ' ');
-						if (rv == null) {
-							// src.remaining(), not .capacity because src is not read, and remaining represents
-							// actual bytes count, while capacity - potential.
-							// Factor of 4 is pure guess and a HACK, need to be fixed with re-expanding buffer on demand
-							rv = ByteBuffer.allocate(isExpanding ? src.remaining() * 4 : src.remaining());
 						}
-						copySlice(src, copyFrom, keywordStart+1, rv);
+						outBuffer.put((byte) '$');
-						rv.put(keyword.getBytes());
-						if (isExpanding) {
-							rv.put((byte) ':');
-							rv.put((byte) ' ');
-							expandKeywordValue(keyword, rv);
-							rv.put((byte) ' ');
-						}
-						rv.put((byte) '$');
-						keywordStart = -1;
-						x = i+1;
-						copyFrom = x;
-						continue;
 					} else {
-						if (rv != null) {
+						outBuffer = append(outBuffer, unprocessedBuffer.toByteArray());
-							// we've already did some substitution, thus need to copy bytes we've scanned.
+					}
-							copySlice(src, x, i, rv);
+					// src part is consumed already, do nothing here, look for next possible kw
-							copyFrom = i;
+					kwBreak = indexOf(src, '$', kwBreak+1, false);
-						} // no else in attempt to avoid rv creation if no real kw would be found
-						keywordStart = i;
-						x = i; // '$' at i wasn't consumed, hence x points to i, not i+1. This is to avoid problems with case: "sdfsd $ asdfs $Id$ sdf"
-						continue;
-					}
 				} else {
-					assert src.get(i) == '\n' || src.get(i) == '\r';
+					// newline => tag without close
-					// line break
+					outBuffer = append(outBuffer, unprocessedBuffer.toByteArray());
-					if (rv != null) {
+					kwBreak = indexOf(src, '$', kwBreak+1, false);
-						copySlice(src, x, i+1, rv);
-						copyFrom = i+1;
-					}
-					x = i+1;
-					keywordStart = -1; // Wasn't keyword, really
-					continue; // try once again
 				}
-			}
+				unprocessedBuffer.clear();
-		}
+			}
-		if (keywordStart != -1) {
+		} while (kwBreak != -1);
-			if (rv == null) {
+		if (outBuffer == null) {
-				// no expansion happened yet, and we have potential kw start
+			return src;
-				rv = ByteBuffer.allocate(keywordStart - src.position());
+		}
-				copySlice(src, src.position(), keywordStart, rv);
+		outBuffer = ensureCapacityFor(outBuffer, src.remaining());
-			}
+		outBuffer.put(src);
-			src.position(keywordStart);
+		outBuffer.flip();
-		}
+		return outBuffer;
-		if (rv != null) {
+	}
-			rv.flip();
+	private boolean knownKeyword(String kw) {
-			return rv;
+		return keywords.containsKey(kw);
-		}
+	}
-		return src;
-	}
+	private static ByteBuffer append(ByteBuffer out, byte[] data) {
+		out = ensureCapacityFor(out, data.length);
-	/**
+		out.put(data);
-	 * @param keyword
+		return out;
-	 * @param rv
+	}
-	 */
+	private static ByteBuffer append(ByteBuffer out, ByteBuffer in, int count) {
-	private void expandKeywordValue(String keyword, ByteBuffer rv) {
+		out = ensureCapacityFor(out, count);
+		while (count-- > 0) {
+			out.put(in.get());
+		}
+		return out;
+	}
+	private static ByteBuffer ensureCapacityFor(ByteBuffer out, int exansion) {
+		if (out == null || out.remaining() < exansion) {
+			ByteBuffer newOut = ByteBuffer.allocate(out == null ? exansion*2 : out.capacity() + exansion);
+			if (out != null) {
+				out.flip();
+				newOut.put(out);
+			}
+			return newOut;
+		}
+		return out;
+	}
+	private ByteBuffer expandKeywordValue(String keyword, ByteBuffer rv) {
+		byte[] toInject;
 		if ("Id".equals(keyword)) {
-			rv.put(identityString().getBytes());
+			toInject = identityString().getBytes();
 		} else if ("Revision".equals(keyword)) {
-			rv.put(revision().getBytes());
+			toInject = revision().getBytes();
 		} else if ("Author".equals(keyword)) {
-			rv.put(username().getBytes());
+			toInject = username().getBytes();
 		} else if ("Date".equals(keyword)) {
-			rv.put(date().getBytes());
+			toInject = date().getBytes();
 		} else {
 			throw new IllegalStateException(String.format("Keyword %s is not yet supported", keyword));
 		}
-	}
+		rv = ensureCapacityFor(rv, toInject.length);
+		rv.put(toInject);
-	private String matchKeyword(ByteBuffer src, int kwStart, int kwEnd) {
+		return rv;
-		assert kwEnd - kwStart - 1 > 0;
-		assert src.get(kwStart) == src.get(kwEnd) && src.get(kwEnd) == '$';
-		char[] chars = new char[kwEnd - kwStart - 1];
-		int i;
-		for (i = 0; i < chars.length; i++) {
-			char c = (char) src.get(kwStart + 1 + i);
-			if (c == ':') {
-				break;
-			}
-			chars[i] = c;
-		}
-		String kw = new String(chars, 0, i);
-//		XXX may use subMap to look up keywords based on few available characters (not waiting till closing $)
-//		System.out.println(keywords.subMap("I", "J"));
-//		System.out.println(keywords.subMap("A", "B"));
-//		System.out.println(keywords.subMap("Au", "B"));
-		return keywords.get(kw);
 	}
 	// copies part of the src buffer, [from..to). doesn't modify src position
 	static void copySlice(ByteBuffer src, int from, int to, ByteBuffer dst) {
 		if (to > src.limit()) {
 		}
 		return latestFileCset;
 	}
 	public static class Factory implements Filter.Factory {
+		private final Map<String,String> keywords;
 		private HgRepository repo;
 		private Path.Matcher matcher;
+		public Factory() {
+			keywords = new TreeMap<String,String>();
+			keywords.put("Id", "Id");
+			keywords.put("Revision", "Revision");
+			keywords.put("Author", "Author");
+			keywords.put("Date", "Date");
+			keywords.put("LastChangedRevision", "LastChangedRevision");
+			keywords.put("LastChangedBy", "LastChangedBy");
+			keywords.put("LastChangedDate", "LastChangedDate");
+			keywords.put("Source", "Source");
+			keywords.put("Header", "Header");
+		}
 		public void initialize(HgRepository hgRepo) {
 			repo = hgRepo;
 			ArrayList<String> patterns = new ArrayList<String>();
 			for (Pair<String,String> e : hgRepo.getConfiguration().getSection("keyword")) {
 			// TODO post-1.0 read and respect keyword patterns from [keywordmaps]
 		}
 		public Filter create(Path path, Options opts) {
 			if (matcher.accept(path)) {
-				return new KeywordFilter(repo, path, opts.getDirection() == Filter.Direction.FromRepo);
+				return new KeywordFilter(repo, path, keywords, opts.getDirection() == Filter.Direction.FromRepo);
 			}
 			return null;
 		}
 	}

Mercurial > hg4j

comparison src/org/tmatesoft/hg/internal/KeywordFilter.java @ 711:a62079bc422b