changeset 91:c2ce1cfaeb9e

ignore file with regex and 'honest' glob support
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 27 Jan 2011 06:06:42 +0100
parents a95c700408a9
children bf304cb14247
files TODO src/org/tmatesoft/hg/repo/HgIgnore.java src/org/tmatesoft/hg/repo/HgRepository.java src/org/tmatesoft/hg/repo/Internals.java src/org/tmatesoft/hg/repo/WorkingCopyStatusCollector.java
diffstat 5 files changed, 107 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/TODO	Wed Jan 26 06:31:40 2011 +0100
+++ b/TODO	Thu Jan 27 06:06:42 2011 +0100
@@ -1,6 +1,8 @@
 Read-only support, version 1.0
 ==============================
 Committed:
+* store+fncache, RevlogNG (i.e. no support for older store formats) 
+
 * hg log
   + user, branch, limit
   - date, 
@@ -12,17 +14,25 @@
 
   
 * hg status
-  - copies for revisions
-
+  + copies for [revision..revision] and for [revision..working dir]
+  - path or anything meaningful instead of Strings
+  - matchers
 
 * hg cat
 
+* hgignore
+  + glob
+  + pattern
+
 
 Proposed:
 - LogCommand.revision(int... rev)+ to walk selected revisions only (list->sort(array) on execute, binary search)
 - LogCommand.before(Date date) and .after()
 - LogCommand.match() to specify pattern, no selected file()s only?
 * RepositoryFacade and CommandContext  
+- hgignore: read extra ignore files from config file (ui.ignore)
+- tags
+
 
 Read-only support, version 1.1
 ==============================
--- a/src/org/tmatesoft/hg/repo/HgIgnore.java	Wed Jan 26 06:31:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgIgnore.java	Thu Jan 27 06:06:42 2011 +0100
@@ -20,69 +20,109 @@
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
-import java.util.Set;
-import java.util.TreeSet;
+import java.util.List;
+import java.util.regex.Pattern;
 
 /**
- *
+ * 
  * @author Artem Tikhomirov
  * @author TMate Software Ltd.
  */
 public class HgIgnore {
 
-	private final HgRepository repo;
-	private Set<String> entries;
+	private List<Pattern> entries;
 
-	public HgIgnore(HgRepository localRepo) {
-		this.repo = localRepo;
+	HgIgnore() {
+		entries = Collections.emptyList();
 	}
 
-	private void read() {
-		entries = Collections.emptySet();
-		File hgignoreFile = new File(repo.getRepositoryRoot().getParentFile(), ".hgignore");
+	/* package-local */void read(File hgignoreFile) throws IOException {
 		if (!hgignoreFile.exists()) {
 			return;
 		}
-		entries = new TreeSet<String>();
-		try {
-			BufferedReader fr = new BufferedReader(new FileReader(hgignoreFile));
-			String line;
-			while ((line = fr.readLine()) != null) {
-				// FIXME need to detect syntax:glob and other parameters
-				entries.add(line.trim()); // shall I account for local paths in the file (i.e. back-slashed on windows)?
+		ArrayList<Pattern> result = new ArrayList<Pattern>(entries); // start with existing
+		String syntax = "regex"; // or "glob"
+		BufferedReader fr = new BufferedReader(new FileReader(hgignoreFile));
+		String line;
+		while ((line = fr.readLine()) != null) {
+			line = line.trim();
+			if (line.startsWith("syntax:")) {
+				syntax = line.substring("syntax:".length()).trim();
+				if (!"regex".equals(syntax) && !"glob".equals(syntax)) {
+					throw new IllegalStateException(line);
+				}
+			} else if (line.length() > 0) {
+				// shall I account for local paths in the file (i.e.
+				// back-slashed on windows)?
+				int x;
+				if ((x = line.indexOf('#')) >= 0) {
+					line = line.substring(0, x).trim();
+					if (line.length() == 0) {
+						continue;
+					}
+				}
+				if ("glob".equals(syntax)) {
+					// hgignore(5)
+					// (http://www.selenic.com/mercurial/hgignore.5.html) says slashes '\' are escape characters,
+					// hence no special  treatment of Windows path
+					// however, own attempts make me think '\' on Windows are not treated as escapes
+					line = glob2regex(line);
+				}
+				result.add(Pattern.compile(line)); // case-sensitive
 			}
-		} catch (IOException ex) {
-			ex.printStackTrace(); // log warn
 		}
+		result.trimToSize();
+		entries = result;
 	}
 
-	public void reset() {
-		// FIXME does anyone really need to clear HgIgnore? Perhaps, repo may return new instance each time,
-		// which is used throughout invocation and then discarded?
-		entries = null;
+	// note, #isIgnored(), even if queried for directories and returned positive reply, may still get
+	// a file from that ignored folder to get examined. Thus, patterns like "bin" shall match not only a folder,
+	// but any file under that folder as well
+	// Alternatively, file walker may memorize folder is ignored and uses this information for all nested files. However,
+	// this approach would require walker (a) return directories (b) provide nesting information. This may become
+	// troublesome when one walks not over io.File, but Eclipse's IResource or any other custom VFS.
+	//
+	//
+	// might be interesting, although looks like of no direct use in my case 
+	// @see http://stackoverflow.com/questions/1247772/is-there-an-equivalent-of-java-util-regex-for-glob-type-patterns
+	private String glob2regex(String line) {
+		assert line.length() > 0;
+		StringBuilder sb = new StringBuilder(line.length() + 10);
+		sb.append('^'); // help avoid matcher.find() to match 'bin' pattern in the middle of the filename
+		int start = 0, end = line.length() - 1;
+		// '*' at the beginning and end of a line are useless for Pattern
+		while (start <= end && line.charAt(start) == '*') start++;
+		while (end > start && line.charAt(end) == '*') end--;
+
+		for (int i = start; i <= end; i++) {
+			char ch = line.charAt(i);
+			if (ch == '.' || ch == '\\') {
+				sb.append('\\');
+			} else if (ch == '?') {
+				// simple '.' substitution might work out, however, more formally 
+				// a char class seems more appropriate to avoid accidentally
+				// matching a subdirectory with ? char (i.e. /a/b?d against /a/bad, /a/bed and /a/b/d)
+				// @see http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_03
+				// quote: "The slash character in a pathname shall be explicitly matched by using one or more slashes in the pattern; 
+				// it shall neither be matched by the asterisk or question-mark special characters nor by a bracket expression" 
+				sb.append("[^/]");
+				continue;
+			} else if (ch == '*') {
+				sb.append("[^/]*?");
+				continue;
+			}
+			sb.append(ch);
+		}
+		return sb.toString();
 	}
 
 	public boolean isIgnored(String path) {
-		if (entries == null) {
-			read();
-		}
-		if (entries.contains(path)) {
-			// easy part
-			return true;
-		}
-		// substrings are memory-friendly 
-		int x = 0, i = path.indexOf('/', 0);
-		while (i != -1) {
-			if (entries.contains(path.substring(x, i))) {
+		for (Pattern p : entries) {
+			if (p.matcher(path).find()) {
 				return true;
 			}
-			// try one with ending slash
-			if (entries.contains(path.substring(x, i+1))) { // even if i is last index, i+1 is safe here
-				return true;
-			}
-			x = i+1;
-			i = path.indexOf('/', x);
 		}
 		return false;
 	}
--- a/src/org/tmatesoft/hg/repo/HgRepository.java	Wed Jan 26 06:31:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgRepository.java	Thu Jan 27 06:06:42 2011 +0100
@@ -71,6 +71,7 @@
 	private final HashMap<Path, SoftReference<RevlogStream>> streamsCache = new HashMap<Path, SoftReference<RevlogStream>>();
 	
 	private final org.tmatesoft.hg.internal.Internals impl = new org.tmatesoft.hg.internal.Internals();
+	private HgIgnore ignore;
 
 	HgRepository(String repositoryPath) {
 		repoDir = null;
@@ -150,8 +151,18 @@
 	}
 
 	// package-local, see comment for loadDirstate
-	/*package-local*/ final HgIgnore loadIgnore() {
-		return new HgIgnore(this);
+	/*package-local*/ final HgIgnore getIgnore() {
+		// TODO read config for additional locations
+		if (ignore == null) {
+			ignore = new HgIgnore();
+			try {
+				File ignoreFile = new File(repoDir.getParentFile(), ".hgignore");
+				ignore.read(ignoreFile);
+			} catch (IOException ex) {
+				ex.printStackTrace(); // log warn
+			}
+		}
+		return ignore;
 	}
 
 	/*package-local*/ DataAccessProvider getDataAccess() {
--- a/src/org/tmatesoft/hg/repo/Internals.java	Wed Jan 26 06:31:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/Internals.java	Thu Jan 27 06:06:42 2011 +0100
@@ -39,7 +39,7 @@
 	}
 
 	public boolean[] checkIgnored(String... toCheck) {
-		HgIgnore ignore = repo.loadIgnore();
+		HgIgnore ignore = repo.getIgnore();
 		boolean[] rv = new boolean[toCheck.length];
 		for (int i = 0; i < toCheck.length; i++) {
 			rv[i] = ignore.isIgnored(toCheck[i]);
--- a/src/org/tmatesoft/hg/repo/WorkingCopyStatusCollector.java	Wed Jan 26 06:31:40 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/WorkingCopyStatusCollector.java	Thu Jan 27 06:06:42 2011 +0100
@@ -69,7 +69,7 @@
 
 	// may be invoked few times
 	public void walk(int baseRevision, StatusCollector.Inspector inspector) {
-		final HgIgnore hgIgnore = repo.loadIgnore();
+		final HgIgnore hgIgnore = repo.getIgnore();
 		TreeSet<String> knownEntries = getDirstate().all();
 		final boolean isTipBase;
 		if (baseRevision == TIP) {