# HG changeset patch # User Artem Tikhomirov # Date 1296104802 -3600 # Node ID c2ce1cfaeb9e739edad5fd26d03f89842079a22d # Parent a95c700408a906cf98e2c7742f00272f5f848155 ignore file with regex and 'honest' glob support diff -r a95c700408a9 -r c2ce1cfaeb9e TODO --- a/TODO Wed Jan 26 06:31:40 2011 +0100 +++ b/TODO Thu Jan 27 06:06:42 2011 +0100 @@ -1,6 +1,8 @@ Read-only support, version 1.0 ============================== Committed: +* store+fncache, RevlogNG (i.e. no support for older store formats) + * hg log + user, branch, limit - date, @@ -12,17 +14,25 @@ * hg status - - copies for revisions - + + copies for [revision..revision] and for [revision..working dir] + - path or anything meaningful instead of Strings + - matchers * hg cat +* hgignore + + glob + + pattern + Proposed: - LogCommand.revision(int... rev)+ to walk selected revisions only (list->sort(array) on execute, binary search) - LogCommand.before(Date date) and .after() - LogCommand.match() to specify pattern, no selected file()s only? * RepositoryFacade and CommandContext +- hgignore: read extra ignore files from config file (ui.ignore) +- tags + Read-only support, version 1.1 ============================== diff -r a95c700408a9 -r c2ce1cfaeb9e src/org/tmatesoft/hg/repo/HgIgnore.java --- a/src/org/tmatesoft/hg/repo/HgIgnore.java Wed Jan 26 06:31:40 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/HgIgnore.java Thu Jan 27 06:06:42 2011 +0100 @@ -20,69 +20,109 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; -import java.util.Set; -import java.util.TreeSet; +import java.util.List; +import java.util.regex.Pattern; /** - * + * * @author Artem Tikhomirov * @author TMate Software Ltd. */ public class HgIgnore { - private final HgRepository repo; - private Set entries; + private List entries; - public HgIgnore(HgRepository localRepo) { - this.repo = localRepo; + HgIgnore() { + entries = Collections.emptyList(); } - private void read() { - entries = Collections.emptySet(); - File hgignoreFile = new File(repo.getRepositoryRoot().getParentFile(), ".hgignore"); + /* package-local */void read(File hgignoreFile) throws IOException { if (!hgignoreFile.exists()) { return; } - entries = new TreeSet(); - try { - BufferedReader fr = new BufferedReader(new FileReader(hgignoreFile)); - String line; - while ((line = fr.readLine()) != null) { - // FIXME need to detect syntax:glob and other parameters - entries.add(line.trim()); // shall I account for local paths in the file (i.e. back-slashed on windows)? + ArrayList result = new ArrayList(entries); // start with existing + String syntax = "regex"; // or "glob" + BufferedReader fr = new BufferedReader(new FileReader(hgignoreFile)); + String line; + while ((line = fr.readLine()) != null) { + line = line.trim(); + if (line.startsWith("syntax:")) { + syntax = line.substring("syntax:".length()).trim(); + if (!"regex".equals(syntax) && !"glob".equals(syntax)) { + throw new IllegalStateException(line); + } + } else if (line.length() > 0) { + // shall I account for local paths in the file (i.e. + // back-slashed on windows)? + int x; + if ((x = line.indexOf('#')) >= 0) { + line = line.substring(0, x).trim(); + if (line.length() == 0) { + continue; + } + } + if ("glob".equals(syntax)) { + // hgignore(5) + // (http://www.selenic.com/mercurial/hgignore.5.html) says slashes '\' are escape characters, + // hence no special treatment of Windows path + // however, own attempts make me think '\' on Windows are not treated as escapes + line = glob2regex(line); + } + result.add(Pattern.compile(line)); // case-sensitive } - } catch (IOException ex) { - ex.printStackTrace(); // log warn } + result.trimToSize(); + entries = result; } - public void reset() { - // FIXME does anyone really need to clear HgIgnore? Perhaps, repo may return new instance each time, - // which is used throughout invocation and then discarded? - entries = null; + // note, #isIgnored(), even if queried for directories and returned positive reply, may still get + // a file from that ignored folder to get examined. Thus, patterns like "bin" shall match not only a folder, + // but any file under that folder as well + // Alternatively, file walker may memorize folder is ignored and uses this information for all nested files. However, + // this approach would require walker (a) return directories (b) provide nesting information. This may become + // troublesome when one walks not over io.File, but Eclipse's IResource or any other custom VFS. + // + // + // might be interesting, although looks like of no direct use in my case + // @see http://stackoverflow.com/questions/1247772/is-there-an-equivalent-of-java-util-regex-for-glob-type-patterns + private String glob2regex(String line) { + assert line.length() > 0; + StringBuilder sb = new StringBuilder(line.length() + 10); + sb.append('^'); // help avoid matcher.find() to match 'bin' pattern in the middle of the filename + int start = 0, end = line.length() - 1; + // '*' at the beginning and end of a line are useless for Pattern + while (start <= end && line.charAt(start) == '*') start++; + while (end > start && line.charAt(end) == '*') end--; + + for (int i = start; i <= end; i++) { + char ch = line.charAt(i); + if (ch == '.' || ch == '\\') { + sb.append('\\'); + } else if (ch == '?') { + // simple '.' substitution might work out, however, more formally + // a char class seems more appropriate to avoid accidentally + // matching a subdirectory with ? char (i.e. /a/b?d against /a/bad, /a/bed and /a/b/d) + // @see http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_03 + // quote: "The slash character in a pathname shall be explicitly matched by using one or more slashes in the pattern; + // it shall neither be matched by the asterisk or question-mark special characters nor by a bracket expression" + sb.append("[^/]"); + continue; + } else if (ch == '*') { + sb.append("[^/]*?"); + continue; + } + sb.append(ch); + } + return sb.toString(); } public boolean isIgnored(String path) { - if (entries == null) { - read(); - } - if (entries.contains(path)) { - // easy part - return true; - } - // substrings are memory-friendly - int x = 0, i = path.indexOf('/', 0); - while (i != -1) { - if (entries.contains(path.substring(x, i))) { + for (Pattern p : entries) { + if (p.matcher(path).find()) { return true; } - // try one with ending slash - if (entries.contains(path.substring(x, i+1))) { // even if i is last index, i+1 is safe here - return true; - } - x = i+1; - i = path.indexOf('/', x); } return false; } diff -r a95c700408a9 -r c2ce1cfaeb9e src/org/tmatesoft/hg/repo/HgRepository.java --- a/src/org/tmatesoft/hg/repo/HgRepository.java Wed Jan 26 06:31:40 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/HgRepository.java Thu Jan 27 06:06:42 2011 +0100 @@ -71,6 +71,7 @@ private final HashMap> streamsCache = new HashMap>(); private final org.tmatesoft.hg.internal.Internals impl = new org.tmatesoft.hg.internal.Internals(); + private HgIgnore ignore; HgRepository(String repositoryPath) { repoDir = null; @@ -150,8 +151,18 @@ } // package-local, see comment for loadDirstate - /*package-local*/ final HgIgnore loadIgnore() { - return new HgIgnore(this); + /*package-local*/ final HgIgnore getIgnore() { + // TODO read config for additional locations + if (ignore == null) { + ignore = new HgIgnore(); + try { + File ignoreFile = new File(repoDir.getParentFile(), ".hgignore"); + ignore.read(ignoreFile); + } catch (IOException ex) { + ex.printStackTrace(); // log warn + } + } + return ignore; } /*package-local*/ DataAccessProvider getDataAccess() { diff -r a95c700408a9 -r c2ce1cfaeb9e src/org/tmatesoft/hg/repo/Internals.java --- a/src/org/tmatesoft/hg/repo/Internals.java Wed Jan 26 06:31:40 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/Internals.java Thu Jan 27 06:06:42 2011 +0100 @@ -39,7 +39,7 @@ } public boolean[] checkIgnored(String... toCheck) { - HgIgnore ignore = repo.loadIgnore(); + HgIgnore ignore = repo.getIgnore(); boolean[] rv = new boolean[toCheck.length]; for (int i = 0; i < toCheck.length; i++) { rv[i] = ignore.isIgnored(toCheck[i]); diff -r a95c700408a9 -r c2ce1cfaeb9e src/org/tmatesoft/hg/repo/WorkingCopyStatusCollector.java --- a/src/org/tmatesoft/hg/repo/WorkingCopyStatusCollector.java Wed Jan 26 06:31:40 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/WorkingCopyStatusCollector.java Thu Jan 27 06:06:42 2011 +0100 @@ -69,7 +69,7 @@ // may be invoked few times public void walk(int baseRevision, StatusCollector.Inspector inspector) { - final HgIgnore hgIgnore = repo.loadIgnore(); + final HgIgnore hgIgnore = repo.getIgnore(); TreeSet knownEntries = getDirstate().all(); final boolean isTipBase; if (baseRevision == TIP) {