comparison src/org/tmatesoft/hg/repo/HgIgnore.java @ 91:c2ce1cfaeb9e

ignore file with regex and 'honest' glob support
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 27 Jan 2011 06:06:42 +0100
parents 6f1b88693d48
children a3a2e5deb320
comparison
equal deleted inserted replaced
90:a95c700408a9 91:c2ce1cfaeb9e
18 18
19 import java.io.BufferedReader; 19 import java.io.BufferedReader;
20 import java.io.File; 20 import java.io.File;
21 import java.io.FileReader; 21 import java.io.FileReader;
22 import java.io.IOException; 22 import java.io.IOException;
23 import java.util.ArrayList;
23 import java.util.Collections; 24 import java.util.Collections;
24 import java.util.Set; 25 import java.util.List;
25 import java.util.TreeSet; 26 import java.util.regex.Pattern;
26 27
27 /** 28 /**
28 * 29 *
29 * @author Artem Tikhomirov 30 * @author Artem Tikhomirov
30 * @author TMate Software Ltd. 31 * @author TMate Software Ltd.
31 */ 32 */
32 public class HgIgnore { 33 public class HgIgnore {
33 34
34 private final HgRepository repo; 35 private List<Pattern> entries;
35 private Set<String> entries;
36 36
37 public HgIgnore(HgRepository localRepo) { 37 HgIgnore() {
38 this.repo = localRepo; 38 entries = Collections.emptyList();
39 } 39 }
40 40
41 private void read() { 41 /* package-local */void read(File hgignoreFile) throws IOException {
42 entries = Collections.emptySet();
43 File hgignoreFile = new File(repo.getRepositoryRoot().getParentFile(), ".hgignore");
44 if (!hgignoreFile.exists()) { 42 if (!hgignoreFile.exists()) {
45 return; 43 return;
46 } 44 }
47 entries = new TreeSet<String>(); 45 ArrayList<Pattern> result = new ArrayList<Pattern>(entries); // start with existing
48 try { 46 String syntax = "regex"; // or "glob"
49 BufferedReader fr = new BufferedReader(new FileReader(hgignoreFile)); 47 BufferedReader fr = new BufferedReader(new FileReader(hgignoreFile));
50 String line; 48 String line;
51 while ((line = fr.readLine()) != null) { 49 while ((line = fr.readLine()) != null) {
52 // FIXME need to detect syntax:glob and other parameters 50 line = line.trim();
53 entries.add(line.trim()); // shall I account for local paths in the file (i.e. back-slashed on windows)? 51 if (line.startsWith("syntax:")) {
52 syntax = line.substring("syntax:".length()).trim();
53 if (!"regex".equals(syntax) && !"glob".equals(syntax)) {
54 throw new IllegalStateException(line);
55 }
56 } else if (line.length() > 0) {
57 // shall I account for local paths in the file (i.e.
58 // back-slashed on windows)?
59 int x;
60 if ((x = line.indexOf('#')) >= 0) {
61 line = line.substring(0, x).trim();
62 if (line.length() == 0) {
63 continue;
64 }
65 }
66 if ("glob".equals(syntax)) {
67 // hgignore(5)
68 // (http://www.selenic.com/mercurial/hgignore.5.html) says slashes '\' are escape characters,
69 // hence no special treatment of Windows path
70 // however, own attempts make me think '\' on Windows are not treated as escapes
71 line = glob2regex(line);
72 }
73 result.add(Pattern.compile(line)); // case-sensitive
54 } 74 }
55 } catch (IOException ex) {
56 ex.printStackTrace(); // log warn
57 } 75 }
76 result.trimToSize();
77 entries = result;
58 } 78 }
59 79
60 public void reset() { 80 // note, #isIgnored(), even if queried for directories and returned positive reply, may still get
61 // FIXME does anyone really need to clear HgIgnore? Perhaps, repo may return new instance each time, 81 // a file from that ignored folder to get examined. Thus, patterns like "bin" shall match not only a folder,
62 // which is used throughout invocation and then discarded? 82 // but any file under that folder as well
63 entries = null; 83 // Alternatively, file walker may memorize folder is ignored and uses this information for all nested files. However,
84 // this approach would require walker (a) return directories (b) provide nesting information. This may become
85 // troublesome when one walks not over io.File, but Eclipse's IResource or any other custom VFS.
86 //
87 //
88 // might be interesting, although looks like of no direct use in my case
89 // @see http://stackoverflow.com/questions/1247772/is-there-an-equivalent-of-java-util-regex-for-glob-type-patterns
90 private String glob2regex(String line) {
91 assert line.length() > 0;
92 StringBuilder sb = new StringBuilder(line.length() + 10);
93 sb.append('^'); // help avoid matcher.find() to match 'bin' pattern in the middle of the filename
94 int start = 0, end = line.length() - 1;
95 // '*' at the beginning and end of a line are useless for Pattern
96 while (start <= end && line.charAt(start) == '*') start++;
97 while (end > start && line.charAt(end) == '*') end--;
98
99 for (int i = start; i <= end; i++) {
100 char ch = line.charAt(i);
101 if (ch == '.' || ch == '\\') {
102 sb.append('\\');
103 } else if (ch == '?') {
104 // simple '.' substitution might work out, however, more formally
105 // a char class seems more appropriate to avoid accidentally
106 // matching a subdirectory with ? char (i.e. /a/b?d against /a/bad, /a/bed and /a/b/d)
107 // @see http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_03
108 // quote: "The slash character in a pathname shall be explicitly matched by using one or more slashes in the pattern;
109 // it shall neither be matched by the asterisk or question-mark special characters nor by a bracket expression"
110 sb.append("[^/]");
111 continue;
112 } else if (ch == '*') {
113 sb.append("[^/]*?");
114 continue;
115 }
116 sb.append(ch);
117 }
118 return sb.toString();
64 } 119 }
65 120
66 public boolean isIgnored(String path) { 121 public boolean isIgnored(String path) {
67 if (entries == null) { 122 for (Pattern p : entries) {
68 read(); 123 if (p.matcher(path).find()) {
69 }
70 if (entries.contains(path)) {
71 // easy part
72 return true;
73 }
74 // substrings are memory-friendly
75 int x = 0, i = path.indexOf('/', 0);
76 while (i != -1) {
77 if (entries.contains(path.substring(x, i))) {
78 return true; 124 return true;
79 } 125 }
80 // try one with ending slash
81 if (entries.contains(path.substring(x, i+1))) { // even if i is last index, i+1 is safe here
82 return true;
83 }
84 x = i+1;
85 i = path.indexOf('/', x);
86 } 126 }
87 return false; 127 return false;
88 } 128 }
89 } 129 }