tikhomirov@15: /* tikhomirov@564: * Copyright (c) 2010-2013 TMate Software Ltd tikhomirov@74: * tikhomirov@74: * This program is free software; you can redistribute it and/or modify tikhomirov@74: * it under the terms of the GNU General Public License as published by tikhomirov@74: * the Free Software Foundation; version 2 of the License. tikhomirov@74: * tikhomirov@74: * This program is distributed in the hope that it will be useful, tikhomirov@74: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@74: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@74: * GNU General Public License for more details. tikhomirov@74: * tikhomirov@74: * For information on how to redistribute this software under tikhomirov@74: * the terms of a license other than GNU General Public License tikhomirov@102: * contact TMate Software at support@hg4j.com tikhomirov@15: */ tikhomirov@74: package org.tmatesoft.hg.repo; tikhomirov@15: tikhomirov@610: import static org.tmatesoft.hg.repo.HgRepositoryFiles.HgIgnore; tikhomirov@610: import static org.tmatesoft.hg.util.LogFacility.Severity.Warn; tikhomirov@610: tikhomirov@15: import java.io.BufferedReader; tikhomirov@15: import java.io.File; tikhomirov@15: import java.io.FileReader; tikhomirov@15: import java.io.IOException; tikhomirov@91: import java.util.ArrayList; tikhomirov@15: import java.util.Collections; tikhomirov@91: import java.util.List; tikhomirov@91: import java.util.regex.Pattern; tikhomirov@335: import java.util.regex.PatternSyntaxException; tikhomirov@15: tikhomirov@610: import org.tmatesoft.hg.internal.FileChangeMonitor; tikhomirov@610: import org.tmatesoft.hg.internal.Internals; tikhomirov@141: import org.tmatesoft.hg.util.Path; tikhomirov@409: import org.tmatesoft.hg.util.PathRewrite; tikhomirov@141: tikhomirov@15: /** tikhomirov@141: * Handling of ignored paths according to .hgignore configuration tikhomirov@91: * tikhomirov@74: * @author Artem Tikhomirov tikhomirov@74: * @author TMate Software Ltd. tikhomirov@15: */ tikhomirov@289: public class HgIgnore implements Path.Matcher { tikhomirov@15: tikhomirov@91: private List entries; tikhomirov@409: private final PathRewrite globPathHelper; tikhomirov@610: private FileChangeMonitor ignoreFileTracker; tikhomirov@15: tikhomirov@409: HgIgnore(PathRewrite globPathRewrite) { tikhomirov@91: entries = Collections.emptyList(); tikhomirov@409: globPathHelper = globPathRewrite; tikhomirov@15: } tikhomirov@15: tikhomirov@610: /* package-local */ void read(Internals repo) throws HgInvalidControlFileException { tikhomirov@610: File ignoreFile = repo.getRepositoryFile(HgIgnore); tikhomirov@610: BufferedReader fr = null; tikhomirov@610: try { tikhomirov@610: if (ignoreFile.canRead() && ignoreFile.isFile()) { tikhomirov@610: fr = new BufferedReader(new FileReader(ignoreFile)); tikhomirov@610: final List errors = read(fr); tikhomirov@610: if (errors != null) { tikhomirov@610: repo.getLog().dump(getClass(), Warn, "Syntax errors parsing %s:\n%s", ignoreFile.getName(), Internals.join(errors, ",\n")); tikhomirov@610: } tikhomirov@610: } tikhomirov@610: if (ignoreFileTracker == null) { tikhomirov@610: ignoreFileTracker = new FileChangeMonitor(ignoreFile); tikhomirov@610: } tikhomirov@610: ignoreFileTracker.touch(this); tikhomirov@610: } catch (IOException ex) { tikhomirov@610: final String m = String.format("Error reading %s file", ignoreFile); tikhomirov@610: throw new HgInvalidControlFileException(m, ex, ignoreFile); tikhomirov@610: } finally { tikhomirov@610: try { tikhomirov@610: if (fr != null) { tikhomirov@610: fr.close(); tikhomirov@610: } tikhomirov@610: } catch (IOException ex) { tikhomirov@610: repo.getLog().dump(getClass(), Warn, ex, null); // it's read, don't treat as error tikhomirov@610: } tikhomirov@15: } tikhomirov@610: } tikhomirov@610: tikhomirov@610: /*package-local*/ void reloadIfChanged(Internals repo) throws HgInvalidControlFileException { tikhomirov@610: assert ignoreFileTracker != null; tikhomirov@610: if (ignoreFileTracker.changed(this)) { tikhomirov@610: entries = Collections.emptyList(); tikhomirov@610: read(repo); tikhomirov@269: } tikhomirov@269: } tikhomirov@269: tikhomirov@409: /* package-local */ List read(BufferedReader content) throws IOException { tikhomirov@408: final String REGEXP = "regexp", GLOB = "glob"; tikhomirov@564: final String REGEXP_PREFIX1 = REGEXP + ":", REGEXP_PREFIX2 = "re:", GLOB_PREFIX = GLOB + ":"; tikhomirov@335: ArrayList errors = new ArrayList(); tikhomirov@91: ArrayList result = new ArrayList(entries); // start with existing tikhomirov@408: String syntax = REGEXP; tikhomirov@91: String line; tikhomirov@269: while ((line = content.readLine()) != null) { tikhomirov@91: line = line.trim(); tikhomirov@91: if (line.startsWith("syntax:")) { tikhomirov@91: syntax = line.substring("syntax:".length()).trim(); tikhomirov@408: if (!REGEXP.equals(syntax) && !GLOB.equals(syntax)) { tikhomirov@335: errors.add(line); tikhomirov@335: continue; tikhomirov@335: //throw new IllegalStateException(line); tikhomirov@91: } tikhomirov@91: } else if (line.length() > 0) { tikhomirov@91: // shall I account for local paths in the file (i.e. tikhomirov@91: // back-slashed on windows)? tikhomirov@269: int x, s = 0; tikhomirov@269: while ((x = line.indexOf('#', s)) >= 0) { tikhomirov@269: if (x > 0 && line.charAt(x-1) == '\\') { tikhomirov@269: // remove escape char tikhomirov@269: line = line.substring(0, x-1).concat(line.substring(x)); tikhomirov@269: s = x; // with exclusion of char at [x], s now points to what used to be at [x+1] tikhomirov@269: } else { tikhomirov@269: line = line.substring(0, x).trim(); tikhomirov@91: } tikhomirov@91: } tikhomirov@408: // due to the nature of Mercurial implementation, lines prefixed with syntax kind tikhomirov@408: // are processed correctly (despite the fact hgignore(5) suggest "syntax:" as the tikhomirov@408: // only way to specify it). lineSyntax below leaves a chance for the line to switch tikhomirov@408: // syntax in use without affecting default kind. tikhomirov@408: String lineSyntax; tikhomirov@408: if (line.startsWith(GLOB_PREFIX)) { tikhomirov@408: line = line.substring(GLOB_PREFIX.length()).trim(); tikhomirov@408: lineSyntax = GLOB; tikhomirov@564: } else if (line.startsWith(REGEXP_PREFIX1)) { tikhomirov@564: line = line.substring(REGEXP_PREFIX1.length()).trim(); tikhomirov@564: lineSyntax = REGEXP; tikhomirov@564: } else if (line.startsWith(REGEXP_PREFIX2)) { tikhomirov@564: line = line.substring(REGEXP_PREFIX2.length()).trim(); tikhomirov@408: lineSyntax = REGEXP; tikhomirov@408: } else { tikhomirov@408: lineSyntax = syntax; tikhomirov@408: } tikhomirov@269: if (line.length() == 0) { tikhomirov@269: continue; tikhomirov@269: } tikhomirov@408: if (GLOB.equals(lineSyntax)) { tikhomirov@409: // hgignore(5) says slashes '\' are escape characters, tikhomirov@409: // however, for glob patterns on Windows first get backslashes converted to slashes tikhomirov@409: if (globPathHelper != null) { tikhomirov@409: line = globPathHelper.rewrite(line).toString(); tikhomirov@409: } tikhomirov@91: line = glob2regex(line); tikhomirov@342: } else { tikhomirov@408: assert REGEXP.equals(lineSyntax); tikhomirov@342: // regular expression patterns need not match start of the line unless demanded explicitly tikhomirov@342: line = line.charAt(0) == '^' ? line : ".*" + line; tikhomirov@91: } tikhomirov@335: try { tikhomirov@335: result.add(Pattern.compile(line)); // case-sensitive tikhomirov@335: } catch (PatternSyntaxException ex) { tikhomirov@335: errors.add(line + "@" + ex.getMessage()); tikhomirov@335: } tikhomirov@15: } tikhomirov@15: } tikhomirov@91: result.trimToSize(); tikhomirov@91: entries = result; tikhomirov@335: return errors.isEmpty() ? null : errors; tikhomirov@15: } tikhomirov@15: tikhomirov@91: // note, #isIgnored(), even if queried for directories and returned positive reply, may still get tikhomirov@91: // a file from that ignored folder to get examined. Thus, patterns like "bin" shall match not only a folder, tikhomirov@91: // but any file under that folder as well tikhomirov@91: // Alternatively, file walker may memorize folder is ignored and uses this information for all nested files. However, tikhomirov@91: // this approach would require walker (a) return directories (b) provide nesting information. This may become tikhomirov@91: // troublesome when one walks not over io.File, but Eclipse's IResource or any other custom VFS. tikhomirov@91: // tikhomirov@91: // tikhomirov@91: // might be interesting, although looks like of no direct use in my case tikhomirov@91: // @see http://stackoverflow.com/questions/1247772/is-there-an-equivalent-of-java-util-regex-for-glob-type-patterns tikhomirov@289: // tikhomirov@289: // TODO consider refactoring to reuse in PathGlobMatcher#glob2regexp tikhomirov@289: private static String glob2regex(String line) { tikhomirov@91: assert line.length() > 0; tikhomirov@91: StringBuilder sb = new StringBuilder(line.length() + 10); tikhomirov@91: int start = 0, end = line.length() - 1; tikhomirov@342: sb.append("(?:|.*/)"); // glob patterns shall match file in any directory tikhomirov@91: tikhomirov@269: int inCurly = 0; tikhomirov@91: for (int i = start; i <= end; i++) { tikhomirov@91: char ch = line.charAt(i); tikhomirov@91: if (ch == '.' || ch == '\\') { tikhomirov@91: sb.append('\\'); tikhomirov@91: } else if (ch == '?') { tikhomirov@91: // simple '.' substitution might work out, however, more formally tikhomirov@91: // a char class seems more appropriate to avoid accidentally tikhomirov@91: // matching a subdirectory with ? char (i.e. /a/b?d against /a/bad, /a/bed and /a/b/d) tikhomirov@91: // @see http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_03 tikhomirov@91: // quote: "The slash character in a pathname shall be explicitly matched by using one or more slashes in the pattern; tikhomirov@91: // it shall neither be matched by the asterisk or question-mark special characters nor by a bracket expression" tikhomirov@91: sb.append("[^/]"); tikhomirov@91: continue; tikhomirov@91: } else if (ch == '*') { tikhomirov@91: sb.append("[^/]*?"); tikhomirov@91: continue; tikhomirov@269: } else if (ch == '{') { tikhomirov@269: // XXX in fact, need to respect if last char was escaping ('\\'), then don't need to treat this as special tikhomirov@269: // see link at javadoc above for reasonable example tikhomirov@269: inCurly++; tikhomirov@269: sb.append('('); tikhomirov@269: continue; tikhomirov@269: } else if (ch == '}') { tikhomirov@269: if (inCurly > 0) { tikhomirov@269: inCurly--; tikhomirov@269: sb.append(')'); tikhomirov@269: continue; tikhomirov@269: } tikhomirov@269: } else if (ch == ',' && inCurly > 0) { tikhomirov@269: sb.append('|'); tikhomirov@269: continue; tikhomirov@91: } tikhomirov@91: sb.append(ch); tikhomirov@91: } tikhomirov@409: // Python impl doesn't keep empty segments in directory names (ntpath.normpath and posixpath.normpath), tikhomirov@409: // effectively removing trailing separators, thus patterns like "bin/" get translated into "bin$" tikhomirov@409: // Our glob rewriter doesn't strip last empty segment, and "bin/$" would be incorrect pattern, tikhomirov@409: // (e.g. isIgnored("bin/file") performs two matches, against "bin/file" and "bin") hence the check. tikhomirov@409: if (sb.charAt(sb.length() - 1) != '/') { tikhomirov@409: sb.append('$'); tikhomirov@409: } tikhomirov@91: return sb.toString(); tikhomirov@15: } tikhomirov@15: tikhomirov@289: /** tikhomirov@289: * @param path file or directory name in question tikhomirov@289: * @return true if matches repository configuration of ignored files. tikhomirov@289: */ tikhomirov@141: public boolean isIgnored(Path path) { tikhomirov@342: String ps = path.toString(); tikhomirov@91: for (Pattern p : entries) { tikhomirov@342: int x = ps.indexOf('/'); // reset for each pattern tikhomirov@342: if (p.matcher(ps).find()) { tikhomirov@15: return true; tikhomirov@15: } tikhomirov@342: while (x != -1 && x+1 != ps.length() /*skip very last segment not to check complete string twice*/) { tikhomirov@342: String fragment = ps.substring(0, x); tikhomirov@342: if (p.matcher(fragment).matches()) { tikhomirov@342: return true; tikhomirov@339: } tikhomirov@342: x = ps.indexOf('/', x+1); tikhomirov@339: } tikhomirov@15: } tikhomirov@15: return false; tikhomirov@15: } tikhomirov@289: tikhomirov@289: /** tikhomirov@419: * A handy wrap of {@link #isIgnored(Path)} into {@link org.tmatesoft.hg.util.Path.Matcher}. Yields same result as {@link #isIgnored(Path)}. tikhomirov@289: * @return true if file is deemed ignored. tikhomirov@289: */ tikhomirov@289: public boolean accept(Path path) { tikhomirov@289: return isIgnored(path); tikhomirov@289: } tikhomirov@15: }