changeset 497:02140be396d5

Issue 38. Towards gentle handling of config files - parse them and keep every possible user change
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 25 Oct 2012 19:59:08 +0200 (2012-10-25)
parents c1c8f6859d3f
children 0205a5c4566b
files build.xml src/org/tmatesoft/hg/internal/ConfigFileParser.java test/org/tmatesoft/hg/test/TestConfigFileParser.java
diffstat 3 files changed, 581 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/build.xml	Mon Oct 22 20:33:20 2012 +0200
+++ b/build.xml	Thu Oct 25 19:59:08 2012 +0200
@@ -83,6 +83,7 @@
 			<sysproperty key="hg4j.tests.remote" value="http://hg.serpentine.com/tutorial/hello"/>
 			<test name="org.tmatesoft.hg.test.TestIntMap" />
 			<test name="org.tmatesoft.hg.test.TestAuxUtilities" />
+			<test name="org.tmatesoft.hg.test.TestConfigFileParser" />
 			<test name="org.tmatesoft.hg.test.TestHistory" />
 			<test name="org.tmatesoft.hg.test.TestManifest" />
 			<test name="org.tmatesoft.hg.test.TestStatus" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/ConfigFileParser.java	Thu Oct 25 19:59:08 2012 +0200
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2012 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+
+/**
+ * Simplistic parser to allow altering configuration files without touching user modifications/formatting/comments
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class ConfigFileParser {
+	private enum ParseState {Initial, Section, Entry};
+	private ParseState state = ParseState.Initial; 
+	private int lastNonEmptyLineEndOffset = -1;
+	private String sectionName;
+	private int sectionStart = -1;
+	private String entryKey;
+	private int entryStart = -1;
+	private int valueStart = -1, valueEnd = -1;
+	private ArrayList<Entry> entries;
+	private ArrayList<Section> sections = new ArrayList<Section>();
+	private byte[] contents;
+	
+	private List<String> deletions = new ArrayList<String>(5);
+	private List<String> additions = new ArrayList<String>(5), changes = new ArrayList<String>(5);
+
+	
+	public boolean exists(String section, String key) {
+		assert contents != null;
+		for (Section s : sections) {
+			if (s.name.equals(section)) {
+				for (Entry e : s.entries) {
+					if (e.name.equals(key)) {
+						return true;
+					}
+				}
+				return false;
+			}
+		}
+		return false;
+	}
+	
+	public void add(String section, String key, String newValue) {
+		additions.add(section);
+		additions.add(key);
+		additions.add(newValue);
+	}
+	
+	public void change(String section, String key, String newValue) {
+		changes.add(section);
+		changes.add(key);
+		changes.add(newValue);
+	}
+	
+	public void delete(String section, String key) {
+		deletions.add(section);
+		deletions.add(key);
+	}
+
+	public void parse(InputStream is) throws IOException {
+		state = ParseState.Initial;
+		sections.clear();
+		contents = null;
+		ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+		ByteArrayOutputStream line = new ByteArrayOutputStream(80);
+		int offset = 0;
+		int lineOffset = -1;
+		int lineNumber = 1;
+		boolean crDetected = false; // true when previous char was \r
+		int b;
+		while ( (b = is.read()) != -1) {
+			bos.write(b);
+			if (b == '\n' || b == '\r') {
+				if (line.size() > 0) {
+					processLine(lineNumber, lineOffset, line.toByteArray());
+					line.reset();
+					lineOffset = -1;
+					lastNonEmptyLineEndOffset = bos.size() - 1; // offset points to EOL char
+				} 
+				// else: XXX does empty line closes entry???
+				// when \n follows \r, increment line count only once
+				if (!(b == '\n' && crDetected)) {
+					lineNumber++;
+				}
+				crDetected = b == '\r';
+			} else {
+				crDetected = false;
+				if (line.size() == 0) {
+					lineOffset = offset;
+				}
+				line.write(b);
+			}
+			offset++;
+		}
+		// handle last line in case it's not EOL-terminated
+		if (line.size() > 0) {
+			processLine(lineNumber, lineOffset, line.toByteArray());
+			// might need it for #closeSection() below
+			lastNonEmptyLineEndOffset = bos.size();
+		}
+		if (state == ParseState.Entry) {
+			closeEntry();
+		}
+		if (state == ParseState.Section) {
+			closeSection();
+		}
+		contents = bos.toByteArray();
+	}
+	
+	public void update(OutputStream out) throws IOException {
+		if (contents == null) {
+			throw new IOException("Shall parse first");
+		}
+		HashSet<String> processedSections = new HashSet<String>();
+		int contentsOffset = 0;
+		for (Section section : sections) {
+			LinkedHashMap<String,String> additionsInSection = new LinkedHashMap<String,String>();
+			LinkedHashMap<String,String> changesInSection = new LinkedHashMap<String,String>();
+			LinkedHashSet<String> deletionsInSection = new LinkedHashSet<String>();
+			if (!processedSections.contains(section.name)) {
+				for (Iterator<String> it = additions.iterator(); it.hasNext();) {
+					String s = it.next(), k = it.next(), v = it.next();
+					if (section.name.equals(s)) {
+						additionsInSection.put(k, v);
+					}
+				}
+				for (Iterator<String> it = changes.iterator(); it.hasNext();) {
+					String s = it.next(), k = it.next(), v = it.next();
+					if (section.name.equals(s)) {
+						changesInSection.put(k, v);
+					}
+				}
+				for (Iterator<String> it = deletions.iterator(); it.hasNext();) {
+					String s = it.next(), k = it.next();
+					if (section.name.equals(s)) {
+						deletionsInSection.add(k);
+					}
+				}
+			}
+			for (Entry e : section.entries) {
+				if (deletionsInSection.contains(e.name)) {
+					// write up to key start only
+					out.write(contents, contentsOffset, e.start - contentsOffset);
+					contentsOffset = e.valueEnd + 1;
+				} else if (changesInSection.containsKey(e.name)) {
+					if (e.valueStart == -1) {
+						// e.valueEnd determines insertion point
+						out.write(contents, contentsOffset, e.valueEnd + 1 - contentsOffset);
+					} else {
+						// e.valueEnd points to last character of the value 
+						out.write(contents, contentsOffset, e.valueStart - contentsOffset);
+					}
+					String value = changesInSection.get(e.name);
+					out.write(value == null ? new byte[0] : value.getBytes());
+					contentsOffset = e.valueEnd + 1;
+				}
+				// else: keep contentsOffset to point to first uncopied character
+			}
+			if (section.entries.length == 0) {
+				// no entries, empty or only comments, perhaps.
+				// use end of last meaningful line (whether [section] or comment string),
+				// which points to newline character
+				out.write(contents, contentsOffset, section.end - contentsOffset);
+				contentsOffset = section.end;
+				// since it's tricky to track \n or \r\n with lastNonEmptyLineEndOffset,
+				// we copy up to the line delimiter and insert new lines, if any, with \n prepended,
+				// so that original EOL will be moved to the very end of the section.
+				// Indeed, would be better to insert *after* lastNonEmptyLineEndOffset,
+				// but I don't want to complicate #parse (if line.size() > 0 part) method.
+				// Hope, this won't make too much trouble (if any, at all - 
+				// if String.format translates \n to system EOL, then nobody would notice)
+			}
+			if (!additionsInSection.isEmpty()) {
+				// make sure additions are written once everything else is there
+				out.write(contents, contentsOffset, section.end - contentsOffset);
+				contentsOffset = section.end;
+				for (String k : additionsInSection.keySet()) {
+					String v = additionsInSection.get(k);
+					out.write(String.format("\n%s = %s", k, v == null ? "" : v).getBytes());
+				}
+			}
+			// if section comes more than once, update only first one.
+			processedSections.add(section.name);
+		}
+		out.write(contents, contentsOffset, contents.length - contentsOffset);
+	}
+	
+	private void processLine(int lineNumber, int offset, byte[] line) throws IOException {
+		int localOffset = 0, i = 0;
+		while (i < line.length && Character.isWhitespace(line[i])) {
+			i++;
+		}
+		if (i == line.length) {
+			return;
+		}
+		localOffset = i;
+		if (line[i] == '[') {
+			if (state == ParseState.Entry) {
+				closeEntry();
+			}
+			if (state == ParseState.Section) {
+				closeSection();
+			}
+			
+			while (i < line.length && line[i] != ']') {
+				i++;
+			}
+			if (i == line.length) {
+				throw new IOException(String.format("Can't find closing ']' for section name in line %d", lineNumber));
+			}
+			sectionName = new String(line, localOffset+1, i-localOffset-1);
+			sectionStart = offset + localOffset;
+			state = ParseState.Section;
+		} else if (line[i] == '#' || line[i] == ';') {
+			// comment line, nothing to process
+			return;
+		} else {
+			// entry
+			if (state == ParseState.Initial) {
+				throw new IOException(String.format("Line %d doesn't belong to any section", lineNumber));
+			}
+			if (localOffset > 0) {
+				if (state == ParseState.Section) {
+					throw new IOException(String.format("Non-indented key is expected in line %d", lineNumber));
+				}
+				assert state == ParseState.Entry;
+				// whitespace-indented continuation of the previous entry  
+				if (valueStart == -1) {
+					// value didn't start at the same line the key was found at
+					valueStart = offset + localOffset;
+				}
+				// value ends with eol (assumption is trailing comments are not allowed)
+				valueEnd = offset + line.length - 1;
+			} else {
+				if (state == ParseState.Entry) {
+					closeEntry();
+				}
+				assert state == ParseState.Section;
+				// it's a new entry
+				state  = ParseState.Entry;
+				// get name of the entry
+				while (i < line.length && !Character.isWhitespace(line[i]) && line[i] != '=') {
+					i++;
+				}
+				if (i == line.length) {
+					throw new IOException(String.format("Can't process entry in line %d", lineNumber));
+				}
+				entryKey = new String(line, localOffset, i - localOffset);
+				entryStart = offset + localOffset;
+				// look for '=' after key name
+				while (i < line.length && line[i] != '=') {
+					i++;
+				}
+				if (i == line.length) {
+					throw new IOException(String.format("Can't find '=' after key %s in line %d", entryKey, lineNumber));
+				}
+				// skip whitespaces after '='
+				i++; // line[i] == '='
+				while (i < line.length && Character.isWhitespace(line[i])) {
+					i++;
+				}
+				// valueStart might be -1 in case no value is specified in the same line as key
+				// but valueEnd is always initialized just in case there's no next, value continuation line
+				if (i == line.length) {
+					valueStart = -1;
+				} else {
+					valueStart = offset + i;
+				}
+				
+				// if trailing comments are allowed, shall
+				// look up comment char and set valueEnd to its position-1
+				valueEnd = offset + line.length - 1;
+			}
+		}
+	}
+	
+	private void closeSection() {
+		assert state == ParseState.Section;
+		assert sectionName != null;
+		assert lastNonEmptyLineEndOffset != -1;
+		Section s = new Section(sectionName, sectionStart, lastNonEmptyLineEndOffset, entries == null ? Collections.<Entry>emptyList() : entries);
+		sections.add(s);
+		sectionName = null;
+		sectionStart = -1;
+		state = ParseState.Initial;
+		entries = null;
+	}
+	
+	private void closeEntry() {
+		assert state == ParseState.Entry;
+		assert entryKey != null;
+		state = ParseState.Section;
+		Entry e = new Entry(entryKey, entryStart, valueStart, valueEnd);
+		if (entries == null) {
+			entries = new ArrayList<Entry>();
+		}
+		entries.add(e);
+		entryKey = null;
+		entryStart = valueStart = valueEnd -1;
+	}
+
+	
+	private static class Block {
+		public final int start;
+		Block(int s) {
+			start = s;
+		}
+	}
+	
+	private static class Entry extends Block {
+		public final int valueStart, valueEnd;
+		public final String name;
+		
+		Entry(String n, int s, int vs, int ve) {
+			super(s);
+			name = n;
+			valueStart = vs;
+			valueEnd = ve;
+		}
+	}
+	
+	private static class Section extends Block {
+		public final String name;
+		public final Entry[] entries;
+		public final int end;
+
+		Section(String n, int s, int endOffset, List<Entry> e) {
+			super(s);
+			name = n;
+			end = endOffset;
+			entries = new Entry[e.size()];
+			e.toArray(entries);
+		}
+	}
+
+	public static void main(String[] args) throws Exception {
+		ConfigFileParser p = new ConfigFileParser();
+		p.parse(new ByteArrayInputStream(xx.getBytes()));
+		System.out.println(">>>");
+		System.out.println(xx);
+		System.out.println("===");
+		p.add("sect1", "key5", "x");
+		ByteArrayOutputStream out = new ByteArrayOutputStream(xx.length());
+		p.update(out);
+		System.out.println(new String(out.toByteArray()));
+		/*
+		for (Section s : p.sections) {
+			System.out.printf("[%s@%d]\n", s.name, s.start);
+			for (Entry e : s.entries) {
+				System.out.printf("%s@%d = %d..%d\n", e.name, e.start, e.valueStart, e.valueEnd);
+			}
+		}
+		*/
+	}
+	private static final String xx = "#comment1\n [sect1]\nkey = value #not a comment2\n#comment3\nkey2=   \nkey3 =  \n  value1, #cc\n  value2\nkey4 = v1,\n  v2 \n  ,v3\n\n\n[sect2]\nx = a";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/org/tmatesoft/hg/test/TestConfigFileParser.java	Thu Oct 25 19:59:08 2012 +0200
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2012 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.tmatesoft.hg.internal.ConfigFileParser;
+
+/**
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class TestConfigFileParser {
+
+	@Test
+	public void testParseOnly() throws IOException {
+		doTest("".getBytes(), new byte[0]);
+		// line comments
+		byte[] inp = " # line comment \n; and another one".getBytes();
+		doTest(inp, inp);
+		// comments inside sections
+		inp = "[section1]\nkey1 = value\n # line comment\n[section2]\nkey2 = ;just presence\n".getBytes();
+		doTest(inp, inp);
+		// empty value
+		inp = "[section1]\nkey1 = \n".getBytes();
+		doTest(inp, inp);
+		// multiline values
+		inp = "[section1]\nkey1 = a,\n  b,\n  c\nkey2=\n  xyz\n".getBytes();
+		doTest(inp, inp);
+		// entry without EOL
+		inp = "[section1]\nkey1 = value".getBytes();
+		doTest(inp, inp);
+		// empty section
+		inp = "[section1]\nkey1 = value\n[section2]\n".getBytes();
+		doTest(inp, inp);
+	}
+	
+	@Test
+	public void testLookup() throws IOException {
+		byte[] inp = "[section1]\nkey1 = a,\n  b,\n  c\nkey2=\n  xyz\n\n[section2]\nkey3=\n".getBytes();
+		doTest(inp, inp, new Inspector() {
+			
+			public void visit(ConfigFileParser p) {
+				Assert.assertTrue(p.exists("section1", "key1"));
+				Assert.assertTrue(p.exists("section1", "key2"));
+				Assert.assertFalse(p.exists("section1", "key3"));
+				
+				Assert.assertTrue(p.exists("section2", "key3"));
+				Assert.assertFalse(p.exists("section2", "key1"));
+				Assert.assertFalse(p.exists("section2", "key2"));
+			}
+		});
+	}
+
+	@Test
+	public void testAddChangeEntries() throws IOException {
+		byte[] inp = "\n; line comment1\n[sect-a]\nkey1 = value1\n\n[sect-b]\nkey2=value2\n".getBytes();
+		byte[] exp = "\n; line comment1\n[sect-a]\nkey1 = value1\nkey3 = value3\n\n[sect-b]\nkey2=valueX\n".getBytes();
+		doTest(inp, exp, new Inspector() {
+			
+			public void visit(ConfigFileParser p) {
+				Assert.assertTrue(p.exists("sect-b", "key2"));
+				p.add("sect-a", "key3", "value3");
+				p.change("sect-b", "key2", "valueX");
+			}
+		});
+	}
+
+	@Test
+	public void testAdditionTwoSectionsSameName() throws IOException {
+		byte[] inp = "[sect-a]\nkey1=value1\n\n[sect-b]\nkey2=\n\n[sect-a]\nkey3=value3\n".getBytes();
+		byte[] exp = "[sect-a]\nkey1=value1\nkey4 = value4\n\n[sect-b]\nkey2=\n\n[sect-a]\nkey3=value3\n".getBytes();
+		doTest(inp, exp, new Inspector() {
+			
+			public void visit(ConfigFileParser p) {
+				p.add("sect-a", "key4", "value4");
+			}
+		});
+	}
+	
+	@Test
+	public void testDeleteTwoSubsequentKeys() throws IOException{
+		byte[] inp = "# line comment1\n\n[sect-a]\nkey1=value1\nkey2=value2\n#line comment2\nkey3=value3\n".getBytes();
+		byte[] exp = "# line comment1\n\n[sect-a]\n\n\n#line comment2\nkey3=value3\n".getBytes();
+		doTest(inp, exp, new Inspector() {
+			
+			public void visit(ConfigFileParser p) {
+				p.delete("sect-a", "key1");
+				p.delete("sect-a", "key2");
+			}
+		});
+	}
+	
+	@Test
+	public void testDeleteLastKeyInSection() throws IOException {
+		String text1 = "[sect-a]\nkey0 = value 0\n%skey1=value1\n%s[sect-b]\nkey3=value3\n";
+		String text2 = "[sect-a]\nkey0 = value 0\n%s\n%s[sect-b]\nkey3=value3\n";
+		withTwoCommentsDeleteKey1(text1, text2);
+	}
+
+	@Test
+	public void testDeleteFirstKeyInSection() throws IOException {
+		String text1 = "[sect-a]\n%skey1=value1\n%skey2 = value 2\n[sect-b]\nkey3=value3\n";
+		String text2 = "[sect-a]\n%s\n%skey2 = value 2\n[sect-b]\nkey3=value3\n";
+		withTwoCommentsDeleteKey1(text1, text2);
+	}
+	
+	@Test
+	public void testOnlyKeyInSection() throws IOException {
+		String text1 = "[sect-a]\n%skey1=value1\n%s[sect-b]\nkey3=value3\n";
+		String text2 = "[sect-a]\n%s\n%s[sect-b]\nkey3=value3\n";
+		withTwoCommentsDeleteKey1(text1, text2);
+	}
+	
+	private void withTwoCommentsDeleteKey1(String text1, String text2) throws IOException {
+		String comment = "# line comment\n";
+		Inspector insp = new Inspector() {
+			
+			public void visit(ConfigFileParser p) {
+				p.delete("sect-a", "key1");
+			}
+		};
+
+		byte[] inp = String.format(text1, "", "").getBytes();
+		byte[] exp = String.format(text2, "", "").getBytes();
+		doTest(inp, exp, insp);
+		inp = String.format(text1, comment, "").getBytes();
+		exp = String.format(text2, comment, "").getBytes();
+		doTest(inp, exp, insp);
+		inp = String.format(text1, "", comment).getBytes();
+		exp = String.format(text2, "", comment).getBytes();
+		doTest(inp, exp, insp);
+		inp = String.format(text1, comment, comment).getBytes();
+		exp = String.format(text2, comment, comment).getBytes();
+		doTest(inp, exp, insp);
+	}
+
+	@Test
+	public void testAddEntryToEmptySection() throws IOException {
+		String text1 = "[sect-a]\n%s[sect-b]\nkey3=value3\n";
+		String text2 = "[sect-a]\n%skey1 = value1\n[sect-b]\nkey3=value3\n";
+		String comment = "# line comment2\n";
+		Inspector insp = new Inspector() {
+			
+			public void visit(ConfigFileParser p) {
+				p.add("sect-a", "key1", "value1");
+			}
+		};
+
+		byte[] inp = String.format(text1, "").getBytes();
+		byte[] exp = String.format(text2, "").getBytes();
+		doTest(inp, exp, insp);
+		inp = String.format(text1, comment).getBytes();
+		exp = String.format(text2, comment).getBytes();
+		doTest(inp, exp, insp);
+	}
+	
+
+	private void doTest(byte[] input, byte[] expected) throws IOException {
+		doTest(input, expected, null);
+	}
+
+	private void doTest(byte[] input, byte[] expected, Inspector insp) throws IOException {
+		ConfigFileParser p = new ConfigFileParser();
+		p.parse(new ByteArrayInputStream(input));
+		if (insp != null) {
+			insp.visit(p);
+		}
+		ByteArrayOutputStream out = new ByteArrayOutputStream(input.length);
+		p.update(out);
+		byte[] result = out.toByteArray();
+		Assert.assertArrayEquals(expected, result);
+	}
+
+	interface Inspector {
+		void visit(ConfigFileParser p);
+	}
+}