diff src/org/tmatesoft/hg/internal/ChangesetParser.java @ 673:545b1d4cc11d

Refactor HgBundle.GroupElement (clear experimental mark), resolve few technical debt issues
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 12 Jul 2013 20:14:24 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/tmatesoft/hg/internal/ChangesetParser.java	Fri Jul 12 20:14:24 2013 +0200
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2010-2013 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@hg4j.com
+ */
+package org.tmatesoft.hg.internal;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.tmatesoft.hg.core.Nodeid;
+import org.tmatesoft.hg.core.SessionContext;
+import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
+import org.tmatesoft.hg.repo.HgInvalidDataFormatException;
+import org.tmatesoft.hg.repo.HgRepository;
+
+/**
+ * @see mercurial/changelog.py:read()
+ * 
+ *      <pre>
+ *         format used:
+ *         nodeid\n        : manifest node in ascii
+ *         user\n          : user, no \n or \r allowed
+ *         time tz extra\n : date (time is int or float, timezone is int)
+ *                         : extra is metadatas, encoded and separated by '\0'
+ *                         : older versions ignore it
+ *         files\n\n       : files modified by the cset, no \n or \r allowed
+ *         (.*)            : comment (free text, ideally utf-8)
+ * 
+ *         changelog v0 doesn't use extra
+ * </pre>
+ * 
+ * Extracted from internals of HgChangelog (the code initially from inside RawChangeset)
+ * 
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public final class ChangesetParser {
+	private final EncodingHelper encHelper;
+	// it's likely user names get repeated again and again throughout repository. 
+	private final Pool<String> usersPool;
+	private final Pool<String> filesPool;
+	private final CsetFactory factory;
+	
+	public ChangesetParser(SessionContext.Source sessionContex, CsetFactory csetFactory) {
+		assert csetFactory != null;
+		encHelper = Internals.buildFileNameEncodingHelper(sessionContex);
+		usersPool = new Pool<String>();
+		filesPool = new Pool<String>();
+		factory = csetFactory;
+	}
+	
+	public void dispose() {
+		usersPool.clear();
+		filesPool.clear();
+	}
+
+	public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException {
+		byte[] data = da.byteArray();
+		return parse(data);
+	}
+	
+	public RawChangeset parse(byte[] data) throws HgInvalidDataFormatException {
+		return init(data, 0, data.length);
+	}
+
+	private RawChangeset init(byte[] data, int offset, int length) throws HgInvalidDataFormatException {
+		final int bufferEndIndex = offset + length;
+		final byte lineBreak = (byte) '\n';
+		int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex);
+		if (breakIndex1 == -1) {
+			throw new HgInvalidDataFormatException("Bad Changeset data");
+		}
+		Nodeid _nodeid = Nodeid.fromAscii(data, 0, breakIndex1);
+		int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex);
+		if (breakIndex2 == -1) {
+			throw new HgInvalidDataFormatException("Bad Changeset data");
+		}
+		String _user;
+		_user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1);
+		_user = usersPool.unify(_user);
+
+		int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex);
+		if (breakIndex3 == -1) {
+			throw new HgInvalidDataFormatException("Bad Changeset data");
+		}
+		String _timeString = new String(data, breakIndex2 + 1, breakIndex3 - breakIndex2 - 1);
+		int space1 = _timeString.indexOf(' ');
+		if (space1 == -1) {
+			throw new HgInvalidDataFormatException(String.format("Bad Changeset data: %s in [%d..%d]", "time string", breakIndex2+1, breakIndex3));
+		}
+		int space2 = _timeString.indexOf(' ', space1 + 1);
+		if (space2 == -1) {
+			space2 = _timeString.length();
+		}
+		long unixTime = Long.parseLong(_timeString.substring(0, space1));
+		int _timezone = Integer.parseInt(_timeString.substring(space1 + 1, space2));
+		// unixTime is local time, and timezone records difference of the local time to UTC.
+		Date _time = new Date(unixTime * 1000);
+		String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null;
+		Map<String, String> _extrasMap = parseExtras(_extras);
+		//
+		int lastStart = breakIndex3 + 1;
+		int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
+		ArrayList<String> _files = null;
+		if (breakIndex4 > lastStart) {
+			// if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision)
+			_files = new ArrayList<String>(5);
+			while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) {
+				String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart);
+				_files.add(filesPool.unify(fname));
+				lastStart = breakIndex4 + 1;
+				if (data[breakIndex4 + 1] == lineBreak) {
+					// found \n\n
+					break;
+				} else {
+					breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
+				}
+			}
+			if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) {
+				throw new HgInvalidDataFormatException("Bad Changeset data");
+			}
+		} else {
+			breakIndex4--;
+		}
+		String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2);
+		RawChangeset target = factory.create(_nodeid, _user, _time, _timezone, _files, _comment, _extrasMap);
+		return target; 
+	}
+
+	private Map<String, String> parseExtras(String _extras) {
+		final String extras_branch_key = "branch";
+		_extras = _extras == null ? null : _extras.trim();
+		if (_extras == null || _extras.length() == 0) {
+			return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
+		}
+		Map<String, String> _extrasMap = new HashMap<String, String>();
+		int lastIndex = 0;
+		do {
+			String pair;
+			int sp = _extras.indexOf('\0', lastIndex);
+			if (sp == -1) {
+				sp = _extras.length();
+			}
+			if (sp > lastIndex) {
+				pair = _extras.substring(lastIndex, sp);
+				pair = decode(pair);
+				int eq = pair.indexOf(':');
+				_extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1));
+				lastIndex = sp + 1;
+			}
+		} while (lastIndex < _extras.length());
+		if (!_extrasMap.containsKey(extras_branch_key)) {
+			_extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
+		}
+		return Collections.unmodifiableMap(_extrasMap);
+	}
+
+	private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) {
+		for (int i = startOffset; i < endIndex; i++) {
+			if (src[i] == what) {
+				return i;
+			}
+		}
+		return -1;
+	}
+	
+	private static String decode(String s) {
+		if (s != null && s.indexOf('\\') != -1) {
+			// TestAuxUtilities#testChangelogExtrasDecode
+			return s.replace("\\\\", "\\").replace("\\n", "\n").replace("\\r", "\r").replace("\\0", "\00");
+		}
+		return s;
+	}
+
+	public interface CsetFactory {
+		public RawChangeset create(Nodeid nodeid, String user, Date time, int timezone, List<String> files, String comment, Map<String, String> extrasMap);
+	}
+}
\ No newline at end of file