tikhomirov@673: /*
tikhomirov@673: * Copyright (c) 2010-2013 TMate Software Ltd
tikhomirov@673: *
tikhomirov@673: * This program is free software; you can redistribute it and/or modify
tikhomirov@673: * it under the terms of the GNU General Public License as published by
tikhomirov@673: * the Free Software Foundation; version 2 of the License.
tikhomirov@673: *
tikhomirov@673: * This program is distributed in the hope that it will be useful,
tikhomirov@673: * but WITHOUT ANY WARRANTY; without even the implied warranty of
tikhomirov@673: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
tikhomirov@673: * GNU General Public License for more details.
tikhomirov@673: *
tikhomirov@673: * For information on how to redistribute this software under
tikhomirov@673: * the terms of a license other than GNU General Public License
tikhomirov@673: * contact TMate Software at support@hg4j.com
tikhomirov@673: */
tikhomirov@673: package org.tmatesoft.hg.internal;
tikhomirov@673:
tikhomirov@673: import java.io.IOException;
tikhomirov@673: import java.util.ArrayList;
tikhomirov@673: import java.util.Collections;
tikhomirov@673: import java.util.Date;
tikhomirov@673: import java.util.HashMap;
tikhomirov@673: import java.util.List;
tikhomirov@673: import java.util.Map;
tikhomirov@673:
tikhomirov@673: import org.tmatesoft.hg.core.Nodeid;
tikhomirov@673: import org.tmatesoft.hg.core.SessionContext;
tikhomirov@673: import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
tikhomirov@673: import org.tmatesoft.hg.repo.HgInvalidDataFormatException;
tikhomirov@673: import org.tmatesoft.hg.repo.HgRepository;
tikhomirov@673:
tikhomirov@673: /**
tikhomirov@673: * @see mercurial/changelog.py:read()
tikhomirov@673: *
tikhomirov@673: *
tikhomirov@673: * format used:
tikhomirov@673: * nodeid\n : manifest node in ascii
tikhomirov@673: * user\n : user, no \n or \r allowed
tikhomirov@673: * time tz extra\n : date (time is int or float, timezone is int)
tikhomirov@673: * : extra is metadatas, encoded and separated by '\0'
tikhomirov@673: * : older versions ignore it
tikhomirov@673: * files\n\n : files modified by the cset, no \n or \r allowed
tikhomirov@673: * (.*) : comment (free text, ideally utf-8)
tikhomirov@673: *
tikhomirov@673: * changelog v0 doesn't use extra
tikhomirov@673: *
tikhomirov@673: *
tikhomirov@673: * Extracted from internals of HgChangelog (the code initially from inside RawChangeset)
tikhomirov@673: *
tikhomirov@673: * @author Artem Tikhomirov
tikhomirov@673: * @author TMate Software Ltd.
tikhomirov@673: */
tikhomirov@673: public final class ChangesetParser {
tikhomirov@673: private final EncodingHelper encHelper;
tikhomirov@673: // it's likely user names get repeated again and again throughout repository.
tikhomirov@673: private final Pool usersPool;
tikhomirov@673: private final Pool filesPool;
tikhomirov@673: private final CsetFactory factory;
tikhomirov@673:
tikhomirov@673: public ChangesetParser(SessionContext.Source sessionContex, CsetFactory csetFactory) {
tikhomirov@673: assert csetFactory != null;
tikhomirov@673: encHelper = Internals.buildFileNameEncodingHelper(sessionContex);
tikhomirov@673: usersPool = new Pool();
tikhomirov@673: filesPool = new Pool();
tikhomirov@673: factory = csetFactory;
tikhomirov@673: }
tikhomirov@673:
tikhomirov@673: public void dispose() {
tikhomirov@673: usersPool.clear();
tikhomirov@673: filesPool.clear();
tikhomirov@673: }
tikhomirov@673:
tikhomirov@673: public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException {
tikhomirov@673: byte[] data = da.byteArray();
tikhomirov@673: return parse(data);
tikhomirov@673: }
tikhomirov@673:
tikhomirov@673: public RawChangeset parse(byte[] data) throws HgInvalidDataFormatException {
tikhomirov@673: return init(data, 0, data.length);
tikhomirov@673: }
tikhomirov@673:
tikhomirov@673: private RawChangeset init(byte[] data, int offset, int length) throws HgInvalidDataFormatException {
tikhomirov@673: final int bufferEndIndex = offset + length;
tikhomirov@673: final byte lineBreak = (byte) '\n';
tikhomirov@673: int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex);
tikhomirov@673: if (breakIndex1 == -1) {
tikhomirov@673: throw new HgInvalidDataFormatException("Bad Changeset data");
tikhomirov@673: }
tikhomirov@673: Nodeid _nodeid = Nodeid.fromAscii(data, 0, breakIndex1);
tikhomirov@673: int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex);
tikhomirov@673: if (breakIndex2 == -1) {
tikhomirov@673: throw new HgInvalidDataFormatException("Bad Changeset data");
tikhomirov@673: }
tikhomirov@673: String _user;
tikhomirov@673: _user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1);
tikhomirov@673: _user = usersPool.unify(_user);
tikhomirov@673:
tikhomirov@673: int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex);
tikhomirov@673: if (breakIndex3 == -1) {
tikhomirov@673: throw new HgInvalidDataFormatException("Bad Changeset data");
tikhomirov@673: }
tikhomirov@673: String _timeString = new String(data, breakIndex2 + 1, breakIndex3 - breakIndex2 - 1);
tikhomirov@673: int space1 = _timeString.indexOf(' ');
tikhomirov@673: if (space1 == -1) {
tikhomirov@673: throw new HgInvalidDataFormatException(String.format("Bad Changeset data: %s in [%d..%d]", "time string", breakIndex2+1, breakIndex3));
tikhomirov@673: }
tikhomirov@673: int space2 = _timeString.indexOf(' ', space1 + 1);
tikhomirov@673: if (space2 == -1) {
tikhomirov@673: space2 = _timeString.length();
tikhomirov@673: }
tikhomirov@673: long unixTime = Long.parseLong(_timeString.substring(0, space1));
tikhomirov@673: int _timezone = Integer.parseInt(_timeString.substring(space1 + 1, space2));
tikhomirov@673: // unixTime is local time, and timezone records difference of the local time to UTC.
tikhomirov@673: Date _time = new Date(unixTime * 1000);
tikhomirov@673: String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null;
tikhomirov@673: Map _extrasMap = parseExtras(_extras);
tikhomirov@673: //
tikhomirov@673: int lastStart = breakIndex3 + 1;
tikhomirov@673: int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
tikhomirov@673: ArrayList _files = null;
tikhomirov@673: if (breakIndex4 > lastStart) {
tikhomirov@673: // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision)
tikhomirov@673: _files = new ArrayList(5);
tikhomirov@673: while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) {
tikhomirov@673: String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart);
tikhomirov@673: _files.add(filesPool.unify(fname));
tikhomirov@673: lastStart = breakIndex4 + 1;
tikhomirov@673: if (data[breakIndex4 + 1] == lineBreak) {
tikhomirov@673: // found \n\n
tikhomirov@673: break;
tikhomirov@673: } else {
tikhomirov@673: breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
tikhomirov@673: }
tikhomirov@673: }
tikhomirov@673: if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) {
tikhomirov@673: throw new HgInvalidDataFormatException("Bad Changeset data");
tikhomirov@673: }
tikhomirov@673: } else {
tikhomirov@673: breakIndex4--;
tikhomirov@673: }
tikhomirov@673: String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2);
tikhomirov@673: RawChangeset target = factory.create(_nodeid, _user, _time, _timezone, _files, _comment, _extrasMap);
tikhomirov@673: return target;
tikhomirov@673: }
tikhomirov@673:
tikhomirov@673: private Map parseExtras(String _extras) {
tikhomirov@673: final String extras_branch_key = "branch";
tikhomirov@673: _extras = _extras == null ? null : _extras.trim();
tikhomirov@673: if (_extras == null || _extras.length() == 0) {
tikhomirov@673: return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
tikhomirov@673: }
tikhomirov@673: Map _extrasMap = new HashMap();
tikhomirov@673: int lastIndex = 0;
tikhomirov@673: do {
tikhomirov@673: String pair;
tikhomirov@673: int sp = _extras.indexOf('\0', lastIndex);
tikhomirov@673: if (sp == -1) {
tikhomirov@673: sp = _extras.length();
tikhomirov@673: }
tikhomirov@673: if (sp > lastIndex) {
tikhomirov@673: pair = _extras.substring(lastIndex, sp);
tikhomirov@673: pair = decode(pair);
tikhomirov@673: int eq = pair.indexOf(':');
tikhomirov@673: _extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1));
tikhomirov@673: lastIndex = sp + 1;
tikhomirov@673: }
tikhomirov@673: } while (lastIndex < _extras.length());
tikhomirov@673: if (!_extrasMap.containsKey(extras_branch_key)) {
tikhomirov@673: _extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
tikhomirov@673: }
tikhomirov@673: return Collections.unmodifiableMap(_extrasMap);
tikhomirov@673: }
tikhomirov@673:
tikhomirov@673: private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) {
tikhomirov@673: for (int i = startOffset; i < endIndex; i++) {
tikhomirov@673: if (src[i] == what) {
tikhomirov@673: return i;
tikhomirov@673: }
tikhomirov@673: }
tikhomirov@673: return -1;
tikhomirov@673: }
tikhomirov@673:
tikhomirov@673: private static String decode(String s) {
tikhomirov@673: if (s != null && s.indexOf('\\') != -1) {
tikhomirov@673: // TestAuxUtilities#testChangelogExtrasDecode
tikhomirov@673: return s.replace("\\\\", "\\").replace("\\n", "\n").replace("\\r", "\r").replace("\\0", "\00");
tikhomirov@673: }
tikhomirov@673: return s;
tikhomirov@673: }
tikhomirov@673:
tikhomirov@673: public interface CsetFactory {
tikhomirov@673: public RawChangeset create(Nodeid nodeid, String user, Date time, int timezone, List files, String comment, Map extrasMap);
tikhomirov@673: }
tikhomirov@673: }