tikhomirov@673: /* tikhomirov@673: * Copyright (c) 2010-2013 TMate Software Ltd tikhomirov@673: * tikhomirov@673: * This program is free software; you can redistribute it and/or modify tikhomirov@673: * it under the terms of the GNU General Public License as published by tikhomirov@673: * the Free Software Foundation; version 2 of the License. tikhomirov@673: * tikhomirov@673: * This program is distributed in the hope that it will be useful, tikhomirov@673: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@673: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@673: * GNU General Public License for more details. tikhomirov@673: * tikhomirov@673: * For information on how to redistribute this software under tikhomirov@673: * the terms of a license other than GNU General Public License tikhomirov@673: * contact TMate Software at support@hg4j.com tikhomirov@673: */ tikhomirov@673: package org.tmatesoft.hg.internal; tikhomirov@673: tikhomirov@673: import java.io.IOException; tikhomirov@673: import java.util.ArrayList; tikhomirov@673: import java.util.Collections; tikhomirov@673: import java.util.Date; tikhomirov@673: import java.util.HashMap; tikhomirov@673: import java.util.List; tikhomirov@673: import java.util.Map; tikhomirov@673: tikhomirov@673: import org.tmatesoft.hg.core.Nodeid; tikhomirov@673: import org.tmatesoft.hg.core.SessionContext; tikhomirov@673: import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; tikhomirov@673: import org.tmatesoft.hg.repo.HgInvalidDataFormatException; tikhomirov@673: import org.tmatesoft.hg.repo.HgRepository; tikhomirov@673: tikhomirov@673: /** tikhomirov@673: * @see mercurial/changelog.py:read() tikhomirov@673: * tikhomirov@673: *
tikhomirov@673:  *         format used:
tikhomirov@673:  *         nodeid\n        : manifest node in ascii
tikhomirov@673:  *         user\n          : user, no \n or \r allowed
tikhomirov@673:  *         time tz extra\n : date (time is int or float, timezone is int)
tikhomirov@673:  *                         : extra is metadatas, encoded and separated by '\0'
tikhomirov@673:  *                         : older versions ignore it
tikhomirov@673:  *         files\n\n       : files modified by the cset, no \n or \r allowed
tikhomirov@673:  *         (.*)            : comment (free text, ideally utf-8)
tikhomirov@673:  * 
tikhomirov@673:  *         changelog v0 doesn't use extra
tikhomirov@673:  * 
tikhomirov@673: * tikhomirov@673: * Extracted from internals of HgChangelog (the code initially from inside RawChangeset) tikhomirov@673: * tikhomirov@673: * @author Artem Tikhomirov tikhomirov@673: * @author TMate Software Ltd. tikhomirov@673: */ tikhomirov@673: public final class ChangesetParser { tikhomirov@673: private final EncodingHelper encHelper; tikhomirov@673: // it's likely user names get repeated again and again throughout repository. tikhomirov@673: private final Pool usersPool; tikhomirov@673: private final Pool filesPool; tikhomirov@673: private final CsetFactory factory; tikhomirov@673: tikhomirov@673: public ChangesetParser(SessionContext.Source sessionContex, CsetFactory csetFactory) { tikhomirov@673: assert csetFactory != null; tikhomirov@673: encHelper = Internals.buildFileNameEncodingHelper(sessionContex); tikhomirov@673: usersPool = new Pool(); tikhomirov@673: filesPool = new Pool(); tikhomirov@673: factory = csetFactory; tikhomirov@673: } tikhomirov@673: tikhomirov@673: public void dispose() { tikhomirov@673: usersPool.clear(); tikhomirov@673: filesPool.clear(); tikhomirov@673: } tikhomirov@673: tikhomirov@673: public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException { tikhomirov@673: byte[] data = da.byteArray(); tikhomirov@673: return parse(data); tikhomirov@673: } tikhomirov@673: tikhomirov@673: public RawChangeset parse(byte[] data) throws HgInvalidDataFormatException { tikhomirov@673: return init(data, 0, data.length); tikhomirov@673: } tikhomirov@673: tikhomirov@673: private RawChangeset init(byte[] data, int offset, int length) throws HgInvalidDataFormatException { tikhomirov@673: final int bufferEndIndex = offset + length; tikhomirov@673: final byte lineBreak = (byte) '\n'; tikhomirov@673: int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex); tikhomirov@673: if (breakIndex1 == -1) { tikhomirov@673: throw new HgInvalidDataFormatException("Bad Changeset data"); tikhomirov@673: } tikhomirov@673: Nodeid _nodeid = Nodeid.fromAscii(data, 0, breakIndex1); tikhomirov@673: int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex); tikhomirov@673: if (breakIndex2 == -1) { tikhomirov@673: throw new HgInvalidDataFormatException("Bad Changeset data"); tikhomirov@673: } tikhomirov@673: String _user; tikhomirov@673: _user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1); tikhomirov@673: _user = usersPool.unify(_user); tikhomirov@673: tikhomirov@673: int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex); tikhomirov@673: if (breakIndex3 == -1) { tikhomirov@673: throw new HgInvalidDataFormatException("Bad Changeset data"); tikhomirov@673: } tikhomirov@673: String _timeString = new String(data, breakIndex2 + 1, breakIndex3 - breakIndex2 - 1); tikhomirov@673: int space1 = _timeString.indexOf(' '); tikhomirov@673: if (space1 == -1) { tikhomirov@673: throw new HgInvalidDataFormatException(String.format("Bad Changeset data: %s in [%d..%d]", "time string", breakIndex2+1, breakIndex3)); tikhomirov@673: } tikhomirov@673: int space2 = _timeString.indexOf(' ', space1 + 1); tikhomirov@673: if (space2 == -1) { tikhomirov@673: space2 = _timeString.length(); tikhomirov@673: } tikhomirov@673: long unixTime = Long.parseLong(_timeString.substring(0, space1)); tikhomirov@673: int _timezone = Integer.parseInt(_timeString.substring(space1 + 1, space2)); tikhomirov@673: // unixTime is local time, and timezone records difference of the local time to UTC. tikhomirov@673: Date _time = new Date(unixTime * 1000); tikhomirov@673: String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null; tikhomirov@673: Map _extrasMap = parseExtras(_extras); tikhomirov@673: // tikhomirov@673: int lastStart = breakIndex3 + 1; tikhomirov@673: int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); tikhomirov@673: ArrayList _files = null; tikhomirov@673: if (breakIndex4 > lastStart) { tikhomirov@673: // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision) tikhomirov@673: _files = new ArrayList(5); tikhomirov@673: while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) { tikhomirov@673: String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart); tikhomirov@673: _files.add(filesPool.unify(fname)); tikhomirov@673: lastStart = breakIndex4 + 1; tikhomirov@673: if (data[breakIndex4 + 1] == lineBreak) { tikhomirov@673: // found \n\n tikhomirov@673: break; tikhomirov@673: } else { tikhomirov@673: breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); tikhomirov@673: } tikhomirov@673: } tikhomirov@673: if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) { tikhomirov@673: throw new HgInvalidDataFormatException("Bad Changeset data"); tikhomirov@673: } tikhomirov@673: } else { tikhomirov@673: breakIndex4--; tikhomirov@673: } tikhomirov@673: String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2); tikhomirov@673: RawChangeset target = factory.create(_nodeid, _user, _time, _timezone, _files, _comment, _extrasMap); tikhomirov@673: return target; tikhomirov@673: } tikhomirov@673: tikhomirov@673: private Map parseExtras(String _extras) { tikhomirov@673: final String extras_branch_key = "branch"; tikhomirov@673: _extras = _extras == null ? null : _extras.trim(); tikhomirov@673: if (_extras == null || _extras.length() == 0) { tikhomirov@673: return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); tikhomirov@673: } tikhomirov@673: Map _extrasMap = new HashMap(); tikhomirov@673: int lastIndex = 0; tikhomirov@673: do { tikhomirov@673: String pair; tikhomirov@673: int sp = _extras.indexOf('\0', lastIndex); tikhomirov@673: if (sp == -1) { tikhomirov@673: sp = _extras.length(); tikhomirov@673: } tikhomirov@673: if (sp > lastIndex) { tikhomirov@673: pair = _extras.substring(lastIndex, sp); tikhomirov@673: pair = decode(pair); tikhomirov@673: int eq = pair.indexOf(':'); tikhomirov@673: _extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1)); tikhomirov@673: lastIndex = sp + 1; tikhomirov@673: } tikhomirov@673: } while (lastIndex < _extras.length()); tikhomirov@673: if (!_extrasMap.containsKey(extras_branch_key)) { tikhomirov@673: _extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); tikhomirov@673: } tikhomirov@673: return Collections.unmodifiableMap(_extrasMap); tikhomirov@673: } tikhomirov@673: tikhomirov@673: private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) { tikhomirov@673: for (int i = startOffset; i < endIndex; i++) { tikhomirov@673: if (src[i] == what) { tikhomirov@673: return i; tikhomirov@673: } tikhomirov@673: } tikhomirov@673: return -1; tikhomirov@673: } tikhomirov@673: tikhomirov@673: private static String decode(String s) { tikhomirov@673: if (s != null && s.indexOf('\\') != -1) { tikhomirov@673: // TestAuxUtilities#testChangelogExtrasDecode tikhomirov@673: return s.replace("\\\\", "\\").replace("\\n", "\n").replace("\\r", "\r").replace("\\0", "\00"); tikhomirov@673: } tikhomirov@673: return s; tikhomirov@673: } tikhomirov@673: tikhomirov@673: public interface CsetFactory { tikhomirov@673: public RawChangeset create(Nodeid nodeid, String user, Date time, int timezone, List files, String comment, Map extrasMap); tikhomirov@673: } tikhomirov@673: }