comparison src/org/tmatesoft/hg/internal/ChangesetParser.java @ 673:545b1d4cc11d

Refactor HgBundle.GroupElement (clear experimental mark), resolve few technical debt issues
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 12 Jul 2013 20:14:24 +0200
parents
children
comparison
equal deleted inserted replaced
672:d2552e6a5af6 673:545b1d4cc11d
1 /*
2 * Copyright (c) 2010-2013 TMate Software Ltd
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; version 2 of the License.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * For information on how to redistribute this software under
14 * the terms of a license other than GNU General Public License
15 * contact TMate Software at support@hg4j.com
16 */
17 package org.tmatesoft.hg.internal;
18
19 import java.io.IOException;
20 import java.util.ArrayList;
21 import java.util.Collections;
22 import java.util.Date;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26
27 import org.tmatesoft.hg.core.Nodeid;
28 import org.tmatesoft.hg.core.SessionContext;
29 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
30 import org.tmatesoft.hg.repo.HgInvalidDataFormatException;
31 import org.tmatesoft.hg.repo.HgRepository;
32
33 /**
34 * @see mercurial/changelog.py:read()
35 *
36 * <pre>
37 * format used:
38 * nodeid\n : manifest node in ascii
39 * user\n : user, no \n or \r allowed
40 * time tz extra\n : date (time is int or float, timezone is int)
41 * : extra is metadatas, encoded and separated by '\0'
42 * : older versions ignore it
43 * files\n\n : files modified by the cset, no \n or \r allowed
44 * (.*) : comment (free text, ideally utf-8)
45 *
46 * changelog v0 doesn't use extra
47 * </pre>
48 *
49 * Extracted from internals of HgChangelog (the code initially from inside RawChangeset)
50 *
51 * @author Artem Tikhomirov
52 * @author TMate Software Ltd.
53 */
54 public final class ChangesetParser {
55 private final EncodingHelper encHelper;
56 // it's likely user names get repeated again and again throughout repository.
57 private final Pool<String> usersPool;
58 private final Pool<String> filesPool;
59 private final CsetFactory factory;
60
61 public ChangesetParser(SessionContext.Source sessionContex, CsetFactory csetFactory) {
62 assert csetFactory != null;
63 encHelper = Internals.buildFileNameEncodingHelper(sessionContex);
64 usersPool = new Pool<String>();
65 filesPool = new Pool<String>();
66 factory = csetFactory;
67 }
68
69 public void dispose() {
70 usersPool.clear();
71 filesPool.clear();
72 }
73
74 public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException {
75 byte[] data = da.byteArray();
76 return parse(data);
77 }
78
79 public RawChangeset parse(byte[] data) throws HgInvalidDataFormatException {
80 return init(data, 0, data.length);
81 }
82
83 private RawChangeset init(byte[] data, int offset, int length) throws HgInvalidDataFormatException {
84 final int bufferEndIndex = offset + length;
85 final byte lineBreak = (byte) '\n';
86 int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex);
87 if (breakIndex1 == -1) {
88 throw new HgInvalidDataFormatException("Bad Changeset data");
89 }
90 Nodeid _nodeid = Nodeid.fromAscii(data, 0, breakIndex1);
91 int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex);
92 if (breakIndex2 == -1) {
93 throw new HgInvalidDataFormatException("Bad Changeset data");
94 }
95 String _user;
96 _user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1);
97 _user = usersPool.unify(_user);
98
99 int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex);
100 if (breakIndex3 == -1) {
101 throw new HgInvalidDataFormatException("Bad Changeset data");
102 }
103 String _timeString = new String(data, breakIndex2 + 1, breakIndex3 - breakIndex2 - 1);
104 int space1 = _timeString.indexOf(' ');
105 if (space1 == -1) {
106 throw new HgInvalidDataFormatException(String.format("Bad Changeset data: %s in [%d..%d]", "time string", breakIndex2+1, breakIndex3));
107 }
108 int space2 = _timeString.indexOf(' ', space1 + 1);
109 if (space2 == -1) {
110 space2 = _timeString.length();
111 }
112 long unixTime = Long.parseLong(_timeString.substring(0, space1));
113 int _timezone = Integer.parseInt(_timeString.substring(space1 + 1, space2));
114 // unixTime is local time, and timezone records difference of the local time to UTC.
115 Date _time = new Date(unixTime * 1000);
116 String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null;
117 Map<String, String> _extrasMap = parseExtras(_extras);
118 //
119 int lastStart = breakIndex3 + 1;
120 int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
121 ArrayList<String> _files = null;
122 if (breakIndex4 > lastStart) {
123 // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision)
124 _files = new ArrayList<String>(5);
125 while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) {
126 String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart);
127 _files.add(filesPool.unify(fname));
128 lastStart = breakIndex4 + 1;
129 if (data[breakIndex4 + 1] == lineBreak) {
130 // found \n\n
131 break;
132 } else {
133 breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
134 }
135 }
136 if (breakIndex4 == -1 || breakIndex4 >= bufferEndIndex) {
137 throw new HgInvalidDataFormatException("Bad Changeset data");
138 }
139 } else {
140 breakIndex4--;
141 }
142 String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2);
143 RawChangeset target = factory.create(_nodeid, _user, _time, _timezone, _files, _comment, _extrasMap);
144 return target;
145 }
146
147 private Map<String, String> parseExtras(String _extras) {
148 final String extras_branch_key = "branch";
149 _extras = _extras == null ? null : _extras.trim();
150 if (_extras == null || _extras.length() == 0) {
151 return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
152 }
153 Map<String, String> _extrasMap = new HashMap<String, String>();
154 int lastIndex = 0;
155 do {
156 String pair;
157 int sp = _extras.indexOf('\0', lastIndex);
158 if (sp == -1) {
159 sp = _extras.length();
160 }
161 if (sp > lastIndex) {
162 pair = _extras.substring(lastIndex, sp);
163 pair = decode(pair);
164 int eq = pair.indexOf(':');
165 _extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1));
166 lastIndex = sp + 1;
167 }
168 } while (lastIndex < _extras.length());
169 if (!_extrasMap.containsKey(extras_branch_key)) {
170 _extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME);
171 }
172 return Collections.unmodifiableMap(_extrasMap);
173 }
174
175 private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) {
176 for (int i = startOffset; i < endIndex; i++) {
177 if (src[i] == what) {
178 return i;
179 }
180 }
181 return -1;
182 }
183
184 private static String decode(String s) {
185 if (s != null && s.indexOf('\\') != -1) {
186 // TestAuxUtilities#testChangelogExtrasDecode
187 return s.replace("\\\\", "\\").replace("\\n", "\n").replace("\\r", "\r").replace("\\0", "\00");
188 }
189 return s;
190 }
191
192 public interface CsetFactory {
193 public RawChangeset create(Nodeid nodeid, String user, Date time, int timezone, List<String> files, String comment, Map<String, String> extrasMap);
194 }
195 }