comparison src/org/tmatesoft/hg/repo/HgChangelog.java @ 667:fba85bc1dfb8

Refactoring: move all encoding/decoding operations into single place, EncodingHelper
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 11 Jul 2013 17:54:08 +0200
parents a937e63b6e02
children 545b1d4cc11d
comparison
equal deleted inserted replaced
666:27a3ddedd6cc 667:fba85bc1dfb8
15 * contact TMate Software at support@hg4j.com 15 * contact TMate Software at support@hg4j.com
16 */ 16 */
17 package org.tmatesoft.hg.repo; 17 package org.tmatesoft.hg.repo;
18 18
19 import java.io.IOException; 19 import java.io.IOException;
20 import java.io.UnsupportedEncodingException;
21 import java.util.ArrayList; 20 import java.util.ArrayList;
22 import java.util.Arrays; 21 import java.util.Arrays;
23 import java.util.Calendar; 22 import java.util.Calendar;
24 import java.util.Collections; 23 import java.util.Collections;
25 import java.util.Date; 24 import java.util.Date;
29 import java.util.Locale; 28 import java.util.Locale;
30 import java.util.Map; 29 import java.util.Map;
31 import java.util.TimeZone; 30 import java.util.TimeZone;
32 31
33 import org.tmatesoft.hg.core.Nodeid; 32 import org.tmatesoft.hg.core.Nodeid;
33 import org.tmatesoft.hg.core.SessionContext;
34 import org.tmatesoft.hg.internal.Callback; 34 import org.tmatesoft.hg.internal.Callback;
35 import org.tmatesoft.hg.internal.DataAccess; 35 import org.tmatesoft.hg.internal.DataAccess;
36 import org.tmatesoft.hg.internal.EncodingHelper;
37 import org.tmatesoft.hg.internal.Internals;
36 import org.tmatesoft.hg.internal.Lifecycle; 38 import org.tmatesoft.hg.internal.Lifecycle;
37 import org.tmatesoft.hg.internal.LifecycleBridge; 39 import org.tmatesoft.hg.internal.LifecycleBridge;
38 import org.tmatesoft.hg.internal.Pool; 40 import org.tmatesoft.hg.internal.Pool;
39 import org.tmatesoft.hg.internal.RevlogStream; 41 import org.tmatesoft.hg.internal.RevlogStream;
40 import org.tmatesoft.hg.util.Adaptable; 42 import org.tmatesoft.hg.util.Adaptable;
74 */ 76 */
75 public void range(int start, int end, final HgChangelog.Inspector inspector) throws HgRuntimeException { 77 public void range(int start, int end, final HgChangelog.Inspector inspector) throws HgRuntimeException {
76 if (inspector == null) { 78 if (inspector == null) {
77 throw new IllegalArgumentException(); 79 throw new IllegalArgumentException();
78 } 80 }
79 content.iterate(start, end, true, new RawCsetParser(inspector)); 81 content.iterate(start, end, true, new RawCsetParser(getRepo(), inspector));
80 } 82 }
81 83
82 /** 84 /**
83 * @see #range(int, int, Inspector) 85 * @see #range(int, int, Inspector)
84 * @return changeset entry objects, never <code>null</code> 86 * @return changeset entry objects, never <code>null</code>
114 return; 116 return;
115 } 117 }
116 if (inspector == null) { 118 if (inspector == null) {
117 throw new IllegalArgumentException(); 119 throw new IllegalArgumentException();
118 } 120 }
119 content.iterate(sortedRevisions, true, new RawCsetParser(inspector)); 121 content.iterate(sortedRevisions, true, new RawCsetParser(getRepo(), inspector));
120 } 122 }
121 123
122 /** 124 /**
123 * Get changeset entry object 125 * Get changeset entry object
124 * @throws HgInvalidRevisionException if supplied nodeid doesn't identify any revision from this revlog. <em>Runtime exception</em> 126 * @throws HgInvalidRevisionException if supplied nodeid doesn't identify any revision from this revlog. <em>Runtime exception</em>
144 } 146 }
145 147
146 /** 148 /**
147 * Entry in the Changelog 149 * Entry in the Changelog
148 */ 150 */
149 public static class RawChangeset implements Cloneable /* for those that would like to keep a copy */{ 151 public static final class RawChangeset implements Cloneable /* for those that would like to keep a copy */{
150 // TODO immutable 152 // would be nice to get it immutable, but then we can't reuse instances
151 private/* final */Nodeid manifest; 153 private/* final */Nodeid manifest;
152 private String user; 154 private String user;
153 private String comment; 155 private String comment;
154 private List<String> files; // unmodifiable collection (otherwise #files() and implicit #clone() shall be revised) 156 private String[] files; // shall not be modified (#clone() does shallow copy)
155 private Date time; 157 private Date time;
156 private int timezone; 158 private int timezone;
157 // http://mercurial.selenic.com/wiki/PruningDeadBranches - Closing changesets can be identified by close=1 in the changeset's extra field. 159 // http://mercurial.selenic.com/wiki/PruningDeadBranches - Closing changesets can be identified by close=1 in the changeset's extra field.
158 private Map<String, String> extras; 160 private Map<String, String> extras;
159 161
160 /**
161 * @see mercurial/changelog.py:read()
162 *
163 * <pre>
164 * format used:
165 * nodeid\n : manifest node in ascii
166 * user\n : user, no \n or \r allowed
167 * time tz extra\n : date (time is int or float, timezone is int)
168 * : extra is metadatas, encoded and separated by '\0'
169 * : older versions ignore it
170 * files\n\n : files modified by the cset, no \n or \r allowed
171 * (.*) : comment (free text, ideally utf-8)
172 *
173 * changelog v0 doesn't use extra
174 * </pre>
175 */
176 private RawChangeset() { 162 private RawChangeset() {
177 } 163 }
178 164
179 public Nodeid manifest() { 165 public Nodeid manifest() {
180 return manifest; 166 return manifest;
187 public String comment() { 173 public String comment() {
188 return comment; 174 return comment;
189 } 175 }
190 176
191 public List<String> files() { 177 public List<String> files() {
192 return files; 178 return Arrays.asList(files);
193 } 179 }
194 180
195 public Date date() { 181 public Date date() {
196 return time; 182 return time;
197 } 183 }
232 sb.append("Changeset {"); 218 sb.append("Changeset {");
233 sb.append("User: ").append(user).append(", "); 219 sb.append("User: ").append(user).append(", ");
234 sb.append("Comment: ").append(comment).append(", "); 220 sb.append("Comment: ").append(comment).append(", ");
235 sb.append("Manifest: ").append(manifest).append(", "); 221 sb.append("Manifest: ").append(manifest).append(", ");
236 sb.append("Date: ").append(time).append(", "); 222 sb.append("Date: ").append(time).append(", ");
237 sb.append("Files: ").append(files.size()); 223 sb.append("Files: ").append(files.length);
238 for (String s : files) { 224 for (String s : files) {
239 sb.append(", ").append(s); 225 sb.append(", ").append(s);
240 } 226 }
241 if (extras != null) { 227 if (extras != null) {
242 sb.append(", Extra: ").append(extras); 228 sb.append(", Extra: ").append(extras);
251 return (RawChangeset) super.clone(); 237 return (RawChangeset) super.clone();
252 } catch (CloneNotSupportedException ex) { 238 } catch (CloneNotSupportedException ex) {
253 throw new InternalError(ex.toString()); 239 throw new InternalError(ex.toString());
254 } 240 }
255 } 241 }
256 242 }
257 /*package*/ static RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException { 243
244 /**
245 * @see mercurial/changelog.py:read()
246 *
247 * <pre>
248 * format used:
249 * nodeid\n : manifest node in ascii
250 * user\n : user, no \n or \r allowed
251 * time tz extra\n : date (time is int or float, timezone is int)
252 * : extra is metadatas, encoded and separated by '\0'
253 * : older versions ignore it
254 * files\n\n : files modified by the cset, no \n or \r allowed
255 * (.*) : comment (free text, ideally utf-8)
256 *
257 * changelog v0 doesn't use extra
258 * </pre>
259 */
260 /*package-local*/static final class ChangesetParser {
261 private final EncodingHelper encHelper;
262 // it's likely user names get repeated again and again throughout repository.
263 private final Pool<String> usersPool;
264 private final Pool<String> filesPool;
265 private final boolean reuseChangesetInstance;
266 private RawChangeset target;
267
268 public ChangesetParser(SessionContext.Source sessionContex, boolean shallReuseCsetInstance) {
269 encHelper = Internals.buildFileNameEncodingHelper(sessionContex);
270 usersPool = new Pool<String>();
271 filesPool = new Pool<String>();
272 reuseChangesetInstance = shallReuseCsetInstance;
273 if (shallReuseCsetInstance) {
274 target = new RawChangeset();
275 }
276 }
277
278 public void dispose() {
279 usersPool.clear();
280 filesPool.clear();
281 }
282
283 public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException {
258 byte[] data = da.byteArray(); 284 byte[] data = da.byteArray();
259 RawChangeset rv = new RawChangeset(); 285 if (!reuseChangesetInstance) {
260 rv.init(data, 0, data.length, null); 286 target = new RawChangeset();
261 return rv; 287 }
262 } 288 init(data, 0, data.length);
263 289 return target;
264 // @param usersPool - it's likely user names get repeated again and again throughout repository. can be null 290 }
265 /* package-local */void init(byte[] data, int offset, int length, Pool<String> usersPool) throws HgInvalidDataFormatException { 291
292 private void init(byte[] data, int offset, int length) throws HgInvalidDataFormatException {
266 final int bufferEndIndex = offset + length; 293 final int bufferEndIndex = offset + length;
267 final byte lineBreak = (byte) '\n'; 294 final byte lineBreak = (byte) '\n';
268 int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex); 295 int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex);
269 if (breakIndex1 == -1) { 296 if (breakIndex1 == -1) {
270 throw new HgInvalidDataFormatException("Bad Changeset data"); 297 throw new HgInvalidDataFormatException("Bad Changeset data");
273 int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex); 300 int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex);
274 if (breakIndex2 == -1) { 301 if (breakIndex2 == -1) {
275 throw new HgInvalidDataFormatException("Bad Changeset data"); 302 throw new HgInvalidDataFormatException("Bad Changeset data");
276 } 303 }
277 String _user; 304 String _user;
278 try { 305 _user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1);
279 // TODO use encoding helper? Although where encoding is fixed (like here), seems to be just too much 306 _user = usersPool.unify(_user);
280 _user = new String(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1, "UTF-8");
281 if (usersPool != null) {
282 _user = usersPool.unify(_user);
283 }
284 } catch (UnsupportedEncodingException ex) {
285 _user = "";
286 // Could hardly happen
287 throw new HgInvalidDataFormatException("Bad Changeset data", ex);
288 }
289 307
290 int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex); 308 int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex);
291 if (breakIndex3 == -1) { 309 if (breakIndex3 == -1) {
292 throw new HgInvalidDataFormatException("Bad Changeset data"); 310 throw new HgInvalidDataFormatException("Bad Changeset data");
293 } 311 }
311 int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); 329 int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex);
312 ArrayList<String> _files = null; 330 ArrayList<String> _files = null;
313 if (breakIndex4 > lastStart) { 331 if (breakIndex4 > lastStart) {
314 // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision) 332 // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision)
315 _files = new ArrayList<String>(5); 333 _files = new ArrayList<String>(5);
316 // TODO pool file names
317 // TODO encoding of filenames?
318 while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) { 334 while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) {
319 _files.add(new String(data, lastStart, breakIndex4 - lastStart)); 335 String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart);
336 _files.add(filesPool.unify(fname));
320 lastStart = breakIndex4 + 1; 337 lastStart = breakIndex4 + 1;
321 if (data[breakIndex4 + 1] == lineBreak) { 338 if (data[breakIndex4 + 1] == lineBreak) {
322 // found \n\n 339 // found \n\n
323 break; 340 break;
324 } else { 341 } else {
329 throw new HgInvalidDataFormatException("Bad Changeset data"); 346 throw new HgInvalidDataFormatException("Bad Changeset data");
330 } 347 }
331 } else { 348 } else {
332 breakIndex4--; 349 breakIndex4--;
333 } 350 }
334 String _comment; 351 String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2);
335 try {
336 _comment = new String(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2, "UTF-8");
337 // TODO post-1.0 respect ui.fallbackencoding and try to decode if set; use EncodingHelper
338 } catch (UnsupportedEncodingException ex) {
339 _comment = "";
340 // Could hardly happen
341 throw new HgInvalidDataFormatException("Bad Changeset data", ex);
342 }
343 // change this instance at once, don't leave it partially changes in case of error 352 // change this instance at once, don't leave it partially changes in case of error
344 this.manifest = _nodeid; 353 target.manifest = _nodeid;
345 this.user = _user; 354 target.user = _user;
346 this.time = _time; 355 target.time = _time;
347 this.timezone = _timezone; 356 target.timezone = _timezone;
348 this.files = _files == null ? Collections.<String> emptyList() : Collections.unmodifiableList(_files); 357 target.files = _files == null ? new String[0] : _files.toArray(new String[_files.size()]);
349 this.comment = _comment; 358 target.comment = _comment;
350 this.extras = _extrasMap; 359 target.extras = _extrasMap;
351 } 360 }
352 361
353 private Map<String, String> parseExtras(String _extras) { 362 private Map<String, String> parseExtras(String _extras) {
354 final String extras_branch_key = "branch"; 363 final String extras_branch_key = "branch";
355 _extras = _extras == null ? null : _extras.trim(); 364 _extras = _extras == null ? null : _extras.trim();
406 public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) { 415 public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) {
407 result.add(cset.clone()); 416 result.add(cset.clone());
408 } 417 }
409 } 418 }
410 419
411 private static class RawCsetParser implements RevlogStream.Inspector, Adaptable { 420 private static final class RawCsetParser implements RevlogStream.Inspector, Adaptable, Lifecycle {
412 421
413 private final Inspector inspector; 422 private final Inspector inspector;
414 private final Pool<String> usersPool; 423 private final ChangesetParser csetBuilder;
415 private final RawChangeset cset = new RawChangeset();
416 // non-null when inspector uses high-level lifecycle entities (progress and/or cancel supports) 424 // non-null when inspector uses high-level lifecycle entities (progress and/or cancel supports)
417 private final LifecycleBridge lifecycleStub; 425 private final LifecycleBridge lifecycleStub;
418 // non-null when inspector relies on low-level lifecycle and is responsible 426 // non-null when inspector relies on low-level lifecycle and is responsible
419 // to proceed any possible high-level entities himself. 427 // to proceed any possible high-level entities himself.
420 private final Lifecycle inspectorLifecycle; 428 private final Lifecycle inspectorLifecycle;
421 429
422 public RawCsetParser(HgChangelog.Inspector delegate) { 430 public RawCsetParser(SessionContext.Source sessionContext, HgChangelog.Inspector delegate) {
423 assert delegate != null; 431 assert delegate != null;
424 inspector = delegate; 432 inspector = delegate;
425 usersPool = new Pool<String>(); 433 csetBuilder = new ChangesetParser(sessionContext, true);
426 inspectorLifecycle = Adaptable.Factory.getAdapter(delegate, Lifecycle.class, null); 434 inspectorLifecycle = Adaptable.Factory.getAdapter(delegate, Lifecycle.class, null);
427 if (inspectorLifecycle == null) { 435 if (inspectorLifecycle == null) {
428 ProgressSupport ph = Adaptable.Factory.getAdapter(delegate, ProgressSupport.class, null); 436 ProgressSupport ph = Adaptable.Factory.getAdapter(delegate, ProgressSupport.class, null);
429 CancelSupport cs = Adaptable.Factory.getAdapter(delegate, CancelSupport.class, null); 437 CancelSupport cs = Adaptable.Factory.getAdapter(delegate, CancelSupport.class, null);
430 if (cs != null || ph != null) { 438 if (cs != null || ph != null) {
437 } 445 }
438 } 446 }
439 447
440 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) throws HgRuntimeException { 448 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) throws HgRuntimeException {
441 try { 449 try {
442 byte[] data = da.byteArray(); 450 RawChangeset cset = csetBuilder.parse(da);
443 cset.init(data, 0, data.length, usersPool);
444 // XXX there's no guarantee for Changeset.Callback that distinct instance comes each time, consider instance reuse 451 // XXX there's no guarantee for Changeset.Callback that distinct instance comes each time, consider instance reuse
445 inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset); 452 inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset);
446 if (lifecycleStub != null) { 453 if (lifecycleStub != null) {
447 lifecycleStub.nextStep(); 454 lifecycleStub.nextStep();
448 } 455 }
454 } 461 }
455 } 462 }
456 463
457 public <T> T getAdapter(Class<T> adapterClass) { 464 public <T> T getAdapter(Class<T> adapterClass) {
458 if (adapterClass == Lifecycle.class) { 465 if (adapterClass == Lifecycle.class) {
459 if (inspectorLifecycle != null) { 466 return adapterClass.cast(this);
460 return adapterClass.cast(inspectorLifecycle); 467 }
461 } 468 // XXX what if caller takes Progress/Cancel (which we update through lifecycleStub, too)
462 // reveal interest in lifecycle only when either progress or cancel support is there
463 // and inspector itself doesn't respond to lifecycle request
464 // lifecycleStub may still be null here (no progress and cancel), it's ok to cast(null)
465 return adapterClass.cast(lifecycleStub);
466
467 }
468 return Adaptable.Factory.getAdapter(inspector, adapterClass, null); 469 return Adaptable.Factory.getAdapter(inspector, adapterClass, null);
469 } 470 }
470 471
472 public void start(int count, Callback callback, Object token) {
473 if (inspectorLifecycle != null) {
474 inspectorLifecycle.start(count, callback, token);
475 } else if (lifecycleStub != null) {
476 lifecycleStub.start(count, callback, token);
477 }
478 }
479
480 public void finish(Object token) {
481 if (inspectorLifecycle != null) {
482 inspectorLifecycle.finish(token);
483 } else if (lifecycleStub != null) {
484 lifecycleStub.finish(token);
485 }
486 csetBuilder.dispose();
487 }
488
471 } 489 }
472 } 490 }