Mercurial > hg4j
comparison src/org/tmatesoft/hg/repo/HgChangelog.java @ 667:fba85bc1dfb8
Refactoring: move all encoding/decoding operations into single place, EncodingHelper
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 11 Jul 2013 17:54:08 +0200 |
parents | a937e63b6e02 |
children | 545b1d4cc11d |
comparison
equal
deleted
inserted
replaced
666:27a3ddedd6cc | 667:fba85bc1dfb8 |
---|---|
15 * contact TMate Software at support@hg4j.com | 15 * contact TMate Software at support@hg4j.com |
16 */ | 16 */ |
17 package org.tmatesoft.hg.repo; | 17 package org.tmatesoft.hg.repo; |
18 | 18 |
19 import java.io.IOException; | 19 import java.io.IOException; |
20 import java.io.UnsupportedEncodingException; | |
21 import java.util.ArrayList; | 20 import java.util.ArrayList; |
22 import java.util.Arrays; | 21 import java.util.Arrays; |
23 import java.util.Calendar; | 22 import java.util.Calendar; |
24 import java.util.Collections; | 23 import java.util.Collections; |
25 import java.util.Date; | 24 import java.util.Date; |
29 import java.util.Locale; | 28 import java.util.Locale; |
30 import java.util.Map; | 29 import java.util.Map; |
31 import java.util.TimeZone; | 30 import java.util.TimeZone; |
32 | 31 |
33 import org.tmatesoft.hg.core.Nodeid; | 32 import org.tmatesoft.hg.core.Nodeid; |
33 import org.tmatesoft.hg.core.SessionContext; | |
34 import org.tmatesoft.hg.internal.Callback; | 34 import org.tmatesoft.hg.internal.Callback; |
35 import org.tmatesoft.hg.internal.DataAccess; | 35 import org.tmatesoft.hg.internal.DataAccess; |
36 import org.tmatesoft.hg.internal.EncodingHelper; | |
37 import org.tmatesoft.hg.internal.Internals; | |
36 import org.tmatesoft.hg.internal.Lifecycle; | 38 import org.tmatesoft.hg.internal.Lifecycle; |
37 import org.tmatesoft.hg.internal.LifecycleBridge; | 39 import org.tmatesoft.hg.internal.LifecycleBridge; |
38 import org.tmatesoft.hg.internal.Pool; | 40 import org.tmatesoft.hg.internal.Pool; |
39 import org.tmatesoft.hg.internal.RevlogStream; | 41 import org.tmatesoft.hg.internal.RevlogStream; |
40 import org.tmatesoft.hg.util.Adaptable; | 42 import org.tmatesoft.hg.util.Adaptable; |
74 */ | 76 */ |
75 public void range(int start, int end, final HgChangelog.Inspector inspector) throws HgRuntimeException { | 77 public void range(int start, int end, final HgChangelog.Inspector inspector) throws HgRuntimeException { |
76 if (inspector == null) { | 78 if (inspector == null) { |
77 throw new IllegalArgumentException(); | 79 throw new IllegalArgumentException(); |
78 } | 80 } |
79 content.iterate(start, end, true, new RawCsetParser(inspector)); | 81 content.iterate(start, end, true, new RawCsetParser(getRepo(), inspector)); |
80 } | 82 } |
81 | 83 |
82 /** | 84 /** |
83 * @see #range(int, int, Inspector) | 85 * @see #range(int, int, Inspector) |
84 * @return changeset entry objects, never <code>null</code> | 86 * @return changeset entry objects, never <code>null</code> |
114 return; | 116 return; |
115 } | 117 } |
116 if (inspector == null) { | 118 if (inspector == null) { |
117 throw new IllegalArgumentException(); | 119 throw new IllegalArgumentException(); |
118 } | 120 } |
119 content.iterate(sortedRevisions, true, new RawCsetParser(inspector)); | 121 content.iterate(sortedRevisions, true, new RawCsetParser(getRepo(), inspector)); |
120 } | 122 } |
121 | 123 |
122 /** | 124 /** |
123 * Get changeset entry object | 125 * Get changeset entry object |
124 * @throws HgInvalidRevisionException if supplied nodeid doesn't identify any revision from this revlog. <em>Runtime exception</em> | 126 * @throws HgInvalidRevisionException if supplied nodeid doesn't identify any revision from this revlog. <em>Runtime exception</em> |
144 } | 146 } |
145 | 147 |
146 /** | 148 /** |
147 * Entry in the Changelog | 149 * Entry in the Changelog |
148 */ | 150 */ |
149 public static class RawChangeset implements Cloneable /* for those that would like to keep a copy */{ | 151 public static final class RawChangeset implements Cloneable /* for those that would like to keep a copy */{ |
150 // TODO immutable | 152 // would be nice to get it immutable, but then we can't reuse instances |
151 private/* final */Nodeid manifest; | 153 private/* final */Nodeid manifest; |
152 private String user; | 154 private String user; |
153 private String comment; | 155 private String comment; |
154 private List<String> files; // unmodifiable collection (otherwise #files() and implicit #clone() shall be revised) | 156 private String[] files; // shall not be modified (#clone() does shallow copy) |
155 private Date time; | 157 private Date time; |
156 private int timezone; | 158 private int timezone; |
157 // http://mercurial.selenic.com/wiki/PruningDeadBranches - Closing changesets can be identified by close=1 in the changeset's extra field. | 159 // http://mercurial.selenic.com/wiki/PruningDeadBranches - Closing changesets can be identified by close=1 in the changeset's extra field. |
158 private Map<String, String> extras; | 160 private Map<String, String> extras; |
159 | 161 |
160 /** | |
161 * @see mercurial/changelog.py:read() | |
162 * | |
163 * <pre> | |
164 * format used: | |
165 * nodeid\n : manifest node in ascii | |
166 * user\n : user, no \n or \r allowed | |
167 * time tz extra\n : date (time is int or float, timezone is int) | |
168 * : extra is metadatas, encoded and separated by '\0' | |
169 * : older versions ignore it | |
170 * files\n\n : files modified by the cset, no \n or \r allowed | |
171 * (.*) : comment (free text, ideally utf-8) | |
172 * | |
173 * changelog v0 doesn't use extra | |
174 * </pre> | |
175 */ | |
176 private RawChangeset() { | 162 private RawChangeset() { |
177 } | 163 } |
178 | 164 |
179 public Nodeid manifest() { | 165 public Nodeid manifest() { |
180 return manifest; | 166 return manifest; |
187 public String comment() { | 173 public String comment() { |
188 return comment; | 174 return comment; |
189 } | 175 } |
190 | 176 |
191 public List<String> files() { | 177 public List<String> files() { |
192 return files; | 178 return Arrays.asList(files); |
193 } | 179 } |
194 | 180 |
195 public Date date() { | 181 public Date date() { |
196 return time; | 182 return time; |
197 } | 183 } |
232 sb.append("Changeset {"); | 218 sb.append("Changeset {"); |
233 sb.append("User: ").append(user).append(", "); | 219 sb.append("User: ").append(user).append(", "); |
234 sb.append("Comment: ").append(comment).append(", "); | 220 sb.append("Comment: ").append(comment).append(", "); |
235 sb.append("Manifest: ").append(manifest).append(", "); | 221 sb.append("Manifest: ").append(manifest).append(", "); |
236 sb.append("Date: ").append(time).append(", "); | 222 sb.append("Date: ").append(time).append(", "); |
237 sb.append("Files: ").append(files.size()); | 223 sb.append("Files: ").append(files.length); |
238 for (String s : files) { | 224 for (String s : files) { |
239 sb.append(", ").append(s); | 225 sb.append(", ").append(s); |
240 } | 226 } |
241 if (extras != null) { | 227 if (extras != null) { |
242 sb.append(", Extra: ").append(extras); | 228 sb.append(", Extra: ").append(extras); |
251 return (RawChangeset) super.clone(); | 237 return (RawChangeset) super.clone(); |
252 } catch (CloneNotSupportedException ex) { | 238 } catch (CloneNotSupportedException ex) { |
253 throw new InternalError(ex.toString()); | 239 throw new InternalError(ex.toString()); |
254 } | 240 } |
255 } | 241 } |
256 | 242 } |
257 /*package*/ static RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException { | 243 |
244 /** | |
245 * @see mercurial/changelog.py:read() | |
246 * | |
247 * <pre> | |
248 * format used: | |
249 * nodeid\n : manifest node in ascii | |
250 * user\n : user, no \n or \r allowed | |
251 * time tz extra\n : date (time is int or float, timezone is int) | |
252 * : extra is metadatas, encoded and separated by '\0' | |
253 * : older versions ignore it | |
254 * files\n\n : files modified by the cset, no \n or \r allowed | |
255 * (.*) : comment (free text, ideally utf-8) | |
256 * | |
257 * changelog v0 doesn't use extra | |
258 * </pre> | |
259 */ | |
260 /*package-local*/static final class ChangesetParser { | |
261 private final EncodingHelper encHelper; | |
262 // it's likely user names get repeated again and again throughout repository. | |
263 private final Pool<String> usersPool; | |
264 private final Pool<String> filesPool; | |
265 private final boolean reuseChangesetInstance; | |
266 private RawChangeset target; | |
267 | |
268 public ChangesetParser(SessionContext.Source sessionContex, boolean shallReuseCsetInstance) { | |
269 encHelper = Internals.buildFileNameEncodingHelper(sessionContex); | |
270 usersPool = new Pool<String>(); | |
271 filesPool = new Pool<String>(); | |
272 reuseChangesetInstance = shallReuseCsetInstance; | |
273 if (shallReuseCsetInstance) { | |
274 target = new RawChangeset(); | |
275 } | |
276 } | |
277 | |
278 public void dispose() { | |
279 usersPool.clear(); | |
280 filesPool.clear(); | |
281 } | |
282 | |
283 public RawChangeset parse(DataAccess da) throws IOException, HgInvalidDataFormatException { | |
258 byte[] data = da.byteArray(); | 284 byte[] data = da.byteArray(); |
259 RawChangeset rv = new RawChangeset(); | 285 if (!reuseChangesetInstance) { |
260 rv.init(data, 0, data.length, null); | 286 target = new RawChangeset(); |
261 return rv; | 287 } |
262 } | 288 init(data, 0, data.length); |
263 | 289 return target; |
264 // @param usersPool - it's likely user names get repeated again and again throughout repository. can be null | 290 } |
265 /* package-local */void init(byte[] data, int offset, int length, Pool<String> usersPool) throws HgInvalidDataFormatException { | 291 |
292 private void init(byte[] data, int offset, int length) throws HgInvalidDataFormatException { | |
266 final int bufferEndIndex = offset + length; | 293 final int bufferEndIndex = offset + length; |
267 final byte lineBreak = (byte) '\n'; | 294 final byte lineBreak = (byte) '\n'; |
268 int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex); | 295 int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex); |
269 if (breakIndex1 == -1) { | 296 if (breakIndex1 == -1) { |
270 throw new HgInvalidDataFormatException("Bad Changeset data"); | 297 throw new HgInvalidDataFormatException("Bad Changeset data"); |
273 int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex); | 300 int breakIndex2 = indexOf(data, lineBreak, breakIndex1 + 1, bufferEndIndex); |
274 if (breakIndex2 == -1) { | 301 if (breakIndex2 == -1) { |
275 throw new HgInvalidDataFormatException("Bad Changeset data"); | 302 throw new HgInvalidDataFormatException("Bad Changeset data"); |
276 } | 303 } |
277 String _user; | 304 String _user; |
278 try { | 305 _user = encHelper.userFromChangeset(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1); |
279 // TODO use encoding helper? Although where encoding is fixed (like here), seems to be just too much | 306 _user = usersPool.unify(_user); |
280 _user = new String(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1, "UTF-8"); | |
281 if (usersPool != null) { | |
282 _user = usersPool.unify(_user); | |
283 } | |
284 } catch (UnsupportedEncodingException ex) { | |
285 _user = ""; | |
286 // Could hardly happen | |
287 throw new HgInvalidDataFormatException("Bad Changeset data", ex); | |
288 } | |
289 | 307 |
290 int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex); | 308 int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex); |
291 if (breakIndex3 == -1) { | 309 if (breakIndex3 == -1) { |
292 throw new HgInvalidDataFormatException("Bad Changeset data"); | 310 throw new HgInvalidDataFormatException("Bad Changeset data"); |
293 } | 311 } |
311 int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); | 329 int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); |
312 ArrayList<String> _files = null; | 330 ArrayList<String> _files = null; |
313 if (breakIndex4 > lastStart) { | 331 if (breakIndex4 > lastStart) { |
314 // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision) | 332 // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision) |
315 _files = new ArrayList<String>(5); | 333 _files = new ArrayList<String>(5); |
316 // TODO pool file names | |
317 // TODO encoding of filenames? | |
318 while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) { | 334 while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) { |
319 _files.add(new String(data, lastStart, breakIndex4 - lastStart)); | 335 String fname = encHelper.fileFromChangeset(data, lastStart, breakIndex4 - lastStart); |
336 _files.add(filesPool.unify(fname)); | |
320 lastStart = breakIndex4 + 1; | 337 lastStart = breakIndex4 + 1; |
321 if (data[breakIndex4 + 1] == lineBreak) { | 338 if (data[breakIndex4 + 1] == lineBreak) { |
322 // found \n\n | 339 // found \n\n |
323 break; | 340 break; |
324 } else { | 341 } else { |
329 throw new HgInvalidDataFormatException("Bad Changeset data"); | 346 throw new HgInvalidDataFormatException("Bad Changeset data"); |
330 } | 347 } |
331 } else { | 348 } else { |
332 breakIndex4--; | 349 breakIndex4--; |
333 } | 350 } |
334 String _comment; | 351 String _comment = encHelper.commentFromChangeset(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2); |
335 try { | |
336 _comment = new String(data, breakIndex4 + 2, bufferEndIndex - breakIndex4 - 2, "UTF-8"); | |
337 // TODO post-1.0 respect ui.fallbackencoding and try to decode if set; use EncodingHelper | |
338 } catch (UnsupportedEncodingException ex) { | |
339 _comment = ""; | |
340 // Could hardly happen | |
341 throw new HgInvalidDataFormatException("Bad Changeset data", ex); | |
342 } | |
343 // change this instance at once, don't leave it partially changes in case of error | 352 // change this instance at once, don't leave it partially changes in case of error |
344 this.manifest = _nodeid; | 353 target.manifest = _nodeid; |
345 this.user = _user; | 354 target.user = _user; |
346 this.time = _time; | 355 target.time = _time; |
347 this.timezone = _timezone; | 356 target.timezone = _timezone; |
348 this.files = _files == null ? Collections.<String> emptyList() : Collections.unmodifiableList(_files); | 357 target.files = _files == null ? new String[0] : _files.toArray(new String[_files.size()]); |
349 this.comment = _comment; | 358 target.comment = _comment; |
350 this.extras = _extrasMap; | 359 target.extras = _extrasMap; |
351 } | 360 } |
352 | 361 |
353 private Map<String, String> parseExtras(String _extras) { | 362 private Map<String, String> parseExtras(String _extras) { |
354 final String extras_branch_key = "branch"; | 363 final String extras_branch_key = "branch"; |
355 _extras = _extras == null ? null : _extras.trim(); | 364 _extras = _extras == null ? null : _extras.trim(); |
406 public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) { | 415 public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) { |
407 result.add(cset.clone()); | 416 result.add(cset.clone()); |
408 } | 417 } |
409 } | 418 } |
410 | 419 |
411 private static class RawCsetParser implements RevlogStream.Inspector, Adaptable { | 420 private static final class RawCsetParser implements RevlogStream.Inspector, Adaptable, Lifecycle { |
412 | 421 |
413 private final Inspector inspector; | 422 private final Inspector inspector; |
414 private final Pool<String> usersPool; | 423 private final ChangesetParser csetBuilder; |
415 private final RawChangeset cset = new RawChangeset(); | |
416 // non-null when inspector uses high-level lifecycle entities (progress and/or cancel supports) | 424 // non-null when inspector uses high-level lifecycle entities (progress and/or cancel supports) |
417 private final LifecycleBridge lifecycleStub; | 425 private final LifecycleBridge lifecycleStub; |
418 // non-null when inspector relies on low-level lifecycle and is responsible | 426 // non-null when inspector relies on low-level lifecycle and is responsible |
419 // to proceed any possible high-level entities himself. | 427 // to proceed any possible high-level entities himself. |
420 private final Lifecycle inspectorLifecycle; | 428 private final Lifecycle inspectorLifecycle; |
421 | 429 |
422 public RawCsetParser(HgChangelog.Inspector delegate) { | 430 public RawCsetParser(SessionContext.Source sessionContext, HgChangelog.Inspector delegate) { |
423 assert delegate != null; | 431 assert delegate != null; |
424 inspector = delegate; | 432 inspector = delegate; |
425 usersPool = new Pool<String>(); | 433 csetBuilder = new ChangesetParser(sessionContext, true); |
426 inspectorLifecycle = Adaptable.Factory.getAdapter(delegate, Lifecycle.class, null); | 434 inspectorLifecycle = Adaptable.Factory.getAdapter(delegate, Lifecycle.class, null); |
427 if (inspectorLifecycle == null) { | 435 if (inspectorLifecycle == null) { |
428 ProgressSupport ph = Adaptable.Factory.getAdapter(delegate, ProgressSupport.class, null); | 436 ProgressSupport ph = Adaptable.Factory.getAdapter(delegate, ProgressSupport.class, null); |
429 CancelSupport cs = Adaptable.Factory.getAdapter(delegate, CancelSupport.class, null); | 437 CancelSupport cs = Adaptable.Factory.getAdapter(delegate, CancelSupport.class, null); |
430 if (cs != null || ph != null) { | 438 if (cs != null || ph != null) { |
437 } | 445 } |
438 } | 446 } |
439 | 447 |
440 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) throws HgRuntimeException { | 448 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) throws HgRuntimeException { |
441 try { | 449 try { |
442 byte[] data = da.byteArray(); | 450 RawChangeset cset = csetBuilder.parse(da); |
443 cset.init(data, 0, data.length, usersPool); | |
444 // XXX there's no guarantee for Changeset.Callback that distinct instance comes each time, consider instance reuse | 451 // XXX there's no guarantee for Changeset.Callback that distinct instance comes each time, consider instance reuse |
445 inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset); | 452 inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset); |
446 if (lifecycleStub != null) { | 453 if (lifecycleStub != null) { |
447 lifecycleStub.nextStep(); | 454 lifecycleStub.nextStep(); |
448 } | 455 } |
454 } | 461 } |
455 } | 462 } |
456 | 463 |
457 public <T> T getAdapter(Class<T> adapterClass) { | 464 public <T> T getAdapter(Class<T> adapterClass) { |
458 if (adapterClass == Lifecycle.class) { | 465 if (adapterClass == Lifecycle.class) { |
459 if (inspectorLifecycle != null) { | 466 return adapterClass.cast(this); |
460 return adapterClass.cast(inspectorLifecycle); | 467 } |
461 } | 468 // XXX what if caller takes Progress/Cancel (which we update through lifecycleStub, too) |
462 // reveal interest in lifecycle only when either progress or cancel support is there | |
463 // and inspector itself doesn't respond to lifecycle request | |
464 // lifecycleStub may still be null here (no progress and cancel), it's ok to cast(null) | |
465 return adapterClass.cast(lifecycleStub); | |
466 | |
467 } | |
468 return Adaptable.Factory.getAdapter(inspector, adapterClass, null); | 469 return Adaptable.Factory.getAdapter(inspector, adapterClass, null); |
469 } | 470 } |
470 | 471 |
472 public void start(int count, Callback callback, Object token) { | |
473 if (inspectorLifecycle != null) { | |
474 inspectorLifecycle.start(count, callback, token); | |
475 } else if (lifecycleStub != null) { | |
476 lifecycleStub.start(count, callback, token); | |
477 } | |
478 } | |
479 | |
480 public void finish(Object token) { | |
481 if (inspectorLifecycle != null) { | |
482 inspectorLifecycle.finish(token); | |
483 } else if (lifecycleStub != null) { | |
484 lifecycleStub.finish(token); | |
485 } | |
486 csetBuilder.dispose(); | |
487 } | |
488 | |
471 } | 489 } |
472 } | 490 } |