Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 242:ad6a046943be
Improved reading of sparse revisions from a revlog
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 23 Jun 2011 15:19:07 +0200 |
parents | 80a3433ace91 |
children | 0e01f9182e16 |
comparison
equal
deleted
inserted
replaced
241:d3ab16739736 | 242:ad6a046943be |
---|---|
198 if (end < start || end >= indexSize) { | 198 if (end < start || end >= indexSize) { |
199 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); | 199 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); |
200 } | 200 } |
201 // XXX may cache [start .. end] from index with a single read (pre-read) | 201 // XXX may cache [start .. end] from index with a single read (pre-read) |
202 | 202 |
203 Lifecycle.BasicCallback cb = null; | 203 ReaderN1 r = new ReaderN1(needData, inspector); |
204 DataAccess daIndex = null, daData = null; | 204 try { |
205 daIndex = getIndexStream(); | 205 r.start(end - start + 1); |
206 if (needData && !inline) { | 206 r.range(start, end); |
207 daData = getDataStream(); | 207 } catch (IOException ex) { |
208 } | 208 throw new HgBadStateException(ex); // FIXME need better handling |
209 try { | 209 } finally { |
210 r.finish(); | |
211 } | |
212 } | |
213 | |
214 /** | |
215 * Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected | |
216 * revisions are of interest. | |
217 * @param sortedRevisions revisions to walk, in ascending order. | |
218 * @param needData whether inspector needs access to header only | |
219 * @param inspector callback to process entries | |
220 */ | |
221 public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) { | |
222 final int indexSize = revisionCount(); | |
223 if (indexSize == 0 || sortedRevisions.length == 0) { | |
224 return; | |
225 } | |
226 if (sortedRevisions[0] > indexSize || sortedRevisions[sortedRevisions.length - 1] > indexSize) { | |
227 throw new IllegalArgumentException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize)); | |
228 } | |
229 | |
230 ReaderN1 r = new ReaderN1(needData, inspector); | |
231 try { | |
232 r.start(sortedRevisions.length); | |
233 for (int i = 0; i < sortedRevisions.length; ) { | |
234 int x = i; | |
235 i++; | |
236 while (i < sortedRevisions.length) { | |
237 if (sortedRevisions[i] == sortedRevisions[i-1] + 1) { | |
238 i++; | |
239 } else { | |
240 break; | |
241 } | |
242 } | |
243 // commitRevisions[x..i-1] are sequential | |
244 if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) { | |
245 return; | |
246 } | |
247 } | |
248 } catch (IOException ex) { | |
249 throw new HgBadStateException(ex); // FIXME need better handling | |
250 } finally { | |
251 r.finish(); | |
252 } | |
253 } | |
254 | |
255 private int getBaseRevision(int revision) { | |
256 return baseRevisions[revision]; | |
257 } | |
258 | |
259 /** | |
260 * @return offset of the revision's record in the index (.i) stream | |
261 */ | |
262 private int getIndexOffsetInt(int revision) { | |
263 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE; | |
264 } | |
265 | |
266 private void initOutline() { | |
267 if (baseRevisions != null && baseRevisions.length > 0) { | |
268 return; | |
269 } | |
270 ArrayList<Integer> resBases = new ArrayList<Integer>(); | |
271 ArrayList<Integer> resOffsets = new ArrayList<Integer>(); | |
272 DataAccess da = getIndexStream(); | |
273 try { | |
274 if (da.isEmpty()) { | |
275 // do not fail with exception if stream is empty, it's likely intentional | |
276 baseRevisions = new int[0]; | |
277 return; | |
278 } | |
279 int versionField = da.readInt(); | |
280 da.readInt(); // just to skip next 4 bytes of offset + flags | |
281 final int INLINEDATA = 1 << 16; | |
282 inline = (versionField & INLINEDATA) != 0; | |
283 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes | |
284 while(true) { | |
285 int compressedLen = da.readInt(); | |
286 // 8+4 = 12 bytes total read here | |
287 @SuppressWarnings("unused") | |
288 int actualLen = da.readInt(); | |
289 int baseRevision = da.readInt(); | |
290 // 12 + 8 = 20 bytes read here | |
291 // int linkRevision = di.readInt(); | |
292 // int parent1Revision = di.readInt(); | |
293 // int parent2Revision = di.readInt(); | |
294 // byte[] nodeid = new byte[32]; | |
295 resBases.add(baseRevision); | |
296 if (inline) { | |
297 int o = (int) offset; | |
298 if (o != offset) { | |
299 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file | |
300 // with inlined data of size greater than 2 Gb. | |
301 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); | |
302 } | |
303 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size()); | |
304 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) | |
305 } else { | |
306 da.skip(3*4 + 32); | |
307 } | |
308 if (da.isEmpty()) { | |
309 // fine, done then | |
310 baseRevisions = toArray(resBases); | |
311 if (inline) { | |
312 indexRecordOffset = toArray(resOffsets); | |
313 } | |
314 break; | |
315 } else { | |
316 // start reading next record | |
317 long l = da.readLong(); | |
318 offset = l >>> 16; | |
319 } | |
320 } | |
321 } catch (IOException ex) { | |
322 ex.printStackTrace(); // log error | |
323 // too bad, no outline then, but don't fail with NPE | |
324 baseRevisions = new int[0]; | |
325 } finally { | |
326 da.done(); | |
327 } | |
328 } | |
329 | |
330 /** | |
331 * operation with single file open/close and multiple diverse reads. | |
332 * XXX initOutline might need similar extraction to keen N1 format knowledge | |
333 */ | |
334 class ReaderN1 { | |
335 private final Inspector inspector; | |
336 private final boolean needData; | |
337 private DataAccess daIndex = null, daData = null; | |
338 private Lifecycle.BasicCallback cb = null; | |
339 private int lastRevisionRead = BAD_REVISION; | |
340 private DataAccess lastUserData; | |
341 | |
342 public ReaderN1(boolean needData, Inspector insp) { | |
343 assert insp != null; | |
344 this.needData = needData; | |
345 inspector = insp; | |
346 } | |
347 | |
348 public void start(int totalWork) { | |
349 daIndex = getIndexStream(); | |
350 if (needData && !inline) { | |
351 daData = getDataStream(); | |
352 } | |
353 if (inspector instanceof Lifecycle) { | |
354 cb = new Lifecycle.BasicCallback(); | |
355 ((Lifecycle) inspector).start(totalWork, cb, cb); | |
356 } | |
357 } | |
358 | |
359 public void finish() { | |
360 if (lastUserData != null) { | |
361 lastUserData.done(); | |
362 lastUserData = null; | |
363 } | |
364 if (inspector instanceof Lifecycle) { | |
365 ((Lifecycle) inspector).finish(cb); | |
366 } | |
367 daIndex.done(); | |
368 if (daData != null) { | |
369 daData.done(); | |
370 } | |
371 } | |
372 | |
373 public boolean range(int start, int end) throws IOException { | |
374 // System.out.printf("RevlogStream.ReaderN1.range(): [%d, %d]\n", start, end); | |
210 byte[] nodeidBuf = new byte[20]; | 375 byte[] nodeidBuf = new byte[20]; |
211 DataAccess lastUserData = null; | |
212 int i; | 376 int i; |
213 boolean extraReadsToBaseRev = false; | 377 boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without |
214 if (needData && getBaseRevision(start) < start) { | 378 // it (i.e. replace with i >= start) |
215 i = getBaseRevision(start); | 379 if (needData && (i = getBaseRevision(start)) < start) { |
216 extraReadsToBaseRev = true; | 380 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData |
381 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). | |
382 if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { | |
383 i = lastRevisionRead + 1; // start with first not-yet-read revision | |
384 extraReadsToBaseRev = i < start; | |
385 } else { | |
386 if (lastUserData != null) { | |
387 lastUserData.done(); | |
388 lastUserData = null; | |
389 } | |
390 extraReadsToBaseRev = true; | |
391 } | |
217 } else { | 392 } else { |
393 // don't need to clean lastUserData as it's always null when !needData | |
218 i = start; | 394 i = start; |
219 } | 395 } |
220 | 396 |
221 daIndex.seek(getIndexOffsetInt(i)); | 397 daIndex.seek(getIndexOffsetInt(i)); |
222 | |
223 if (inspector instanceof Lifecycle) { | |
224 cb = new Lifecycle.BasicCallback(); | |
225 ((Lifecycle) inspector).start(end - start + 1, cb, cb); | |
226 } | |
227 | 398 |
228 for (; i <= end; i++ ) { | 399 for (; i <= end; i++ ) { |
229 if (inline && needData) { | 400 if (inline && needData) { |
230 // inspector reading data (though FilterDataAccess) may have affected index position | 401 // inspector reading data (though FilterDataAccess) may have affected index position |
231 daIndex.seek(getIndexOffsetInt(i)); | 402 daIndex.seek(getIndexOffsetInt(i)); |
292 if (!extraReadsToBaseRev || i >= start) { | 463 if (!extraReadsToBaseRev || i >= start) { |
293 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); | 464 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); |
294 } | 465 } |
295 if (cb != null) { | 466 if (cb != null) { |
296 if (cb.isStopped()) { | 467 if (cb.isStopped()) { |
297 break; | 468 return false; |
298 } | 469 } |
299 } | 470 } |
300 if (userDataAccess != null) { | 471 if (userDataAccess != null) { |
301 userDataAccess.reset(); | 472 userDataAccess.reset(); |
302 if (lastUserData != null) { | 473 if (lastUserData != null) { |
303 lastUserData.done(); | 474 lastUserData.done(); |
304 } | 475 } |
305 lastUserData = userDataAccess; | 476 lastUserData = userDataAccess; |
306 } | 477 } |
307 } | 478 } |
308 } catch (IOException ex) { | 479 lastRevisionRead = end; |
309 throw new HgBadStateException(ex); // FIXME need better handling | 480 return true; |
310 } finally { | 481 } |
311 if (inspector instanceof Lifecycle) { | 482 } |
312 ((Lifecycle) inspector).finish(cb); | 483 |
313 } | |
314 daIndex.done(); | |
315 if (daData != null) { | |
316 daData.done(); | |
317 } | |
318 } | |
319 } | |
320 | |
321 private int getBaseRevision(int revision) { | |
322 return baseRevisions[revision]; | |
323 } | |
324 | |
325 /** | |
326 * @return offset of the revision's record in the index (.i) stream | |
327 */ | |
328 private int getIndexOffsetInt(int revision) { | |
329 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE; | |
330 } | |
331 | |
332 private void initOutline() { | |
333 if (baseRevisions != null && baseRevisions.length > 0) { | |
334 return; | |
335 } | |
336 ArrayList<Integer> resBases = new ArrayList<Integer>(); | |
337 ArrayList<Integer> resOffsets = new ArrayList<Integer>(); | |
338 DataAccess da = getIndexStream(); | |
339 try { | |
340 if (da.isEmpty()) { | |
341 // do not fail with exception if stream is empty, it's likely intentional | |
342 baseRevisions = new int[0]; | |
343 return; | |
344 } | |
345 int versionField = da.readInt(); | |
346 da.readInt(); // just to skip next 4 bytes of offset + flags | |
347 final int INLINEDATA = 1 << 16; | |
348 inline = (versionField & INLINEDATA) != 0; | |
349 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes | |
350 while(true) { | |
351 int compressedLen = da.readInt(); | |
352 // 8+4 = 12 bytes total read here | |
353 @SuppressWarnings("unused") | |
354 int actualLen = da.readInt(); | |
355 int baseRevision = da.readInt(); | |
356 // 12 + 8 = 20 bytes read here | |
357 // int linkRevision = di.readInt(); | |
358 // int parent1Revision = di.readInt(); | |
359 // int parent2Revision = di.readInt(); | |
360 // byte[] nodeid = new byte[32]; | |
361 resBases.add(baseRevision); | |
362 if (inline) { | |
363 int o = (int) offset; | |
364 if (o != offset) { | |
365 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file | |
366 // with inlined data of size greater than 2 Gb. | |
367 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); | |
368 } | |
369 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size()); | |
370 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) | |
371 } else { | |
372 da.skip(3*4 + 32); | |
373 } | |
374 if (da.isEmpty()) { | |
375 // fine, done then | |
376 baseRevisions = toArray(resBases); | |
377 if (inline) { | |
378 indexRecordOffset = toArray(resOffsets); | |
379 } | |
380 break; | |
381 } else { | |
382 // start reading next record | |
383 long l = da.readLong(); | |
384 offset = l >>> 16; | |
385 } | |
386 } | |
387 } catch (IOException ex) { | |
388 ex.printStackTrace(); // log error | |
389 // too bad, no outline then, but don't fail with NPE | |
390 baseRevisions = new int[0]; | |
391 } finally { | |
392 da.done(); | |
393 } | |
394 } | |
395 | 484 |
396 private static int[] toArray(List<Integer> l) { | 485 private static int[] toArray(List<Integer> l) { |
397 int[] rv = new int[l.size()]; | 486 int[] rv = new int[l.size()]; |
398 for (int i = 0; i < rv.length; i++) { | 487 for (int i = 0; i < rv.length; i++) { |
399 rv[i] = l.get(i); | 488 rv[i] = l.get(i); |