comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 242:ad6a046943be

Improved reading of sparse revisions from a revlog
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 23 Jun 2011 15:19:07 +0200
parents 80a3433ace91
children 0e01f9182e16
comparison
equal deleted inserted replaced
241:d3ab16739736 242:ad6a046943be
198 if (end < start || end >= indexSize) { 198 if (end < start || end >= indexSize) {
199 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); 199 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1));
200 } 200 }
201 // XXX may cache [start .. end] from index with a single read (pre-read) 201 // XXX may cache [start .. end] from index with a single read (pre-read)
202 202
203 Lifecycle.BasicCallback cb = null; 203 ReaderN1 r = new ReaderN1(needData, inspector);
204 DataAccess daIndex = null, daData = null; 204 try {
205 daIndex = getIndexStream(); 205 r.start(end - start + 1);
206 if (needData && !inline) { 206 r.range(start, end);
207 daData = getDataStream(); 207 } catch (IOException ex) {
208 } 208 throw new HgBadStateException(ex); // FIXME need better handling
209 try { 209 } finally {
210 r.finish();
211 }
212 }
213
214 /**
215 * Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected
216 * revisions are of interest.
217 * @param sortedRevisions revisions to walk, in ascending order.
218 * @param needData whether inspector needs access to header only
219 * @param inspector callback to process entries
220 */
221 public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) {
222 final int indexSize = revisionCount();
223 if (indexSize == 0 || sortedRevisions.length == 0) {
224 return;
225 }
226 if (sortedRevisions[0] > indexSize || sortedRevisions[sortedRevisions.length - 1] > indexSize) {
227 throw new IllegalArgumentException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize));
228 }
229
230 ReaderN1 r = new ReaderN1(needData, inspector);
231 try {
232 r.start(sortedRevisions.length);
233 for (int i = 0; i < sortedRevisions.length; ) {
234 int x = i;
235 i++;
236 while (i < sortedRevisions.length) {
237 if (sortedRevisions[i] == sortedRevisions[i-1] + 1) {
238 i++;
239 } else {
240 break;
241 }
242 }
243 // commitRevisions[x..i-1] are sequential
244 if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) {
245 return;
246 }
247 }
248 } catch (IOException ex) {
249 throw new HgBadStateException(ex); // FIXME need better handling
250 } finally {
251 r.finish();
252 }
253 }
254
255 private int getBaseRevision(int revision) {
256 return baseRevisions[revision];
257 }
258
259 /**
260 * @return offset of the revision's record in the index (.i) stream
261 */
262 private int getIndexOffsetInt(int revision) {
263 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE;
264 }
265
266 private void initOutline() {
267 if (baseRevisions != null && baseRevisions.length > 0) {
268 return;
269 }
270 ArrayList<Integer> resBases = new ArrayList<Integer>();
271 ArrayList<Integer> resOffsets = new ArrayList<Integer>();
272 DataAccess da = getIndexStream();
273 try {
274 if (da.isEmpty()) {
275 // do not fail with exception if stream is empty, it's likely intentional
276 baseRevisions = new int[0];
277 return;
278 }
279 int versionField = da.readInt();
280 da.readInt(); // just to skip next 4 bytes of offset + flags
281 final int INLINEDATA = 1 << 16;
282 inline = (versionField & INLINEDATA) != 0;
283 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
284 while(true) {
285 int compressedLen = da.readInt();
286 // 8+4 = 12 bytes total read here
287 @SuppressWarnings("unused")
288 int actualLen = da.readInt();
289 int baseRevision = da.readInt();
290 // 12 + 8 = 20 bytes read here
291 // int linkRevision = di.readInt();
292 // int parent1Revision = di.readInt();
293 // int parent2Revision = di.readInt();
294 // byte[] nodeid = new byte[32];
295 resBases.add(baseRevision);
296 if (inline) {
297 int o = (int) offset;
298 if (o != offset) {
299 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file
300 // with inlined data of size greater than 2 Gb.
301 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)");
302 }
303 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size());
304 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
305 } else {
306 da.skip(3*4 + 32);
307 }
308 if (da.isEmpty()) {
309 // fine, done then
310 baseRevisions = toArray(resBases);
311 if (inline) {
312 indexRecordOffset = toArray(resOffsets);
313 }
314 break;
315 } else {
316 // start reading next record
317 long l = da.readLong();
318 offset = l >>> 16;
319 }
320 }
321 } catch (IOException ex) {
322 ex.printStackTrace(); // log error
323 // too bad, no outline then, but don't fail with NPE
324 baseRevisions = new int[0];
325 } finally {
326 da.done();
327 }
328 }
329
330 /**
331 * operation with single file open/close and multiple diverse reads.
332 * XXX initOutline might need similar extraction to keen N1 format knowledge
333 */
334 class ReaderN1 {
335 private final Inspector inspector;
336 private final boolean needData;
337 private DataAccess daIndex = null, daData = null;
338 private Lifecycle.BasicCallback cb = null;
339 private int lastRevisionRead = BAD_REVISION;
340 private DataAccess lastUserData;
341
342 public ReaderN1(boolean needData, Inspector insp) {
343 assert insp != null;
344 this.needData = needData;
345 inspector = insp;
346 }
347
348 public void start(int totalWork) {
349 daIndex = getIndexStream();
350 if (needData && !inline) {
351 daData = getDataStream();
352 }
353 if (inspector instanceof Lifecycle) {
354 cb = new Lifecycle.BasicCallback();
355 ((Lifecycle) inspector).start(totalWork, cb, cb);
356 }
357 }
358
359 public void finish() {
360 if (lastUserData != null) {
361 lastUserData.done();
362 lastUserData = null;
363 }
364 if (inspector instanceof Lifecycle) {
365 ((Lifecycle) inspector).finish(cb);
366 }
367 daIndex.done();
368 if (daData != null) {
369 daData.done();
370 }
371 }
372
373 public boolean range(int start, int end) throws IOException {
374 // System.out.printf("RevlogStream.ReaderN1.range(): [%d, %d]\n", start, end);
210 byte[] nodeidBuf = new byte[20]; 375 byte[] nodeidBuf = new byte[20];
211 DataAccess lastUserData = null;
212 int i; 376 int i;
213 boolean extraReadsToBaseRev = false; 377 boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without
214 if (needData && getBaseRevision(start) < start) { 378 // it (i.e. replace with i >= start)
215 i = getBaseRevision(start); 379 if (needData && (i = getBaseRevision(start)) < start) {
216 extraReadsToBaseRev = true; 380 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData
381 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below).
382 if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) {
383 i = lastRevisionRead + 1; // start with first not-yet-read revision
384 extraReadsToBaseRev = i < start;
385 } else {
386 if (lastUserData != null) {
387 lastUserData.done();
388 lastUserData = null;
389 }
390 extraReadsToBaseRev = true;
391 }
217 } else { 392 } else {
393 // don't need to clean lastUserData as it's always null when !needData
218 i = start; 394 i = start;
219 } 395 }
220 396
221 daIndex.seek(getIndexOffsetInt(i)); 397 daIndex.seek(getIndexOffsetInt(i));
222
223 if (inspector instanceof Lifecycle) {
224 cb = new Lifecycle.BasicCallback();
225 ((Lifecycle) inspector).start(end - start + 1, cb, cb);
226 }
227 398
228 for (; i <= end; i++ ) { 399 for (; i <= end; i++ ) {
229 if (inline && needData) { 400 if (inline && needData) {
230 // inspector reading data (though FilterDataAccess) may have affected index position 401 // inspector reading data (though FilterDataAccess) may have affected index position
231 daIndex.seek(getIndexOffsetInt(i)); 402 daIndex.seek(getIndexOffsetInt(i));
292 if (!extraReadsToBaseRev || i >= start) { 463 if (!extraReadsToBaseRev || i >= start) {
293 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); 464 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
294 } 465 }
295 if (cb != null) { 466 if (cb != null) {
296 if (cb.isStopped()) { 467 if (cb.isStopped()) {
297 break; 468 return false;
298 } 469 }
299 } 470 }
300 if (userDataAccess != null) { 471 if (userDataAccess != null) {
301 userDataAccess.reset(); 472 userDataAccess.reset();
302 if (lastUserData != null) { 473 if (lastUserData != null) {
303 lastUserData.done(); 474 lastUserData.done();
304 } 475 }
305 lastUserData = userDataAccess; 476 lastUserData = userDataAccess;
306 } 477 }
307 } 478 }
308 } catch (IOException ex) { 479 lastRevisionRead = end;
309 throw new HgBadStateException(ex); // FIXME need better handling 480 return true;
310 } finally { 481 }
311 if (inspector instanceof Lifecycle) { 482 }
312 ((Lifecycle) inspector).finish(cb); 483
313 }
314 daIndex.done();
315 if (daData != null) {
316 daData.done();
317 }
318 }
319 }
320
321 private int getBaseRevision(int revision) {
322 return baseRevisions[revision];
323 }
324
325 /**
326 * @return offset of the revision's record in the index (.i) stream
327 */
328 private int getIndexOffsetInt(int revision) {
329 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE;
330 }
331
332 private void initOutline() {
333 if (baseRevisions != null && baseRevisions.length > 0) {
334 return;
335 }
336 ArrayList<Integer> resBases = new ArrayList<Integer>();
337 ArrayList<Integer> resOffsets = new ArrayList<Integer>();
338 DataAccess da = getIndexStream();
339 try {
340 if (da.isEmpty()) {
341 // do not fail with exception if stream is empty, it's likely intentional
342 baseRevisions = new int[0];
343 return;
344 }
345 int versionField = da.readInt();
346 da.readInt(); // just to skip next 4 bytes of offset + flags
347 final int INLINEDATA = 1 << 16;
348 inline = (versionField & INLINEDATA) != 0;
349 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
350 while(true) {
351 int compressedLen = da.readInt();
352 // 8+4 = 12 bytes total read here
353 @SuppressWarnings("unused")
354 int actualLen = da.readInt();
355 int baseRevision = da.readInt();
356 // 12 + 8 = 20 bytes read here
357 // int linkRevision = di.readInt();
358 // int parent1Revision = di.readInt();
359 // int parent2Revision = di.readInt();
360 // byte[] nodeid = new byte[32];
361 resBases.add(baseRevision);
362 if (inline) {
363 int o = (int) offset;
364 if (o != offset) {
365 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file
366 // with inlined data of size greater than 2 Gb.
367 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)");
368 }
369 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size());
370 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
371 } else {
372 da.skip(3*4 + 32);
373 }
374 if (da.isEmpty()) {
375 // fine, done then
376 baseRevisions = toArray(resBases);
377 if (inline) {
378 indexRecordOffset = toArray(resOffsets);
379 }
380 break;
381 } else {
382 // start reading next record
383 long l = da.readLong();
384 offset = l >>> 16;
385 }
386 }
387 } catch (IOException ex) {
388 ex.printStackTrace(); // log error
389 // too bad, no outline then, but don't fail with NPE
390 baseRevisions = new int[0];
391 } finally {
392 da.done();
393 }
394 }
395 484
396 private static int[] toArray(List<Integer> l) { 485 private static int[] toArray(List<Integer> l) {
397 int[] rv = new int[l.size()]; 486 int[] rv = new int[l.size()];
398 for (int i = 0; i < rv.length; i++) { 487 for (int i = 0; i < rv.length; i++) {
399 rv[i] = l.get(i); 488 rv[i] = l.get(i);