Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 242:ad6a046943be
Improved reading of sparse revisions from a revlog
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Thu, 23 Jun 2011 15:19:07 +0200 |
| parents | 80a3433ace91 |
| children | 0e01f9182e16 |
comparison
equal
deleted
inserted
replaced
| 241:d3ab16739736 | 242:ad6a046943be |
|---|---|
| 198 if (end < start || end >= indexSize) { | 198 if (end < start || end >= indexSize) { |
| 199 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); | 199 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); |
| 200 } | 200 } |
| 201 // XXX may cache [start .. end] from index with a single read (pre-read) | 201 // XXX may cache [start .. end] from index with a single read (pre-read) |
| 202 | 202 |
| 203 Lifecycle.BasicCallback cb = null; | 203 ReaderN1 r = new ReaderN1(needData, inspector); |
| 204 DataAccess daIndex = null, daData = null; | 204 try { |
| 205 daIndex = getIndexStream(); | 205 r.start(end - start + 1); |
| 206 if (needData && !inline) { | 206 r.range(start, end); |
| 207 daData = getDataStream(); | 207 } catch (IOException ex) { |
| 208 } | 208 throw new HgBadStateException(ex); // FIXME need better handling |
| 209 try { | 209 } finally { |
| 210 r.finish(); | |
| 211 } | |
| 212 } | |
| 213 | |
| 214 /** | |
| 215 * Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected | |
| 216 * revisions are of interest. | |
| 217 * @param sortedRevisions revisions to walk, in ascending order. | |
| 218 * @param needData whether inspector needs access to header only | |
| 219 * @param inspector callback to process entries | |
| 220 */ | |
| 221 public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) { | |
| 222 final int indexSize = revisionCount(); | |
| 223 if (indexSize == 0 || sortedRevisions.length == 0) { | |
| 224 return; | |
| 225 } | |
| 226 if (sortedRevisions[0] > indexSize || sortedRevisions[sortedRevisions.length - 1] > indexSize) { | |
| 227 throw new IllegalArgumentException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize)); | |
| 228 } | |
| 229 | |
| 230 ReaderN1 r = new ReaderN1(needData, inspector); | |
| 231 try { | |
| 232 r.start(sortedRevisions.length); | |
| 233 for (int i = 0; i < sortedRevisions.length; ) { | |
| 234 int x = i; | |
| 235 i++; | |
| 236 while (i < sortedRevisions.length) { | |
| 237 if (sortedRevisions[i] == sortedRevisions[i-1] + 1) { | |
| 238 i++; | |
| 239 } else { | |
| 240 break; | |
| 241 } | |
| 242 } | |
| 243 // commitRevisions[x..i-1] are sequential | |
| 244 if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) { | |
| 245 return; | |
| 246 } | |
| 247 } | |
| 248 } catch (IOException ex) { | |
| 249 throw new HgBadStateException(ex); // FIXME need better handling | |
| 250 } finally { | |
| 251 r.finish(); | |
| 252 } | |
| 253 } | |
| 254 | |
| 255 private int getBaseRevision(int revision) { | |
| 256 return baseRevisions[revision]; | |
| 257 } | |
| 258 | |
| 259 /** | |
| 260 * @return offset of the revision's record in the index (.i) stream | |
| 261 */ | |
| 262 private int getIndexOffsetInt(int revision) { | |
| 263 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE; | |
| 264 } | |
| 265 | |
| 266 private void initOutline() { | |
| 267 if (baseRevisions != null && baseRevisions.length > 0) { | |
| 268 return; | |
| 269 } | |
| 270 ArrayList<Integer> resBases = new ArrayList<Integer>(); | |
| 271 ArrayList<Integer> resOffsets = new ArrayList<Integer>(); | |
| 272 DataAccess da = getIndexStream(); | |
| 273 try { | |
| 274 if (da.isEmpty()) { | |
| 275 // do not fail with exception if stream is empty, it's likely intentional | |
| 276 baseRevisions = new int[0]; | |
| 277 return; | |
| 278 } | |
| 279 int versionField = da.readInt(); | |
| 280 da.readInt(); // just to skip next 4 bytes of offset + flags | |
| 281 final int INLINEDATA = 1 << 16; | |
| 282 inline = (versionField & INLINEDATA) != 0; | |
| 283 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes | |
| 284 while(true) { | |
| 285 int compressedLen = da.readInt(); | |
| 286 // 8+4 = 12 bytes total read here | |
| 287 @SuppressWarnings("unused") | |
| 288 int actualLen = da.readInt(); | |
| 289 int baseRevision = da.readInt(); | |
| 290 // 12 + 8 = 20 bytes read here | |
| 291 // int linkRevision = di.readInt(); | |
| 292 // int parent1Revision = di.readInt(); | |
| 293 // int parent2Revision = di.readInt(); | |
| 294 // byte[] nodeid = new byte[32]; | |
| 295 resBases.add(baseRevision); | |
| 296 if (inline) { | |
| 297 int o = (int) offset; | |
| 298 if (o != offset) { | |
| 299 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file | |
| 300 // with inlined data of size greater than 2 Gb. | |
| 301 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); | |
| 302 } | |
| 303 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size()); | |
| 304 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) | |
| 305 } else { | |
| 306 da.skip(3*4 + 32); | |
| 307 } | |
| 308 if (da.isEmpty()) { | |
| 309 // fine, done then | |
| 310 baseRevisions = toArray(resBases); | |
| 311 if (inline) { | |
| 312 indexRecordOffset = toArray(resOffsets); | |
| 313 } | |
| 314 break; | |
| 315 } else { | |
| 316 // start reading next record | |
| 317 long l = da.readLong(); | |
| 318 offset = l >>> 16; | |
| 319 } | |
| 320 } | |
| 321 } catch (IOException ex) { | |
| 322 ex.printStackTrace(); // log error | |
| 323 // too bad, no outline then, but don't fail with NPE | |
| 324 baseRevisions = new int[0]; | |
| 325 } finally { | |
| 326 da.done(); | |
| 327 } | |
| 328 } | |
| 329 | |
| 330 /** | |
| 331 * operation with single file open/close and multiple diverse reads. | |
| 332 * XXX initOutline might need similar extraction to keen N1 format knowledge | |
| 333 */ | |
| 334 class ReaderN1 { | |
| 335 private final Inspector inspector; | |
| 336 private final boolean needData; | |
| 337 private DataAccess daIndex = null, daData = null; | |
| 338 private Lifecycle.BasicCallback cb = null; | |
| 339 private int lastRevisionRead = BAD_REVISION; | |
| 340 private DataAccess lastUserData; | |
| 341 | |
| 342 public ReaderN1(boolean needData, Inspector insp) { | |
| 343 assert insp != null; | |
| 344 this.needData = needData; | |
| 345 inspector = insp; | |
| 346 } | |
| 347 | |
| 348 public void start(int totalWork) { | |
| 349 daIndex = getIndexStream(); | |
| 350 if (needData && !inline) { | |
| 351 daData = getDataStream(); | |
| 352 } | |
| 353 if (inspector instanceof Lifecycle) { | |
| 354 cb = new Lifecycle.BasicCallback(); | |
| 355 ((Lifecycle) inspector).start(totalWork, cb, cb); | |
| 356 } | |
| 357 } | |
| 358 | |
| 359 public void finish() { | |
| 360 if (lastUserData != null) { | |
| 361 lastUserData.done(); | |
| 362 lastUserData = null; | |
| 363 } | |
| 364 if (inspector instanceof Lifecycle) { | |
| 365 ((Lifecycle) inspector).finish(cb); | |
| 366 } | |
| 367 daIndex.done(); | |
| 368 if (daData != null) { | |
| 369 daData.done(); | |
| 370 } | |
| 371 } | |
| 372 | |
| 373 public boolean range(int start, int end) throws IOException { | |
| 374 // System.out.printf("RevlogStream.ReaderN1.range(): [%d, %d]\n", start, end); | |
| 210 byte[] nodeidBuf = new byte[20]; | 375 byte[] nodeidBuf = new byte[20]; |
| 211 DataAccess lastUserData = null; | |
| 212 int i; | 376 int i; |
| 213 boolean extraReadsToBaseRev = false; | 377 boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without |
| 214 if (needData && getBaseRevision(start) < start) { | 378 // it (i.e. replace with i >= start) |
| 215 i = getBaseRevision(start); | 379 if (needData && (i = getBaseRevision(start)) < start) { |
| 216 extraReadsToBaseRev = true; | 380 // if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData |
| 381 // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). | |
| 382 if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) { | |
| 383 i = lastRevisionRead + 1; // start with first not-yet-read revision | |
| 384 extraReadsToBaseRev = i < start; | |
| 385 } else { | |
| 386 if (lastUserData != null) { | |
| 387 lastUserData.done(); | |
| 388 lastUserData = null; | |
| 389 } | |
| 390 extraReadsToBaseRev = true; | |
| 391 } | |
| 217 } else { | 392 } else { |
| 393 // don't need to clean lastUserData as it's always null when !needData | |
| 218 i = start; | 394 i = start; |
| 219 } | 395 } |
| 220 | 396 |
| 221 daIndex.seek(getIndexOffsetInt(i)); | 397 daIndex.seek(getIndexOffsetInt(i)); |
| 222 | |
| 223 if (inspector instanceof Lifecycle) { | |
| 224 cb = new Lifecycle.BasicCallback(); | |
| 225 ((Lifecycle) inspector).start(end - start + 1, cb, cb); | |
| 226 } | |
| 227 | 398 |
| 228 for (; i <= end; i++ ) { | 399 for (; i <= end; i++ ) { |
| 229 if (inline && needData) { | 400 if (inline && needData) { |
| 230 // inspector reading data (though FilterDataAccess) may have affected index position | 401 // inspector reading data (though FilterDataAccess) may have affected index position |
| 231 daIndex.seek(getIndexOffsetInt(i)); | 402 daIndex.seek(getIndexOffsetInt(i)); |
| 292 if (!extraReadsToBaseRev || i >= start) { | 463 if (!extraReadsToBaseRev || i >= start) { |
| 293 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); | 464 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); |
| 294 } | 465 } |
| 295 if (cb != null) { | 466 if (cb != null) { |
| 296 if (cb.isStopped()) { | 467 if (cb.isStopped()) { |
| 297 break; | 468 return false; |
| 298 } | 469 } |
| 299 } | 470 } |
| 300 if (userDataAccess != null) { | 471 if (userDataAccess != null) { |
| 301 userDataAccess.reset(); | 472 userDataAccess.reset(); |
| 302 if (lastUserData != null) { | 473 if (lastUserData != null) { |
| 303 lastUserData.done(); | 474 lastUserData.done(); |
| 304 } | 475 } |
| 305 lastUserData = userDataAccess; | 476 lastUserData = userDataAccess; |
| 306 } | 477 } |
| 307 } | 478 } |
| 308 } catch (IOException ex) { | 479 lastRevisionRead = end; |
| 309 throw new HgBadStateException(ex); // FIXME need better handling | 480 return true; |
| 310 } finally { | 481 } |
| 311 if (inspector instanceof Lifecycle) { | 482 } |
| 312 ((Lifecycle) inspector).finish(cb); | 483 |
| 313 } | |
| 314 daIndex.done(); | |
| 315 if (daData != null) { | |
| 316 daData.done(); | |
| 317 } | |
| 318 } | |
| 319 } | |
| 320 | |
| 321 private int getBaseRevision(int revision) { | |
| 322 return baseRevisions[revision]; | |
| 323 } | |
| 324 | |
| 325 /** | |
| 326 * @return offset of the revision's record in the index (.i) stream | |
| 327 */ | |
| 328 private int getIndexOffsetInt(int revision) { | |
| 329 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE; | |
| 330 } | |
| 331 | |
| 332 private void initOutline() { | |
| 333 if (baseRevisions != null && baseRevisions.length > 0) { | |
| 334 return; | |
| 335 } | |
| 336 ArrayList<Integer> resBases = new ArrayList<Integer>(); | |
| 337 ArrayList<Integer> resOffsets = new ArrayList<Integer>(); | |
| 338 DataAccess da = getIndexStream(); | |
| 339 try { | |
| 340 if (da.isEmpty()) { | |
| 341 // do not fail with exception if stream is empty, it's likely intentional | |
| 342 baseRevisions = new int[0]; | |
| 343 return; | |
| 344 } | |
| 345 int versionField = da.readInt(); | |
| 346 da.readInt(); // just to skip next 4 bytes of offset + flags | |
| 347 final int INLINEDATA = 1 << 16; | |
| 348 inline = (versionField & INLINEDATA) != 0; | |
| 349 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes | |
| 350 while(true) { | |
| 351 int compressedLen = da.readInt(); | |
| 352 // 8+4 = 12 bytes total read here | |
| 353 @SuppressWarnings("unused") | |
| 354 int actualLen = da.readInt(); | |
| 355 int baseRevision = da.readInt(); | |
| 356 // 12 + 8 = 20 bytes read here | |
| 357 // int linkRevision = di.readInt(); | |
| 358 // int parent1Revision = di.readInt(); | |
| 359 // int parent2Revision = di.readInt(); | |
| 360 // byte[] nodeid = new byte[32]; | |
| 361 resBases.add(baseRevision); | |
| 362 if (inline) { | |
| 363 int o = (int) offset; | |
| 364 if (o != offset) { | |
| 365 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file | |
| 366 // with inlined data of size greater than 2 Gb. | |
| 367 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); | |
| 368 } | |
| 369 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size()); | |
| 370 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) | |
| 371 } else { | |
| 372 da.skip(3*4 + 32); | |
| 373 } | |
| 374 if (da.isEmpty()) { | |
| 375 // fine, done then | |
| 376 baseRevisions = toArray(resBases); | |
| 377 if (inline) { | |
| 378 indexRecordOffset = toArray(resOffsets); | |
| 379 } | |
| 380 break; | |
| 381 } else { | |
| 382 // start reading next record | |
| 383 long l = da.readLong(); | |
| 384 offset = l >>> 16; | |
| 385 } | |
| 386 } | |
| 387 } catch (IOException ex) { | |
| 388 ex.printStackTrace(); // log error | |
| 389 // too bad, no outline then, but don't fail with NPE | |
| 390 baseRevisions = new int[0]; | |
| 391 } finally { | |
| 392 da.done(); | |
| 393 } | |
| 394 } | |
| 395 | 484 |
| 396 private static int[] toArray(List<Integer> l) { | 485 private static int[] toArray(List<Integer> l) { |
| 397 int[] rv = new int[l.size()]; | 486 int[] rv = new int[l.size()]; |
| 398 for (int i = 0; i < rv.length; i++) { | 487 for (int i = 0; i < rv.length; i++) { |
| 399 rv[i] = l.get(i); | 488 rv[i] = l.get(i); |
