hg4j: src/org/tmatesoft/hg/internal/RevlogStream.java comparison

comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 242:ad6a046943be

Improved reading of sparse revisions from a revlog

author	Artem Tikhomirov <tikhomirov.artem@gmail.com>
date	Thu, 23 Jun 2011 15:19:07 +0200
parents	80a3433ace91
children	0e01f9182e16

comparison

equal deleted inserted replaced

-:d3ab16739736
+:ad6a046943be
 		if (end < start || end >= indexSize) {
 			throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1));
 		}
 		// XXX may cache [start .. end] from index with a single read (pre-read)
-		Lifecycle.BasicCallback cb = null;
+		ReaderN1 r = new ReaderN1(needData, inspector);
-		DataAccess daIndex = null, daData = null;
+		try {
-		daIndex = getIndexStream();
+			r.start(end - start + 1);
-		if (needData && !inline) {
+			r.range(start, end);
-			daData = getDataStream();
+		} catch (IOException ex) {
-		}
+			throw new HgBadStateException(ex); // FIXME need better handling
-		try {
+		} finally {
+			r.finish();
+		}
+	}
+	/**
+	 * Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected
+	 * revisions are of interest.
+	 * @param sortedRevisions revisions to walk, in ascending order.
+	 * @param needData whether inspector needs access to header only
+	 * @param inspector callback to process entries
+	 */
+	public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) {
+		final int indexSize = revisionCount();
+		if (indexSize == 0 || sortedRevisions.length == 0) {
+			return;
+		}
+		if (sortedRevisions[0] > indexSize || sortedRevisions[sortedRevisions.length - 1] > indexSize) {
+			throw new IllegalArgumentException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize));
+		}
+		ReaderN1 r = new ReaderN1(needData, inspector);
+		try {
+			r.start(sortedRevisions.length);
+			for (int i = 0; i < sortedRevisions.length; ) {
+				int x = i;
+				i++;
+				while (i < sortedRevisions.length) {
+					if (sortedRevisions[i] == sortedRevisions[i-1] + 1) {
+						i++;
+					} else {
+						break;
+					}
+				}
+				// commitRevisions[x..i-1] are sequential
+				if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) {
+					return;
+				}
+			}
+		} catch (IOException ex) {
+			throw new HgBadStateException(ex); // FIXME need better handling
+		} finally {
+			r.finish();
+		}
+	}
+	private int getBaseRevision(int revision) {
+		return baseRevisions[revision];
+	}
+	/**
+	 * @return  offset of the revision's record in the index (.i) stream
+	 */
+	private int getIndexOffsetInt(int revision) {
+		return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE;
+	}
+	private void initOutline() {
+		if (baseRevisions != null && baseRevisions.length > 0) {
+			return;
+		}
+		ArrayList<Integer> resBases = new ArrayList<Integer>();
+		ArrayList<Integer> resOffsets = new ArrayList<Integer>();
+		DataAccess da = getIndexStream();
+		try {
+			if (da.isEmpty()) {
+				// do not fail with exception if stream is empty, it's likely intentional
+				baseRevisions = new int[0];
+				return;
+			}
+			int versionField = da.readInt();
+			da.readInt(); // just to skip next 4 bytes of offset + flags
+			final int INLINEDATA = 1 << 16;
+			inline = (versionField & INLINEDATA) != 0;
+			long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
+			while(true) {
+				int compressedLen = da.readInt();
+				// 8+4 = 12 bytes total read here
+				@SuppressWarnings("unused")
+				int actualLen = da.readInt();
+				int baseRevision = da.readInt();
+				// 12 + 8 = 20 bytes read here
+//				int linkRevision = di.readInt();
+//				int parent1Revision = di.readInt();
+//				int parent2Revision = di.readInt();
+//				byte[] nodeid = new byte[32];
+				resBases.add(baseRevision);
+				if (inline) {
+					int o = (int) offset;
+					if (o != offset) {
+						// just in case, can't happen, ever, unless HG (or some other bad tool) produces index file
+						// with inlined data of size greater than 2 Gb.
+						throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)");
+					}
+					resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size());
+					da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
+				} else {
+					da.skip(3*4 + 32);
+				}
+				if (da.isEmpty()) {
+					// fine, done then
+					baseRevisions = toArray(resBases);
+					if (inline) {
+						indexRecordOffset = toArray(resOffsets);
+					}
+					break;
+				} else {
+					// start reading next record
+					long l = da.readLong();
+					offset = l >>> 16;
+				}
+			}
+		} catch (IOException ex) {
+			ex.printStackTrace(); // log error
+			// too bad, no outline then, but don't fail with NPE
+			baseRevisions = new int[0];
+		} finally {
+			da.done();
+		}
+	}
+	/**
+	 * operation with single file open/close and multiple diverse reads.
+	 * XXX initOutline might need similar extraction to keen N1 format knowledge
+	 */
+	class ReaderN1 {
+		private final Inspector inspector;
+		private final boolean needData;
+		private DataAccess daIndex = null, daData = null;
+		private Lifecycle.BasicCallback cb = null;
+		private int lastRevisionRead = BAD_REVISION;
+		private DataAccess lastUserData;
+		public ReaderN1(boolean needData, Inspector insp) {
+			assert insp != null;
+			this.needData = needData;
+			inspector = insp;
+		}
+		public void start(int totalWork) {
+			daIndex = getIndexStream();
+			if (needData && !inline) {
+				daData = getDataStream();
+			}
+			if (inspector instanceof Lifecycle) {
+				cb = new Lifecycle.BasicCallback();
+				((Lifecycle) inspector).start(totalWork, cb, cb);
+			}
+		}
+		public void finish() {
+			if (lastUserData != null) {
+				lastUserData.done();
+				lastUserData = null;
+			}
+			if (inspector instanceof Lifecycle) {
+				((Lifecycle) inspector).finish(cb);
+			}
+			daIndex.done();
+			if (daData != null) {
+				daData.done();
+			}
+		}
+		public boolean range(int start, int end) throws IOException {
+//			System.out.printf("RevlogStream.ReaderN1.range(): [%d, %d]\n", start, end);
 			byte[] nodeidBuf = new byte[20];
-			DataAccess lastUserData = null;
 			int i;
-			boolean extraReadsToBaseRev = false;
+			boolean extraReadsToBaseRev = false; // to indicate we read revision prior to start. XXX not sure can't do without
-			if (needData && getBaseRevision(start) < start) {
+			// it (i.e. replace with i >= start)
-				i = getBaseRevision(start);
+			if (needData && (i = getBaseRevision(start)) < start) {
-				extraReadsToBaseRev = true;
+				// if lastRevisionRead in [baseRevision(start), start)  can reuse lastUserData
+				// doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below).
+				if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) {
+					i = lastRevisionRead + 1; // start with first not-yet-read revision
+					extraReadsToBaseRev = i < start;
+				} else {
+					if (lastUserData != null) {
+						lastUserData.done();
+						lastUserData = null;
+					}
+					extraReadsToBaseRev = true;
+				}
 			} else {
+				// don't need to clean lastUserData as it's always null when !needData
 				i = start;
 			}
 			daIndex.seek(getIndexOffsetInt(i));
-			if (inspector instanceof Lifecycle) {
-				cb = new Lifecycle.BasicCallback();
-				((Lifecycle) inspector).start(end - start + 1, cb, cb);
-			}
 			for (; i <= end; i++ ) {
 				if (inline && needData) {
 					// inspector reading data (though FilterDataAccess) may have affected index position
 					daIndex.seek(getIndexOffsetInt(i));
 				if (!extraReadsToBaseRev || i >= start) {
 					inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
 				}
 				if (cb != null) {
 					if (cb.isStopped()) {
-						break;
+						return false;
 					}
 				}
 				if (userDataAccess != null) {
 					userDataAccess.reset();
 					if (lastUserData != null) {
 						lastUserData.done();
 					}
 					lastUserData = userDataAccess;
 				}
 			}
-		} catch (IOException ex) {
+			lastRevisionRead = end;
-			throw new HgBadStateException(ex); // FIXME need better handling
+			return true;
-		} finally {
+		}
-			if (inspector instanceof Lifecycle) {
+	}
-				((Lifecycle) inspector).finish(cb);
-			}
-			daIndex.done();
-			if (daData != null) {
-				daData.done();
-			}
-		}
-	}
-	private int getBaseRevision(int revision) {
-		return baseRevisions[revision];
-	}
-	/**
-	 * @return  offset of the revision's record in the index (.i) stream
-	 */
-	private int getIndexOffsetInt(int revision) {
-		return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE;
-	}
-	private void initOutline() {
-		if (baseRevisions != null && baseRevisions.length > 0) {
-			return;
-		}
-		ArrayList<Integer> resBases = new ArrayList<Integer>();
-		ArrayList<Integer> resOffsets = new ArrayList<Integer>();
-		DataAccess da = getIndexStream();
-		try {
-			if (da.isEmpty()) {
-				// do not fail with exception if stream is empty, it's likely intentional
-				baseRevisions = new int[0];
-				return;
-			}
-			int versionField = da.readInt();
-			da.readInt(); // just to skip next 4 bytes of offset + flags
-			final int INLINEDATA = 1 << 16;
-			inline = (versionField & INLINEDATA) != 0;
-			long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
-			while(true) {
-				int compressedLen = da.readInt();
-				// 8+4 = 12 bytes total read here
-				@SuppressWarnings("unused")
-				int actualLen = da.readInt();
-				int baseRevision = da.readInt();
-				// 12 + 8 = 20 bytes read here
-//				int linkRevision = di.readInt();
-//				int parent1Revision = di.readInt();
-//				int parent2Revision = di.readInt();
-//				byte[] nodeid = new byte[32];
-				resBases.add(baseRevision);
-				if (inline) {
-					int o = (int) offset;
-					if (o != offset) {
-						// just in case, can't happen, ever, unless HG (or some other bad tool) produces index file
-						// with inlined data of size greater than 2 Gb.
-						throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)");
-					}
-					resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size());
-					da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
-				} else {
-					da.skip(3*4 + 32);
-				}
-				if (da.isEmpty()) {
-					// fine, done then
-					baseRevisions = toArray(resBases);
-					if (inline) {
-						indexRecordOffset = toArray(resOffsets);
-					}
-					break;
-				} else {
-					// start reading next record
-					long l = da.readLong();
-					offset = l >>> 16;
-				}
-			}
-		} catch (IOException ex) {
-			ex.printStackTrace(); // log error
-			// too bad, no outline then, but don't fail with NPE
-			baseRevisions = new int[0];
-		} finally {
-			da.done();
-		}
-	}
 	private static int[] toArray(List<Integer> l) {
 		int[] rv = new int[l.size()];
 		for (int i = 0; i < rv.length; i++) {
 			rv[i] = l.get(i);

Mercurial > hg4j

comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 242:ad6a046943be