diff src/org/tmatesoft/hg/internal/RevlogDump.java @ 583:47dfa0ec7e35

Effective revlog patching
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 24 Apr 2013 15:39:53 +0200
parents e6f72c9829a6
children
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/RevlogDump.java	Mon Apr 22 19:17:29 2013 +0200
+++ b/src/org/tmatesoft/hg/internal/RevlogDump.java	Wed Apr 24 15:39:53 2013 +0200
@@ -29,6 +29,7 @@
 import java.nio.channels.FileChannel;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.zip.DataFormatException;
 import java.util.zip.Inflater;
 
 /**
@@ -50,7 +51,7 @@
 		String filename = "store/00changelog.i";
 //		String filename = "store/data/hello.c.i";
 //		String filename = "store/data/docs/readme.i";
-		System.out.println(escape("abc\0def\nzxc\tmnb"));
+//		System.out.println(escape("abc\0def\nzxc\tmnb"));
 		boolean dumpDataFull = true;
 		boolean dumpDataStats = false;
 		if (args.length > 1) {
@@ -61,77 +62,45 @@
 		}
 		final boolean needRevData = dumpDataFull || dumpDataStats; 
 		//
-		DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(repo, filename))));
-		DataInput di = dis;
-		dis.mark(10);
-		int versionField = di.readInt();
-		dis.reset();
-		final int INLINEDATA = 1 << 16;
-		
-		final boolean inlineData = (versionField & INLINEDATA) != 0;
-		System.out.printf("%#8x, inline: %b\n", versionField, inlineData);
-		FileChannel dataStream = null; 
-		if (!inlineData && needRevData) {
-			dataStream = new FileInputStream(new File(repo, filename.substring(0, filename.length()-2) + ".d")).getChannel();
-		}
+		RevlogReader rr = new RevlogReader(new File(repo, filename)).needData(needRevData);
+		rr.init(needRevData);
+		System.out.printf("%#8x, inline: %b\n", rr.versionField, rr.inlineData);
 		System.out.println("Index    Offset      Flags     Packed     Actual   Base Rev   Link Rev  Parent1  Parent2     nodeid");
-		int entryIndex = 0;
-		while (dis.available() > 0) {
-			long l = di.readLong();
-			long offset = entryIndex == 0 ? 0 : (l >>> 16);
-			int flags = (int) (l & 0x0FFFF);
-			int compressedLen = di.readInt();
-			int actualLen = di.readInt();
-			int baseRevision = di.readInt();
-			int linkRevision = di.readInt();
-			int parent1Revision = di.readInt();
-			int parent2Revision = di.readInt();
-			byte[] buf = new byte[32];
-			di.readFully(buf, 12, 20);
-			dis.skipBytes(12); 
-			// CAN'T USE skip() here without extra precautions. E.g. I ran into situation when 
-			// buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12). 
-			// BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code.
-			System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d     %040x\n", entryIndex, offset, flags, compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, new BigInteger(buf));
-			String resultString;
-			byte[] data = new byte[compressedLen];
-			if (inlineData) {
-				di.readFully(data);
-			} else if (needRevData) {
-				dataStream.position(offset);
-				dataStream.read(ByteBuffer.wrap(data));
-			}
+		ByteBuffer data = null;
+		while (rr.hasMore()) {
+			rr.readNext();
+			System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d     %040x\n", rr.entryIndex, rr.offset, rr.flags, rr.compressedLen, rr.actualLen, rr.baseRevision, rr.linkRevision, rr.parent1Revision, rr.parent2Revision, new BigInteger(rr.nodeid));
 			if (needRevData) {
-				if (compressedLen == 0) {
+				String resultString;
+				if (rr.getDataLength() == 0) {
 					resultString = "<NO DATA>";
 				} else {
-					if (data[0] == 0x78 /* 'x' */) {
-						Inflater zlib = new Inflater();
-						zlib.setInput(data, 0, compressedLen);
-						byte[] result = new byte[actualLen*2];
-						int resultLen = zlib.inflate(result);
-						zlib.end();
-						resultString = buildString(result, 0, resultLen, baseRevision != entryIndex, dumpDataFull);
-					} else if (data[0] == 0x75 /* 'u' */) {
-						resultString = buildString(data, 1, data.length - 1, baseRevision != entryIndex, dumpDataFull);
-					} else {
-						resultString = buildString(data, 0, data.length, baseRevision != entryIndex, dumpDataFull);
-					}
+					data = ensureCapacity(data, rr.getDataLength());
+					rr.getData(data);
+					data.flip();
+					resultString = buildString(data, rr.isPatch(), dumpDataFull);
 				}
-				System.out.println(resultString);
+				if (resultString.endsWith("\n")) {
+					System.out.print(resultString);
+				} else {
+					System.out.println(resultString);
+				}
 			}
-			entryIndex++;
 		}
-		dis.close();
-		if (dataStream != null) {
-			dataStream.close();
-		}
-		//
+		rr.done();
 	}
 	
-	private static String buildString(byte[] data, int offset, int len, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException {
+	private static ByteBuffer ensureCapacity(ByteBuffer src, int requiredCap) {
+		if (src == null || src.capacity() < requiredCap) {
+			return ByteBuffer.allocate((1 + requiredCap) * 3 / 2);
+		}
+		src.clear();
+		return src;
+	}
+	
+	private static String buildString(ByteBuffer data, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException {
 		if (isPatch) {
-			DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data, offset, len));
+			DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data.array(), data.arrayOffset(), data.remaining()));
 			StringBuilder sb = new StringBuilder();
 			sb.append("<PATCH>:\n");
 			while (dis.available() > 0) {
@@ -152,9 +121,9 @@
 			return sb.toString();
 		} else {
 			if (completeDataDump) {
-				return escape(new String(data, offset, len, "UTF-8"));
+				return escape(new String(data.array(), data.arrayOffset(), data.remaining(), "UTF-8"));
 			}
-			return String.format("<DATA>:%d bytes", len-offset);
+			return String.format("<DATA>:%d bytes", data.remaining());
 		}
 	}
 	
@@ -186,4 +155,159 @@
 		m.appendTail(rv);
 		return rv.toString();
 	}
+
+	public static class RevlogReader {
+		
+		private final File file;
+		private boolean needRevData;
+		private DataInputStream dis;
+		private boolean inlineData;
+		public int versionField;
+		private FileChannel dataStream;
+		public int entryIndex;
+		private byte[] data;
+		private int dataOffset, dataLen;
+		public long offset;
+		public int flags;
+		public int baseRevision;
+		public int linkRevision;
+		public int parent1Revision;
+		public int parent2Revision;
+		public int compressedLen;
+		public int actualLen;
+		public byte[] nodeid = new byte[21]; // need 1 byte in the front to be 0 to avoid negative BigInts
+
+		public RevlogReader(File f) {
+			assert f.getName().endsWith(".i");
+			file = f;
+		}
+
+		// affects #readNext()
+		public RevlogReader needData(boolean needData) {
+			needRevData = needData;
+			return this;
+		}
+		
+		public void init(boolean mayRequireData) throws IOException {
+			dis = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
+			DataInput di = dis;
+			dis.mark(10);
+			versionField = di.readInt();
+			dis.reset();
+			final int INLINEDATA = 1 << 16;
+			inlineData = (versionField & INLINEDATA) != 0;
+			
+			dataStream = null; 
+			if (!inlineData && mayRequireData) {
+				String fname = file.getAbsolutePath();
+				dataStream = new FileInputStream(new File(fname.substring(0, fname.length()-2) + ".d")).getChannel();
+			}
+			
+			entryIndex = -1;
+		}
+		
+		public void startFrom(int startEntryIndex) throws IOException {
+			if (dis == null) {
+				throw new IllegalStateException("Call #init() first");
+			}
+			if (entryIndex != -1 && startEntryIndex != 0) {
+				throw new IllegalStateException("Can't seek once iteration has started");
+			}
+			if (dataStream == null) {
+				throw new IllegalStateException("Sorry, initial seek is now supported for separate .i/.d only");
+			}
+			long newPos = startEntryIndex * Internals.REVLOGV1_RECORD_SIZE, actualSkip;
+			do {
+				actualSkip = dis.skip(newPos);
+				if (actualSkip <= 0) {
+					throw new IllegalStateException(String.valueOf(actualSkip));
+				}
+				newPos -= actualSkip;
+			} while (newPos > 0);
+			entryIndex = startEntryIndex - 1;
+		}
+		
+		public boolean hasMore() throws IOException {
+			return dis.available() > 0;
+		}
+		
+		public void readNext() throws IOException, DataFormatException {
+			entryIndex++;
+			DataInput di = dis;
+			long l = di.readLong();
+			offset = entryIndex == 0 ? 0 : (l >>> 16);
+			flags = (int) (l & 0x0FFFF);
+			compressedLen = di.readInt();
+			actualLen = di.readInt();
+			baseRevision = di.readInt();
+			linkRevision = di.readInt();
+			parent1Revision = di.readInt();
+			parent2Revision = di.readInt();
+			di.readFully(nodeid, 1, 20);
+			dis.skipBytes(12); 
+			// CAN'T USE skip() here without extra precautions. E.g. I ran into situation when 
+			// buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12). 
+			// BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code.
+			data = new byte[compressedLen];
+			if (inlineData) {
+				di.readFully(data);
+			} else if (needRevData) {
+				dataStream.position(offset);
+				dataStream.read(ByteBuffer.wrap(data));
+			}
+			if (needRevData) {
+				if (compressedLen == 0) {
+					data = null;
+					dataOffset = dataLen = 0;
+				} else {
+					if (data[0] == 0x78 /* 'x' */) {
+						Inflater zlib = new Inflater();
+						zlib.setInput(data, 0, compressedLen);
+						byte[] result = new byte[actualLen * 3];
+						int resultLen = zlib.inflate(result);
+						zlib.end();
+						data = result;
+						dataOffset = 0;
+						dataLen = resultLen;
+					} else if (data[0] == 0x75 /* 'u' */) {
+						dataOffset = 1;
+						dataLen = data.length - 1;
+					} else {
+						dataOffset = 0;
+						dataLen = data.length;
+					}
+				}
+			}
+		}
+		
+		public int getDataLength() {
+			// NOT actualLen - there are empty patch revisions (dataLen == 0, but actualLen == previous length)
+			// NOT compressedLen - zip data is uncompressed
+			return dataLen;
+		}
+		
+		public void getData(ByteBuffer bb) {
+			assert bb.remaining() >= dataLen;
+			bb.put(data, dataOffset, dataLen);
+		}
+		
+		public boolean isPatch() {
+			assert entryIndex != -1;
+			return baseRevision != entryIndex;
+		}
+		
+		public boolean isInline() {
+			assert dis != null;
+			return inlineData;
+		}
+
+		public void done() throws IOException {
+			dis.close();
+			dis = null;
+			if (dataStream != null) {
+				dataStream.close();
+				dataStream = null;
+			}
+		}
+	}
 }