Mercurial > hg4j
diff src/org/tmatesoft/hg/internal/RevlogDump.java @ 583:47dfa0ec7e35
Effective revlog patching
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Wed, 24 Apr 2013 15:39:53 +0200 |
parents | e6f72c9829a6 |
children |
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/RevlogDump.java Mon Apr 22 19:17:29 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/RevlogDump.java Wed Apr 24 15:39:53 2013 +0200 @@ -29,6 +29,7 @@ import java.nio.channels.FileChannel; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.DataFormatException; import java.util.zip.Inflater; /** @@ -50,7 +51,7 @@ String filename = "store/00changelog.i"; // String filename = "store/data/hello.c.i"; // String filename = "store/data/docs/readme.i"; - System.out.println(escape("abc\0def\nzxc\tmnb")); +// System.out.println(escape("abc\0def\nzxc\tmnb")); boolean dumpDataFull = true; boolean dumpDataStats = false; if (args.length > 1) { @@ -61,77 +62,45 @@ } final boolean needRevData = dumpDataFull || dumpDataStats; // - DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(repo, filename)))); - DataInput di = dis; - dis.mark(10); - int versionField = di.readInt(); - dis.reset(); - final int INLINEDATA = 1 << 16; - - final boolean inlineData = (versionField & INLINEDATA) != 0; - System.out.printf("%#8x, inline: %b\n", versionField, inlineData); - FileChannel dataStream = null; - if (!inlineData && needRevData) { - dataStream = new FileInputStream(new File(repo, filename.substring(0, filename.length()-2) + ".d")).getChannel(); - } + RevlogReader rr = new RevlogReader(new File(repo, filename)).needData(needRevData); + rr.init(needRevData); + System.out.printf("%#8x, inline: %b\n", rr.versionField, rr.inlineData); System.out.println("Index Offset Flags Packed Actual Base Rev Link Rev Parent1 Parent2 nodeid"); - int entryIndex = 0; - while (dis.available() > 0) { - long l = di.readLong(); - long offset = entryIndex == 0 ? 0 : (l >>> 16); - int flags = (int) (l & 0x0FFFF); - int compressedLen = di.readInt(); - int actualLen = di.readInt(); - int baseRevision = di.readInt(); - int linkRevision = di.readInt(); - int parent1Revision = di.readInt(); - int parent2Revision = di.readInt(); - byte[] buf = new byte[32]; - di.readFully(buf, 12, 20); - dis.skipBytes(12); - // CAN'T USE skip() here without extra precautions. E.g. I ran into situation when - // buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12). - // BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code. - System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d %040x\n", entryIndex, offset, flags, compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, new BigInteger(buf)); - String resultString; - byte[] data = new byte[compressedLen]; - if (inlineData) { - di.readFully(data); - } else if (needRevData) { - dataStream.position(offset); - dataStream.read(ByteBuffer.wrap(data)); - } + ByteBuffer data = null; + while (rr.hasMore()) { + rr.readNext(); + System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d %040x\n", rr.entryIndex, rr.offset, rr.flags, rr.compressedLen, rr.actualLen, rr.baseRevision, rr.linkRevision, rr.parent1Revision, rr.parent2Revision, new BigInteger(rr.nodeid)); if (needRevData) { - if (compressedLen == 0) { + String resultString; + if (rr.getDataLength() == 0) { resultString = "<NO DATA>"; } else { - if (data[0] == 0x78 /* 'x' */) { - Inflater zlib = new Inflater(); - zlib.setInput(data, 0, compressedLen); - byte[] result = new byte[actualLen*2]; - int resultLen = zlib.inflate(result); - zlib.end(); - resultString = buildString(result, 0, resultLen, baseRevision != entryIndex, dumpDataFull); - } else if (data[0] == 0x75 /* 'u' */) { - resultString = buildString(data, 1, data.length - 1, baseRevision != entryIndex, dumpDataFull); - } else { - resultString = buildString(data, 0, data.length, baseRevision != entryIndex, dumpDataFull); - } + data = ensureCapacity(data, rr.getDataLength()); + rr.getData(data); + data.flip(); + resultString = buildString(data, rr.isPatch(), dumpDataFull); } - System.out.println(resultString); + if (resultString.endsWith("\n")) { + System.out.print(resultString); + } else { + System.out.println(resultString); + } } - entryIndex++; } - dis.close(); - if (dataStream != null) { - dataStream.close(); - } - // + rr.done(); } - private static String buildString(byte[] data, int offset, int len, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException { + private static ByteBuffer ensureCapacity(ByteBuffer src, int requiredCap) { + if (src == null || src.capacity() < requiredCap) { + return ByteBuffer.allocate((1 + requiredCap) * 3 / 2); + } + src.clear(); + return src; + } + + private static String buildString(ByteBuffer data, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException { if (isPatch) { - DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data, offset, len)); + DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data.array(), data.arrayOffset(), data.remaining())); StringBuilder sb = new StringBuilder(); sb.append("<PATCH>:\n"); while (dis.available() > 0) { @@ -152,9 +121,9 @@ return sb.toString(); } else { if (completeDataDump) { - return escape(new String(data, offset, len, "UTF-8")); + return escape(new String(data.array(), data.arrayOffset(), data.remaining(), "UTF-8")); } - return String.format("<DATA>:%d bytes", len-offset); + return String.format("<DATA>:%d bytes", data.remaining()); } } @@ -186,4 +155,159 @@ m.appendTail(rv); return rv.toString(); } + + public static class RevlogReader { + + private final File file; + private boolean needRevData; + private DataInputStream dis; + private boolean inlineData; + public int versionField; + private FileChannel dataStream; + public int entryIndex; + private byte[] data; + private int dataOffset, dataLen; + public long offset; + public int flags; + public int baseRevision; + public int linkRevision; + public int parent1Revision; + public int parent2Revision; + public int compressedLen; + public int actualLen; + public byte[] nodeid = new byte[21]; // need 1 byte in the front to be 0 to avoid negative BigInts + + public RevlogReader(File f) { + assert f.getName().endsWith(".i"); + file = f; + } + + // affects #readNext() + public RevlogReader needData(boolean needData) { + needRevData = needData; + return this; + } + + public void init(boolean mayRequireData) throws IOException { + dis = new DataInputStream(new BufferedInputStream(new FileInputStream(file))); + DataInput di = dis; + dis.mark(10); + versionField = di.readInt(); + dis.reset(); + final int INLINEDATA = 1 << 16; + inlineData = (versionField & INLINEDATA) != 0; + + dataStream = null; + if (!inlineData && mayRequireData) { + String fname = file.getAbsolutePath(); + dataStream = new FileInputStream(new File(fname.substring(0, fname.length()-2) + ".d")).getChannel(); + } + + entryIndex = -1; + } + + public void startFrom(int startEntryIndex) throws IOException { + if (dis == null) { + throw new IllegalStateException("Call #init() first"); + } + if (entryIndex != -1 && startEntryIndex != 0) { + throw new IllegalStateException("Can't seek once iteration has started"); + } + if (dataStream == null) { + throw new IllegalStateException("Sorry, initial seek is now supported for separate .i/.d only"); + } + long newPos = startEntryIndex * Internals.REVLOGV1_RECORD_SIZE, actualSkip; + do { + actualSkip = dis.skip(newPos); + if (actualSkip <= 0) { + throw new IllegalStateException(String.valueOf(actualSkip)); + } + newPos -= actualSkip; + } while (newPos > 0); + entryIndex = startEntryIndex - 1; + } + + public boolean hasMore() throws IOException { + return dis.available() > 0; + } + + public void readNext() throws IOException, DataFormatException { + entryIndex++; + DataInput di = dis; + long l = di.readLong(); + offset = entryIndex == 0 ? 0 : (l >>> 16); + flags = (int) (l & 0x0FFFF); + compressedLen = di.readInt(); + actualLen = di.readInt(); + baseRevision = di.readInt(); + linkRevision = di.readInt(); + parent1Revision = di.readInt(); + parent2Revision = di.readInt(); + di.readFully(nodeid, 1, 20); + dis.skipBytes(12); + // CAN'T USE skip() here without extra precautions. E.g. I ran into situation when + // buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12). + // BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code. + data = new byte[compressedLen]; + if (inlineData) { + di.readFully(data); + } else if (needRevData) { + dataStream.position(offset); + dataStream.read(ByteBuffer.wrap(data)); + } + if (needRevData) { + if (compressedLen == 0) { + data = null; + dataOffset = dataLen = 0; + } else { + if (data[0] == 0x78 /* 'x' */) { + Inflater zlib = new Inflater(); + zlib.setInput(data, 0, compressedLen); + byte[] result = new byte[actualLen * 3]; + int resultLen = zlib.inflate(result); + zlib.end(); + data = result; + dataOffset = 0; + dataLen = resultLen; + } else if (data[0] == 0x75 /* 'u' */) { + dataOffset = 1; + dataLen = data.length - 1; + } else { + dataOffset = 0; + dataLen = data.length; + } + } + } + } + + public int getDataLength() { + // NOT actualLen - there are empty patch revisions (dataLen == 0, but actualLen == previous length) + // NOT compressedLen - zip data is uncompressed + return dataLen; + } + + public void getData(ByteBuffer bb) { + assert bb.remaining() >= dataLen; + bb.put(data, dataOffset, dataLen); + } + + public boolean isPatch() { + assert entryIndex != -1; + return baseRevision != entryIndex; + } + + public boolean isInline() { + assert dis != null; + return inlineData; + } + + public void done() throws IOException { + dis.close(); + dis = null; + if (dataStream != null) { + dataStream.close(); + dataStream = null; + } + } + } }