comparison src/org/tmatesoft/hg/internal/RevlogDump.java @ 583:47dfa0ec7e35

Effective revlog patching
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 24 Apr 2013 15:39:53 +0200
parents e6f72c9829a6
children
comparison
equal deleted inserted replaced
582:90df078d6418 583:47dfa0ec7e35
27 import java.math.BigInteger; 27 import java.math.BigInteger;
28 import java.nio.ByteBuffer; 28 import java.nio.ByteBuffer;
29 import java.nio.channels.FileChannel; 29 import java.nio.channels.FileChannel;
30 import java.util.regex.Matcher; 30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern; 31 import java.util.regex.Pattern;
32 import java.util.zip.DataFormatException;
32 import java.util.zip.Inflater; 33 import java.util.zip.Inflater;
33 34
34 /** 35 /**
35 * Utility to test/debug/troubleshoot 36 * Utility to test/debug/troubleshoot
36 * 37 *
48 public static void main(String[] args) throws Exception { 49 public static void main(String[] args) throws Exception {
49 String repo = "/temp/hg/hello/.hg/"; 50 String repo = "/temp/hg/hello/.hg/";
50 String filename = "store/00changelog.i"; 51 String filename = "store/00changelog.i";
51 // String filename = "store/data/hello.c.i"; 52 // String filename = "store/data/hello.c.i";
52 // String filename = "store/data/docs/readme.i"; 53 // String filename = "store/data/docs/readme.i";
53 System.out.println(escape("abc\0def\nzxc\tmnb")); 54 // System.out.println(escape("abc\0def\nzxc\tmnb"));
54 boolean dumpDataFull = true; 55 boolean dumpDataFull = true;
55 boolean dumpDataStats = false; 56 boolean dumpDataStats = false;
56 if (args.length > 1) { 57 if (args.length > 1) {
57 repo = args[0]; 58 repo = args[0];
58 filename = args[1]; 59 filename = args[1];
59 dumpDataFull = args.length > 2 ? "dumpData".equals(args[2]) : false; 60 dumpDataFull = args.length > 2 ? "dumpData".equals(args[2]) : false;
60 dumpDataStats = args.length > 2 ? "dumpDataStats".equals(args[2]) : false; 61 dumpDataStats = args.length > 2 ? "dumpDataStats".equals(args[2]) : false;
61 } 62 }
62 final boolean needRevData = dumpDataFull || dumpDataStats; 63 final boolean needRevData = dumpDataFull || dumpDataStats;
63 // 64 //
64 DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(repo, filename)))); 65 RevlogReader rr = new RevlogReader(new File(repo, filename)).needData(needRevData);
65 DataInput di = dis; 66 rr.init(needRevData);
66 dis.mark(10); 67 System.out.printf("%#8x, inline: %b\n", rr.versionField, rr.inlineData);
67 int versionField = di.readInt();
68 dis.reset();
69 final int INLINEDATA = 1 << 16;
70
71 final boolean inlineData = (versionField & INLINEDATA) != 0;
72 System.out.printf("%#8x, inline: %b\n", versionField, inlineData);
73 FileChannel dataStream = null;
74 if (!inlineData && needRevData) {
75 dataStream = new FileInputStream(new File(repo, filename.substring(0, filename.length()-2) + ".d")).getChannel();
76 }
77 System.out.println("Index Offset Flags Packed Actual Base Rev Link Rev Parent1 Parent2 nodeid"); 68 System.out.println("Index Offset Flags Packed Actual Base Rev Link Rev Parent1 Parent2 nodeid");
78 int entryIndex = 0; 69 ByteBuffer data = null;
79 while (dis.available() > 0) { 70 while (rr.hasMore()) {
80 long l = di.readLong(); 71 rr.readNext();
81 long offset = entryIndex == 0 ? 0 : (l >>> 16); 72 System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d %040x\n", rr.entryIndex, rr.offset, rr.flags, rr.compressedLen, rr.actualLen, rr.baseRevision, rr.linkRevision, rr.parent1Revision, rr.parent2Revision, new BigInteger(rr.nodeid));
82 int flags = (int) (l & 0x0FFFF);
83 int compressedLen = di.readInt();
84 int actualLen = di.readInt();
85 int baseRevision = di.readInt();
86 int linkRevision = di.readInt();
87 int parent1Revision = di.readInt();
88 int parent2Revision = di.readInt();
89 byte[] buf = new byte[32];
90 di.readFully(buf, 12, 20);
91 dis.skipBytes(12);
92 // CAN'T USE skip() here without extra precautions. E.g. I ran into situation when
93 // buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12).
94 // BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code.
95 System.out.printf("%4d:%14d %6X %10d %10d %10d %10d %8d %8d %040x\n", entryIndex, offset, flags, compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, new BigInteger(buf));
96 String resultString;
97 byte[] data = new byte[compressedLen];
98 if (inlineData) {
99 di.readFully(data);
100 } else if (needRevData) {
101 dataStream.position(offset);
102 dataStream.read(ByteBuffer.wrap(data));
103 }
104 if (needRevData) { 73 if (needRevData) {
105 if (compressedLen == 0) { 74 String resultString;
75 if (rr.getDataLength() == 0) {
106 resultString = "<NO DATA>"; 76 resultString = "<NO DATA>";
107 } else { 77 } else {
108 if (data[0] == 0x78 /* 'x' */) { 78 data = ensureCapacity(data, rr.getDataLength());
109 Inflater zlib = new Inflater(); 79 rr.getData(data);
110 zlib.setInput(data, 0, compressedLen); 80 data.flip();
111 byte[] result = new byte[actualLen*2]; 81 resultString = buildString(data, rr.isPatch(), dumpDataFull);
112 int resultLen = zlib.inflate(result); 82 }
113 zlib.end(); 83 if (resultString.endsWith("\n")) {
114 resultString = buildString(result, 0, resultLen, baseRevision != entryIndex, dumpDataFull); 84 System.out.print(resultString);
115 } else if (data[0] == 0x75 /* 'u' */) { 85 } else {
116 resultString = buildString(data, 1, data.length - 1, baseRevision != entryIndex, dumpDataFull); 86 System.out.println(resultString);
117 } else { 87 }
118 resultString = buildString(data, 0, data.length, baseRevision != entryIndex, dumpDataFull); 88 }
119 } 89 }
120 } 90 rr.done();
121 System.out.println(resultString);
122 }
123 entryIndex++;
124 }
125 dis.close();
126 if (dataStream != null) {
127 dataStream.close();
128 }
129 //
130 } 91 }
131 92
132 private static String buildString(byte[] data, int offset, int len, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException { 93 private static ByteBuffer ensureCapacity(ByteBuffer src, int requiredCap) {
94 if (src == null || src.capacity() < requiredCap) {
95 return ByteBuffer.allocate((1 + requiredCap) * 3 / 2);
96 }
97 src.clear();
98 return src;
99 }
100
101 private static String buildString(ByteBuffer data, boolean isPatch, boolean completeDataDump) throws IOException, UnsupportedEncodingException {
133 if (isPatch) { 102 if (isPatch) {
134 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data, offset, len)); 103 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(data.array(), data.arrayOffset(), data.remaining()));
135 StringBuilder sb = new StringBuilder(); 104 StringBuilder sb = new StringBuilder();
136 sb.append("<PATCH>:\n"); 105 sb.append("<PATCH>:\n");
137 while (dis.available() > 0) { 106 while (dis.available() > 0) {
138 int s = dis.readInt(); 107 int s = dis.readInt();
139 int e = dis.readInt(); 108 int e = dis.readInt();
150 sb.append('\n'); 119 sb.append('\n');
151 } 120 }
152 return sb.toString(); 121 return sb.toString();
153 } else { 122 } else {
154 if (completeDataDump) { 123 if (completeDataDump) {
155 return escape(new String(data, offset, len, "UTF-8")); 124 return escape(new String(data.array(), data.arrayOffset(), data.remaining(), "UTF-8"));
156 } 125 }
157 return String.format("<DATA>:%d bytes", len-offset); 126 return String.format("<DATA>:%d bytes", data.remaining());
158 } 127 }
159 } 128 }
160 129
161 private static Pattern controlCharPattern = Pattern.compile("\\p{Cntrl}"); 130 private static Pattern controlCharPattern = Pattern.compile("\\p{Cntrl}");
162 // \p{Cntrl} A control character: [\x00-\x1F\x7F] 131 // \p{Cntrl} A control character: [\x00-\x1F\x7F]
184 m.appendReplacement(rv, replacements[x == 127 ? 32 : x]); 153 m.appendReplacement(rv, replacements[x == 127 ? 32 : x]);
185 } 154 }
186 m.appendTail(rv); 155 m.appendTail(rv);
187 return rv.toString(); 156 return rv.toString();
188 } 157 }
158
159 public static class RevlogReader {
160
161 private final File file;
162 private boolean needRevData;
163 private DataInputStream dis;
164 private boolean inlineData;
165 public int versionField;
166 private FileChannel dataStream;
167 public int entryIndex;
168 private byte[] data;
169 private int dataOffset, dataLen;
170 public long offset;
171 public int flags;
172 public int baseRevision;
173 public int linkRevision;
174 public int parent1Revision;
175 public int parent2Revision;
176 public int compressedLen;
177 public int actualLen;
178 public byte[] nodeid = new byte[21]; // need 1 byte in the front to be 0 to avoid negative BigInts
179
180 public RevlogReader(File f) {
181 assert f.getName().endsWith(".i");
182 file = f;
183 }
184
185 // affects #readNext()
186 public RevlogReader needData(boolean needData) {
187 needRevData = needData;
188 return this;
189 }
190
191 public void init(boolean mayRequireData) throws IOException {
192 dis = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
193 DataInput di = dis;
194 dis.mark(10);
195 versionField = di.readInt();
196 dis.reset();
197 final int INLINEDATA = 1 << 16;
198 inlineData = (versionField & INLINEDATA) != 0;
199
200 dataStream = null;
201 if (!inlineData && mayRequireData) {
202 String fname = file.getAbsolutePath();
203 dataStream = new FileInputStream(new File(fname.substring(0, fname.length()-2) + ".d")).getChannel();
204 }
205
206 entryIndex = -1;
207 }
208
209 public void startFrom(int startEntryIndex) throws IOException {
210 if (dis == null) {
211 throw new IllegalStateException("Call #init() first");
212 }
213 if (entryIndex != -1 && startEntryIndex != 0) {
214 throw new IllegalStateException("Can't seek once iteration has started");
215 }
216 if (dataStream == null) {
217 throw new IllegalStateException("Sorry, initial seek is now supported for separate .i/.d only");
218 }
219 long newPos = startEntryIndex * Internals.REVLOGV1_RECORD_SIZE, actualSkip;
220 do {
221 actualSkip = dis.skip(newPos);
222 if (actualSkip <= 0) {
223 throw new IllegalStateException(String.valueOf(actualSkip));
224 }
225 newPos -= actualSkip;
226 } while (newPos > 0);
227 entryIndex = startEntryIndex - 1;
228 }
229
230 public boolean hasMore() throws IOException {
231 return dis.available() > 0;
232 }
233
234 public void readNext() throws IOException, DataFormatException {
235 entryIndex++;
236 DataInput di = dis;
237 long l = di.readLong();
238 offset = entryIndex == 0 ? 0 : (l >>> 16);
239 flags = (int) (l & 0x0FFFF);
240 compressedLen = di.readInt();
241 actualLen = di.readInt();
242 baseRevision = di.readInt();
243 linkRevision = di.readInt();
244 parent1Revision = di.readInt();
245 parent2Revision = di.readInt();
246 di.readFully(nodeid, 1, 20);
247 dis.skipBytes(12);
248 // CAN'T USE skip() here without extra precautions. E.g. I ran into situation when
249 // buffer was 8192 and BufferedInputStream was at position 8182 before attempt to skip(12).
250 // BIS silently skips available bytes and leaves me two extra bytes that ruin the rest of the code.
251 data = new byte[compressedLen];
252 if (inlineData) {
253 di.readFully(data);
254 } else if (needRevData) {
255 dataStream.position(offset);
256 dataStream.read(ByteBuffer.wrap(data));
257 }
258 if (needRevData) {
259 if (compressedLen == 0) {
260 data = null;
261 dataOffset = dataLen = 0;
262 } else {
263 if (data[0] == 0x78 /* 'x' */) {
264 Inflater zlib = new Inflater();
265 zlib.setInput(data, 0, compressedLen);
266 byte[] result = new byte[actualLen * 3];
267 int resultLen = zlib.inflate(result);
268 zlib.end();
269 data = result;
270 dataOffset = 0;
271 dataLen = resultLen;
272 } else if (data[0] == 0x75 /* 'u' */) {
273 dataOffset = 1;
274 dataLen = data.length - 1;
275 } else {
276 dataOffset = 0;
277 dataLen = data.length;
278 }
279 }
280 }
281 }
282
283 public int getDataLength() {
284 // NOT actualLen - there are empty patch revisions (dataLen == 0, but actualLen == previous length)
285 // NOT compressedLen - zip data is uncompressed
286 return dataLen;
287 }
288
289 public void getData(ByteBuffer bb) {
290 assert bb.remaining() >= dataLen;
291 bb.put(data, dataOffset, dataLen);
292 }
293
294 public boolean isPatch() {
295 assert entryIndex != -1;
296 return baseRevision != entryIndex;
297 }
298
299 public boolean isInline() {
300 assert dis != null;
301 return inlineData;
302 }
303
304 public void done() throws IOException {
305 dis.close();
306 dis = null;
307 if (dataStream != null) {
308 dataStream.close();
309 dataStream = null;
310 }
311 }
312 }
189 } 313 }