comparison hg4j/src/main/java/org/tmatesoft/hg/internal/RevlogStream.java @ 213:6ec4af642ba8 gradle

Project uses Gradle for build - actual changes
author Alexander Kitaev <kitaev@gmail.com>
date Tue, 10 May 2011 10:52:53 +0200
parents
children
comparison
equal deleted inserted replaced
212:edb2e2829352 213:6ec4af642ba8
1 /*
2 * Copyright (c) 2010-2011 TMate Software Ltd
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; version 2 of the License.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * For information on how to redistribute this software under
14 * the terms of a license other than GNU General Public License
15 * contact TMate Software at support@hg4j.com
16 */
17 package org.tmatesoft.hg.internal;
18
19 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION;
20 import static org.tmatesoft.hg.repo.HgRepository.TIP;
21
22 import java.io.File;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.LinkedList;
26 import java.util.List;
27
28 import org.tmatesoft.hg.core.HgBadStateException;
29 import org.tmatesoft.hg.core.Nodeid;
30 import org.tmatesoft.hg.repo.HgRepository;
31
32
33 /**
34 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations),
35 * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)?
36 *
37 * @see http://mercurial.selenic.com/wiki/Revlog
38 * @see http://mercurial.selenic.com/wiki/RevlogNG
39 *
40 * @author Artem Tikhomirov
41 * @author TMate Software Ltd.
42 */
43 public class RevlogStream {
44
45 /*
46 * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size))
47 *
48 * long[] in fact (there are 8-bytes field in the revlog)
49 * However, (a) DataAccess currently doesn't operate with long seek/length
50 * and, of greater significance, (b) files with inlined data are designated for smaller files,
51 * guess, about 130 Kb, and offset there won't ever break int capacity
52 */
53 private int[] indexRecordOffset;
54 private int[] baseRevisions;
55 private boolean inline = false;
56 private final File indexFile;
57 private final DataAccessProvider dataAccess;
58
59 // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP.
60 public RevlogStream(DataAccessProvider dap, File indexFile) {
61 this.dataAccess = dap;
62 this.indexFile = indexFile;
63 }
64
65 /*package*/ DataAccess getIndexStream() {
66 return dataAccess.create(indexFile);
67 }
68
69 /*package*/ DataAccess getDataStream() {
70 final String indexName = indexFile.getName();
71 File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d");
72 return dataAccess.create(dataFile);
73 }
74
75 public int revisionCount() {
76 initOutline();
77 return baseRevisions.length;
78 }
79
80 public int dataLength(int revision) {
81 // XXX in fact, use of iterate() instead of this implementation may be quite reasonable.
82 //
83 final int indexSize = revisionCount();
84 DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset)
85 if (revision == TIP) {
86 revision = indexSize - 1;
87 }
88 try {
89 int recordOffset = getIndexOffsetInt(revision);
90 daIndex.seek(recordOffset + 12); // 6+2+4
91 int actualLen = daIndex.readInt();
92 return actualLen;
93 } catch (IOException ex) {
94 ex.printStackTrace(); // log error. FIXME better handling
95 throw new IllegalStateException(ex);
96 } finally {
97 daIndex.done();
98 }
99 }
100
101 public byte[] nodeid(int revision) {
102 final int indexSize = revisionCount();
103 if (revision == TIP) {
104 revision = indexSize - 1;
105 }
106 if (revision < 0 || revision >= indexSize) {
107 throw new IllegalArgumentException(Integer.toString(revision));
108 }
109 DataAccess daIndex = getIndexStream();
110 try {
111 int recordOffset = getIndexOffsetInt(revision);
112 daIndex.seek(recordOffset + 32);
113 byte[] rv = new byte[20];
114 daIndex.readBytes(rv, 0, 20);
115 return rv;
116 } catch (IOException ex) {
117 ex.printStackTrace();
118 throw new IllegalStateException();
119 } finally {
120 daIndex.done();
121 }
122 }
123
124 public int linkRevision(int revision) {
125 final int last = revisionCount() - 1;
126 if (revision == TIP) {
127 revision = last;
128 }
129 if (revision < 0 || revision > last) {
130 throw new IllegalArgumentException(Integer.toString(revision));
131 }
132 DataAccess daIndex = getIndexStream();
133 try {
134 int recordOffset = getIndexOffsetInt(revision);
135 daIndex.seek(recordOffset + 20);
136 int linkRev = daIndex.readInt();
137 return linkRev;
138 } catch (IOException ex) {
139 ex.printStackTrace();
140 throw new IllegalStateException();
141 } finally {
142 daIndex.done();
143 }
144 }
145
146 // Perhaps, RevlogStream should be limited to use of plain int revisions for access,
147 // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep
148 // map of nodeids, and once this comes true, we may get rid of this method.
149 // Unlike its counterpart, {@link Revlog#getLocalRevisionNumber()}, doesn't fail with exception if node not found,
150 /**
151 * @return integer in [0..revisionCount()) or {@link HgRepository#BAD_REVISION} if not found
152 */
153 public int findLocalRevisionNumber(Nodeid nodeid) {
154 // XXX this one may be implemented with iterate() once there's mechanism to stop iterations
155 final int indexSize = revisionCount();
156 DataAccess daIndex = getIndexStream();
157 try {
158 byte[] nodeidBuf = new byte[20];
159 for (int i = 0; i < indexSize; i++) {
160 daIndex.skip(8);
161 int compressedLen = daIndex.readInt();
162 daIndex.skip(20);
163 daIndex.readBytes(nodeidBuf, 0, 20);
164 if (nodeid.equalsTo(nodeidBuf)) {
165 return i;
166 }
167 daIndex.skip(inline ? 12 + compressedLen : 12);
168 }
169 } catch (IOException ex) {
170 ex.printStackTrace(); // log error. FIXME better handling
171 throw new IllegalStateException(ex);
172 } finally {
173 daIndex.done();
174 }
175 return BAD_REVISION;
176 }
177
178
179 private final int REVLOGV1_RECORD_SIZE = 64;
180
181 // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg
182 // ? boolean needsNodeid
183 public void iterate(int start, int end, boolean needData, Inspector inspector) {
184 initOutline();
185 final int indexSize = revisionCount();
186 if (indexSize == 0) {
187 return;
188 }
189 if (end == TIP) {
190 end = indexSize - 1;
191 }
192 if (start == TIP) {
193 start = indexSize - 1;
194 }
195 if (start < 0 || start >= indexSize) {
196 throw new IllegalArgumentException(String.format("Bad left range boundary %d in [0..%d]", start, indexSize-1));
197 }
198 if (end < start || end >= indexSize) {
199 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1));
200 }
201 // XXX may cache [start .. end] from index with a single read (pre-read)
202
203 DataAccess daIndex = null, daData = null;
204 daIndex = getIndexStream();
205 if (needData && !inline) {
206 daData = getDataStream();
207 }
208 try {
209 byte[] nodeidBuf = new byte[20];
210 DataAccess lastUserData = null;
211 int i;
212 boolean extraReadsToBaseRev = false;
213 if (needData && getBaseRevision(start) < start) {
214 i = getBaseRevision(start);
215 extraReadsToBaseRev = true;
216 } else {
217 i = start;
218 }
219
220 daIndex.seek(getIndexOffsetInt(i));
221 for (; i <= end; i++ ) {
222 if (inline && needData) {
223 // inspector reading data (though FilterDataAccess) may have affected index position
224 daIndex.seek(getIndexOffsetInt(i));
225 }
226 long l = daIndex.readLong(); // 0
227 long offset = i == 0 ? 0 : (l >>> 16);
228 @SuppressWarnings("unused")
229 int flags = (int) (l & 0X0FFFF);
230 int compressedLen = daIndex.readInt(); // +8
231 int actualLen = daIndex.readInt(); // +12
232 int baseRevision = daIndex.readInt(); // +16
233 int linkRevision = daIndex.readInt(); // +20
234 int parent1Revision = daIndex.readInt();
235 int parent2Revision = daIndex.readInt();
236 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
237 daIndex.readBytes(nodeidBuf, 0, 20); // +32
238 daIndex.skip(12);
239 DataAccess userDataAccess = null;
240 if (needData) {
241 final byte firstByte;
242 int streamOffset;
243 DataAccess streamDataAccess;
244 if (inline) {
245 streamDataAccess = daIndex;
246 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream
247 } else {
248 streamOffset = (int) offset;
249 streamDataAccess = daData;
250 daData.seek(streamOffset);
251 }
252 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch
253 firstByte = streamDataAccess.readByte();
254 if (firstByte == 0x78 /* 'x' */) {
255 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen);
256 } else if (firstByte == 0x75 /* 'u' */) {
257 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
258 } else {
259 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
260 // but I don't see reason not to return data as is
261 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen);
262 }
263 // XXX
264 if (patchToPrevious) {
265 // this is a patch
266 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>();
267 while (!userDataAccess.isEmpty()) {
268 PatchRecord pr = PatchRecord.read(userDataAccess);
269 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len);
270 patches.add(pr);
271 }
272 userDataAccess.done();
273 //
274 byte[] userData = apply(lastUserData, actualLen, patches);
275 userDataAccess = new ByteArrayDataAccess(userData);
276 }
277 } else {
278 if (inline) {
279 daIndex.skip(compressedLen);
280 }
281 }
282 if (!extraReadsToBaseRev || i >= start) {
283 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
284 }
285 if (userDataAccess != null) {
286 userDataAccess.reset();
287 if (lastUserData != null) {
288 lastUserData.done();
289 }
290 lastUserData = userDataAccess;
291 }
292 }
293 } catch (IOException ex) {
294 throw new HgBadStateException(ex); // FIXME need better handling
295 } finally {
296 daIndex.done();
297 if (daData != null) {
298 daData.done();
299 }
300 }
301 }
302
303 private int getBaseRevision(int revision) {
304 return baseRevisions[revision];
305 }
306
307 /**
308 * @return offset of the revision's record in the index (.i) stream
309 */
310 private int getIndexOffsetInt(int revision) {
311 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE;
312 }
313
314 private void initOutline() {
315 if (baseRevisions != null && baseRevisions.length > 0) {
316 return;
317 }
318 ArrayList<Integer> resBases = new ArrayList<Integer>();
319 ArrayList<Integer> resOffsets = new ArrayList<Integer>();
320 DataAccess da = getIndexStream();
321 try {
322 if (da.isEmpty()) {
323 // do not fail with exception if stream is empty, it's likely intentional
324 baseRevisions = new int[0];
325 return;
326 }
327 int versionField = da.readInt();
328 da.readInt(); // just to skip next 4 bytes of offset + flags
329 final int INLINEDATA = 1 << 16;
330 inline = (versionField & INLINEDATA) != 0;
331 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
332 while(true) {
333 int compressedLen = da.readInt();
334 // 8+4 = 12 bytes total read here
335 @SuppressWarnings("unused")
336 int actualLen = da.readInt();
337 int baseRevision = da.readInt();
338 // 12 + 8 = 20 bytes read here
339 // int linkRevision = di.readInt();
340 // int parent1Revision = di.readInt();
341 // int parent2Revision = di.readInt();
342 // byte[] nodeid = new byte[32];
343 resBases.add(baseRevision);
344 if (inline) {
345 int o = (int) offset;
346 if (o != offset) {
347 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file
348 // with inlined data of size greater than 2 Gb.
349 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)");
350 }
351 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size());
352 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
353 } else {
354 da.skip(3*4 + 32);
355 }
356 if (da.isEmpty()) {
357 // fine, done then
358 baseRevisions = toArray(resBases);
359 if (inline) {
360 indexRecordOffset = toArray(resOffsets);
361 }
362 break;
363 } else {
364 // start reading next record
365 long l = da.readLong();
366 offset = l >>> 16;
367 }
368 }
369 } catch (IOException ex) {
370 ex.printStackTrace(); // log error
371 // too bad, no outline then, but don't fail with NPE
372 baseRevisions = new int[0];
373 } finally {
374 da.done();
375 }
376 }
377
378 private static int[] toArray(List<Integer> l) {
379 int[] rv = new int[l.size()];
380 for (int i = 0; i < rv.length; i++) {
381 rv[i] = l.get(i);
382 }
383 return rv;
384 }
385
386
387 // mpatch.c : apply()
388 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
389 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException {
390 int last = 0, destIndex = 0;
391 if (outcomeLen == -1) {
392 outcomeLen = baseRevisionContent.length();
393 for (PatchRecord pr : patch) {
394 outcomeLen += pr.start - last + pr.len;
395 last = pr.end;
396 }
397 outcomeLen -= last;
398 last = 0;
399 }
400 byte[] rv = new byte[outcomeLen];
401 for (PatchRecord pr : patch) {
402 baseRevisionContent.seek(last);
403 baseRevisionContent.readBytes(rv, destIndex, pr.start-last);
404 destIndex += pr.start - last;
405 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length);
406 destIndex += pr.data.length;
407 last = pr.end;
408 }
409 baseRevisionContent.seek(last);
410 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last));
411 return rv;
412 }
413
414 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description
415 public static class PatchRecord {
416 /*
417 Given there are pr1 and pr2:
418 pr1.start to pr1.end will be replaced with pr's data (of pr1.len)
419 pr1.end to pr2.start gets copied from base
420 */
421 public int start, end, len;
422 public byte[] data;
423
424 // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed
425 private PatchRecord(int p1, int p2, int length, byte[] src) {
426 start = p1;
427 end = p2;
428 len = length;
429 data = src;
430 }
431
432 /*package-local*/ static PatchRecord read(byte[] data, int offset) {
433 final int x = offset; // shorthand
434 int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF);
435 int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF);
436 int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF);
437 byte[] dataCopy = new byte[len];
438 System.arraycopy(data, x+12, dataCopy, 0, len);
439 return new PatchRecord(p1, p2, len, dataCopy);
440 }
441
442 public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException {
443 int p1 = da.readInt();
444 int p2 = da.readInt();
445 int len = da.readInt();
446 byte[] src = new byte[len];
447 da.readBytes(src, 0, len);
448 return new PatchRecord(p1, p2, len, src);
449 }
450 }
451
452 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data
453 // instantly - e.g. calculate hash, or comparing two revisions
454 public interface Inspector {
455 // XXX boolean retVal to indicate whether to continue?
456 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call)
457 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment
458 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data);
459 }
460 }