Mercurial > jhg
comparison hg4j/src/main/java/org/tmatesoft/hg/internal/RevlogStream.java @ 213:6ec4af642ba8 gradle
Project uses Gradle for build - actual changes
author | Alexander Kitaev <kitaev@gmail.com> |
---|---|
date | Tue, 10 May 2011 10:52:53 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
212:edb2e2829352 | 213:6ec4af642ba8 |
---|---|
1 /* | |
2 * Copyright (c) 2010-2011 TMate Software Ltd | |
3 * | |
4 * This program is free software; you can redistribute it and/or modify | |
5 * it under the terms of the GNU General Public License as published by | |
6 * the Free Software Foundation; version 2 of the License. | |
7 * | |
8 * This program is distributed in the hope that it will be useful, | |
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 * GNU General Public License for more details. | |
12 * | |
13 * For information on how to redistribute this software under | |
14 * the terms of a license other than GNU General Public License | |
15 * contact TMate Software at support@hg4j.com | |
16 */ | |
17 package org.tmatesoft.hg.internal; | |
18 | |
19 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; | |
20 import static org.tmatesoft.hg.repo.HgRepository.TIP; | |
21 | |
22 import java.io.File; | |
23 import java.io.IOException; | |
24 import java.util.ArrayList; | |
25 import java.util.LinkedList; | |
26 import java.util.List; | |
27 | |
28 import org.tmatesoft.hg.core.HgBadStateException; | |
29 import org.tmatesoft.hg.core.Nodeid; | |
30 import org.tmatesoft.hg.repo.HgRepository; | |
31 | |
32 | |
33 /** | |
34 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), | |
35 * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)? | |
36 * | |
37 * @see http://mercurial.selenic.com/wiki/Revlog | |
38 * @see http://mercurial.selenic.com/wiki/RevlogNG | |
39 * | |
40 * @author Artem Tikhomirov | |
41 * @author TMate Software Ltd. | |
42 */ | |
43 public class RevlogStream { | |
44 | |
45 /* | |
46 * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size)) | |
47 * | |
48 * long[] in fact (there are 8-bytes field in the revlog) | |
49 * However, (a) DataAccess currently doesn't operate with long seek/length | |
50 * and, of greater significance, (b) files with inlined data are designated for smaller files, | |
51 * guess, about 130 Kb, and offset there won't ever break int capacity | |
52 */ | |
53 private int[] indexRecordOffset; | |
54 private int[] baseRevisions; | |
55 private boolean inline = false; | |
56 private final File indexFile; | |
57 private final DataAccessProvider dataAccess; | |
58 | |
59 // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP. | |
60 public RevlogStream(DataAccessProvider dap, File indexFile) { | |
61 this.dataAccess = dap; | |
62 this.indexFile = indexFile; | |
63 } | |
64 | |
65 /*package*/ DataAccess getIndexStream() { | |
66 return dataAccess.create(indexFile); | |
67 } | |
68 | |
69 /*package*/ DataAccess getDataStream() { | |
70 final String indexName = indexFile.getName(); | |
71 File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d"); | |
72 return dataAccess.create(dataFile); | |
73 } | |
74 | |
75 public int revisionCount() { | |
76 initOutline(); | |
77 return baseRevisions.length; | |
78 } | |
79 | |
80 public int dataLength(int revision) { | |
81 // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. | |
82 // | |
83 final int indexSize = revisionCount(); | |
84 DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset) | |
85 if (revision == TIP) { | |
86 revision = indexSize - 1; | |
87 } | |
88 try { | |
89 int recordOffset = getIndexOffsetInt(revision); | |
90 daIndex.seek(recordOffset + 12); // 6+2+4 | |
91 int actualLen = daIndex.readInt(); | |
92 return actualLen; | |
93 } catch (IOException ex) { | |
94 ex.printStackTrace(); // log error. FIXME better handling | |
95 throw new IllegalStateException(ex); | |
96 } finally { | |
97 daIndex.done(); | |
98 } | |
99 } | |
100 | |
101 public byte[] nodeid(int revision) { | |
102 final int indexSize = revisionCount(); | |
103 if (revision == TIP) { | |
104 revision = indexSize - 1; | |
105 } | |
106 if (revision < 0 || revision >= indexSize) { | |
107 throw new IllegalArgumentException(Integer.toString(revision)); | |
108 } | |
109 DataAccess daIndex = getIndexStream(); | |
110 try { | |
111 int recordOffset = getIndexOffsetInt(revision); | |
112 daIndex.seek(recordOffset + 32); | |
113 byte[] rv = new byte[20]; | |
114 daIndex.readBytes(rv, 0, 20); | |
115 return rv; | |
116 } catch (IOException ex) { | |
117 ex.printStackTrace(); | |
118 throw new IllegalStateException(); | |
119 } finally { | |
120 daIndex.done(); | |
121 } | |
122 } | |
123 | |
124 public int linkRevision(int revision) { | |
125 final int last = revisionCount() - 1; | |
126 if (revision == TIP) { | |
127 revision = last; | |
128 } | |
129 if (revision < 0 || revision > last) { | |
130 throw new IllegalArgumentException(Integer.toString(revision)); | |
131 } | |
132 DataAccess daIndex = getIndexStream(); | |
133 try { | |
134 int recordOffset = getIndexOffsetInt(revision); | |
135 daIndex.seek(recordOffset + 20); | |
136 int linkRev = daIndex.readInt(); | |
137 return linkRev; | |
138 } catch (IOException ex) { | |
139 ex.printStackTrace(); | |
140 throw new IllegalStateException(); | |
141 } finally { | |
142 daIndex.done(); | |
143 } | |
144 } | |
145 | |
146 // Perhaps, RevlogStream should be limited to use of plain int revisions for access, | |
147 // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep | |
148 // map of nodeids, and once this comes true, we may get rid of this method. | |
149 // Unlike its counterpart, {@link Revlog#getLocalRevisionNumber()}, doesn't fail with exception if node not found, | |
150 /** | |
151 * @return integer in [0..revisionCount()) or {@link HgRepository#BAD_REVISION} if not found | |
152 */ | |
153 public int findLocalRevisionNumber(Nodeid nodeid) { | |
154 // XXX this one may be implemented with iterate() once there's mechanism to stop iterations | |
155 final int indexSize = revisionCount(); | |
156 DataAccess daIndex = getIndexStream(); | |
157 try { | |
158 byte[] nodeidBuf = new byte[20]; | |
159 for (int i = 0; i < indexSize; i++) { | |
160 daIndex.skip(8); | |
161 int compressedLen = daIndex.readInt(); | |
162 daIndex.skip(20); | |
163 daIndex.readBytes(nodeidBuf, 0, 20); | |
164 if (nodeid.equalsTo(nodeidBuf)) { | |
165 return i; | |
166 } | |
167 daIndex.skip(inline ? 12 + compressedLen : 12); | |
168 } | |
169 } catch (IOException ex) { | |
170 ex.printStackTrace(); // log error. FIXME better handling | |
171 throw new IllegalStateException(ex); | |
172 } finally { | |
173 daIndex.done(); | |
174 } | |
175 return BAD_REVISION; | |
176 } | |
177 | |
178 | |
179 private final int REVLOGV1_RECORD_SIZE = 64; | |
180 | |
181 // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg | |
182 // ? boolean needsNodeid | |
183 public void iterate(int start, int end, boolean needData, Inspector inspector) { | |
184 initOutline(); | |
185 final int indexSize = revisionCount(); | |
186 if (indexSize == 0) { | |
187 return; | |
188 } | |
189 if (end == TIP) { | |
190 end = indexSize - 1; | |
191 } | |
192 if (start == TIP) { | |
193 start = indexSize - 1; | |
194 } | |
195 if (start < 0 || start >= indexSize) { | |
196 throw new IllegalArgumentException(String.format("Bad left range boundary %d in [0..%d]", start, indexSize-1)); | |
197 } | |
198 if (end < start || end >= indexSize) { | |
199 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); | |
200 } | |
201 // XXX may cache [start .. end] from index with a single read (pre-read) | |
202 | |
203 DataAccess daIndex = null, daData = null; | |
204 daIndex = getIndexStream(); | |
205 if (needData && !inline) { | |
206 daData = getDataStream(); | |
207 } | |
208 try { | |
209 byte[] nodeidBuf = new byte[20]; | |
210 DataAccess lastUserData = null; | |
211 int i; | |
212 boolean extraReadsToBaseRev = false; | |
213 if (needData && getBaseRevision(start) < start) { | |
214 i = getBaseRevision(start); | |
215 extraReadsToBaseRev = true; | |
216 } else { | |
217 i = start; | |
218 } | |
219 | |
220 daIndex.seek(getIndexOffsetInt(i)); | |
221 for (; i <= end; i++ ) { | |
222 if (inline && needData) { | |
223 // inspector reading data (though FilterDataAccess) may have affected index position | |
224 daIndex.seek(getIndexOffsetInt(i)); | |
225 } | |
226 long l = daIndex.readLong(); // 0 | |
227 long offset = i == 0 ? 0 : (l >>> 16); | |
228 @SuppressWarnings("unused") | |
229 int flags = (int) (l & 0X0FFFF); | |
230 int compressedLen = daIndex.readInt(); // +8 | |
231 int actualLen = daIndex.readInt(); // +12 | |
232 int baseRevision = daIndex.readInt(); // +16 | |
233 int linkRevision = daIndex.readInt(); // +20 | |
234 int parent1Revision = daIndex.readInt(); | |
235 int parent2Revision = daIndex.readInt(); | |
236 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty | |
237 daIndex.readBytes(nodeidBuf, 0, 20); // +32 | |
238 daIndex.skip(12); | |
239 DataAccess userDataAccess = null; | |
240 if (needData) { | |
241 final byte firstByte; | |
242 int streamOffset; | |
243 DataAccess streamDataAccess; | |
244 if (inline) { | |
245 streamDataAccess = daIndex; | |
246 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream | |
247 } else { | |
248 streamOffset = (int) offset; | |
249 streamDataAccess = daData; | |
250 daData.seek(streamOffset); | |
251 } | |
252 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch | |
253 firstByte = streamDataAccess.readByte(); | |
254 if (firstByte == 0x78 /* 'x' */) { | |
255 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen); | |
256 } else if (firstByte == 0x75 /* 'u' */) { | |
257 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); | |
258 } else { | |
259 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' | |
260 // but I don't see reason not to return data as is | |
261 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); | |
262 } | |
263 // XXX | |
264 if (patchToPrevious) { | |
265 // this is a patch | |
266 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); | |
267 while (!userDataAccess.isEmpty()) { | |
268 PatchRecord pr = PatchRecord.read(userDataAccess); | |
269 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); | |
270 patches.add(pr); | |
271 } | |
272 userDataAccess.done(); | |
273 // | |
274 byte[] userData = apply(lastUserData, actualLen, patches); | |
275 userDataAccess = new ByteArrayDataAccess(userData); | |
276 } | |
277 } else { | |
278 if (inline) { | |
279 daIndex.skip(compressedLen); | |
280 } | |
281 } | |
282 if (!extraReadsToBaseRev || i >= start) { | |
283 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); | |
284 } | |
285 if (userDataAccess != null) { | |
286 userDataAccess.reset(); | |
287 if (lastUserData != null) { | |
288 lastUserData.done(); | |
289 } | |
290 lastUserData = userDataAccess; | |
291 } | |
292 } | |
293 } catch (IOException ex) { | |
294 throw new HgBadStateException(ex); // FIXME need better handling | |
295 } finally { | |
296 daIndex.done(); | |
297 if (daData != null) { | |
298 daData.done(); | |
299 } | |
300 } | |
301 } | |
302 | |
303 private int getBaseRevision(int revision) { | |
304 return baseRevisions[revision]; | |
305 } | |
306 | |
307 /** | |
308 * @return offset of the revision's record in the index (.i) stream | |
309 */ | |
310 private int getIndexOffsetInt(int revision) { | |
311 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE; | |
312 } | |
313 | |
314 private void initOutline() { | |
315 if (baseRevisions != null && baseRevisions.length > 0) { | |
316 return; | |
317 } | |
318 ArrayList<Integer> resBases = new ArrayList<Integer>(); | |
319 ArrayList<Integer> resOffsets = new ArrayList<Integer>(); | |
320 DataAccess da = getIndexStream(); | |
321 try { | |
322 if (da.isEmpty()) { | |
323 // do not fail with exception if stream is empty, it's likely intentional | |
324 baseRevisions = new int[0]; | |
325 return; | |
326 } | |
327 int versionField = da.readInt(); | |
328 da.readInt(); // just to skip next 4 bytes of offset + flags | |
329 final int INLINEDATA = 1 << 16; | |
330 inline = (versionField & INLINEDATA) != 0; | |
331 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes | |
332 while(true) { | |
333 int compressedLen = da.readInt(); | |
334 // 8+4 = 12 bytes total read here | |
335 @SuppressWarnings("unused") | |
336 int actualLen = da.readInt(); | |
337 int baseRevision = da.readInt(); | |
338 // 12 + 8 = 20 bytes read here | |
339 // int linkRevision = di.readInt(); | |
340 // int parent1Revision = di.readInt(); | |
341 // int parent2Revision = di.readInt(); | |
342 // byte[] nodeid = new byte[32]; | |
343 resBases.add(baseRevision); | |
344 if (inline) { | |
345 int o = (int) offset; | |
346 if (o != offset) { | |
347 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file | |
348 // with inlined data of size greater than 2 Gb. | |
349 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); | |
350 } | |
351 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size()); | |
352 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) | |
353 } else { | |
354 da.skip(3*4 + 32); | |
355 } | |
356 if (da.isEmpty()) { | |
357 // fine, done then | |
358 baseRevisions = toArray(resBases); | |
359 if (inline) { | |
360 indexRecordOffset = toArray(resOffsets); | |
361 } | |
362 break; | |
363 } else { | |
364 // start reading next record | |
365 long l = da.readLong(); | |
366 offset = l >>> 16; | |
367 } | |
368 } | |
369 } catch (IOException ex) { | |
370 ex.printStackTrace(); // log error | |
371 // too bad, no outline then, but don't fail with NPE | |
372 baseRevisions = new int[0]; | |
373 } finally { | |
374 da.done(); | |
375 } | |
376 } | |
377 | |
378 private static int[] toArray(List<Integer> l) { | |
379 int[] rv = new int[l.size()]; | |
380 for (int i = 0; i < rv.length; i++) { | |
381 rv[i] = l.get(i); | |
382 } | |
383 return rv; | |
384 } | |
385 | |
386 | |
387 // mpatch.c : apply() | |
388 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF | |
389 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { | |
390 int last = 0, destIndex = 0; | |
391 if (outcomeLen == -1) { | |
392 outcomeLen = baseRevisionContent.length(); | |
393 for (PatchRecord pr : patch) { | |
394 outcomeLen += pr.start - last + pr.len; | |
395 last = pr.end; | |
396 } | |
397 outcomeLen -= last; | |
398 last = 0; | |
399 } | |
400 byte[] rv = new byte[outcomeLen]; | |
401 for (PatchRecord pr : patch) { | |
402 baseRevisionContent.seek(last); | |
403 baseRevisionContent.readBytes(rv, destIndex, pr.start-last); | |
404 destIndex += pr.start - last; | |
405 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); | |
406 destIndex += pr.data.length; | |
407 last = pr.end; | |
408 } | |
409 baseRevisionContent.seek(last); | |
410 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last)); | |
411 return rv; | |
412 } | |
413 | |
414 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description | |
415 public static class PatchRecord { | |
416 /* | |
417 Given there are pr1 and pr2: | |
418 pr1.start to pr1.end will be replaced with pr's data (of pr1.len) | |
419 pr1.end to pr2.start gets copied from base | |
420 */ | |
421 public int start, end, len; | |
422 public byte[] data; | |
423 | |
424 // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed | |
425 private PatchRecord(int p1, int p2, int length, byte[] src) { | |
426 start = p1; | |
427 end = p2; | |
428 len = length; | |
429 data = src; | |
430 } | |
431 | |
432 /*package-local*/ static PatchRecord read(byte[] data, int offset) { | |
433 final int x = offset; // shorthand | |
434 int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF); | |
435 int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF); | |
436 int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF); | |
437 byte[] dataCopy = new byte[len]; | |
438 System.arraycopy(data, x+12, dataCopy, 0, len); | |
439 return new PatchRecord(p1, p2, len, dataCopy); | |
440 } | |
441 | |
442 public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException { | |
443 int p1 = da.readInt(); | |
444 int p2 = da.readInt(); | |
445 int len = da.readInt(); | |
446 byte[] src = new byte[len]; | |
447 da.readBytes(src, 0, len); | |
448 return new PatchRecord(p1, p2, len, src); | |
449 } | |
450 } | |
451 | |
452 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data | |
453 // instantly - e.g. calculate hash, or comparing two revisions | |
454 public interface Inspector { | |
455 // XXX boolean retVal to indicate whether to continue? | |
456 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) | |
457 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment | |
458 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); | |
459 } | |
460 } |