Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 198:33a7d76f067b
Performance optimization: reduce memory to keep revlog cached info
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Wed, 20 Apr 2011 05:40:14 +0200 |
parents | 71ddbf8603e8 |
children | 706bcc7cfee4 |
comparison
equal
deleted
inserted
replaced
197:3a7696fb457c | 198:33a7d76f067b |
---|---|
20 import static org.tmatesoft.hg.repo.HgRepository.TIP; | 20 import static org.tmatesoft.hg.repo.HgRepository.TIP; |
21 | 21 |
22 import java.io.File; | 22 import java.io.File; |
23 import java.io.IOException; | 23 import java.io.IOException; |
24 import java.util.ArrayList; | 24 import java.util.ArrayList; |
25 import java.util.Collections; | |
26 import java.util.LinkedList; | 25 import java.util.LinkedList; |
27 import java.util.List; | 26 import java.util.List; |
28 | 27 |
29 import org.tmatesoft.hg.core.HgBadStateException; | 28 import org.tmatesoft.hg.core.HgBadStateException; |
30 import org.tmatesoft.hg.core.Nodeid; | 29 import org.tmatesoft.hg.core.Nodeid; |
31 import org.tmatesoft.hg.repo.HgRepository; | 30 import org.tmatesoft.hg.repo.HgRepository; |
32 | 31 |
33 | 32 |
34 /** | 33 /** |
35 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), | 34 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), |
36 * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)? | 35 * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)? |
37 * | 36 * |
38 * @see http://mercurial.selenic.com/wiki/Revlog | 37 * @see http://mercurial.selenic.com/wiki/Revlog |
39 * @see http://mercurial.selenic.com/wiki/RevlogNG | 38 * @see http://mercurial.selenic.com/wiki/RevlogNG |
40 * | 39 * |
41 * @author Artem Tikhomirov | 40 * @author Artem Tikhomirov |
42 * @author TMate Software Ltd. | 41 * @author TMate Software Ltd. |
43 */ | 42 */ |
44 public class RevlogStream { | 43 public class RevlogStream { |
45 | 44 |
46 private List<IndexEntry> index; // indexed access highly needed | 45 /* |
46 * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size)) | |
47 * | |
48 * long[] in fact (there are 8-bytes field in the revlog) | |
49 * However, (a) DataAccess currently doesn't operate with long seek/length | |
50 * and, of greater significance, (b) files with inlined data are designated for smaller files, | |
51 * guess, about 130 Kb, and offset there won't ever break int capacity | |
52 */ | |
53 private int[] indexRecordOffset; | |
54 private int[] baseRevisions; | |
47 private boolean inline = false; | 55 private boolean inline = false; |
48 private final File indexFile; | 56 private final File indexFile; |
49 private final DataAccessProvider dataAccess; | 57 private final DataAccessProvider dataAccess; |
50 | 58 |
51 // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP. | 59 // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP. |
64 return dataAccess.create(dataFile); | 72 return dataAccess.create(dataFile); |
65 } | 73 } |
66 | 74 |
67 public int revisionCount() { | 75 public int revisionCount() { |
68 initOutline(); | 76 initOutline(); |
69 return index.size(); | 77 return baseRevisions.length; |
70 } | 78 } |
71 | 79 |
72 public int dataLength(int revision) { | 80 public int dataLength(int revision) { |
73 // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. | 81 // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. |
74 // | 82 // |
76 DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset) | 84 DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset) |
77 if (revision == TIP) { | 85 if (revision == TIP) { |
78 revision = indexSize - 1; | 86 revision = indexSize - 1; |
79 } | 87 } |
80 try { | 88 try { |
81 int recordOffset = inline ? index.get(revision).getIntOffset() : revision * REVLOGV1_RECORD_SIZE; | 89 int recordOffset = getIndexOffsetInt(revision); |
82 daIndex.seek(recordOffset + 12); // 6+2+4 | 90 daIndex.seek(recordOffset + 12); // 6+2+4 |
83 int actualLen = daIndex.readInt(); | 91 int actualLen = daIndex.readInt(); |
84 return actualLen; | 92 return actualLen; |
85 } catch (IOException ex) { | 93 } catch (IOException ex) { |
86 ex.printStackTrace(); // log error. FIXME better handling | 94 ex.printStackTrace(); // log error. FIXME better handling |
98 if (revision < 0 || revision >= indexSize) { | 106 if (revision < 0 || revision >= indexSize) { |
99 throw new IllegalArgumentException(Integer.toString(revision)); | 107 throw new IllegalArgumentException(Integer.toString(revision)); |
100 } | 108 } |
101 DataAccess daIndex = getIndexStream(); | 109 DataAccess daIndex = getIndexStream(); |
102 try { | 110 try { |
103 int recordOffset = inline ? index.get(revision).getIntOffset() : revision * REVLOGV1_RECORD_SIZE; | 111 int recordOffset = getIndexOffsetInt(revision); |
104 daIndex.seek(recordOffset + 32); | 112 daIndex.seek(recordOffset + 32); |
105 byte[] rv = new byte[20]; | 113 byte[] rv = new byte[20]; |
106 daIndex.readBytes(rv, 0, 20); | 114 daIndex.readBytes(rv, 0, 20); |
107 return rv; | 115 return rv; |
108 } catch (IOException ex) { | 116 } catch (IOException ex) { |
121 if (revision < 0 || revision > last) { | 129 if (revision < 0 || revision > last) { |
122 throw new IllegalArgumentException(Integer.toString(revision)); | 130 throw new IllegalArgumentException(Integer.toString(revision)); |
123 } | 131 } |
124 DataAccess daIndex = getIndexStream(); | 132 DataAccess daIndex = getIndexStream(); |
125 try { | 133 try { |
126 int recordOffset = inline ? index.get(revision).getIntOffset() : revision * REVLOGV1_RECORD_SIZE; | 134 int recordOffset = getIndexOffsetInt(revision); |
127 daIndex.seek(recordOffset + 20); | 135 daIndex.seek(recordOffset + 20); |
128 int linkRev = daIndex.readInt(); | 136 int linkRev = daIndex.readInt(); |
129 return linkRev; | 137 return linkRev; |
130 } catch (IOException ex) { | 138 } catch (IOException ex) { |
131 ex.printStackTrace(); | 139 ex.printStackTrace(); |
172 | 180 |
173 // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg | 181 // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg |
174 // ? boolean needsNodeid | 182 // ? boolean needsNodeid |
175 public void iterate(int start, int end, boolean needData, Inspector inspector) { | 183 public void iterate(int start, int end, boolean needData, Inspector inspector) { |
176 initOutline(); | 184 initOutline(); |
177 final int indexSize = index.size(); | 185 final int indexSize = revisionCount(); |
178 if (indexSize == 0) { | 186 if (indexSize == 0) { |
179 return; | 187 return; |
180 } | 188 } |
181 if (end == TIP) { | 189 if (end == TIP) { |
182 end = indexSize - 1; | 190 end = indexSize - 1; |
200 try { | 208 try { |
201 byte[] nodeidBuf = new byte[20]; | 209 byte[] nodeidBuf = new byte[20]; |
202 DataAccess lastUserData = null; | 210 DataAccess lastUserData = null; |
203 int i; | 211 int i; |
204 boolean extraReadsToBaseRev = false; | 212 boolean extraReadsToBaseRev = false; |
205 if (needData && index.get(start).baseRevision < start) { | 213 if (needData && getBaseRevision(start) < start) { |
206 i = index.get(start).baseRevision; | 214 i = getBaseRevision(start); |
207 extraReadsToBaseRev = true; | 215 extraReadsToBaseRev = true; |
208 } else { | 216 } else { |
209 i = start; | 217 i = start; |
210 } | 218 } |
211 | 219 |
212 daIndex.seek(inline ? index.get(i).getIntOffset() : i * REVLOGV1_RECORD_SIZE); | 220 daIndex.seek(getIndexOffsetInt(i)); |
213 for (; i <= end; i++ ) { | 221 for (; i <= end; i++ ) { |
214 if (inline && needData) { | 222 if (inline && needData) { |
215 // inspector reading data (though FilterDataAccess) may have affected index position | 223 // inspector reading data (though FilterDataAccess) may have affected index position |
216 daIndex.seek(index.get(i).getIntOffset()); | 224 daIndex.seek(getIndexOffsetInt(i)); |
217 } | 225 } |
218 long l = daIndex.readLong(); // 0 | 226 long l = daIndex.readLong(); // 0 |
219 @SuppressWarnings("unused") | 227 long offset = i == 0 ? 0 : (l >>> 16); |
220 long offset = l >>> 16; | |
221 @SuppressWarnings("unused") | 228 @SuppressWarnings("unused") |
222 int flags = (int) (l & 0X0FFFF); | 229 int flags = (int) (l & 0X0FFFF); |
223 int compressedLen = daIndex.readInt(); // +8 | 230 int compressedLen = daIndex.readInt(); // +8 |
224 int actualLen = daIndex.readInt(); // +12 | 231 int actualLen = daIndex.readInt(); // +12 |
225 int baseRevision = daIndex.readInt(); // +16 | 232 int baseRevision = daIndex.readInt(); // +16 |
230 daIndex.readBytes(nodeidBuf, 0, 20); // +32 | 237 daIndex.readBytes(nodeidBuf, 0, 20); // +32 |
231 daIndex.skip(12); | 238 daIndex.skip(12); |
232 DataAccess userDataAccess = null; | 239 DataAccess userDataAccess = null; |
233 if (needData) { | 240 if (needData) { |
234 final byte firstByte; | 241 final byte firstByte; |
235 int streamOffset = index.get(i).getIntOffset(); | 242 int streamOffset; |
236 DataAccess streamDataAccess; | 243 DataAccess streamDataAccess; |
237 if (inline) { | 244 if (inline) { |
238 streamDataAccess = daIndex; | 245 streamDataAccess = daIndex; |
239 streamOffset += REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream | 246 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream |
240 } else { | 247 } else { |
248 streamOffset = (int) offset; | |
241 streamDataAccess = daData; | 249 streamDataAccess = daData; |
242 daData.seek(streamOffset); | 250 daData.seek(streamOffset); |
243 } | 251 } |
244 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch | 252 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch |
245 firstByte = streamDataAccess.readByte(); | 253 firstByte = streamDataAccess.readByte(); |
289 if (daData != null) { | 297 if (daData != null) { |
290 daData.done(); | 298 daData.done(); |
291 } | 299 } |
292 } | 300 } |
293 } | 301 } |
294 | 302 |
303 private int getBaseRevision(int revision) { | |
304 return baseRevisions[revision]; | |
305 } | |
306 | |
307 /** | |
308 * @return offset of the revision's record in the index (.i) stream | |
309 */ | |
310 private int getIndexOffsetInt(int revision) { | |
311 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE; | |
312 } | |
313 | |
295 private void initOutline() { | 314 private void initOutline() { |
296 if (index != null && !index.isEmpty()) { | 315 if (baseRevisions != null && baseRevisions.length > 0) { |
297 return; | 316 return; |
298 } | 317 } |
299 ArrayList<IndexEntry> res = new ArrayList<IndexEntry>(); | 318 ArrayList<Integer> resBases = new ArrayList<Integer>(); |
319 ArrayList<Integer> resOffsets = new ArrayList<Integer>(); | |
300 DataAccess da = getIndexStream(); | 320 DataAccess da = getIndexStream(); |
301 try { | 321 try { |
302 int versionField = da.readInt(); | 322 int versionField = da.readInt(); |
303 da.readInt(); // just to skip next 4 bytes of offset + flags | 323 da.readInt(); // just to skip next 4 bytes of offset + flags |
304 final int INLINEDATA = 1 << 16; | 324 final int INLINEDATA = 1 << 16; |
313 // 12 + 8 = 20 bytes read here | 333 // 12 + 8 = 20 bytes read here |
314 // int linkRevision = di.readInt(); | 334 // int linkRevision = di.readInt(); |
315 // int parent1Revision = di.readInt(); | 335 // int parent1Revision = di.readInt(); |
316 // int parent2Revision = di.readInt(); | 336 // int parent2Revision = di.readInt(); |
317 // byte[] nodeid = new byte[32]; | 337 // byte[] nodeid = new byte[32]; |
338 resBases.add(baseRevision); | |
318 if (inline) { | 339 if (inline) { |
319 res.add(new IndexEntry(offset + REVLOGV1_RECORD_SIZE * res.size(), baseRevision)); | 340 int o = (int) offset; |
341 if (o != offset) { | |
342 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file | |
343 // with inlined data of size greater than 2 Gb. | |
344 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); | |
345 } | |
346 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size()); | |
320 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) | 347 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) |
321 } else { | 348 } else { |
322 res.add(new IndexEntry(offset, baseRevision)); | |
323 da.skip(3*4 + 32); | 349 da.skip(3*4 + 32); |
324 } | 350 } |
325 if (da.isEmpty()) { | 351 if (da.isEmpty()) { |
326 // fine, done then | 352 // fine, done then |
327 res.trimToSize(); | 353 baseRevisions = toArray(resBases); |
328 index = res; | 354 if (inline) { |
355 indexRecordOffset = toArray(resOffsets); | |
356 } | |
329 break; | 357 break; |
330 } else { | 358 } else { |
331 // start reading next record | 359 // start reading next record |
332 long l = da.readLong(); | 360 long l = da.readLong(); |
333 offset = l >>> 16; | 361 offset = l >>> 16; |
334 } | 362 } |
335 } | 363 } |
336 } catch (IOException ex) { | 364 } catch (IOException ex) { |
337 ex.printStackTrace(); // log error | 365 ex.printStackTrace(); // log error |
338 // too bad, no outline then. | 366 // too bad, no outline then, but don't fail with NPE |
339 index = Collections.emptyList(); | 367 baseRevisions = new int[0]; |
340 } finally { | 368 } finally { |
341 da.done(); | 369 da.done(); |
342 } | 370 } |
343 | 371 } |
344 } | 372 |
345 | 373 private static int[] toArray(List<Integer> l) { |
346 | 374 int[] rv = new int[l.size()]; |
347 // perhaps, package-local or protected, if anyone else from low-level needs them | 375 for (int i = 0; i < rv.length; i++) { |
348 // XXX think over if we should keep offset in case of separate data file - we read the field anyway. Perhaps, distinct entry classes for Inline and non-inline indexes? | 376 rv[i] = l.get(i); |
349 private static class IndexEntry { | 377 } |
350 private final int/*long*/ offset; // for separate .i and .d - copy of index record entry, for inline index - actual offset of the record in the .i file (record entry + revision * record size)) | 378 return rv; |
351 //public final int length; // data past fixed record (need to decide whether including header size or not), and whether length is of compressed data or not | 379 } |
352 public final int baseRevision; | 380 |
353 | |
354 public IndexEntry(long o, int baseRev) { | |
355 offset = (int) o; | |
356 // Index file stores offsets as long, but since DataAccess doesn't support long length() and others yet, | |
357 // no reason to operate with long offsets | |
358 if (o != offset) { | |
359 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); | |
360 } | |
361 baseRevision = baseRev; | |
362 } | |
363 | |
364 public int getIntOffset() { | |
365 return offset; | |
366 } | |
367 } | |
368 | 381 |
369 // mpatch.c : apply() | 382 // mpatch.c : apply() |
370 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF | 383 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF |
371 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { | 384 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { |
372 int last = 0, destIndex = 0; | 385 int last = 0, destIndex = 0; |