comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 198:33a7d76f067b

Performance optimization: reduce memory to keep revlog cached info
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 20 Apr 2011 05:40:14 +0200
parents 71ddbf8603e8
children 706bcc7cfee4
comparison
equal deleted inserted replaced
197:3a7696fb457c 198:33a7d76f067b
20 import static org.tmatesoft.hg.repo.HgRepository.TIP; 20 import static org.tmatesoft.hg.repo.HgRepository.TIP;
21 21
22 import java.io.File; 22 import java.io.File;
23 import java.io.IOException; 23 import java.io.IOException;
24 import java.util.ArrayList; 24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.LinkedList; 25 import java.util.LinkedList;
27 import java.util.List; 26 import java.util.List;
28 27
29 import org.tmatesoft.hg.core.HgBadStateException; 28 import org.tmatesoft.hg.core.HgBadStateException;
30 import org.tmatesoft.hg.core.Nodeid; 29 import org.tmatesoft.hg.core.Nodeid;
31 import org.tmatesoft.hg.repo.HgRepository; 30 import org.tmatesoft.hg.repo.HgRepository;
32 31
33 32
34 /** 33 /**
35 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), 34 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations),
36 * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)? 35 * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)?
37 * 36 *
38 * @see http://mercurial.selenic.com/wiki/Revlog 37 * @see http://mercurial.selenic.com/wiki/Revlog
39 * @see http://mercurial.selenic.com/wiki/RevlogNG 38 * @see http://mercurial.selenic.com/wiki/RevlogNG
40 * 39 *
41 * @author Artem Tikhomirov 40 * @author Artem Tikhomirov
42 * @author TMate Software Ltd. 41 * @author TMate Software Ltd.
43 */ 42 */
44 public class RevlogStream { 43 public class RevlogStream {
45 44
46 private List<IndexEntry> index; // indexed access highly needed 45 /*
46 * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size))
47 *
48 * long[] in fact (there are 8-bytes field in the revlog)
49 * However, (a) DataAccess currently doesn't operate with long seek/length
50 * and, of greater significance, (b) files with inlined data are designated for smaller files,
51 * guess, about 130 Kb, and offset there won't ever break int capacity
52 */
53 private int[] indexRecordOffset;
54 private int[] baseRevisions;
47 private boolean inline = false; 55 private boolean inline = false;
48 private final File indexFile; 56 private final File indexFile;
49 private final DataAccessProvider dataAccess; 57 private final DataAccessProvider dataAccess;
50 58
51 // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP. 59 // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP.
64 return dataAccess.create(dataFile); 72 return dataAccess.create(dataFile);
65 } 73 }
66 74
67 public int revisionCount() { 75 public int revisionCount() {
68 initOutline(); 76 initOutline();
69 return index.size(); 77 return baseRevisions.length;
70 } 78 }
71 79
72 public int dataLength(int revision) { 80 public int dataLength(int revision) {
73 // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. 81 // XXX in fact, use of iterate() instead of this implementation may be quite reasonable.
74 // 82 //
76 DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset) 84 DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset)
77 if (revision == TIP) { 85 if (revision == TIP) {
78 revision = indexSize - 1; 86 revision = indexSize - 1;
79 } 87 }
80 try { 88 try {
81 int recordOffset = inline ? index.get(revision).getIntOffset() : revision * REVLOGV1_RECORD_SIZE; 89 int recordOffset = getIndexOffsetInt(revision);
82 daIndex.seek(recordOffset + 12); // 6+2+4 90 daIndex.seek(recordOffset + 12); // 6+2+4
83 int actualLen = daIndex.readInt(); 91 int actualLen = daIndex.readInt();
84 return actualLen; 92 return actualLen;
85 } catch (IOException ex) { 93 } catch (IOException ex) {
86 ex.printStackTrace(); // log error. FIXME better handling 94 ex.printStackTrace(); // log error. FIXME better handling
98 if (revision < 0 || revision >= indexSize) { 106 if (revision < 0 || revision >= indexSize) {
99 throw new IllegalArgumentException(Integer.toString(revision)); 107 throw new IllegalArgumentException(Integer.toString(revision));
100 } 108 }
101 DataAccess daIndex = getIndexStream(); 109 DataAccess daIndex = getIndexStream();
102 try { 110 try {
103 int recordOffset = inline ? index.get(revision).getIntOffset() : revision * REVLOGV1_RECORD_SIZE; 111 int recordOffset = getIndexOffsetInt(revision);
104 daIndex.seek(recordOffset + 32); 112 daIndex.seek(recordOffset + 32);
105 byte[] rv = new byte[20]; 113 byte[] rv = new byte[20];
106 daIndex.readBytes(rv, 0, 20); 114 daIndex.readBytes(rv, 0, 20);
107 return rv; 115 return rv;
108 } catch (IOException ex) { 116 } catch (IOException ex) {
121 if (revision < 0 || revision > last) { 129 if (revision < 0 || revision > last) {
122 throw new IllegalArgumentException(Integer.toString(revision)); 130 throw new IllegalArgumentException(Integer.toString(revision));
123 } 131 }
124 DataAccess daIndex = getIndexStream(); 132 DataAccess daIndex = getIndexStream();
125 try { 133 try {
126 int recordOffset = inline ? index.get(revision).getIntOffset() : revision * REVLOGV1_RECORD_SIZE; 134 int recordOffset = getIndexOffsetInt(revision);
127 daIndex.seek(recordOffset + 20); 135 daIndex.seek(recordOffset + 20);
128 int linkRev = daIndex.readInt(); 136 int linkRev = daIndex.readInt();
129 return linkRev; 137 return linkRev;
130 } catch (IOException ex) { 138 } catch (IOException ex) {
131 ex.printStackTrace(); 139 ex.printStackTrace();
172 180
173 // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg 181 // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg
174 // ? boolean needsNodeid 182 // ? boolean needsNodeid
175 public void iterate(int start, int end, boolean needData, Inspector inspector) { 183 public void iterate(int start, int end, boolean needData, Inspector inspector) {
176 initOutline(); 184 initOutline();
177 final int indexSize = index.size(); 185 final int indexSize = revisionCount();
178 if (indexSize == 0) { 186 if (indexSize == 0) {
179 return; 187 return;
180 } 188 }
181 if (end == TIP) { 189 if (end == TIP) {
182 end = indexSize - 1; 190 end = indexSize - 1;
200 try { 208 try {
201 byte[] nodeidBuf = new byte[20]; 209 byte[] nodeidBuf = new byte[20];
202 DataAccess lastUserData = null; 210 DataAccess lastUserData = null;
203 int i; 211 int i;
204 boolean extraReadsToBaseRev = false; 212 boolean extraReadsToBaseRev = false;
205 if (needData && index.get(start).baseRevision < start) { 213 if (needData && getBaseRevision(start) < start) {
206 i = index.get(start).baseRevision; 214 i = getBaseRevision(start);
207 extraReadsToBaseRev = true; 215 extraReadsToBaseRev = true;
208 } else { 216 } else {
209 i = start; 217 i = start;
210 } 218 }
211 219
212 daIndex.seek(inline ? index.get(i).getIntOffset() : i * REVLOGV1_RECORD_SIZE); 220 daIndex.seek(getIndexOffsetInt(i));
213 for (; i <= end; i++ ) { 221 for (; i <= end; i++ ) {
214 if (inline && needData) { 222 if (inline && needData) {
215 // inspector reading data (though FilterDataAccess) may have affected index position 223 // inspector reading data (though FilterDataAccess) may have affected index position
216 daIndex.seek(index.get(i).getIntOffset()); 224 daIndex.seek(getIndexOffsetInt(i));
217 } 225 }
218 long l = daIndex.readLong(); // 0 226 long l = daIndex.readLong(); // 0
219 @SuppressWarnings("unused") 227 long offset = i == 0 ? 0 : (l >>> 16);
220 long offset = l >>> 16;
221 @SuppressWarnings("unused") 228 @SuppressWarnings("unused")
222 int flags = (int) (l & 0X0FFFF); 229 int flags = (int) (l & 0X0FFFF);
223 int compressedLen = daIndex.readInt(); // +8 230 int compressedLen = daIndex.readInt(); // +8
224 int actualLen = daIndex.readInt(); // +12 231 int actualLen = daIndex.readInt(); // +12
225 int baseRevision = daIndex.readInt(); // +16 232 int baseRevision = daIndex.readInt(); // +16
230 daIndex.readBytes(nodeidBuf, 0, 20); // +32 237 daIndex.readBytes(nodeidBuf, 0, 20); // +32
231 daIndex.skip(12); 238 daIndex.skip(12);
232 DataAccess userDataAccess = null; 239 DataAccess userDataAccess = null;
233 if (needData) { 240 if (needData) {
234 final byte firstByte; 241 final byte firstByte;
235 int streamOffset = index.get(i).getIntOffset(); 242 int streamOffset;
236 DataAccess streamDataAccess; 243 DataAccess streamDataAccess;
237 if (inline) { 244 if (inline) {
238 streamDataAccess = daIndex; 245 streamDataAccess = daIndex;
239 streamOffset += REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream 246 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream
240 } else { 247 } else {
248 streamOffset = (int) offset;
241 streamDataAccess = daData; 249 streamDataAccess = daData;
242 daData.seek(streamOffset); 250 daData.seek(streamOffset);
243 } 251 }
244 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch 252 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch
245 firstByte = streamDataAccess.readByte(); 253 firstByte = streamDataAccess.readByte();
289 if (daData != null) { 297 if (daData != null) {
290 daData.done(); 298 daData.done();
291 } 299 }
292 } 300 }
293 } 301 }
294 302
303 private int getBaseRevision(int revision) {
304 return baseRevisions[revision];
305 }
306
307 /**
308 * @return offset of the revision's record in the index (.i) stream
309 */
310 private int getIndexOffsetInt(int revision) {
311 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE;
312 }
313
295 private void initOutline() { 314 private void initOutline() {
296 if (index != null && !index.isEmpty()) { 315 if (baseRevisions != null && baseRevisions.length > 0) {
297 return; 316 return;
298 } 317 }
299 ArrayList<IndexEntry> res = new ArrayList<IndexEntry>(); 318 ArrayList<Integer> resBases = new ArrayList<Integer>();
319 ArrayList<Integer> resOffsets = new ArrayList<Integer>();
300 DataAccess da = getIndexStream(); 320 DataAccess da = getIndexStream();
301 try { 321 try {
302 int versionField = da.readInt(); 322 int versionField = da.readInt();
303 da.readInt(); // just to skip next 4 bytes of offset + flags 323 da.readInt(); // just to skip next 4 bytes of offset + flags
304 final int INLINEDATA = 1 << 16; 324 final int INLINEDATA = 1 << 16;
313 // 12 + 8 = 20 bytes read here 333 // 12 + 8 = 20 bytes read here
314 // int linkRevision = di.readInt(); 334 // int linkRevision = di.readInt();
315 // int parent1Revision = di.readInt(); 335 // int parent1Revision = di.readInt();
316 // int parent2Revision = di.readInt(); 336 // int parent2Revision = di.readInt();
317 // byte[] nodeid = new byte[32]; 337 // byte[] nodeid = new byte[32];
338 resBases.add(baseRevision);
318 if (inline) { 339 if (inline) {
319 res.add(new IndexEntry(offset + REVLOGV1_RECORD_SIZE * res.size(), baseRevision)); 340 int o = (int) offset;
341 if (o != offset) {
342 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file
343 // with inlined data of size greater than 2 Gb.
344 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)");
345 }
346 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size());
320 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) 347 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
321 } else { 348 } else {
322 res.add(new IndexEntry(offset, baseRevision));
323 da.skip(3*4 + 32); 349 da.skip(3*4 + 32);
324 } 350 }
325 if (da.isEmpty()) { 351 if (da.isEmpty()) {
326 // fine, done then 352 // fine, done then
327 res.trimToSize(); 353 baseRevisions = toArray(resBases);
328 index = res; 354 if (inline) {
355 indexRecordOffset = toArray(resOffsets);
356 }
329 break; 357 break;
330 } else { 358 } else {
331 // start reading next record 359 // start reading next record
332 long l = da.readLong(); 360 long l = da.readLong();
333 offset = l >>> 16; 361 offset = l >>> 16;
334 } 362 }
335 } 363 }
336 } catch (IOException ex) { 364 } catch (IOException ex) {
337 ex.printStackTrace(); // log error 365 ex.printStackTrace(); // log error
338 // too bad, no outline then. 366 // too bad, no outline then, but don't fail with NPE
339 index = Collections.emptyList(); 367 baseRevisions = new int[0];
340 } finally { 368 } finally {
341 da.done(); 369 da.done();
342 } 370 }
343 371 }
344 } 372
345 373 private static int[] toArray(List<Integer> l) {
346 374 int[] rv = new int[l.size()];
347 // perhaps, package-local or protected, if anyone else from low-level needs them 375 for (int i = 0; i < rv.length; i++) {
348 // XXX think over if we should keep offset in case of separate data file - we read the field anyway. Perhaps, distinct entry classes for Inline and non-inline indexes? 376 rv[i] = l.get(i);
349 private static class IndexEntry { 377 }
350 private final int/*long*/ offset; // for separate .i and .d - copy of index record entry, for inline index - actual offset of the record in the .i file (record entry + revision * record size)) 378 return rv;
351 //public final int length; // data past fixed record (need to decide whether including header size or not), and whether length is of compressed data or not 379 }
352 public final int baseRevision; 380
353
354 public IndexEntry(long o, int baseRev) {
355 offset = (int) o;
356 // Index file stores offsets as long, but since DataAccess doesn't support long length() and others yet,
357 // no reason to operate with long offsets
358 if (o != offset) {
359 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)");
360 }
361 baseRevision = baseRev;
362 }
363
364 public int getIntOffset() {
365 return offset;
366 }
367 }
368 381
369 // mpatch.c : apply() 382 // mpatch.c : apply()
370 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF 383 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
371 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { 384 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException {
372 int last = 0, destIndex = 0; 385 int last = 0, destIndex = 0;