comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 157:d5268ca7715b

Merged branch wrap-data-access into default for resource-friendly data access. Updated API to promote that friendliness to clients (channels, not byte[]). More exceptions
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 09 Mar 2011 05:22:17 +0100
parents src/com/tmate/hgkit/ll/RevlogStream.java@a6f39e595b2b src/com/tmate/hgkit/ll/RevlogStream.java@a3a2e5deb320
children b413b16d10a5
comparison
equal deleted inserted replaced
156:643ddec3be36 157:d5268ca7715b
23 import java.io.IOException; 23 import java.io.IOException;
24 import java.util.ArrayList; 24 import java.util.ArrayList;
25 import java.util.Collections; 25 import java.util.Collections;
26 import java.util.LinkedList; 26 import java.util.LinkedList;
27 import java.util.List; 27 import java.util.List;
28 import java.util.zip.DataFormatException;
29 import java.util.zip.Inflater;
30 28
31 import org.tmatesoft.hg.core.Nodeid; 29 import org.tmatesoft.hg.core.Nodeid;
32 import org.tmatesoft.hg.repo.HgRepository; 30 import org.tmatesoft.hg.repo.HgRepository;
33 31
34 32
184 } 182 }
185 if (start == TIP) { 183 if (start == TIP) {
186 start = indexSize - 1; 184 start = indexSize - 1;
187 } 185 }
188 if (start < 0 || start >= indexSize) { 186 if (start < 0 || start >= indexSize) {
189 throw new IllegalArgumentException("Bad left range boundary " + start); 187 throw new IllegalArgumentException(String.format("Bad left range boundary %d in [0..%d]", start, indexSize-1));
190 } 188 }
191 if (end < start || end >= indexSize) { 189 if (end < start || end >= indexSize) {
192 throw new IllegalArgumentException("Bad right range boundary " + end); 190 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1));
193 } 191 }
194 // XXX may cache [start .. end] from index with a single read (pre-read) 192 // XXX may cache [start .. end] from index with a single read (pre-read)
195 193
196 DataAccess daIndex = null, daData = null; 194 DataAccess daIndex = null, daData = null;
197 daIndex = getIndexStream(); 195 daIndex = getIndexStream();
198 if (needData && !inline) { 196 if (needData && !inline) {
199 daData = getDataStream(); 197 daData = getDataStream();
200 } 198 }
201 try { 199 try {
202 byte[] nodeidBuf = new byte[20]; 200 byte[] nodeidBuf = new byte[20];
203 byte[] lastData = null; 201 DataAccess lastUserData = null;
204 int i; 202 int i;
205 boolean extraReadsToBaseRev = false; 203 boolean extraReadsToBaseRev = false;
206 if (needData && index.get(start).baseRevision < start) { 204 if (needData && index.get(start).baseRevision < start) {
207 i = index.get(start).baseRevision; 205 i = index.get(start).baseRevision;
208 extraReadsToBaseRev = true; 206 extraReadsToBaseRev = true;
209 } else { 207 } else {
210 i = start; 208 i = start;
211 } 209 }
212 210
213 daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE); 211 daIndex.seek(inline ? index.get(i).offset : i * REVLOGV1_RECORD_SIZE);
214 for (; i <= end; i++ ) { 212 for (; i <= end; i++ ) {
215 long l = daIndex.readLong(); // 0 213 if (inline && needData) {
214 // inspector reading data (though FilterDataAccess) may have affected index position
215 daIndex.seek(index.get(i).offset);
216 }
217 long l = daIndex.readLong(); // 0
216 @SuppressWarnings("unused") 218 @SuppressWarnings("unused")
217 long offset = l >>> 16; 219 long offset = l >>> 16;
218 @SuppressWarnings("unused") 220 @SuppressWarnings("unused")
219 int flags = (int) (l & 0X0FFFF); 221 int flags = (int) (l & 0X0FFFF);
220 int compressedLen = daIndex.readInt(); // +8 222 int compressedLen = daIndex.readInt(); // +8
224 int parent1Revision = daIndex.readInt(); 226 int parent1Revision = daIndex.readInt();
225 int parent2Revision = daIndex.readInt(); 227 int parent2Revision = daIndex.readInt();
226 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty 228 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
227 daIndex.readBytes(nodeidBuf, 0, 20); // +32 229 daIndex.readBytes(nodeidBuf, 0, 20); // +32
228 daIndex.skip(12); 230 daIndex.skip(12);
229 byte[] data = null; 231 DataAccess userDataAccess = null;
230 if (needData) { 232 if (needData) {
231 byte[] dataBuf = new byte[compressedLen]; 233 final byte firstByte;
234 long streamOffset = index.get(i).offset;
235 DataAccess streamDataAccess;
232 if (inline) { 236 if (inline) {
233 daIndex.readBytes(dataBuf, 0, compressedLen); 237 streamDataAccess = daIndex;
238 streamOffset += REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream
234 } else { 239 } else {
235 daData.seek(index.get(i).offset); 240 streamDataAccess = daData;
236 daData.readBytes(dataBuf, 0, compressedLen); 241 daData.seek(streamOffset);
237 } 242 }
238 if (dataBuf[0] == 0x78 /* 'x' */) { 243 firstByte = streamDataAccess.readByte();
239 try { 244 if (firstByte == 0x78 /* 'x' */) {
240 Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative 245 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen);
241 zlib.setInput(dataBuf, 0, compressedLen); 246 } else if (firstByte == 0x75 /* 'u' */) {
242 byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 247 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
243 int resultLen = zlib.inflate(result);
244 zlib.end();
245 data = new byte[resultLen];
246 System.arraycopy(result, 0, data, 0, resultLen);
247 } catch (DataFormatException ex) {
248 ex.printStackTrace();
249 data = new byte[0]; // FIXME need better failure strategy
250 }
251 } else if (dataBuf[0] == 0x75 /* 'u' */) {
252 data = new byte[dataBuf.length - 1];
253 System.arraycopy(dataBuf, 1, data, 0, data.length);
254 } else { 248 } else {
255 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' 249 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
256 // but I don't see reason not to return data as is 250 // but I don't see reason not to return data as is
257 data = dataBuf; 251 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen);
258 } 252 }
259 // XXX 253 // XXX
260 if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch 254 if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch
261 // this is a patch 255 // this is a patch
262 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); 256 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>();
263 int patchElementIndex = 0; 257 while (!userDataAccess.isEmpty()) {
264 do { 258 PatchRecord pr = PatchRecord.read(userDataAccess);
265 PatchRecord pr = PatchRecord.read(data, patchElementIndex); 259 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len);
266 patches.add(pr); 260 patches.add(pr);
267 patchElementIndex += 12 + pr.len; 261 }
268 } while (patchElementIndex < data.length); 262 userDataAccess.done();
269 // 263 //
270 byte[] baseRevContent = lastData; 264 byte[] userData = apply(lastUserData, actualLen, patches);
271 data = apply(baseRevContent, actualLen, patches); 265 userDataAccess = new ByteArrayDataAccess(userData);
272 } 266 }
273 } else { 267 } else {
274 if (inline) { 268 if (inline) {
275 daIndex.skip(compressedLen); 269 daIndex.skip(compressedLen);
276 } 270 }
277 } 271 }
278 if (!extraReadsToBaseRev || i >= start) { 272 if (!extraReadsToBaseRev || i >= start) {
279 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data); 273 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
280 } 274 }
281 lastData = data; 275 if (userDataAccess != null) {
276 userDataAccess.reset();
277 if (lastUserData != null) {
278 lastUserData.done();
279 }
280 lastUserData = userDataAccess;
281 }
282 } 282 }
283 } catch (IOException ex) { 283 } catch (IOException ex) {
284 throw new IllegalStateException(ex); // FIXME need better handling 284 throw new IllegalStateException(ex); // FIXME need better handling
285 } finally { 285 } finally {
286 daIndex.done(); 286 daIndex.done();
355 } 355 }
356 } 356 }
357 357
358 // mpatch.c : apply() 358 // mpatch.c : apply()
359 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF 359 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
360 public/*for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) { 360 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException {
361 int last = 0, destIndex = 0; 361 int last = 0, destIndex = 0;
362 if (outcomeLen == -1) { 362 if (outcomeLen == -1) {
363 outcomeLen = baseRevisionContent.length; 363 outcomeLen = (int) baseRevisionContent.length();
364 for (PatchRecord pr : patch) { 364 for (PatchRecord pr : patch) {
365 outcomeLen += pr.start - last + pr.len; 365 outcomeLen += pr.start - last + pr.len;
366 last = pr.end; 366 last = pr.end;
367 } 367 }
368 outcomeLen -= last; 368 outcomeLen -= last;
369 last = 0; 369 last = 0;
370 } 370 }
371 byte[] rv = new byte[outcomeLen]; 371 byte[] rv = new byte[outcomeLen];
372 for (PatchRecord pr : patch) { 372 for (PatchRecord pr : patch) {
373 System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last); 373 baseRevisionContent.seek(last);
374 baseRevisionContent.readBytes(rv, destIndex, pr.start-last);
374 destIndex += pr.start - last; 375 destIndex += pr.start - last;
375 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); 376 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length);
376 destIndex += pr.data.length; 377 destIndex += pr.data.length;
377 last = pr.end; 378 last = pr.end;
378 } 379 }
379 System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last); 380 baseRevisionContent.seek(last);
381 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last));
380 return rv; 382 return rv;
381 } 383 }
382 384
383 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description 385 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description
384 public static class PatchRecord { 386 public static class PatchRecord {
420 422
421 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data 423 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data
422 // instantly - e.g. calculate hash, or comparing two revisions 424 // instantly - e.g. calculate hash, or comparing two revisions
423 public interface Inspector { 425 public interface Inspector {
424 // XXX boolean retVal to indicate whether to continue? 426 // XXX boolean retVal to indicate whether to continue?
425 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) 427 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call)
426 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, byte[] data); 428 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment
429 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data);
427 } 430 }
428 } 431 }