Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 157:d5268ca7715b
Merged branch wrap-data-access into default for resource-friendly data access. Updated API to promote that friendliness to clients (channels, not byte[]). More exceptions
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Wed, 09 Mar 2011 05:22:17 +0100 |
parents | src/com/tmate/hgkit/ll/RevlogStream.java@a6f39e595b2b src/com/tmate/hgkit/ll/RevlogStream.java@a3a2e5deb320 |
children | b413b16d10a5 |
comparison
equal
deleted
inserted
replaced
156:643ddec3be36 | 157:d5268ca7715b |
---|---|
23 import java.io.IOException; | 23 import java.io.IOException; |
24 import java.util.ArrayList; | 24 import java.util.ArrayList; |
25 import java.util.Collections; | 25 import java.util.Collections; |
26 import java.util.LinkedList; | 26 import java.util.LinkedList; |
27 import java.util.List; | 27 import java.util.List; |
28 import java.util.zip.DataFormatException; | |
29 import java.util.zip.Inflater; | |
30 | 28 |
31 import org.tmatesoft.hg.core.Nodeid; | 29 import org.tmatesoft.hg.core.Nodeid; |
32 import org.tmatesoft.hg.repo.HgRepository; | 30 import org.tmatesoft.hg.repo.HgRepository; |
33 | 31 |
34 | 32 |
184 } | 182 } |
185 if (start == TIP) { | 183 if (start == TIP) { |
186 start = indexSize - 1; | 184 start = indexSize - 1; |
187 } | 185 } |
188 if (start < 0 || start >= indexSize) { | 186 if (start < 0 || start >= indexSize) { |
189 throw new IllegalArgumentException("Bad left range boundary " + start); | 187 throw new IllegalArgumentException(String.format("Bad left range boundary %d in [0..%d]", start, indexSize-1)); |
190 } | 188 } |
191 if (end < start || end >= indexSize) { | 189 if (end < start || end >= indexSize) { |
192 throw new IllegalArgumentException("Bad right range boundary " + end); | 190 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); |
193 } | 191 } |
194 // XXX may cache [start .. end] from index with a single read (pre-read) | 192 // XXX may cache [start .. end] from index with a single read (pre-read) |
195 | 193 |
196 DataAccess daIndex = null, daData = null; | 194 DataAccess daIndex = null, daData = null; |
197 daIndex = getIndexStream(); | 195 daIndex = getIndexStream(); |
198 if (needData && !inline) { | 196 if (needData && !inline) { |
199 daData = getDataStream(); | 197 daData = getDataStream(); |
200 } | 198 } |
201 try { | 199 try { |
202 byte[] nodeidBuf = new byte[20]; | 200 byte[] nodeidBuf = new byte[20]; |
203 byte[] lastData = null; | 201 DataAccess lastUserData = null; |
204 int i; | 202 int i; |
205 boolean extraReadsToBaseRev = false; | 203 boolean extraReadsToBaseRev = false; |
206 if (needData && index.get(start).baseRevision < start) { | 204 if (needData && index.get(start).baseRevision < start) { |
207 i = index.get(start).baseRevision; | 205 i = index.get(start).baseRevision; |
208 extraReadsToBaseRev = true; | 206 extraReadsToBaseRev = true; |
209 } else { | 207 } else { |
210 i = start; | 208 i = start; |
211 } | 209 } |
212 | 210 |
213 daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE); | 211 daIndex.seek(inline ? index.get(i).offset : i * REVLOGV1_RECORD_SIZE); |
214 for (; i <= end; i++ ) { | 212 for (; i <= end; i++ ) { |
215 long l = daIndex.readLong(); // 0 | 213 if (inline && needData) { |
214 // inspector reading data (though FilterDataAccess) may have affected index position | |
215 daIndex.seek(index.get(i).offset); | |
216 } | |
217 long l = daIndex.readLong(); // 0 | |
216 @SuppressWarnings("unused") | 218 @SuppressWarnings("unused") |
217 long offset = l >>> 16; | 219 long offset = l >>> 16; |
218 @SuppressWarnings("unused") | 220 @SuppressWarnings("unused") |
219 int flags = (int) (l & 0X0FFFF); | 221 int flags = (int) (l & 0X0FFFF); |
220 int compressedLen = daIndex.readInt(); // +8 | 222 int compressedLen = daIndex.readInt(); // +8 |
224 int parent1Revision = daIndex.readInt(); | 226 int parent1Revision = daIndex.readInt(); |
225 int parent2Revision = daIndex.readInt(); | 227 int parent2Revision = daIndex.readInt(); |
226 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty | 228 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty |
227 daIndex.readBytes(nodeidBuf, 0, 20); // +32 | 229 daIndex.readBytes(nodeidBuf, 0, 20); // +32 |
228 daIndex.skip(12); | 230 daIndex.skip(12); |
229 byte[] data = null; | 231 DataAccess userDataAccess = null; |
230 if (needData) { | 232 if (needData) { |
231 byte[] dataBuf = new byte[compressedLen]; | 233 final byte firstByte; |
234 long streamOffset = index.get(i).offset; | |
235 DataAccess streamDataAccess; | |
232 if (inline) { | 236 if (inline) { |
233 daIndex.readBytes(dataBuf, 0, compressedLen); | 237 streamDataAccess = daIndex; |
238 streamOffset += REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream | |
234 } else { | 239 } else { |
235 daData.seek(index.get(i).offset); | 240 streamDataAccess = daData; |
236 daData.readBytes(dataBuf, 0, compressedLen); | 241 daData.seek(streamOffset); |
237 } | 242 } |
238 if (dataBuf[0] == 0x78 /* 'x' */) { | 243 firstByte = streamDataAccess.readByte(); |
239 try { | 244 if (firstByte == 0x78 /* 'x' */) { |
240 Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative | 245 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen); |
241 zlib.setInput(dataBuf, 0, compressedLen); | 246 } else if (firstByte == 0x75 /* 'u' */) { |
242 byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead | 247 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); |
243 int resultLen = zlib.inflate(result); | |
244 zlib.end(); | |
245 data = new byte[resultLen]; | |
246 System.arraycopy(result, 0, data, 0, resultLen); | |
247 } catch (DataFormatException ex) { | |
248 ex.printStackTrace(); | |
249 data = new byte[0]; // FIXME need better failure strategy | |
250 } | |
251 } else if (dataBuf[0] == 0x75 /* 'u' */) { | |
252 data = new byte[dataBuf.length - 1]; | |
253 System.arraycopy(dataBuf, 1, data, 0, data.length); | |
254 } else { | 248 } else { |
255 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' | 249 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' |
256 // but I don't see reason not to return data as is | 250 // but I don't see reason not to return data as is |
257 data = dataBuf; | 251 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); |
258 } | 252 } |
259 // XXX | 253 // XXX |
260 if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch | 254 if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch |
261 // this is a patch | 255 // this is a patch |
262 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); | 256 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); |
263 int patchElementIndex = 0; | 257 while (!userDataAccess.isEmpty()) { |
264 do { | 258 PatchRecord pr = PatchRecord.read(userDataAccess); |
265 PatchRecord pr = PatchRecord.read(data, patchElementIndex); | 259 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); |
266 patches.add(pr); | 260 patches.add(pr); |
267 patchElementIndex += 12 + pr.len; | 261 } |
268 } while (patchElementIndex < data.length); | 262 userDataAccess.done(); |
269 // | 263 // |
270 byte[] baseRevContent = lastData; | 264 byte[] userData = apply(lastUserData, actualLen, patches); |
271 data = apply(baseRevContent, actualLen, patches); | 265 userDataAccess = new ByteArrayDataAccess(userData); |
272 } | 266 } |
273 } else { | 267 } else { |
274 if (inline) { | 268 if (inline) { |
275 daIndex.skip(compressedLen); | 269 daIndex.skip(compressedLen); |
276 } | 270 } |
277 } | 271 } |
278 if (!extraReadsToBaseRev || i >= start) { | 272 if (!extraReadsToBaseRev || i >= start) { |
279 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data); | 273 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); |
280 } | 274 } |
281 lastData = data; | 275 if (userDataAccess != null) { |
276 userDataAccess.reset(); | |
277 if (lastUserData != null) { | |
278 lastUserData.done(); | |
279 } | |
280 lastUserData = userDataAccess; | |
281 } | |
282 } | 282 } |
283 } catch (IOException ex) { | 283 } catch (IOException ex) { |
284 throw new IllegalStateException(ex); // FIXME need better handling | 284 throw new IllegalStateException(ex); // FIXME need better handling |
285 } finally { | 285 } finally { |
286 daIndex.done(); | 286 daIndex.done(); |
355 } | 355 } |
356 } | 356 } |
357 | 357 |
358 // mpatch.c : apply() | 358 // mpatch.c : apply() |
359 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF | 359 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF |
360 public/*for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) { | 360 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { |
361 int last = 0, destIndex = 0; | 361 int last = 0, destIndex = 0; |
362 if (outcomeLen == -1) { | 362 if (outcomeLen == -1) { |
363 outcomeLen = baseRevisionContent.length; | 363 outcomeLen = (int) baseRevisionContent.length(); |
364 for (PatchRecord pr : patch) { | 364 for (PatchRecord pr : patch) { |
365 outcomeLen += pr.start - last + pr.len; | 365 outcomeLen += pr.start - last + pr.len; |
366 last = pr.end; | 366 last = pr.end; |
367 } | 367 } |
368 outcomeLen -= last; | 368 outcomeLen -= last; |
369 last = 0; | 369 last = 0; |
370 } | 370 } |
371 byte[] rv = new byte[outcomeLen]; | 371 byte[] rv = new byte[outcomeLen]; |
372 for (PatchRecord pr : patch) { | 372 for (PatchRecord pr : patch) { |
373 System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last); | 373 baseRevisionContent.seek(last); |
374 baseRevisionContent.readBytes(rv, destIndex, pr.start-last); | |
374 destIndex += pr.start - last; | 375 destIndex += pr.start - last; |
375 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); | 376 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); |
376 destIndex += pr.data.length; | 377 destIndex += pr.data.length; |
377 last = pr.end; | 378 last = pr.end; |
378 } | 379 } |
379 System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last); | 380 baseRevisionContent.seek(last); |
381 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last)); | |
380 return rv; | 382 return rv; |
381 } | 383 } |
382 | 384 |
383 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description | 385 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description |
384 public static class PatchRecord { | 386 public static class PatchRecord { |
420 | 422 |
421 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data | 423 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data |
422 // instantly - e.g. calculate hash, or comparing two revisions | 424 // instantly - e.g. calculate hash, or comparing two revisions |
423 public interface Inspector { | 425 public interface Inspector { |
424 // XXX boolean retVal to indicate whether to continue? | 426 // XXX boolean retVal to indicate whether to continue? |
425 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) | 427 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) |
426 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, byte[] data); | 428 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment |
429 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); | |
427 } | 430 } |
428 } | 431 } |