Mercurial > jhg
comparison src/org/tmatesoft/hg/internal/RevlogStream.java @ 157:d5268ca7715b
Merged branch wrap-data-access into default for resource-friendly data access. Updated API to promote that friendliness to clients (channels, not byte[]). More exceptions
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Wed, 09 Mar 2011 05:22:17 +0100 |
| parents | src/com/tmate/hgkit/ll/RevlogStream.java@a6f39e595b2b src/com/tmate/hgkit/ll/RevlogStream.java@a3a2e5deb320 |
| children | b413b16d10a5 |
comparison
equal
deleted
inserted
replaced
| 156:643ddec3be36 | 157:d5268ca7715b |
|---|---|
| 23 import java.io.IOException; | 23 import java.io.IOException; |
| 24 import java.util.ArrayList; | 24 import java.util.ArrayList; |
| 25 import java.util.Collections; | 25 import java.util.Collections; |
| 26 import java.util.LinkedList; | 26 import java.util.LinkedList; |
| 27 import java.util.List; | 27 import java.util.List; |
| 28 import java.util.zip.DataFormatException; | |
| 29 import java.util.zip.Inflater; | |
| 30 | 28 |
| 31 import org.tmatesoft.hg.core.Nodeid; | 29 import org.tmatesoft.hg.core.Nodeid; |
| 32 import org.tmatesoft.hg.repo.HgRepository; | 30 import org.tmatesoft.hg.repo.HgRepository; |
| 33 | 31 |
| 34 | 32 |
| 184 } | 182 } |
| 185 if (start == TIP) { | 183 if (start == TIP) { |
| 186 start = indexSize - 1; | 184 start = indexSize - 1; |
| 187 } | 185 } |
| 188 if (start < 0 || start >= indexSize) { | 186 if (start < 0 || start >= indexSize) { |
| 189 throw new IllegalArgumentException("Bad left range boundary " + start); | 187 throw new IllegalArgumentException(String.format("Bad left range boundary %d in [0..%d]", start, indexSize-1)); |
| 190 } | 188 } |
| 191 if (end < start || end >= indexSize) { | 189 if (end < start || end >= indexSize) { |
| 192 throw new IllegalArgumentException("Bad right range boundary " + end); | 190 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); |
| 193 } | 191 } |
| 194 // XXX may cache [start .. end] from index with a single read (pre-read) | 192 // XXX may cache [start .. end] from index with a single read (pre-read) |
| 195 | 193 |
| 196 DataAccess daIndex = null, daData = null; | 194 DataAccess daIndex = null, daData = null; |
| 197 daIndex = getIndexStream(); | 195 daIndex = getIndexStream(); |
| 198 if (needData && !inline) { | 196 if (needData && !inline) { |
| 199 daData = getDataStream(); | 197 daData = getDataStream(); |
| 200 } | 198 } |
| 201 try { | 199 try { |
| 202 byte[] nodeidBuf = new byte[20]; | 200 byte[] nodeidBuf = new byte[20]; |
| 203 byte[] lastData = null; | 201 DataAccess lastUserData = null; |
| 204 int i; | 202 int i; |
| 205 boolean extraReadsToBaseRev = false; | 203 boolean extraReadsToBaseRev = false; |
| 206 if (needData && index.get(start).baseRevision < start) { | 204 if (needData && index.get(start).baseRevision < start) { |
| 207 i = index.get(start).baseRevision; | 205 i = index.get(start).baseRevision; |
| 208 extraReadsToBaseRev = true; | 206 extraReadsToBaseRev = true; |
| 209 } else { | 207 } else { |
| 210 i = start; | 208 i = start; |
| 211 } | 209 } |
| 212 | 210 |
| 213 daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE); | 211 daIndex.seek(inline ? index.get(i).offset : i * REVLOGV1_RECORD_SIZE); |
| 214 for (; i <= end; i++ ) { | 212 for (; i <= end; i++ ) { |
| 215 long l = daIndex.readLong(); // 0 | 213 if (inline && needData) { |
| 214 // inspector reading data (though FilterDataAccess) may have affected index position | |
| 215 daIndex.seek(index.get(i).offset); | |
| 216 } | |
| 217 long l = daIndex.readLong(); // 0 | |
| 216 @SuppressWarnings("unused") | 218 @SuppressWarnings("unused") |
| 217 long offset = l >>> 16; | 219 long offset = l >>> 16; |
| 218 @SuppressWarnings("unused") | 220 @SuppressWarnings("unused") |
| 219 int flags = (int) (l & 0X0FFFF); | 221 int flags = (int) (l & 0X0FFFF); |
| 220 int compressedLen = daIndex.readInt(); // +8 | 222 int compressedLen = daIndex.readInt(); // +8 |
| 224 int parent1Revision = daIndex.readInt(); | 226 int parent1Revision = daIndex.readInt(); |
| 225 int parent2Revision = daIndex.readInt(); | 227 int parent2Revision = daIndex.readInt(); |
| 226 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty | 228 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty |
| 227 daIndex.readBytes(nodeidBuf, 0, 20); // +32 | 229 daIndex.readBytes(nodeidBuf, 0, 20); // +32 |
| 228 daIndex.skip(12); | 230 daIndex.skip(12); |
| 229 byte[] data = null; | 231 DataAccess userDataAccess = null; |
| 230 if (needData) { | 232 if (needData) { |
| 231 byte[] dataBuf = new byte[compressedLen]; | 233 final byte firstByte; |
| 234 long streamOffset = index.get(i).offset; | |
| 235 DataAccess streamDataAccess; | |
| 232 if (inline) { | 236 if (inline) { |
| 233 daIndex.readBytes(dataBuf, 0, compressedLen); | 237 streamDataAccess = daIndex; |
| 238 streamOffset += REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream | |
| 234 } else { | 239 } else { |
| 235 daData.seek(index.get(i).offset); | 240 streamDataAccess = daData; |
| 236 daData.readBytes(dataBuf, 0, compressedLen); | 241 daData.seek(streamOffset); |
| 237 } | 242 } |
| 238 if (dataBuf[0] == 0x78 /* 'x' */) { | 243 firstByte = streamDataAccess.readByte(); |
| 239 try { | 244 if (firstByte == 0x78 /* 'x' */) { |
| 240 Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative | 245 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen); |
| 241 zlib.setInput(dataBuf, 0, compressedLen); | 246 } else if (firstByte == 0x75 /* 'u' */) { |
| 242 byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead | 247 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); |
| 243 int resultLen = zlib.inflate(result); | |
| 244 zlib.end(); | |
| 245 data = new byte[resultLen]; | |
| 246 System.arraycopy(result, 0, data, 0, resultLen); | |
| 247 } catch (DataFormatException ex) { | |
| 248 ex.printStackTrace(); | |
| 249 data = new byte[0]; // FIXME need better failure strategy | |
| 250 } | |
| 251 } else if (dataBuf[0] == 0x75 /* 'u' */) { | |
| 252 data = new byte[dataBuf.length - 1]; | |
| 253 System.arraycopy(dataBuf, 1, data, 0, data.length); | |
| 254 } else { | 248 } else { |
| 255 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' | 249 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' |
| 256 // but I don't see reason not to return data as is | 250 // but I don't see reason not to return data as is |
| 257 data = dataBuf; | 251 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); |
| 258 } | 252 } |
| 259 // XXX | 253 // XXX |
| 260 if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch | 254 if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch |
| 261 // this is a patch | 255 // this is a patch |
| 262 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); | 256 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); |
| 263 int patchElementIndex = 0; | 257 while (!userDataAccess.isEmpty()) { |
| 264 do { | 258 PatchRecord pr = PatchRecord.read(userDataAccess); |
| 265 PatchRecord pr = PatchRecord.read(data, patchElementIndex); | 259 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); |
| 266 patches.add(pr); | 260 patches.add(pr); |
| 267 patchElementIndex += 12 + pr.len; | 261 } |
| 268 } while (patchElementIndex < data.length); | 262 userDataAccess.done(); |
| 269 // | 263 // |
| 270 byte[] baseRevContent = lastData; | 264 byte[] userData = apply(lastUserData, actualLen, patches); |
| 271 data = apply(baseRevContent, actualLen, patches); | 265 userDataAccess = new ByteArrayDataAccess(userData); |
| 272 } | 266 } |
| 273 } else { | 267 } else { |
| 274 if (inline) { | 268 if (inline) { |
| 275 daIndex.skip(compressedLen); | 269 daIndex.skip(compressedLen); |
| 276 } | 270 } |
| 277 } | 271 } |
| 278 if (!extraReadsToBaseRev || i >= start) { | 272 if (!extraReadsToBaseRev || i >= start) { |
| 279 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data); | 273 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); |
| 280 } | 274 } |
| 281 lastData = data; | 275 if (userDataAccess != null) { |
| 276 userDataAccess.reset(); | |
| 277 if (lastUserData != null) { | |
| 278 lastUserData.done(); | |
| 279 } | |
| 280 lastUserData = userDataAccess; | |
| 281 } | |
| 282 } | 282 } |
| 283 } catch (IOException ex) { | 283 } catch (IOException ex) { |
| 284 throw new IllegalStateException(ex); // FIXME need better handling | 284 throw new IllegalStateException(ex); // FIXME need better handling |
| 285 } finally { | 285 } finally { |
| 286 daIndex.done(); | 286 daIndex.done(); |
| 355 } | 355 } |
| 356 } | 356 } |
| 357 | 357 |
| 358 // mpatch.c : apply() | 358 // mpatch.c : apply() |
| 359 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF | 359 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF |
| 360 public/*for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) { | 360 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { |
| 361 int last = 0, destIndex = 0; | 361 int last = 0, destIndex = 0; |
| 362 if (outcomeLen == -1) { | 362 if (outcomeLen == -1) { |
| 363 outcomeLen = baseRevisionContent.length; | 363 outcomeLen = (int) baseRevisionContent.length(); |
| 364 for (PatchRecord pr : patch) { | 364 for (PatchRecord pr : patch) { |
| 365 outcomeLen += pr.start - last + pr.len; | 365 outcomeLen += pr.start - last + pr.len; |
| 366 last = pr.end; | 366 last = pr.end; |
| 367 } | 367 } |
| 368 outcomeLen -= last; | 368 outcomeLen -= last; |
| 369 last = 0; | 369 last = 0; |
| 370 } | 370 } |
| 371 byte[] rv = new byte[outcomeLen]; | 371 byte[] rv = new byte[outcomeLen]; |
| 372 for (PatchRecord pr : patch) { | 372 for (PatchRecord pr : patch) { |
| 373 System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last); | 373 baseRevisionContent.seek(last); |
| 374 baseRevisionContent.readBytes(rv, destIndex, pr.start-last); | |
| 374 destIndex += pr.start - last; | 375 destIndex += pr.start - last; |
| 375 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); | 376 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); |
| 376 destIndex += pr.data.length; | 377 destIndex += pr.data.length; |
| 377 last = pr.end; | 378 last = pr.end; |
| 378 } | 379 } |
| 379 System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last); | 380 baseRevisionContent.seek(last); |
| 381 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last)); | |
| 380 return rv; | 382 return rv; |
| 381 } | 383 } |
| 382 | 384 |
| 383 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description | 385 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description |
| 384 public static class PatchRecord { | 386 public static class PatchRecord { |
| 420 | 422 |
| 421 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data | 423 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data |
| 422 // instantly - e.g. calculate hash, or comparing two revisions | 424 // instantly - e.g. calculate hash, or comparing two revisions |
| 423 public interface Inspector { | 425 public interface Inspector { |
| 424 // XXX boolean retVal to indicate whether to continue? | 426 // XXX boolean retVal to indicate whether to continue? |
| 425 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) | 427 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) |
| 426 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, byte[] data); | 428 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment |
| 429 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); | |
| 427 } | 430 } |
| 428 } | 431 } |
