Mercurial > jhg
comparison hg4j/src/main/java/org/tmatesoft/hg/internal/RevlogStream.java @ 213:6ec4af642ba8 gradle
Project uses Gradle for build - actual changes
| author | Alexander Kitaev <kitaev@gmail.com> |
|---|---|
| date | Tue, 10 May 2011 10:52:53 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 212:edb2e2829352 | 213:6ec4af642ba8 |
|---|---|
| 1 /* | |
| 2 * Copyright (c) 2010-2011 TMate Software Ltd | |
| 3 * | |
| 4 * This program is free software; you can redistribute it and/or modify | |
| 5 * it under the terms of the GNU General Public License as published by | |
| 6 * the Free Software Foundation; version 2 of the License. | |
| 7 * | |
| 8 * This program is distributed in the hope that it will be useful, | |
| 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 11 * GNU General Public License for more details. | |
| 12 * | |
| 13 * For information on how to redistribute this software under | |
| 14 * the terms of a license other than GNU General Public License | |
| 15 * contact TMate Software at support@hg4j.com | |
| 16 */ | |
| 17 package org.tmatesoft.hg.internal; | |
| 18 | |
| 19 import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; | |
| 20 import static org.tmatesoft.hg.repo.HgRepository.TIP; | |
| 21 | |
| 22 import java.io.File; | |
| 23 import java.io.IOException; | |
| 24 import java.util.ArrayList; | |
| 25 import java.util.LinkedList; | |
| 26 import java.util.List; | |
| 27 | |
| 28 import org.tmatesoft.hg.core.HgBadStateException; | |
| 29 import org.tmatesoft.hg.core.Nodeid; | |
| 30 import org.tmatesoft.hg.repo.HgRepository; | |
| 31 | |
| 32 | |
| 33 /** | |
| 34 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), | |
| 35 * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)? | |
| 36 * | |
| 37 * @see http://mercurial.selenic.com/wiki/Revlog | |
| 38 * @see http://mercurial.selenic.com/wiki/RevlogNG | |
| 39 * | |
| 40 * @author Artem Tikhomirov | |
| 41 * @author TMate Software Ltd. | |
| 42 */ | |
| 43 public class RevlogStream { | |
| 44 | |
| 45 /* | |
| 46 * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size)) | |
| 47 * | |
| 48 * long[] in fact (there are 8-bytes field in the revlog) | |
| 49 * However, (a) DataAccess currently doesn't operate with long seek/length | |
| 50 * and, of greater significance, (b) files with inlined data are designated for smaller files, | |
| 51 * guess, about 130 Kb, and offset there won't ever break int capacity | |
| 52 */ | |
| 53 private int[] indexRecordOffset; | |
| 54 private int[] baseRevisions; | |
| 55 private boolean inline = false; | |
| 56 private final File indexFile; | |
| 57 private final DataAccessProvider dataAccess; | |
| 58 | |
| 59 // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP. | |
| 60 public RevlogStream(DataAccessProvider dap, File indexFile) { | |
| 61 this.dataAccess = dap; | |
| 62 this.indexFile = indexFile; | |
| 63 } | |
| 64 | |
| 65 /*package*/ DataAccess getIndexStream() { | |
| 66 return dataAccess.create(indexFile); | |
| 67 } | |
| 68 | |
| 69 /*package*/ DataAccess getDataStream() { | |
| 70 final String indexName = indexFile.getName(); | |
| 71 File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d"); | |
| 72 return dataAccess.create(dataFile); | |
| 73 } | |
| 74 | |
| 75 public int revisionCount() { | |
| 76 initOutline(); | |
| 77 return baseRevisions.length; | |
| 78 } | |
| 79 | |
| 80 public int dataLength(int revision) { | |
| 81 // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. | |
| 82 // | |
| 83 final int indexSize = revisionCount(); | |
| 84 DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset) | |
| 85 if (revision == TIP) { | |
| 86 revision = indexSize - 1; | |
| 87 } | |
| 88 try { | |
| 89 int recordOffset = getIndexOffsetInt(revision); | |
| 90 daIndex.seek(recordOffset + 12); // 6+2+4 | |
| 91 int actualLen = daIndex.readInt(); | |
| 92 return actualLen; | |
| 93 } catch (IOException ex) { | |
| 94 ex.printStackTrace(); // log error. FIXME better handling | |
| 95 throw new IllegalStateException(ex); | |
| 96 } finally { | |
| 97 daIndex.done(); | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 public byte[] nodeid(int revision) { | |
| 102 final int indexSize = revisionCount(); | |
| 103 if (revision == TIP) { | |
| 104 revision = indexSize - 1; | |
| 105 } | |
| 106 if (revision < 0 || revision >= indexSize) { | |
| 107 throw new IllegalArgumentException(Integer.toString(revision)); | |
| 108 } | |
| 109 DataAccess daIndex = getIndexStream(); | |
| 110 try { | |
| 111 int recordOffset = getIndexOffsetInt(revision); | |
| 112 daIndex.seek(recordOffset + 32); | |
| 113 byte[] rv = new byte[20]; | |
| 114 daIndex.readBytes(rv, 0, 20); | |
| 115 return rv; | |
| 116 } catch (IOException ex) { | |
| 117 ex.printStackTrace(); | |
| 118 throw new IllegalStateException(); | |
| 119 } finally { | |
| 120 daIndex.done(); | |
| 121 } | |
| 122 } | |
| 123 | |
| 124 public int linkRevision(int revision) { | |
| 125 final int last = revisionCount() - 1; | |
| 126 if (revision == TIP) { | |
| 127 revision = last; | |
| 128 } | |
| 129 if (revision < 0 || revision > last) { | |
| 130 throw new IllegalArgumentException(Integer.toString(revision)); | |
| 131 } | |
| 132 DataAccess daIndex = getIndexStream(); | |
| 133 try { | |
| 134 int recordOffset = getIndexOffsetInt(revision); | |
| 135 daIndex.seek(recordOffset + 20); | |
| 136 int linkRev = daIndex.readInt(); | |
| 137 return linkRev; | |
| 138 } catch (IOException ex) { | |
| 139 ex.printStackTrace(); | |
| 140 throw new IllegalStateException(); | |
| 141 } finally { | |
| 142 daIndex.done(); | |
| 143 } | |
| 144 } | |
| 145 | |
| 146 // Perhaps, RevlogStream should be limited to use of plain int revisions for access, | |
| 147 // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep | |
| 148 // map of nodeids, and once this comes true, we may get rid of this method. | |
| 149 // Unlike its counterpart, {@link Revlog#getLocalRevisionNumber()}, doesn't fail with exception if node not found, | |
| 150 /** | |
| 151 * @return integer in [0..revisionCount()) or {@link HgRepository#BAD_REVISION} if not found | |
| 152 */ | |
| 153 public int findLocalRevisionNumber(Nodeid nodeid) { | |
| 154 // XXX this one may be implemented with iterate() once there's mechanism to stop iterations | |
| 155 final int indexSize = revisionCount(); | |
| 156 DataAccess daIndex = getIndexStream(); | |
| 157 try { | |
| 158 byte[] nodeidBuf = new byte[20]; | |
| 159 for (int i = 0; i < indexSize; i++) { | |
| 160 daIndex.skip(8); | |
| 161 int compressedLen = daIndex.readInt(); | |
| 162 daIndex.skip(20); | |
| 163 daIndex.readBytes(nodeidBuf, 0, 20); | |
| 164 if (nodeid.equalsTo(nodeidBuf)) { | |
| 165 return i; | |
| 166 } | |
| 167 daIndex.skip(inline ? 12 + compressedLen : 12); | |
| 168 } | |
| 169 } catch (IOException ex) { | |
| 170 ex.printStackTrace(); // log error. FIXME better handling | |
| 171 throw new IllegalStateException(ex); | |
| 172 } finally { | |
| 173 daIndex.done(); | |
| 174 } | |
| 175 return BAD_REVISION; | |
| 176 } | |
| 177 | |
| 178 | |
| 179 private final int REVLOGV1_RECORD_SIZE = 64; | |
| 180 | |
| 181 // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg | |
| 182 // ? boolean needsNodeid | |
| 183 public void iterate(int start, int end, boolean needData, Inspector inspector) { | |
| 184 initOutline(); | |
| 185 final int indexSize = revisionCount(); | |
| 186 if (indexSize == 0) { | |
| 187 return; | |
| 188 } | |
| 189 if (end == TIP) { | |
| 190 end = indexSize - 1; | |
| 191 } | |
| 192 if (start == TIP) { | |
| 193 start = indexSize - 1; | |
| 194 } | |
| 195 if (start < 0 || start >= indexSize) { | |
| 196 throw new IllegalArgumentException(String.format("Bad left range boundary %d in [0..%d]", start, indexSize-1)); | |
| 197 } | |
| 198 if (end < start || end >= indexSize) { | |
| 199 throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); | |
| 200 } | |
| 201 // XXX may cache [start .. end] from index with a single read (pre-read) | |
| 202 | |
| 203 DataAccess daIndex = null, daData = null; | |
| 204 daIndex = getIndexStream(); | |
| 205 if (needData && !inline) { | |
| 206 daData = getDataStream(); | |
| 207 } | |
| 208 try { | |
| 209 byte[] nodeidBuf = new byte[20]; | |
| 210 DataAccess lastUserData = null; | |
| 211 int i; | |
| 212 boolean extraReadsToBaseRev = false; | |
| 213 if (needData && getBaseRevision(start) < start) { | |
| 214 i = getBaseRevision(start); | |
| 215 extraReadsToBaseRev = true; | |
| 216 } else { | |
| 217 i = start; | |
| 218 } | |
| 219 | |
| 220 daIndex.seek(getIndexOffsetInt(i)); | |
| 221 for (; i <= end; i++ ) { | |
| 222 if (inline && needData) { | |
| 223 // inspector reading data (though FilterDataAccess) may have affected index position | |
| 224 daIndex.seek(getIndexOffsetInt(i)); | |
| 225 } | |
| 226 long l = daIndex.readLong(); // 0 | |
| 227 long offset = i == 0 ? 0 : (l >>> 16); | |
| 228 @SuppressWarnings("unused") | |
| 229 int flags = (int) (l & 0X0FFFF); | |
| 230 int compressedLen = daIndex.readInt(); // +8 | |
| 231 int actualLen = daIndex.readInt(); // +12 | |
| 232 int baseRevision = daIndex.readInt(); // +16 | |
| 233 int linkRevision = daIndex.readInt(); // +20 | |
| 234 int parent1Revision = daIndex.readInt(); | |
| 235 int parent2Revision = daIndex.readInt(); | |
| 236 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty | |
| 237 daIndex.readBytes(nodeidBuf, 0, 20); // +32 | |
| 238 daIndex.skip(12); | |
| 239 DataAccess userDataAccess = null; | |
| 240 if (needData) { | |
| 241 final byte firstByte; | |
| 242 int streamOffset; | |
| 243 DataAccess streamDataAccess; | |
| 244 if (inline) { | |
| 245 streamDataAccess = daIndex; | |
| 246 streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream | |
| 247 } else { | |
| 248 streamOffset = (int) offset; | |
| 249 streamDataAccess = daData; | |
| 250 daData.seek(streamOffset); | |
| 251 } | |
| 252 final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch | |
| 253 firstByte = streamDataAccess.readByte(); | |
| 254 if (firstByte == 0x78 /* 'x' */) { | |
| 255 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen); | |
| 256 } else if (firstByte == 0x75 /* 'u' */) { | |
| 257 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); | |
| 258 } else { | |
| 259 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' | |
| 260 // but I don't see reason not to return data as is | |
| 261 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); | |
| 262 } | |
| 263 // XXX | |
| 264 if (patchToPrevious) { | |
| 265 // this is a patch | |
| 266 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); | |
| 267 while (!userDataAccess.isEmpty()) { | |
| 268 PatchRecord pr = PatchRecord.read(userDataAccess); | |
| 269 // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); | |
| 270 patches.add(pr); | |
| 271 } | |
| 272 userDataAccess.done(); | |
| 273 // | |
| 274 byte[] userData = apply(lastUserData, actualLen, patches); | |
| 275 userDataAccess = new ByteArrayDataAccess(userData); | |
| 276 } | |
| 277 } else { | |
| 278 if (inline) { | |
| 279 daIndex.skip(compressedLen); | |
| 280 } | |
| 281 } | |
| 282 if (!extraReadsToBaseRev || i >= start) { | |
| 283 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); | |
| 284 } | |
| 285 if (userDataAccess != null) { | |
| 286 userDataAccess.reset(); | |
| 287 if (lastUserData != null) { | |
| 288 lastUserData.done(); | |
| 289 } | |
| 290 lastUserData = userDataAccess; | |
| 291 } | |
| 292 } | |
| 293 } catch (IOException ex) { | |
| 294 throw new HgBadStateException(ex); // FIXME need better handling | |
| 295 } finally { | |
| 296 daIndex.done(); | |
| 297 if (daData != null) { | |
| 298 daData.done(); | |
| 299 } | |
| 300 } | |
| 301 } | |
| 302 | |
| 303 private int getBaseRevision(int revision) { | |
| 304 return baseRevisions[revision]; | |
| 305 } | |
| 306 | |
| 307 /** | |
| 308 * @return offset of the revision's record in the index (.i) stream | |
| 309 */ | |
| 310 private int getIndexOffsetInt(int revision) { | |
| 311 return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE; | |
| 312 } | |
| 313 | |
| 314 private void initOutline() { | |
| 315 if (baseRevisions != null && baseRevisions.length > 0) { | |
| 316 return; | |
| 317 } | |
| 318 ArrayList<Integer> resBases = new ArrayList<Integer>(); | |
| 319 ArrayList<Integer> resOffsets = new ArrayList<Integer>(); | |
| 320 DataAccess da = getIndexStream(); | |
| 321 try { | |
| 322 if (da.isEmpty()) { | |
| 323 // do not fail with exception if stream is empty, it's likely intentional | |
| 324 baseRevisions = new int[0]; | |
| 325 return; | |
| 326 } | |
| 327 int versionField = da.readInt(); | |
| 328 da.readInt(); // just to skip next 4 bytes of offset + flags | |
| 329 final int INLINEDATA = 1 << 16; | |
| 330 inline = (versionField & INLINEDATA) != 0; | |
| 331 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes | |
| 332 while(true) { | |
| 333 int compressedLen = da.readInt(); | |
| 334 // 8+4 = 12 bytes total read here | |
| 335 @SuppressWarnings("unused") | |
| 336 int actualLen = da.readInt(); | |
| 337 int baseRevision = da.readInt(); | |
| 338 // 12 + 8 = 20 bytes read here | |
| 339 // int linkRevision = di.readInt(); | |
| 340 // int parent1Revision = di.readInt(); | |
| 341 // int parent2Revision = di.readInt(); | |
| 342 // byte[] nodeid = new byte[32]; | |
| 343 resBases.add(baseRevision); | |
| 344 if (inline) { | |
| 345 int o = (int) offset; | |
| 346 if (o != offset) { | |
| 347 // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file | |
| 348 // with inlined data of size greater than 2 Gb. | |
| 349 throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); | |
| 350 } | |
| 351 resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size()); | |
| 352 da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) | |
| 353 } else { | |
| 354 da.skip(3*4 + 32); | |
| 355 } | |
| 356 if (da.isEmpty()) { | |
| 357 // fine, done then | |
| 358 baseRevisions = toArray(resBases); | |
| 359 if (inline) { | |
| 360 indexRecordOffset = toArray(resOffsets); | |
| 361 } | |
| 362 break; | |
| 363 } else { | |
| 364 // start reading next record | |
| 365 long l = da.readLong(); | |
| 366 offset = l >>> 16; | |
| 367 } | |
| 368 } | |
| 369 } catch (IOException ex) { | |
| 370 ex.printStackTrace(); // log error | |
| 371 // too bad, no outline then, but don't fail with NPE | |
| 372 baseRevisions = new int[0]; | |
| 373 } finally { | |
| 374 da.done(); | |
| 375 } | |
| 376 } | |
| 377 | |
| 378 private static int[] toArray(List<Integer> l) { | |
| 379 int[] rv = new int[l.size()]; | |
| 380 for (int i = 0; i < rv.length; i++) { | |
| 381 rv[i] = l.get(i); | |
| 382 } | |
| 383 return rv; | |
| 384 } | |
| 385 | |
| 386 | |
| 387 // mpatch.c : apply() | |
| 388 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF | |
| 389 public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException { | |
| 390 int last = 0, destIndex = 0; | |
| 391 if (outcomeLen == -1) { | |
| 392 outcomeLen = baseRevisionContent.length(); | |
| 393 for (PatchRecord pr : patch) { | |
| 394 outcomeLen += pr.start - last + pr.len; | |
| 395 last = pr.end; | |
| 396 } | |
| 397 outcomeLen -= last; | |
| 398 last = 0; | |
| 399 } | |
| 400 byte[] rv = new byte[outcomeLen]; | |
| 401 for (PatchRecord pr : patch) { | |
| 402 baseRevisionContent.seek(last); | |
| 403 baseRevisionContent.readBytes(rv, destIndex, pr.start-last); | |
| 404 destIndex += pr.start - last; | |
| 405 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); | |
| 406 destIndex += pr.data.length; | |
| 407 last = pr.end; | |
| 408 } | |
| 409 baseRevisionContent.seek(last); | |
| 410 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last)); | |
| 411 return rv; | |
| 412 } | |
| 413 | |
| 414 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description | |
| 415 public static class PatchRecord { | |
| 416 /* | |
| 417 Given there are pr1 and pr2: | |
| 418 pr1.start to pr1.end will be replaced with pr's data (of pr1.len) | |
| 419 pr1.end to pr2.start gets copied from base | |
| 420 */ | |
| 421 public int start, end, len; | |
| 422 public byte[] data; | |
| 423 | |
| 424 // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed | |
| 425 private PatchRecord(int p1, int p2, int length, byte[] src) { | |
| 426 start = p1; | |
| 427 end = p2; | |
| 428 len = length; | |
| 429 data = src; | |
| 430 } | |
| 431 | |
| 432 /*package-local*/ static PatchRecord read(byte[] data, int offset) { | |
| 433 final int x = offset; // shorthand | |
| 434 int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF); | |
| 435 int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF); | |
| 436 int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF); | |
| 437 byte[] dataCopy = new byte[len]; | |
| 438 System.arraycopy(data, x+12, dataCopy, 0, len); | |
| 439 return new PatchRecord(p1, p2, len, dataCopy); | |
| 440 } | |
| 441 | |
| 442 public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException { | |
| 443 int p1 = da.readInt(); | |
| 444 int p2 = da.readInt(); | |
| 445 int len = da.readInt(); | |
| 446 byte[] src = new byte[len]; | |
| 447 da.readBytes(src, 0, len); | |
| 448 return new PatchRecord(p1, p2, len, src); | |
| 449 } | |
| 450 } | |
| 451 | |
| 452 // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data | |
| 453 // instantly - e.g. calculate hash, or comparing two revisions | |
| 454 public interface Inspector { | |
| 455 // XXX boolean retVal to indicate whether to continue? | |
| 456 // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) | |
| 457 // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment | |
| 458 void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); | |
| 459 } | |
| 460 } |
