kitaev@213: /* kitaev@213: * Copyright (c) 2010-2011 TMate Software Ltd kitaev@213: * kitaev@213: * This program is free software; you can redistribute it and/or modify kitaev@213: * it under the terms of the GNU General Public License as published by kitaev@213: * the Free Software Foundation; version 2 of the License. kitaev@213: * kitaev@213: * This program is distributed in the hope that it will be useful, kitaev@213: * but WITHOUT ANY WARRANTY; without even the implied warranty of kitaev@213: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the kitaev@213: * GNU General Public License for more details. kitaev@213: * kitaev@213: * For information on how to redistribute this software under kitaev@213: * the terms of a license other than GNU General Public License kitaev@213: * contact TMate Software at support@hg4j.com kitaev@213: */ kitaev@213: package org.tmatesoft.hg.internal; kitaev@213: kitaev@213: import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION; kitaev@213: import static org.tmatesoft.hg.repo.HgRepository.TIP; kitaev@213: kitaev@213: import java.io.File; kitaev@213: import java.io.IOException; kitaev@213: import java.util.ArrayList; kitaev@213: import java.util.LinkedList; kitaev@213: import java.util.List; kitaev@213: kitaev@213: import org.tmatesoft.hg.core.HgBadStateException; kitaev@213: import org.tmatesoft.hg.core.Nodeid; kitaev@213: import org.tmatesoft.hg.repo.HgRepository; kitaev@213: kitaev@213: kitaev@213: /** kitaev@213: * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), kitaev@213: * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)? kitaev@213: * kitaev@213: * @see http://mercurial.selenic.com/wiki/Revlog kitaev@213: * @see http://mercurial.selenic.com/wiki/RevlogNG kitaev@213: * kitaev@213: * @author Artem Tikhomirov kitaev@213: * @author TMate Software Ltd. kitaev@213: */ kitaev@213: public class RevlogStream { kitaev@213: kitaev@213: /* kitaev@213: * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size)) kitaev@213: * kitaev@213: * long[] in fact (there are 8-bytes field in the revlog) kitaev@213: * However, (a) DataAccess currently doesn't operate with long seek/length kitaev@213: * and, of greater significance, (b) files with inlined data are designated for smaller files, kitaev@213: * guess, about 130 Kb, and offset there won't ever break int capacity kitaev@213: */ kitaev@213: private int[] indexRecordOffset; kitaev@213: private int[] baseRevisions; kitaev@213: private boolean inline = false; kitaev@213: private final File indexFile; kitaev@213: private final DataAccessProvider dataAccess; kitaev@213: kitaev@213: // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP. kitaev@213: public RevlogStream(DataAccessProvider dap, File indexFile) { kitaev@213: this.dataAccess = dap; kitaev@213: this.indexFile = indexFile; kitaev@213: } kitaev@213: kitaev@213: /*package*/ DataAccess getIndexStream() { kitaev@213: return dataAccess.create(indexFile); kitaev@213: } kitaev@213: kitaev@213: /*package*/ DataAccess getDataStream() { kitaev@213: final String indexName = indexFile.getName(); kitaev@213: File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d"); kitaev@213: return dataAccess.create(dataFile); kitaev@213: } kitaev@213: kitaev@213: public int revisionCount() { kitaev@213: initOutline(); kitaev@213: return baseRevisions.length; kitaev@213: } kitaev@213: kitaev@213: public int dataLength(int revision) { kitaev@213: // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. kitaev@213: // kitaev@213: final int indexSize = revisionCount(); kitaev@213: DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset) kitaev@213: if (revision == TIP) { kitaev@213: revision = indexSize - 1; kitaev@213: } kitaev@213: try { kitaev@213: int recordOffset = getIndexOffsetInt(revision); kitaev@213: daIndex.seek(recordOffset + 12); // 6+2+4 kitaev@213: int actualLen = daIndex.readInt(); kitaev@213: return actualLen; kitaev@213: } catch (IOException ex) { kitaev@213: ex.printStackTrace(); // log error. FIXME better handling kitaev@213: throw new IllegalStateException(ex); kitaev@213: } finally { kitaev@213: daIndex.done(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: public byte[] nodeid(int revision) { kitaev@213: final int indexSize = revisionCount(); kitaev@213: if (revision == TIP) { kitaev@213: revision = indexSize - 1; kitaev@213: } kitaev@213: if (revision < 0 || revision >= indexSize) { kitaev@213: throw new IllegalArgumentException(Integer.toString(revision)); kitaev@213: } kitaev@213: DataAccess daIndex = getIndexStream(); kitaev@213: try { kitaev@213: int recordOffset = getIndexOffsetInt(revision); kitaev@213: daIndex.seek(recordOffset + 32); kitaev@213: byte[] rv = new byte[20]; kitaev@213: daIndex.readBytes(rv, 0, 20); kitaev@213: return rv; kitaev@213: } catch (IOException ex) { kitaev@213: ex.printStackTrace(); kitaev@213: throw new IllegalStateException(); kitaev@213: } finally { kitaev@213: daIndex.done(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: public int linkRevision(int revision) { kitaev@213: final int last = revisionCount() - 1; kitaev@213: if (revision == TIP) { kitaev@213: revision = last; kitaev@213: } kitaev@213: if (revision < 0 || revision > last) { kitaev@213: throw new IllegalArgumentException(Integer.toString(revision)); kitaev@213: } kitaev@213: DataAccess daIndex = getIndexStream(); kitaev@213: try { kitaev@213: int recordOffset = getIndexOffsetInt(revision); kitaev@213: daIndex.seek(recordOffset + 20); kitaev@213: int linkRev = daIndex.readInt(); kitaev@213: return linkRev; kitaev@213: } catch (IOException ex) { kitaev@213: ex.printStackTrace(); kitaev@213: throw new IllegalStateException(); kitaev@213: } finally { kitaev@213: daIndex.done(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: // Perhaps, RevlogStream should be limited to use of plain int revisions for access, kitaev@213: // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep kitaev@213: // map of nodeids, and once this comes true, we may get rid of this method. kitaev@213: // Unlike its counterpart, {@link Revlog#getLocalRevisionNumber()}, doesn't fail with exception if node not found, kitaev@213: /** kitaev@213: * @return integer in [0..revisionCount()) or {@link HgRepository#BAD_REVISION} if not found kitaev@213: */ kitaev@213: public int findLocalRevisionNumber(Nodeid nodeid) { kitaev@213: // XXX this one may be implemented with iterate() once there's mechanism to stop iterations kitaev@213: final int indexSize = revisionCount(); kitaev@213: DataAccess daIndex = getIndexStream(); kitaev@213: try { kitaev@213: byte[] nodeidBuf = new byte[20]; kitaev@213: for (int i = 0; i < indexSize; i++) { kitaev@213: daIndex.skip(8); kitaev@213: int compressedLen = daIndex.readInt(); kitaev@213: daIndex.skip(20); kitaev@213: daIndex.readBytes(nodeidBuf, 0, 20); kitaev@213: if (nodeid.equalsTo(nodeidBuf)) { kitaev@213: return i; kitaev@213: } kitaev@213: daIndex.skip(inline ? 12 + compressedLen : 12); kitaev@213: } kitaev@213: } catch (IOException ex) { kitaev@213: ex.printStackTrace(); // log error. FIXME better handling kitaev@213: throw new IllegalStateException(ex); kitaev@213: } finally { kitaev@213: daIndex.done(); kitaev@213: } kitaev@213: return BAD_REVISION; kitaev@213: } kitaev@213: kitaev@213: kitaev@213: private final int REVLOGV1_RECORD_SIZE = 64; kitaev@213: kitaev@213: // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg kitaev@213: // ? boolean needsNodeid kitaev@213: public void iterate(int start, int end, boolean needData, Inspector inspector) { kitaev@213: initOutline(); kitaev@213: final int indexSize = revisionCount(); kitaev@213: if (indexSize == 0) { kitaev@213: return; kitaev@213: } kitaev@213: if (end == TIP) { kitaev@213: end = indexSize - 1; kitaev@213: } kitaev@213: if (start == TIP) { kitaev@213: start = indexSize - 1; kitaev@213: } kitaev@213: if (start < 0 || start >= indexSize) { kitaev@213: throw new IllegalArgumentException(String.format("Bad left range boundary %d in [0..%d]", start, indexSize-1)); kitaev@213: } kitaev@213: if (end < start || end >= indexSize) { kitaev@213: throw new IllegalArgumentException(String.format("Bad right range boundary %d in [0..%d]", end, indexSize-1)); kitaev@213: } kitaev@213: // XXX may cache [start .. end] from index with a single read (pre-read) kitaev@213: kitaev@213: DataAccess daIndex = null, daData = null; kitaev@213: daIndex = getIndexStream(); kitaev@213: if (needData && !inline) { kitaev@213: daData = getDataStream(); kitaev@213: } kitaev@213: try { kitaev@213: byte[] nodeidBuf = new byte[20]; kitaev@213: DataAccess lastUserData = null; kitaev@213: int i; kitaev@213: boolean extraReadsToBaseRev = false; kitaev@213: if (needData && getBaseRevision(start) < start) { kitaev@213: i = getBaseRevision(start); kitaev@213: extraReadsToBaseRev = true; kitaev@213: } else { kitaev@213: i = start; kitaev@213: } kitaev@213: kitaev@213: daIndex.seek(getIndexOffsetInt(i)); kitaev@213: for (; i <= end; i++ ) { kitaev@213: if (inline && needData) { kitaev@213: // inspector reading data (though FilterDataAccess) may have affected index position kitaev@213: daIndex.seek(getIndexOffsetInt(i)); kitaev@213: } kitaev@213: long l = daIndex.readLong(); // 0 kitaev@213: long offset = i == 0 ? 0 : (l >>> 16); kitaev@213: @SuppressWarnings("unused") kitaev@213: int flags = (int) (l & 0X0FFFF); kitaev@213: int compressedLen = daIndex.readInt(); // +8 kitaev@213: int actualLen = daIndex.readInt(); // +12 kitaev@213: int baseRevision = daIndex.readInt(); // +16 kitaev@213: int linkRevision = daIndex.readInt(); // +20 kitaev@213: int parent1Revision = daIndex.readInt(); kitaev@213: int parent2Revision = daIndex.readInt(); kitaev@213: // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty kitaev@213: daIndex.readBytes(nodeidBuf, 0, 20); // +32 kitaev@213: daIndex.skip(12); kitaev@213: DataAccess userDataAccess = null; kitaev@213: if (needData) { kitaev@213: final byte firstByte; kitaev@213: int streamOffset; kitaev@213: DataAccess streamDataAccess; kitaev@213: if (inline) { kitaev@213: streamDataAccess = daIndex; kitaev@213: streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream kitaev@213: } else { kitaev@213: streamOffset = (int) offset; kitaev@213: streamDataAccess = daData; kitaev@213: daData.seek(streamOffset); kitaev@213: } kitaev@213: final boolean patchToPrevious = baseRevision != i; // the only way I found to tell if it's a patch kitaev@213: firstByte = streamDataAccess.readByte(); kitaev@213: if (firstByte == 0x78 /* 'x' */) { kitaev@213: userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, patchToPrevious ? -1 : actualLen); kitaev@213: } else if (firstByte == 0x75 /* 'u' */) { kitaev@213: userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1); kitaev@213: } else { kitaev@213: // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' kitaev@213: // but I don't see reason not to return data as is kitaev@213: userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen); kitaev@213: } kitaev@213: // XXX kitaev@213: if (patchToPrevious) { kitaev@213: // this is a patch kitaev@213: LinkedList patches = new LinkedList(); kitaev@213: while (!userDataAccess.isEmpty()) { kitaev@213: PatchRecord pr = PatchRecord.read(userDataAccess); kitaev@213: // System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len); kitaev@213: patches.add(pr); kitaev@213: } kitaev@213: userDataAccess.done(); kitaev@213: // kitaev@213: byte[] userData = apply(lastUserData, actualLen, patches); kitaev@213: userDataAccess = new ByteArrayDataAccess(userData); kitaev@213: } kitaev@213: } else { kitaev@213: if (inline) { kitaev@213: daIndex.skip(compressedLen); kitaev@213: } kitaev@213: } kitaev@213: if (!extraReadsToBaseRev || i >= start) { kitaev@213: inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess); kitaev@213: } kitaev@213: if (userDataAccess != null) { kitaev@213: userDataAccess.reset(); kitaev@213: if (lastUserData != null) { kitaev@213: lastUserData.done(); kitaev@213: } kitaev@213: lastUserData = userDataAccess; kitaev@213: } kitaev@213: } kitaev@213: } catch (IOException ex) { kitaev@213: throw new HgBadStateException(ex); // FIXME need better handling kitaev@213: } finally { kitaev@213: daIndex.done(); kitaev@213: if (daData != null) { kitaev@213: daData.done(); kitaev@213: } kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: private int getBaseRevision(int revision) { kitaev@213: return baseRevisions[revision]; kitaev@213: } kitaev@213: kitaev@213: /** kitaev@213: * @return offset of the revision's record in the index (.i) stream kitaev@213: */ kitaev@213: private int getIndexOffsetInt(int revision) { kitaev@213: return inline ? indexRecordOffset[revision] : revision * REVLOGV1_RECORD_SIZE; kitaev@213: } kitaev@213: kitaev@213: private void initOutline() { kitaev@213: if (baseRevisions != null && baseRevisions.length > 0) { kitaev@213: return; kitaev@213: } kitaev@213: ArrayList resBases = new ArrayList(); kitaev@213: ArrayList resOffsets = new ArrayList(); kitaev@213: DataAccess da = getIndexStream(); kitaev@213: try { kitaev@213: if (da.isEmpty()) { kitaev@213: // do not fail with exception if stream is empty, it's likely intentional kitaev@213: baseRevisions = new int[0]; kitaev@213: return; kitaev@213: } kitaev@213: int versionField = da.readInt(); kitaev@213: da.readInt(); // just to skip next 4 bytes of offset + flags kitaev@213: final int INLINEDATA = 1 << 16; kitaev@213: inline = (versionField & INLINEDATA) != 0; kitaev@213: long offset = 0; // first offset is always 0, thus Hg uses it for other purposes kitaev@213: while(true) { kitaev@213: int compressedLen = da.readInt(); kitaev@213: // 8+4 = 12 bytes total read here kitaev@213: @SuppressWarnings("unused") kitaev@213: int actualLen = da.readInt(); kitaev@213: int baseRevision = da.readInt(); kitaev@213: // 12 + 8 = 20 bytes read here kitaev@213: // int linkRevision = di.readInt(); kitaev@213: // int parent1Revision = di.readInt(); kitaev@213: // int parent2Revision = di.readInt(); kitaev@213: // byte[] nodeid = new byte[32]; kitaev@213: resBases.add(baseRevision); kitaev@213: if (inline) { kitaev@213: int o = (int) offset; kitaev@213: if (o != offset) { kitaev@213: // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file kitaev@213: // with inlined data of size greater than 2 Gb. kitaev@213: throw new HgBadStateException("Data too big, offset didn't fit to sizeof(int)"); kitaev@213: } kitaev@213: resOffsets.add(o + REVLOGV1_RECORD_SIZE * resOffsets.size()); kitaev@213: da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) kitaev@213: } else { kitaev@213: da.skip(3*4 + 32); kitaev@213: } kitaev@213: if (da.isEmpty()) { kitaev@213: // fine, done then kitaev@213: baseRevisions = toArray(resBases); kitaev@213: if (inline) { kitaev@213: indexRecordOffset = toArray(resOffsets); kitaev@213: } kitaev@213: break; kitaev@213: } else { kitaev@213: // start reading next record kitaev@213: long l = da.readLong(); kitaev@213: offset = l >>> 16; kitaev@213: } kitaev@213: } kitaev@213: } catch (IOException ex) { kitaev@213: ex.printStackTrace(); // log error kitaev@213: // too bad, no outline then, but don't fail with NPE kitaev@213: baseRevisions = new int[0]; kitaev@213: } finally { kitaev@213: da.done(); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: private static int[] toArray(List l) { kitaev@213: int[] rv = new int[l.size()]; kitaev@213: for (int i = 0; i < rv.length; i++) { kitaev@213: rv[i] = l.get(i); kitaev@213: } kitaev@213: return rv; kitaev@213: } kitaev@213: kitaev@213: kitaev@213: // mpatch.c : apply() kitaev@213: // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF kitaev@213: public/*for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List patch) throws IOException { kitaev@213: int last = 0, destIndex = 0; kitaev@213: if (outcomeLen == -1) { kitaev@213: outcomeLen = baseRevisionContent.length(); kitaev@213: for (PatchRecord pr : patch) { kitaev@213: outcomeLen += pr.start - last + pr.len; kitaev@213: last = pr.end; kitaev@213: } kitaev@213: outcomeLen -= last; kitaev@213: last = 0; kitaev@213: } kitaev@213: byte[] rv = new byte[outcomeLen]; kitaev@213: for (PatchRecord pr : patch) { kitaev@213: baseRevisionContent.seek(last); kitaev@213: baseRevisionContent.readBytes(rv, destIndex, pr.start-last); kitaev@213: destIndex += pr.start - last; kitaev@213: System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); kitaev@213: destIndex += pr.data.length; kitaev@213: last = pr.end; kitaev@213: } kitaev@213: baseRevisionContent.seek(last); kitaev@213: baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last)); kitaev@213: return rv; kitaev@213: } kitaev@213: kitaev@213: // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description kitaev@213: public static class PatchRecord { kitaev@213: /* kitaev@213: Given there are pr1 and pr2: kitaev@213: pr1.start to pr1.end will be replaced with pr's data (of pr1.len) kitaev@213: pr1.end to pr2.start gets copied from base kitaev@213: */ kitaev@213: public int start, end, len; kitaev@213: public byte[] data; kitaev@213: kitaev@213: // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed kitaev@213: private PatchRecord(int p1, int p2, int length, byte[] src) { kitaev@213: start = p1; kitaev@213: end = p2; kitaev@213: len = length; kitaev@213: data = src; kitaev@213: } kitaev@213: kitaev@213: /*package-local*/ static PatchRecord read(byte[] data, int offset) { kitaev@213: final int x = offset; // shorthand kitaev@213: int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF); kitaev@213: int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF); kitaev@213: int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF); kitaev@213: byte[] dataCopy = new byte[len]; kitaev@213: System.arraycopy(data, x+12, dataCopy, 0, len); kitaev@213: return new PatchRecord(p1, p2, len, dataCopy); kitaev@213: } kitaev@213: kitaev@213: public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException { kitaev@213: int p1 = da.readInt(); kitaev@213: int p2 = da.readInt(); kitaev@213: int len = da.readInt(); kitaev@213: byte[] src = new byte[len]; kitaev@213: da.readBytes(src, 0, len); kitaev@213: return new PatchRecord(p1, p2, len, src); kitaev@213: } kitaev@213: } kitaev@213: kitaev@213: // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data kitaev@213: // instantly - e.g. calculate hash, or comparing two revisions kitaev@213: public interface Inspector { kitaev@213: // XXX boolean retVal to indicate whether to continue? kitaev@213: // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) kitaev@213: // implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropraite moment kitaev@213: void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data); kitaev@213: } kitaev@213: }