Mercurial > jhg
changeset 77:c677e1593919
Moved RevlogStream implementation into .internal
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Mon, 24 Jan 2011 05:33:47 +0100 (2011-01-24) |
parents | 658fa6b3a371 |
children | c25c5c348d1b |
files | TODO cmdline/org/tmatesoft/hg/console/Log.java src/org/tmatesoft/hg/core/LogCommand.java src/org/tmatesoft/hg/core/Path.java src/org/tmatesoft/hg/internal/RevlogStream.java src/org/tmatesoft/hg/repo/Changelog.java src/org/tmatesoft/hg/repo/HgBundle.java src/org/tmatesoft/hg/repo/HgDataFile.java src/org/tmatesoft/hg/repo/HgManifest.java src/org/tmatesoft/hg/repo/HgRepository.java src/org/tmatesoft/hg/repo/Revlog.java src/org/tmatesoft/hg/repo/RevlogStream.java |
diffstat | 12 files changed, 432 insertions(+), 402 deletions(-) [+] |
line wrap: on
line diff
--- a/TODO Mon Jan 24 04:38:09 2011 +0100 +++ b/TODO Mon Jan 24 05:33:47 2011 +0100 @@ -2,12 +2,16 @@ ============================== Committed: * hg log - user, date, branch, limit - filename(multiple?) + + user, branch, limit + - date, + + filename + - filename and follow history + - * hg manifest (aka ls) * hg status + - copies for revisions * hg cat
--- a/cmdline/org/tmatesoft/hg/console/Log.java Mon Jan 24 04:38:09 2011 +0100 +++ b/cmdline/org/tmatesoft/hg/console/Log.java Mon Jan 24 05:33:47 2011 +0100 @@ -82,15 +82,16 @@ for (String fname : cmdLineOpts.files) { HgDataFile f1 = hgRepo.getFileNode(fname); System.out.println("History of the file: " + f1.getPath()); + String normalizesName = hgRepo.getPathHelper().rewrite(fname); if (cmdLineOpts.limit == -1) { - cmd.file(Path.create(fname)).execute(dump); + cmd.file(Path.create(normalizesName)).execute(dump); } else { int[] r = new int[] { 0, f1.getRevisionCount() }; if (fixRange(r, dump.reverseOrder, cmdLineOpts.limit) == 0) { System.out.println("No changes"); continue; } - cmd.range(r[0], r[1]).file(Path.create(fname)).execute(dump); + cmd.range(r[0], r[1]).file(Path.create(normalizesName)).execute(dump); } dump.complete(); }
--- a/src/org/tmatesoft/hg/core/LogCommand.java Mon Jan 24 04:38:09 2011 +0100 +++ b/src/org/tmatesoft/hg/core/LogCommand.java Mon Jan 24 05:33:47 2011 +0100 @@ -50,6 +50,7 @@ private int startRev = 0, endRev = TIP; private Handler delegate; private Calendar date; + private Path file; private Cset changeset; public LogCommand(HgRepository hgRepo) { @@ -129,10 +130,15 @@ return this; } - // multiple? Bad idea, would need to include extra method into Handler to tell start of next file + /** + * Visit history of a given file only. + * @param file path relative to repository root. Pass <code>null</code> to reset. + */ public LogCommand file(Path file) { + // multiple? Bad idea, would need to include extra method into Handler to tell start of next file // implicit --follow in this case - throw HgRepository.notImplemented(); + this.file = file; + return this; } /** @@ -161,7 +167,11 @@ delegate = handler; count = 0; changeset = new Cset(new StatusCollector(repo), new PathPool(repo.getPathHelper())); - repo.getChangelog().range(startRev, endRev, this); + if (file == null) { + repo.getChangelog().range(startRev, endRev, this); + } else { + repo.getFileNode(file).history(startRev, endRev, this); + } } finally { delegate = null; changeset = null;
--- a/src/org/tmatesoft/hg/core/Path.java Mon Jan 24 04:38:09 2011 +0100 +++ b/src/org/tmatesoft/hg/core/Path.java Mon Jan 24 05:33:47 2011 +0100 @@ -69,6 +69,9 @@ if (path == null) { throw new IllegalArgumentException(); } + if (path.indexOf('\\') != -1) { + throw new IllegalArgumentException(); + } Path rv = new Path(path); return rv; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/org/tmatesoft/hg/internal/RevlogStream.java Mon Jan 24 05:33:47 2011 +0100 @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2010-2011 TMate Software Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For information on how to redistribute this software under + * the terms of a license other than GNU General Public License + * contact TMate Software at support@svnkit.com + */ +package org.tmatesoft.hg.internal; + +import static org.tmatesoft.hg.repo.HgRepository.TIP; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; + +import org.tmatesoft.hg.core.Nodeid; + + +/** + * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), + * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)? + * + * @see http://mercurial.selenic.com/wiki/Revlog + * @see http://mercurial.selenic.com/wiki/RevlogNG + * + * @author Artem Tikhomirov + * @author TMate Software Ltd. + */ +public class RevlogStream { + + private List<IndexEntry> index; // indexed access highly needed + private boolean inline = false; + private final File indexFile; + private final DataAccessProvider dataAccess; + + // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP. + public RevlogStream(DataAccessProvider dap, File indexFile) { + this.dataAccess = dap; + this.indexFile = indexFile; + } + + /*package*/ DataAccess getIndexStream() { + return dataAccess.create(indexFile); + } + + /*package*/ DataAccess getDataStream() { + final String indexName = indexFile.getName(); + File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d"); + return dataAccess.create(dataFile); + } + + public int revisionCount() { + initOutline(); + return index.size(); + } + + public int dataLength(int revision) { + // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. + // + final int indexSize = revisionCount(); + DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset) + if (revision == TIP) { + revision = indexSize - 1; + } + try { + int recordOffset = inline ? (int) index.get(revision).offset : revision * REVLOGV1_RECORD_SIZE; + daIndex.seek(recordOffset + 12); // 6+2+4 + int actualLen = daIndex.readInt(); + return actualLen; + } catch (IOException ex) { + ex.printStackTrace(); // log error. FIXME better handling + throw new IllegalStateException(ex); + } finally { + daIndex.done(); + } + } + + // Perhaps, RevlogStream should be limited to use of plain int revisions for access, + // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep + // map of nodeids, and once this comes true, we may get rid of this method. + // Unlike its counterpart, Revlog#getLocalRevisionNumber, doesn't fail with exception if node not found, + // returns a predefined constant instead + public int findLocalRevisionNumber(Nodeid nodeid) { + // XXX this one may be implemented with iterate() once there's mechanism to stop iterations + final int indexSize = revisionCount(); + DataAccess daIndex = getIndexStream(); + try { + byte[] nodeidBuf = new byte[20]; + for (int i = 0; i < indexSize; i++) { + daIndex.skip(8); + int compressedLen = daIndex.readInt(); + daIndex.skip(20); + daIndex.readBytes(nodeidBuf, 0, 20); + if (nodeid.equalsTo(nodeidBuf)) { + return i; + } + daIndex.skip(inline ? 12 + compressedLen : 12); + } + } catch (IOException ex) { + ex.printStackTrace(); // log error. FIXME better handling + throw new IllegalStateException(ex); + } finally { + daIndex.done(); + } + return Integer.MIN_VALUE; + } + + + private final int REVLOGV1_RECORD_SIZE = 64; + + // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg + // ? boolean needsNodeid + public void iterate(int start, int end, boolean needData, Inspector inspector) { + initOutline(); + final int indexSize = index.size(); + if (indexSize == 0) { + return; + } + if (end == TIP) { + end = indexSize - 1; + } + if (start == TIP) { + start = indexSize - 1; + } + if (start < 0 || start >= indexSize) { + throw new IllegalArgumentException("Bad left range boundary " + start); + } + if (end < start || end >= indexSize) { + throw new IllegalArgumentException("Bad right range boundary " + end); + } + // XXX may cache [start .. end] from index with a single read (pre-read) + + DataAccess daIndex = null, daData = null; + daIndex = getIndexStream(); + if (needData && !inline) { + daData = getDataStream(); + } + try { + byte[] nodeidBuf = new byte[20]; + byte[] lastData = null; + int i; + boolean extraReadsToBaseRev = false; + if (needData && index.get(start).baseRevision < start) { + i = index.get(start).baseRevision; + extraReadsToBaseRev = true; + } else { + i = start; + } + + daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE); + for (; i <= end; i++ ) { + long l = daIndex.readLong(); + @SuppressWarnings("unused") + long offset = l >>> 16; + @SuppressWarnings("unused") + int flags = (int) (l & 0X0FFFF); + int compressedLen = daIndex.readInt(); + int actualLen = daIndex.readInt(); + int baseRevision = daIndex.readInt(); + int linkRevision = daIndex.readInt(); + int parent1Revision = daIndex.readInt(); + int parent2Revision = daIndex.readInt(); + // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty + daIndex.readBytes(nodeidBuf, 0, 20); + daIndex.skip(12); + byte[] data = null; + if (needData) { + byte[] dataBuf = new byte[compressedLen]; + if (inline) { + daIndex.readBytes(dataBuf, 0, compressedLen); + } else { + daData.seek(index.get(i).offset); + daData.readBytes(dataBuf, 0, compressedLen); + } + if (dataBuf[0] == 0x78 /* 'x' */) { + try { + Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative + zlib.setInput(dataBuf, 0, compressedLen); + byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead + int resultLen = zlib.inflate(result); + zlib.end(); + data = new byte[resultLen]; + System.arraycopy(result, 0, data, 0, resultLen); + } catch (DataFormatException ex) { + ex.printStackTrace(); + data = new byte[0]; // FIXME need better failure strategy + } + } else if (dataBuf[0] == 0x75 /* 'u' */) { + data = new byte[dataBuf.length - 1]; + System.arraycopy(dataBuf, 1, data, 0, data.length); + } else { + // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' + // but I don't see reason not to return data as is + data = dataBuf; + } + // XXX + if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch + // this is a patch + LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); + int patchElementIndex = 0; + do { + PatchRecord pr = PatchRecord.read(data, patchElementIndex); + patches.add(pr); + patchElementIndex += 12 + pr.len; + } while (patchElementIndex < data.length); + // + byte[] baseRevContent = lastData; + data = apply(baseRevContent, actualLen, patches); + } + } else { + if (inline) { + daIndex.skip(compressedLen); + } + } + if (!extraReadsToBaseRev || i >= start) { + inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data); + } + lastData = data; + } + } catch (IOException ex) { + throw new IllegalStateException(ex); // FIXME need better handling + } finally { + daIndex.done(); + if (daData != null) { + daData.done(); + } + } + } + + private void initOutline() { + if (index != null && !index.isEmpty()) { + return; + } + ArrayList<IndexEntry> res = new ArrayList<IndexEntry>(); + DataAccess da = getIndexStream(); + try { + int versionField = da.readInt(); + da.readInt(); // just to skip next 2 bytes of offset + flags + final int INLINEDATA = 1 << 16; + inline = (versionField & INLINEDATA) != 0; + long offset = 0; // first offset is always 0, thus Hg uses it for other purposes + while(true) { + int compressedLen = da.readInt(); + // 8+4 = 12 bytes total read here + @SuppressWarnings("unused") + int actualLen = da.readInt(); + int baseRevision = da.readInt(); + // 12 + 8 = 20 bytes read here +// int linkRevision = di.readInt(); +// int parent1Revision = di.readInt(); +// int parent2Revision = di.readInt(); +// byte[] nodeid = new byte[32]; + if (inline) { + res.add(new IndexEntry(offset + REVLOGV1_RECORD_SIZE * res.size(), baseRevision)); + da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) + } else { + res.add(new IndexEntry(offset, baseRevision)); + da.skip(3*4 + 32); + } + if (da.isEmpty()) { + // fine, done then + res.trimToSize(); + index = res; + break; + } else { + // start reading next record + long l = da.readLong(); + offset = l >>> 16; + } + } + } catch (IOException ex) { + ex.printStackTrace(); // log error + // too bad, no outline then. + index = Collections.emptyList(); + } finally { + da.done(); + } + + } + + + // perhaps, package-local or protected, if anyone else from low-level needs them + // XXX think over if we should keep offset in case of separate data file - we read the field anyway. Perhaps, distinct entry classes for Inline and non-inline indexes? + private static class IndexEntry { + public final long offset; // for separate .i and .d - copy of index record entry, for inline index - actual offset of the record in the .i file (record entry + revision * record size)) + //public final int length; // data past fixed record (need to decide whether including header size or not), and whether length is of compressed data or not + public final int baseRevision; + + public IndexEntry(long o, int baseRev) { + offset = o; + baseRevision = baseRev; + } + } + + // mpatch.c : apply() + // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF + public/*for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) { + int last = 0, destIndex = 0; + if (outcomeLen == -1) { + outcomeLen = baseRevisionContent.length; + for (PatchRecord pr : patch) { + outcomeLen += pr.start - last + pr.len; + last = pr.end; + } + outcomeLen -= last; + last = 0; + } + byte[] rv = new byte[outcomeLen]; + for (PatchRecord pr : patch) { + System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last); + destIndex += pr.start - last; + System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); + destIndex += pr.data.length; + last = pr.end; + } + System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last); + return rv; + } + + // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description + public static class PatchRecord { + /* + Given there are pr1 and pr2: + pr1.start to pr1.end will be replaced with pr's data (of pr1.len) + pr1.end to pr2.start gets copied from base + */ + public int start, end, len; + public byte[] data; + + // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed + private PatchRecord(int p1, int p2, int length, byte[] src) { + start = p1; + end = p2; + len = length; + data = src; + } + + /*package-local*/ static PatchRecord read(byte[] data, int offset) { + final int x = offset; // shorthand + int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF); + int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF); + int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF); + byte[] dataCopy = new byte[len]; + System.arraycopy(data, x+12, dataCopy, 0, len); + return new PatchRecord(p1, p2, len, dataCopy); + } + + public /*for HgBundle*/ static PatchRecord read(DataAccess da) throws IOException { + int p1 = da.readInt(); + int p2 = da.readInt(); + int len = da.readInt(); + byte[] src = new byte[len]; + da.readBytes(src, 0, len); + return new PatchRecord(p1, p2, len, src); + } + } + + // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data + // instantly - e.g. calculate hash, or comparing two revisions + public interface Inspector { + // XXX boolean retVal to indicate whether to continue? + // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) + void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, byte[] data); + } +}
--- a/src/org/tmatesoft/hg/repo/Changelog.java Mon Jan 24 04:38:09 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/Changelog.java Mon Jan 24 05:33:47 2011 +0100 @@ -21,6 +21,7 @@ import java.util.List; import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.internal.RevlogStream; /** @@ -40,7 +41,7 @@ } public void range(int start, int end, final Changeset.Inspector inspector) { - Revlog.Inspector i = new Revlog.Inspector() { + RevlogStream.Inspector i = new RevlogStream.Inspector() { public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) { Changeset cset = Changeset.parse(data, 0, data.length); @@ -53,7 +54,7 @@ public List<Changeset> range(int start, int end) { final ArrayList<Changeset> rv = new ArrayList<Changeset>(end - start + 1); - Revlog.Inspector i = new Revlog.Inspector() { + RevlogStream.Inspector i = new RevlogStream.Inspector() { public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) { Changeset cset = Changeset.parse(data, 0, data.length); @@ -68,7 +69,7 @@ if (revisions == null || revisions.length == 0) { return; } - Revlog.Inspector i = new Revlog.Inspector() { + RevlogStream.Inspector i = new RevlogStream.Inspector() { public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) { if (Arrays.binarySearch(revisions, revisionNumber) >= 0) {
--- a/src/org/tmatesoft/hg/repo/HgBundle.java Mon Jan 24 04:38:09 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/HgBundle.java Mon Jan 24 05:33:47 2011 +0100 @@ -25,6 +25,7 @@ import org.tmatesoft.hg.internal.DataAccess; import org.tmatesoft.hg.internal.DataAccessProvider; import org.tmatesoft.hg.internal.DigestHelper; +import org.tmatesoft.hg.internal.RevlogStream; /**
--- a/src/org/tmatesoft/hg/repo/HgDataFile.java Mon Jan 24 04:38:09 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/HgDataFile.java Mon Jan 24 05:33:47 2011 +0100 @@ -20,6 +20,7 @@ import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.core.Path; +import org.tmatesoft.hg.internal.RevlogStream; @@ -45,6 +46,7 @@ return content != null; // XXX need better impl } + // human-readable (i.e. "COPYING", not "store/data/_c_o_p_y_i_n_g.i") public Path getPath() { return path; // hgRepo.backresolve(this) -> name? } @@ -65,8 +67,17 @@ if (!exists()) { throw new IllegalStateException("Can't get history of invalid repository file node"); } + final int last = content.revisionCount() - 1; + if (start < 0 || start > last) { + throw new IllegalArgumentException(); + } + if (end == TIP) { + end = last; + } else if (end < start || end > last) { + throw new IllegalArgumentException(); + } final int[] commitRevisions = new int[end - start + 1]; - Revlog.Inspector insp = new Revlog.Inspector() { + RevlogStream.Inspector insp = new RevlogStream.Inspector() { int count = 0; public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
--- a/src/org/tmatesoft/hg/repo/HgManifest.java Mon Jan 24 04:38:09 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/HgManifest.java Mon Jan 24 05:33:47 2011 +0100 @@ -17,6 +17,7 @@ package org.tmatesoft.hg.repo; import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.internal.RevlogStream; /** @@ -31,7 +32,7 @@ } public void walk(int start, int end, final Inspector inspector) { - Revlog.Inspector insp = new Revlog.Inspector() { + RevlogStream.Inspector insp = new RevlogStream.Inspector() { private boolean gtg = true; // good to go
--- a/src/org/tmatesoft/hg/repo/HgRepository.java Mon Jan 24 04:38:09 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/HgRepository.java Mon Jan 24 05:33:47 2011 +0100 @@ -24,6 +24,7 @@ import org.tmatesoft.hg.core.Path; import org.tmatesoft.hg.internal.DataAccessProvider; import org.tmatesoft.hg.internal.RequiresFile; +import org.tmatesoft.hg.internal.RevlogStream; import org.tmatesoft.hg.util.FileWalker; import org.tmatesoft.hg.util.PathRewrite; @@ -124,11 +125,13 @@ public HgDataFile getFileNode(String path) { String nPath = normalizePath.rewrite(path); String storagePath = dataPathHelper.rewrite(nPath); - return getFileNode(Path.create(storagePath)); + RevlogStream content = resolve(Path.create(storagePath)); + return new HgDataFile(this, Path.create(nPath), content); } public HgDataFile getFileNode(Path path) { - RevlogStream content = resolve(path); + String storagePath = dataPathHelper.rewrite(path.toString()); + RevlogStream content = resolve(Path.create(storagePath)); // XXX no content when no file? or HgDataFile.exists() to detect that? How about files that were removed in previous releases? return new HgDataFile(this, path, content); }
--- a/src/org/tmatesoft/hg/repo/Revlog.java Mon Jan 24 04:38:09 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/Revlog.java Mon Jan 24 05:33:47 2011 +0100 @@ -27,6 +27,7 @@ import java.util.Set; import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.internal.RevlogStream; /** @@ -92,7 +93,7 @@ */ public byte[] content(int revision) { final byte[][] dataPtr = new byte[1][]; - Revlog.Inspector insp = new Revlog.Inspector() { + RevlogStream.Inspector insp = new RevlogStream.Inspector() { public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) { dataPtr[0] = data; } @@ -123,7 +124,7 @@ if (parent2 != null && parent2.length < 20) { throw new IllegalArgumentException(parent2.toString()); } - class ParentCollector implements Revlog.Inspector { + class ParentCollector implements RevlogStream.Inspector { public int p1 = -1; public int p2 = -1; public byte[] nodeid; @@ -158,15 +159,6 @@ } } - // FIXME byte[] data might be too expensive, for few usecases it may be better to have intermediate Access object (when we don't need full data - // instantly - e.g. calculate hash, or comparing two revisions - // XXX seems that RevlogStream is better place for this class. - public interface Inspector { - // XXX boolean retVal to indicate whether to continue? - // TODO specify nodeid and data length, and reuse policy (i.e. if revlog stream doesn't reuse nodeid[] for each call) - void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, byte[] data); - } - /* * XXX think over if it's better to do either: * pw = getChangelog().new ParentWalker(); pw.init() and pass pw instance around as needed @@ -192,7 +184,7 @@ secondParent = new HashMap<Nodeid, Nodeid>(firstParent.size() >> 1); // assume branches/merges are less frequent allNodes = new LinkedHashSet<Nodeid>(); - Inspector insp = new Inspector() { + RevlogStream.Inspector insp = new RevlogStream.Inspector() { final Nodeid[] sequentialRevisionNodeids = new Nodeid[revisionCount]; int ix = 0; public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, byte[] data) {
--- a/src/org/tmatesoft/hg/repo/RevlogStream.java Mon Jan 24 04:38:09 2011 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,376 +0,0 @@ -/* - * Copyright (c) 2010-2011 TMate Software Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * For information on how to redistribute this software under - * the terms of a license other than GNU General Public License - * contact TMate Software at support@svnkit.com - */ -package org.tmatesoft.hg.repo; - -import static org.tmatesoft.hg.repo.HgRepository.TIP; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; -import java.util.zip.DataFormatException; -import java.util.zip.Inflater; - -import org.tmatesoft.hg.core.Nodeid; -import org.tmatesoft.hg.internal.DataAccess; -import org.tmatesoft.hg.internal.DataAccessProvider; - - -/** - * XXX move to .internal? - * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), - * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)? - * - * @see http://mercurial.selenic.com/wiki/Revlog - * @see http://mercurial.selenic.com/wiki/RevlogNG - * - * @author Artem Tikhomirov - * @author TMate Software Ltd. - */ -public class RevlogStream { - - private List<IndexEntry> index; // indexed access highly needed - private boolean inline = false; - private final File indexFile; - private final DataAccessProvider dataAccess; - - // if we need anything else from HgRepo, might replace DAP parameter with HgRepo and query it for DAP. - RevlogStream(DataAccessProvider dap, File indexFile) { - this.dataAccess = dap; - this.indexFile = indexFile; - } - - /*package*/ DataAccess getIndexStream() { - return dataAccess.create(indexFile); - } - - /*package*/ DataAccess getDataStream() { - final String indexName = indexFile.getName(); - File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d"); - return dataAccess.create(dataFile); - } - - public int revisionCount() { - initOutline(); - return index.size(); - } - - public int dataLength(int revision) { - // XXX in fact, use of iterate() instead of this implementation may be quite reasonable. - // - final int indexSize = revisionCount(); - DataAccess daIndex = getIndexStream(); // XXX may supply a hint that I'll need really few bytes of data (although at some offset) - if (revision == TIP) { - revision = indexSize - 1; - } - try { - int recordOffset = inline ? (int) index.get(revision).offset : revision * REVLOGV1_RECORD_SIZE; - daIndex.seek(recordOffset + 12); // 6+2+4 - int actualLen = daIndex.readInt(); - return actualLen; - } catch (IOException ex) { - ex.printStackTrace(); // log error. FIXME better handling - throw new IllegalStateException(ex); - } finally { - daIndex.done(); - } - } - - // Perhaps, RevlogStream should be limited to use of plain int revisions for access, - // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep - // map of nodeids, and once this comes true, we may get rid of this method. - // Unlike its counterpart, Revlog#getLocalRevisionNumber, doesn't fail with exception if node not found, - // returns a predefined constant instead - /*package-local*/ int findLocalRevisionNumber(Nodeid nodeid) { - // XXX this one may be implemented with iterate() once there's mechanism to stop iterations - final int indexSize = revisionCount(); - DataAccess daIndex = getIndexStream(); - try { - byte[] nodeidBuf = new byte[20]; - for (int i = 0; i < indexSize; i++) { - daIndex.skip(8); - int compressedLen = daIndex.readInt(); - daIndex.skip(20); - daIndex.readBytes(nodeidBuf, 0, 20); - if (nodeid.equalsTo(nodeidBuf)) { - return i; - } - daIndex.skip(inline ? 12 + compressedLen : 12); - } - } catch (IOException ex) { - ex.printStackTrace(); // log error. FIXME better handling - throw new IllegalStateException(ex); - } finally { - daIndex.done(); - } - return Integer.MIN_VALUE; - } - - - private final int REVLOGV1_RECORD_SIZE = 64; - - // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg - // ? boolean needsNodeid - public void iterate(int start, int end, boolean needData, Revlog.Inspector inspector) { - initOutline(); - final int indexSize = index.size(); - if (indexSize == 0) { - return; - } - if (end == TIP) { - end = indexSize - 1; - } - if (start == TIP) { - start = indexSize - 1; - } - if (start < 0 || start >= indexSize) { - throw new IllegalArgumentException("Bad left range boundary " + start); - } - if (end < start || end >= indexSize) { - throw new IllegalArgumentException("Bad right range boundary " + end); - } - // XXX may cache [start .. end] from index with a single read (pre-read) - - DataAccess daIndex = null, daData = null; - daIndex = getIndexStream(); - if (needData && !inline) { - daData = getDataStream(); - } - try { - byte[] nodeidBuf = new byte[20]; - byte[] lastData = null; - int i; - boolean extraReadsToBaseRev = false; - if (needData && index.get(start).baseRevision < start) { - i = index.get(start).baseRevision; - extraReadsToBaseRev = true; - } else { - i = start; - } - - daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE); - for (; i <= end; i++ ) { - long l = daIndex.readLong(); - @SuppressWarnings("unused") - long offset = l >>> 16; - @SuppressWarnings("unused") - int flags = (int) (l & 0X0FFFF); - int compressedLen = daIndex.readInt(); - int actualLen = daIndex.readInt(); - int baseRevision = daIndex.readInt(); - int linkRevision = daIndex.readInt(); - int parent1Revision = daIndex.readInt(); - int parent2Revision = daIndex.readInt(); - // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty - daIndex.readBytes(nodeidBuf, 0, 20); - daIndex.skip(12); - byte[] data = null; - if (needData) { - byte[] dataBuf = new byte[compressedLen]; - if (inline) { - daIndex.readBytes(dataBuf, 0, compressedLen); - } else { - daData.seek(index.get(i).offset); - daData.readBytes(dataBuf, 0, compressedLen); - } - if (dataBuf[0] == 0x78 /* 'x' */) { - try { - Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative - zlib.setInput(dataBuf, 0, compressedLen); - byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead - int resultLen = zlib.inflate(result); - zlib.end(); - data = new byte[resultLen]; - System.arraycopy(result, 0, data, 0, resultLen); - } catch (DataFormatException ex) { - ex.printStackTrace(); - data = new byte[0]; // FIXME need better failure strategy - } - } else if (dataBuf[0] == 0x75 /* 'u' */) { - data = new byte[dataBuf.length - 1]; - System.arraycopy(dataBuf, 1, data, 0, data.length); - } else { - // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' - // but I don't see reason not to return data as is - data = dataBuf; - } - // XXX - if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch - // this is a patch - LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); - int patchElementIndex = 0; - do { - PatchRecord pr = PatchRecord.read(data, patchElementIndex); - patches.add(pr); - patchElementIndex += 12 + pr.len; - } while (patchElementIndex < data.length); - // - byte[] baseRevContent = lastData; - data = apply(baseRevContent, actualLen, patches); - } - } else { - if (inline) { - daIndex.skip(compressedLen); - } - } - if (!extraReadsToBaseRev || i >= start) { - inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data); - } - lastData = data; - } - } catch (IOException ex) { - throw new IllegalStateException(ex); // FIXME need better handling - } finally { - daIndex.done(); - if (daData != null) { - daData.done(); - } - } - } - - private void initOutline() { - if (index != null && !index.isEmpty()) { - return; - } - ArrayList<IndexEntry> res = new ArrayList<IndexEntry>(); - DataAccess da = getIndexStream(); - try { - int versionField = da.readInt(); - da.readInt(); // just to skip next 2 bytes of offset + flags - final int INLINEDATA = 1 << 16; - inline = (versionField & INLINEDATA) != 0; - long offset = 0; // first offset is always 0, thus Hg uses it for other purposes - while(true) { - int compressedLen = da.readInt(); - // 8+4 = 12 bytes total read here - @SuppressWarnings("unused") - int actualLen = da.readInt(); - int baseRevision = da.readInt(); - // 12 + 8 = 20 bytes read here -// int linkRevision = di.readInt(); -// int parent1Revision = di.readInt(); -// int parent2Revision = di.readInt(); -// byte[] nodeid = new byte[32]; - if (inline) { - res.add(new IndexEntry(offset + REVLOGV1_RECORD_SIZE * res.size(), baseRevision)); - da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size) - } else { - res.add(new IndexEntry(offset, baseRevision)); - da.skip(3*4 + 32); - } - if (da.isEmpty()) { - // fine, done then - res.trimToSize(); - index = res; - break; - } else { - // start reading next record - long l = da.readLong(); - offset = l >>> 16; - } - } - } catch (IOException ex) { - ex.printStackTrace(); // log error - // too bad, no outline then. - index = Collections.emptyList(); - } finally { - da.done(); - } - - } - - - // perhaps, package-local or protected, if anyone else from low-level needs them - // XXX think over if we should keep offset in case of separate data file - we read the field anyway. Perhaps, distinct entry classes for Inline and non-inline indexes? - private static class IndexEntry { - public final long offset; // for separate .i and .d - copy of index record entry, for inline index - actual offset of the record in the .i file (record entry + revision * record size)) - //public final int length; // data past fixed record (need to decide whether including header size or not), and whether length is of compressed data or not - public final int baseRevision; - - public IndexEntry(long o, int baseRev) { - offset = o; - baseRevision = baseRev; - } - } - - // mpatch.c : apply() - // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF - /*package-local for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) { - int last = 0, destIndex = 0; - if (outcomeLen == -1) { - outcomeLen = baseRevisionContent.length; - for (PatchRecord pr : patch) { - outcomeLen += pr.start - last + pr.len; - last = pr.end; - } - outcomeLen -= last; - last = 0; - } - byte[] rv = new byte[outcomeLen]; - for (PatchRecord pr : patch) { - System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last); - destIndex += pr.start - last; - System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); - destIndex += pr.data.length; - last = pr.end; - } - System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last); - return rv; - } - - // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description - /*package-local*/ static class PatchRecord { // copy of struct frag from mpatch.c - /* - Given there are pr1 and pr2: - pr1.start to pr1.end will be replaced with pr's data (of pr1.len) - pr1.end to pr2.start gets copied from base - */ - int start, end, len; - byte[] data; - - // TODO consider PatchRecord that only records data position (absolute in data source), and acquires data as needed - private PatchRecord(int p1, int p2, int length, byte[] src) { - start = p1; - end = p2; - len = length; - data = src; - } - - /*package-local*/ static PatchRecord read(byte[] data, int offset) { - final int x = offset; // shorthand - int p1 = ((data[x] & 0xFF)<< 24) | ((data[x+1] & 0xFF) << 16) | ((data[x+2] & 0xFF) << 8) | (data[x+3] & 0xFF); - int p2 = ((data[x+4] & 0xFF) << 24) | ((data[x+5] & 0xFF) << 16) | ((data[x+6] & 0xFF) << 8) | (data[x+7] & 0xFF); - int len = ((data[x+8] & 0xFF) << 24) | ((data[x+9] & 0xFF) << 16) | ((data[x+10] & 0xFF) << 8) | (data[x+11] & 0xFF); - byte[] dataCopy = new byte[len]; - System.arraycopy(data, x+12, dataCopy, 0, len); - return new PatchRecord(p1, p2, len, dataCopy); - } - - /*package-local*/ static PatchRecord read(DataAccess da) throws IOException { - int p1 = da.readInt(); - int p2 = da.readInt(); - int len = da.readInt(); - byte[] src = new byte[len]; - da.readBytes(src, 0, len); - return new PatchRecord(p1, p2, len, src); - } - - - } -}