# HG changeset patch # User Artem Tikhomirov # Date 1295918033 -3600 # Node ID c25c5c348d1bcdd8764bd0ea708427f4281cbd69 # Parent c677e159391925a50b9a23f557426b2246bc9c5d Skip metadata in the beginning of a file content. Parse metadata, recognize copies/renames diff -r c677e1593919 -r c25c5c348d1b TODO --- a/TODO Mon Jan 24 05:33:47 2011 +0100 +++ b/TODO Tue Jan 25 02:13:53 2011 +0100 @@ -6,15 +6,18 @@ - date, + filename - filename and follow history - - + * hg manifest (aka ls) + * hg status - copies for revisions + * hg cat + Proposed: - LogCommand.revision(int... rev)+ to walk selected revisions only (list->sort(array) on execute, binary search) - LogCommand.before(Date date) and .after() diff -r c677e1593919 -r c25c5c348d1b design.txt --- a/design.txt Mon Jan 24 05:33:47 2011 +0100 +++ b/design.txt Tue Jan 25 02:13:53 2011 +0100 @@ -35,12 +35,13 @@ +RevisionWalker (on manifest) and WorkingCopyWalker (io.File) talking to ? and/or dirstate (StatusCollector and WCSC) +RevlogStream - Inflater. Perhaps, InflaterStream instead? branch:wrap-data-access +repo.status - use same collector class twice, difference as external code. add external walker that keeps collected maps and use it in Log operation to give files+,files- - ++ strip \1\n metadata out from RevlogStream Implement use of fncache (use names from it - perhaps, would help for Mac issues Alex mentioned) along with 'digest'-ing long file names delta merge DataAccess - collect debug info (buffer misses, file size/total read operations) to find out better strategy to buffer size detection. Compare performance. +Strip off metadata from beg of the stream - DataAccess (with rebase/moveBaseOffset(int)) would be handy Status operation from GUI - guess, usually on a file/subfolder, hence API should allow for starting path (unlike cmdline, seems useless to implement include/exclide patterns - GUI users hardly enter them, ever) -> recently introduced FileWalker may perhaps help solving this (if starts walking from selected folder) for status op against WorkingDir? diff -r c677e1593919 -r c25c5c348d1b src/org/tmatesoft/hg/core/Nodeid.java --- a/src/org/tmatesoft/hg/core/Nodeid.java Mon Jan 24 05:33:47 2011 +0100 +++ b/src/org/tmatesoft/hg/core/Nodeid.java Tue Jan 25 02:13:53 2011 +0100 @@ -117,6 +117,13 @@ return new Nodeid(b, false); } + public static Nodeid fromAscii(String asciiRepresentation) { + if (asciiRepresentation.length() != 40) { + throw new IllegalArgumentException(); + } + // XXX is better impl for String possible? + return fromAscii(asciiRepresentation.getBytes(), 0, 40); + } public static Nodeid fromAscii(byte[] asciiRepresentation, int offset, int length) { if (length != 40) { throw new IllegalArgumentException(); diff -r c677e1593919 -r c25c5c348d1b src/org/tmatesoft/hg/repo/HgDataFile.java --- a/src/org/tmatesoft/hg/repo/HgDataFile.java Mon Jan 24 05:33:47 2011 +0100 +++ b/src/org/tmatesoft/hg/repo/HgDataFile.java Tue Jan 25 02:13:53 2011 +0100 @@ -18,6 +18,10 @@ import static org.tmatesoft.hg.repo.HgRepository.TIP; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; + import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.core.Path; import org.tmatesoft.hg.internal.RevlogStream; @@ -36,6 +40,7 @@ // slashes, unix-style? // repo location agnostic, just to give info to user, not to access real storage private final Path path; + private Metadata metadata; /*package-local*/HgDataFile(HgRepository hgRepo, Path path, RevlogStream content) { super(hgRepo, content); @@ -59,6 +64,61 @@ return content(TIP); } + // for data files need to check heading of the file content for possible metadata + // @see http://mercurial.selenic.com/wiki/FileFormats#data.2BAC8- + @Override + public byte[] content(int revision) { + if (revision == TIP) { + revision = content.revisionCount() - 1; // FIXME maxRevision. + } + byte[] data = super.content(revision); + if (data.length < 4 || (data[0] != 1 && data[1] != 10)) { + return data; + } + int toSkip = 0; + if (metadata == null || !metadata.known(revision)) { + int lastEntryStart = 2; + int lastColon = -1; + ArrayList _metadata = new ArrayList(); + String key = null, value = null; + for (int i = 2; i < data.length; i++) { + if (data[i] == (int) ':') { + key = new String(data, lastEntryStart, i - lastEntryStart); + lastColon = i; + } else if (data[i] == '\n') { + if (key == null || lastColon == -1 || i <= lastColon) { + throw new IllegalStateException(); // FIXME log instead and record null key in the metadata. Ex just to fail fast during dev + } + value = new String(data, lastColon + 1, i - lastColon - 1).trim(); + _metadata.add(new MetadataEntry(key, value)); + key = value = null; + lastColon = -1; + lastEntryStart = i+1; + } else if (data[i] == 1 && i + 1 < data.length && data[i+1] == 10) { + if (key != null && lastColon != -1 && i > lastColon) { + // just in case last entry didn't end with newline + value = new String(data, lastColon + 1, i - lastColon - 1); + _metadata.add(new MetadataEntry(key, value)); + } + lastEntryStart = i+1; + break; + } + } + _metadata.trimToSize(); + if (metadata == null) { + metadata = new Metadata(); + } + metadata.add(revision, lastEntryStart, _metadata); + toSkip = lastEntryStart; + } else { + toSkip = metadata.dataOffset(revision); + } + // XXX copy of an array may be memory-hostile, a wrapper with baseOffsetShift(lastEntryStart) would be more convenient + byte[] rv = new byte[data.length - toSkip]; + System.arraycopy(data, toSkip, rv, 0, rv.length); + return rv; + } + public void history(Changeset.Inspector inspector) { history(0, content.revisionCount() - 1, inspector); } @@ -87,4 +147,71 @@ content.iterate(start, end, false, insp); getRepo().getChangelog().range(inspector, commitRevisions); } + + public boolean isCopy() { + if (metadata == null) { + content(0); // FIXME expensive way to find out metadata, distinct RevlogStream.Iterator would be better. + } + if (metadata == null || !metadata.known(0)) { + return false; + } + return metadata.find(0, "copy") != null; + } + + public Path getCopySourceName() { + if (isCopy()) { + return Path.create(metadata.find(0, "copy")); + } + throw new UnsupportedOperationException(); // XXX REVISIT, think over if Exception is good (clients would check isCopy() anyway, perhaps null is sufficient?) + } + + public Nodeid getCopySourceRevision() { + if (isCopy()) { + return Nodeid.fromAscii(metadata.find(0, "copyrev")); // XXX reuse/cache Nodeid + } + throw new UnsupportedOperationException(); + } + + public static final class MetadataEntry { + private final String entry; + private final int valueStart; + /*package-local*/MetadataEntry(String key, String value) { + entry = key + value; + valueStart = key.length(); + } + /*package-local*/boolean matchKey(String key) { + return key.length() == valueStart && entry.startsWith(key); + } + public String key() { + return entry.substring(0, valueStart); + } + public String value() { + return entry.substring(valueStart); + } + } + + private static class Metadata { + // XXX sparse array needed + private final HashMap offsets = new HashMap(5); + private final HashMap entries = new HashMap(5); + boolean known(int revision) { + return offsets.containsKey(revision); + } + // since this is internal class, callers are supposed to ensure arg correctness (i.e. ask known() before) + int dataOffset(int revision) { + return offsets.get(revision); + } + void add(int revision, int dataOffset, Collection e) { + offsets.put(revision, dataOffset); + entries.put(revision, e.toArray(new MetadataEntry[e.size()])); + } + String find(int revision, String key) { + for (MetadataEntry me : entries.get(revision)) { + if (me.matchKey(key)) { + return me.value(); + } + } + return null; + } + } }