changeset 78:c25c5c348d1b

Skip metadata in the beginning of a file content. Parse metadata, recognize copies/renames
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 25 Jan 2011 02:13:53 +0100 (2011-01-25)
parents c677e1593919
children 5f9635c01681
files TODO design.txt src/org/tmatesoft/hg/core/Nodeid.java src/org/tmatesoft/hg/repo/HgDataFile.java
diffstat 4 files changed, 140 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/TODO	Mon Jan 24 05:33:47 2011 +0100
+++ b/TODO	Tue Jan 25 02:13:53 2011 +0100
@@ -6,15 +6,18 @@
   - date, 
   + filename
   - filename and follow history
-  - 
+
   
 * hg manifest (aka ls)
+
   
 * hg status
   - copies for revisions
 
+
 * hg cat
 
+
 Proposed:
 - LogCommand.revision(int... rev)+ to walk selected revisions only (list->sort(array) on execute, binary search)
 - LogCommand.before(Date date) and .after()
--- a/design.txt	Mon Jan 24 05:33:47 2011 +0100
+++ b/design.txt	Tue Jan 25 02:13:53 2011 +0100
@@ -35,12 +35,13 @@
 +RevisionWalker (on manifest) and WorkingCopyWalker (io.File) talking to ? and/or dirstate (StatusCollector and WCSC) 
 +RevlogStream - Inflater. Perhaps, InflaterStream instead? branch:wrap-data-access
 +repo.status - use same collector class twice, difference as external code. add external walker that keeps collected maps and use it in Log operation to give files+,files-  
-
++ strip \1\n metadata out from RevlogStream
 
 Implement use of fncache (use names from it - perhaps, would help for Mac issues Alex mentioned) along with 'digest'-ing long file names
 delta merge
 DataAccess - collect debug info (buffer misses, file size/total read operations) to find out better strategy to buffer size detection. Compare performance.
 
+Strip off metadata from beg of the stream - DataAccess (with rebase/moveBaseOffset(int)) would be handy
 
 Status operation from GUI - guess, usually on a file/subfolder, hence API should allow for starting path (unlike cmdline, seems useless to implement include/exclide patterns - GUI users hardly enter them, ever)
   -> recently introduced FileWalker may perhaps help solving this (if starts walking from selected folder) for status op against WorkingDir?
--- a/src/org/tmatesoft/hg/core/Nodeid.java	Mon Jan 24 05:33:47 2011 +0100
+++ b/src/org/tmatesoft/hg/core/Nodeid.java	Tue Jan 25 02:13:53 2011 +0100
@@ -117,6 +117,13 @@
 		return new Nodeid(b, false);
 	}
 
+	public static Nodeid fromAscii(String asciiRepresentation) {
+		if (asciiRepresentation.length() != 40) {
+			throw new IllegalArgumentException();
+		}
+		// XXX is better impl for String possible?
+		return fromAscii(asciiRepresentation.getBytes(), 0, 40);
+	}
 	public static Nodeid fromAscii(byte[] asciiRepresentation, int offset, int length) {
 		if (length != 40) {
 			throw new IllegalArgumentException();
--- a/src/org/tmatesoft/hg/repo/HgDataFile.java	Mon Jan 24 05:33:47 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgDataFile.java	Tue Jan 25 02:13:53 2011 +0100
@@ -18,6 +18,10 @@
 
 import static org.tmatesoft.hg.repo.HgRepository.TIP;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.core.Path;
 import org.tmatesoft.hg.internal.RevlogStream;
@@ -36,6 +40,7 @@
 	// slashes, unix-style?
 	// repo location agnostic, just to give info to user, not to access real storage
 	private final Path path;
+	private Metadata metadata;
 	
 	/*package-local*/HgDataFile(HgRepository hgRepo, Path path, RevlogStream content) {
 		super(hgRepo, content);
@@ -59,6 +64,61 @@
 		return content(TIP);
 	}
 
+	// for data files need to check heading of the file content for possible metadata
+	// @see http://mercurial.selenic.com/wiki/FileFormats#data.2BAC8-
+	@Override
+	public byte[] content(int revision) {
+		if (revision == TIP) {
+			revision = content.revisionCount() - 1; // FIXME maxRevision.
+		}
+		byte[] data = super.content(revision);
+		if (data.length < 4 || (data[0] != 1 && data[1] != 10)) {
+			return data;
+		}
+		int toSkip = 0;
+		if (metadata == null || !metadata.known(revision)) {
+			int lastEntryStart = 2;
+			int lastColon = -1;
+			ArrayList<MetadataEntry> _metadata = new ArrayList<MetadataEntry>();
+			String key = null, value = null;
+			for (int i = 2; i < data.length; i++) {
+				if (data[i] == (int) ':') {
+					key = new String(data, lastEntryStart, i - lastEntryStart);
+					lastColon = i;
+				} else if (data[i] == '\n') {
+					if (key == null || lastColon == -1 || i <= lastColon) {
+						throw new IllegalStateException(); // FIXME log instead and record null key in the metadata. Ex just to fail fast during dev
+					}
+					value = new String(data, lastColon + 1, i - lastColon - 1).trim();
+					_metadata.add(new MetadataEntry(key, value));
+					key = value = null;
+					lastColon = -1;
+					lastEntryStart = i+1;
+				} else if (data[i] == 1 && i + 1 < data.length && data[i+1] == 10) {
+					if (key != null && lastColon != -1 && i > lastColon) {
+						// just in case last entry didn't end with newline
+						value = new String(data, lastColon + 1, i - lastColon - 1);
+						_metadata.add(new MetadataEntry(key, value));
+					}
+					lastEntryStart = i+1;
+					break;
+				}
+			}
+			_metadata.trimToSize();
+			if (metadata == null) {
+				metadata = new Metadata();
+			}
+			metadata.add(revision, lastEntryStart, _metadata);
+			toSkip = lastEntryStart;
+		} else {
+			toSkip = metadata.dataOffset(revision);
+		}
+		// XXX copy of an array may be memory-hostile, a wrapper with baseOffsetShift(lastEntryStart) would be more convenient
+		byte[] rv = new byte[data.length - toSkip];
+		System.arraycopy(data, toSkip, rv, 0, rv.length);
+		return rv;
+	}
+
 	public void history(Changeset.Inspector inspector) {
 		history(0, content.revisionCount() - 1, inspector);
 	}
@@ -87,4 +147,71 @@
 		content.iterate(start, end, false, insp);
 		getRepo().getChangelog().range(inspector, commitRevisions);
 	}
+
+	public boolean isCopy() {
+		if (metadata == null) {
+			content(0); // FIXME expensive way to find out metadata, distinct RevlogStream.Iterator would be better.
+		}
+		if (metadata == null || !metadata.known(0)) {
+			return false;
+		}
+		return metadata.find(0, "copy") != null;
+	}
+
+	public Path getCopySourceName() {
+		if (isCopy()) {
+			return Path.create(metadata.find(0, "copy"));
+		}
+		throw new UnsupportedOperationException(); // XXX REVISIT, think over if Exception is good (clients would check isCopy() anyway, perhaps null is sufficient?)
+	}
+	
+	public Nodeid getCopySourceRevision() {
+		if (isCopy()) {
+			return Nodeid.fromAscii(metadata.find(0, "copyrev")); // XXX reuse/cache Nodeid
+		}
+		throw new UnsupportedOperationException();
+	}
+
+	public static final class MetadataEntry {
+		private final String entry;
+		private final int valueStart;
+		/*package-local*/MetadataEntry(String key, String value) {
+			entry = key + value;
+			valueStart = key.length();
+		}
+		/*package-local*/boolean matchKey(String key) {
+			return key.length() == valueStart && entry.startsWith(key);
+		}
+		public String key() {
+			return entry.substring(0, valueStart);
+		}
+		public String value() {
+			return entry.substring(valueStart);
+		}
+	}
+
+	private static class Metadata {
+		// XXX sparse array needed
+		private final HashMap<Integer, Integer> offsets = new HashMap<Integer, Integer>(5);
+		private final HashMap<Integer, MetadataEntry[]> entries = new HashMap<Integer, MetadataEntry[]>(5);
+		boolean known(int revision) {
+			return offsets.containsKey(revision);
+		}
+		// since this is internal class, callers are supposed to ensure arg correctness (i.e. ask known() before)
+		int dataOffset(int revision) {
+			return offsets.get(revision);
+		}
+		void add(int revision, int dataOffset, Collection<MetadataEntry> e) {
+			offsets.put(revision, dataOffset);
+			entries.put(revision, e.toArray(new MetadataEntry[e.size()]));
+		}
+		String find(int revision, String key) {
+			for (MetadataEntry me : entries.get(revision)) {
+				if (me.matchKey(key)) {
+					return me.value();
+				}
+			}
+			return null;
+		}
+	}
 }