Mercurial > hg4j

diff src/org/tmatesoft/hg/repo/HgDataFile.java @ 78:c25c5c348d1b
Skip metadata in the beginning of a file content. Parse metadata, recognize copies/renames
author: Artem Tikhomirov <tikhomirov.artem@gmail.com>
date: Tue, 25 Jan 2011 02:13:53 +0100
parents: c677e1593919
children: 5f9635c01681
--- a/src/org/tmatesoft/hg/repo/HgDataFile.java	Mon Jan 24 05:33:47 2011 +0100
+++ b/src/org/tmatesoft/hg/repo/HgDataFile.java	Tue Jan 25 02:13:53 2011 +0100
@@ -18,6 +18,10 @@
 
 import static org.tmatesoft.hg.repo.HgRepository.TIP;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.core.Path;
 import org.tmatesoft.hg.internal.RevlogStream;
@@ -36,6 +40,7 @@
 	// slashes, unix-style?
 	// repo location agnostic, just to give info to user, not to access real storage
 	private final Path path;
+	private Metadata metadata;
 	
 	/*package-local*/HgDataFile(HgRepository hgRepo, Path path, RevlogStream content) {
 		super(hgRepo, content);
@@ -59,6 +64,61 @@
 		return content(TIP);
 	}
 
+	// for data files need to check heading of the file content for possible metadata
+	// @see http://mercurial.selenic.com/wiki/FileFormats#data.2BAC8-
+	@Override
+	public byte[] content(int revision) {
+		if (revision == TIP) {
+			revision = content.revisionCount() - 1; // FIXME maxRevision.
+		}
+		byte[] data = super.content(revision);
+		if (data.length < 4 || (data[0] != 1 && data[1] != 10)) {
+			return data;
+		}
+		int toSkip = 0;
+		if (metadata == null || !metadata.known(revision)) {
+			int lastEntryStart = 2;
+			int lastColon = -1;
+			ArrayList<MetadataEntry> _metadata = new ArrayList<MetadataEntry>();
+			String key = null, value = null;
+			for (int i = 2; i < data.length; i++) {
+				if (data[i] == (int) ':') {
+					key = new String(data, lastEntryStart, i - lastEntryStart);
+					lastColon = i;
+				} else if (data[i] == '\n') {
+					if (key == null || lastColon == -1 || i <= lastColon) {
+						throw new IllegalStateException(); // FIXME log instead and record null key in the metadata. Ex just to fail fast during dev
+					}
+					value = new String(data, lastColon + 1, i - lastColon - 1).trim();
+					_metadata.add(new MetadataEntry(key, value));
+					key = value = null;
+					lastColon = -1;
+					lastEntryStart = i+1;
+				} else if (data[i] == 1 && i + 1 < data.length && data[i+1] == 10) {
+					if (key != null && lastColon != -1 && i > lastColon) {
+						// just in case last entry didn't end with newline
+						value = new String(data, lastColon + 1, i - lastColon - 1);
+						_metadata.add(new MetadataEntry(key, value));
+					}
+					lastEntryStart = i+1;
+					break;
+				}
+			}
+			_metadata.trimToSize();
+			if (metadata == null) {
+				metadata = new Metadata();
+			}
+			metadata.add(revision, lastEntryStart, _metadata);
+			toSkip = lastEntryStart;
+		} else {
+			toSkip = metadata.dataOffset(revision);
+		}
+		// XXX copy of an array may be memory-hostile, a wrapper with baseOffsetShift(lastEntryStart) would be more convenient
+		byte[] rv = new byte[data.length - toSkip];
+		System.arraycopy(data, toSkip, rv, 0, rv.length);
+		return rv;
+	}
+
 	public void history(Changeset.Inspector inspector) {
 		history(0, content.revisionCount() - 1, inspector);
 	}
@@ -87,4 +147,71 @@
 		content.iterate(start, end, false, insp);
 		getRepo().getChangelog().range(inspector, commitRevisions);
 	}
+
+	public boolean isCopy() {
+		if (metadata == null) {
+			content(0); // FIXME expensive way to find out metadata, distinct RevlogStream.Iterator would be better.
+		}
+		if (metadata == null || !metadata.known(0)) {
+			return false;
+		}
+		return metadata.find(0, "copy") != null;
+	}
+
+	public Path getCopySourceName() {
+		if (isCopy()) {
+			return Path.create(metadata.find(0, "copy"));
+		}
+		throw new UnsupportedOperationException(); // XXX REVISIT, think over if Exception is good (clients would check isCopy() anyway, perhaps null is sufficient?)
+	}
+	
+	public Nodeid getCopySourceRevision() {
+		if (isCopy()) {
+			return Nodeid.fromAscii(metadata.find(0, "copyrev")); // XXX reuse/cache Nodeid
+		}
+		throw new UnsupportedOperationException();
+	}
+
+	public static final class MetadataEntry {
+		private final String entry;
+		private final int valueStart;
+		/*package-local*/MetadataEntry(String key, String value) {
+			entry = key + value;
+			valueStart = key.length();
+		}
+		/*package-local*/boolean matchKey(String key) {
+			return key.length() == valueStart && entry.startsWith(key);
+		}
+		public String key() {
+			return entry.substring(0, valueStart);
+		}
+		public String value() {
+			return entry.substring(valueStart);
+		}
+	}
+
+	private static class Metadata {
+		// XXX sparse array needed
+		private final HashMap<Integer, Integer> offsets = new HashMap<Integer, Integer>(5);
+		private final HashMap<Integer, MetadataEntry[]> entries = new HashMap<Integer, MetadataEntry[]>(5);
+		boolean known(int revision) {
+			return offsets.containsKey(revision);
+		}
+		// since this is internal class, callers are supposed to ensure arg correctness (i.e. ask known() before)
+		int dataOffset(int revision) {
+			return offsets.get(revision);
+		}
+		void add(int revision, int dataOffset, Collection<MetadataEntry> e) {
+			offsets.put(revision, dataOffset);
+			entries.put(revision, e.toArray(new MetadataEntry[e.size()]));
+		}
+		String find(int revision, String key) {
+			for (MetadataEntry me : entries.get(revision)) {
+				if (me.matchKey(key)) {
+					return me.value();
+				}
+			}
+			return null;
+		}
+	}
 }
author	Artem Tikhomirov <tikhomirov.artem@gmail.com>
date	Tue, 25 Jan 2011 02:13:53 +0100
parents	c677e1593919
children	5f9635c01681