changeset 83:a5275143664c

Complete path hash calculation of fncache requirement
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 25 Jan 2011 22:44:14 +0100 (2011-01-25)
parents 7255c971dd66
children 08754fce5778
files TODO src/org/tmatesoft/hg/internal/DigestHelper.java src/org/tmatesoft/hg/internal/Internals.java src/org/tmatesoft/hg/internal/StoragePathHelper.java test/org/tmatesoft/hg/test/TestStorePath.java
diffstat 5 files changed, 189 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/TODO	Tue Jan 25 04:14:42 2011 +0100
+++ b/TODO	Tue Jan 25 22:44:14 2011 +0100
@@ -5,7 +5,7 @@
   + user, branch, limit
   - date, 
   + filename
-  - filename and follow history
+  + filename and follow history
 
   
 * hg manifest (aka ls)
@@ -21,7 +21,8 @@
 Proposed:
 - LogCommand.revision(int... rev)+ to walk selected revisions only (list->sort(array) on execute, binary search)
 - LogCommand.before(Date date) and .after()
-- LogCommand.match() to specify pattern, no selected file()s only?  
+- LogCommand.match() to specify pattern, no selected file()s only?
+* RepositoryFacade and CommandContext  
 
 Read-only support, version 1.1
 ==============================
--- a/src/org/tmatesoft/hg/internal/DigestHelper.java	Tue Jan 25 04:14:42 2011 +0100
+++ b/src/org/tmatesoft/hg/internal/DigestHelper.java	Tue Jan 25 22:44:14 2011 +0100
@@ -101,6 +101,16 @@
 		digest = alg.digest();
 		return this;
 	}
+	
+	public DigestHelper sha1(CharSequence... seq) {
+		MessageDigest alg = getSHA1();
+		for (CharSequence s : seq) {
+			byte[] b = s.toString().getBytes();
+			alg.update(b);
+		}
+		digest = alg.digest();
+		return this;
+	}
 
 	public static String toHexString(byte[] data, final int offset, final int count) {
 		char[] result = new char[count << 1];
--- a/src/org/tmatesoft/hg/internal/Internals.java	Tue Jan 25 04:14:42 2011 +0100
+++ b/src/org/tmatesoft/hg/internal/Internals.java	Tue Jan 25 22:44:14 2011 +0100
@@ -33,7 +33,7 @@
 	private int revlogVersion = 0;
 	private int requiresFlags = 0;
 
-	void setStorageConfig(int version, int flags) {
+	public/*for tests, otherwise pkg*/ void setStorageConfig(int version, int flags) {
 		revlogVersion = version;
 		requiresFlags = flags;
 	}
--- a/src/org/tmatesoft/hg/internal/StoragePathHelper.java	Tue Jan 25 04:14:42 2011 +0100
+++ b/src/org/tmatesoft/hg/internal/StoragePathHelper.java	Tue Jan 25 22:44:14 2011 +0100
@@ -19,7 +19,6 @@
 import java.util.Arrays;
 import java.util.TreeSet;
 
-import org.tmatesoft.hg.repo.HgRepository;
 import org.tmatesoft.hg.util.PathRewrite;
 
 /**
@@ -49,15 +48,13 @@
 		final String STR_STORE = "store/";
 		final String STR_DATA = "data/";
 		final String STR_DH = "dh/";
+		final String reservedChars = "\\:*?\"<>|";
+		char[] hexByte = new char[2];
 		
 		path = path.replace(".hg/", ".hg.hg/").replace(".i/", ".i.hg/").replace(".d/", ".d.hg/");
 		StringBuilder sb = new StringBuilder(path.length() << 1);
 		if (store || fncache) {
 			// encodefilename
-			final String reservedChars = "\\:*?\"<>|";
-			// in fact, \\ is unlikely to match, ever - we've replaced all of them already, above. Just regards to store.py 
-			int x;
-			char[] hexByte = new char[2];
 			for (int i = 0; i < path.length(); i++) {
 				final char ch = path.charAt(i);
 				if (ch >= 'a' && ch <= 'z') {
@@ -65,15 +62,13 @@
 				} else if (ch >= 'A' && ch <= 'Z') {
 					sb.append('_');
 					sb.append(Character.toLowerCase(ch)); // Perhaps, (char) (((int) ch) + 32)? Even better, |= 0x20? 
-				} else if ( (x = reservedChars.indexOf(ch)) != -1) {
+				} else if (reservedChars.indexOf(ch) != -1) {
 					sb.append('~');
-					sb.append(toHexByte(reservedChars.charAt(x), hexByte));
+					sb.append(toHexByte(ch, hexByte));
 				} else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) {
 					sb.append('~');
 					sb.append(toHexByte(ch, hexByte));
 				} else if (ch == '_') {
-					// note, encoding from store.py:_buildencodefun and :_build_lower_encodefun
-					// differ in the way they process '_' (latter doesn't escape it)
 					sb.append('_');
 					sb.append('_');
 				} else {
@@ -82,49 +77,116 @@
 			}
 			// auxencode
 			if (fncache) {
-				x = 0; // last segment start
-				final TreeSet<String> windowsReservedFilenames = new TreeSet<String>();
-				windowsReservedFilenames.addAll(Arrays.asList("con prn aux nul com1 com2 com3 com4 com5 com6 com7 com8 com9 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9".split(" "))); 
+				encodeWindowsDeviceNames(sb);
+			}
+		}
+		final int MAX_PATH_LEN = 120;
+		if (fncache && (sb.length() + STR_DATA.length() + ".i".length() > MAX_PATH_LEN)) {
+			String digest = new DigestHelper().sha1(STR_DATA, path, ".i").asHexString();
+			final int DIR_PREFIX_LEN = 8;
+			 // not sure why (-4) is here. 120 - 40 = up to 80 for path with ext. dh/ + ext(.i) = 3+2
+			final int MAX_DIR_PREFIX = 8 * (DIR_PREFIX_LEN + 1) - 4;
+			sb = new StringBuilder(MAX_PATH_LEN);
+			for (int i = 0; i < path.length(); i++) {
+				final char ch = path.charAt(i);
+				if (ch >= 'a' && ch <= 'z') {
+					sb.append(ch);
+				} else if (ch >= 'A' && ch <= 'Z') {
+					sb.append((char) (ch | 0x20)); // lowercase 
+				} else if (reservedChars.indexOf(ch) != -1) {
+					sb.append('~');
+					sb.append(toHexByte(ch, hexByte));
+				} else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) {
+					sb.append('~');
+					sb.append(toHexByte(ch, hexByte));
+				} else {
+					sb.append(ch);
+				}
+			}
+			encodeWindowsDeviceNames(sb);
+			int fnameStart = sb.lastIndexOf("/"); // since we rewrite file names, it never ends with slash (for dirs, I'd pass length-2);
+			StringBuilder completeHashName = new StringBuilder(MAX_PATH_LEN);
+			completeHashName.append(STR_STORE);
+			completeHashName.append(STR_DH);
+			if (fnameStart == -1) {
+				// no dirs, just long filename
+				sb.setLength(MAX_PATH_LEN - 40 /*digest.length()*/ - STR_DH.length() - ".i".length());
+				completeHashName.append(sb);
+			} else {
+				StringBuilder sb2 = new StringBuilder(MAX_PATH_LEN);
+				int x = 0;
 				do {
 					int i = sb.indexOf("/", x);
-					if (i == -1) {
-						i = sb.length();
-					}
-					// windows reserved filenames are at least of length 3 
-					if (i - x >= 3) {
-						boolean found = false;
-						if (i-x == 3) {
-							found = windowsReservedFilenames.contains(sb.subSequence(x, i));
-						} else if (sb.charAt(x+3) == '.') { // implicit i-x > 3
-							found = windowsReservedFilenames.contains(sb.subSequence(x, x+3));
-						} else if (i-x > 4 && sb.charAt(x+4) == '.') {
-							found = windowsReservedFilenames.contains(sb.subSequence(x, x+4));
+					final int sb2Len = sb2.length(); 
+					if (i-x <= DIR_PREFIX_LEN) { // a b c d e f g h /
+						sb2.append(sb, x, i + 1); // with slash
+					} else {
+						sb2.append(sb, x, x + DIR_PREFIX_LEN);
+						// may unexpectedly end with bad character
+						final int last = sb2.length()-1;
+						char lastChar = sb2.charAt(last); 
+						assert lastChar == sb.charAt(x + DIR_PREFIX_LEN - 1);
+						if (lastChar == '.' || lastChar == ' ') {
+							sb2.setCharAt(last, '_');
 						}
-						if (found) {
-							sb.setCharAt(x, '~');
-							sb.insert(x+1, toHexByte(sb.charAt(x+2), hexByte));
-							i += 2;
-						}
+						sb2.append('/');
 					}
-					if (dotencode && (sb.charAt(x) == '.' || sb.charAt(x) == ' ')) {
-						sb.insert(x+1, toHexByte(sb.charAt(x), hexByte));
-						sb.setCharAt(x, '~'); // setChar *after* charAt/insert to get ~2e, not ~7e for '.'
-						i += 2;
+					if (sb2.length()-1 > MAX_DIR_PREFIX) {
+						sb2.setLength(sb2Len); // strip off last segment, it's too much
+						break;
 					}
-					x = i+1;
-				} while (x < sb.length());
+					x = i+1; 
+				} while (x < fnameStart);
+				assert sb2.charAt(sb2.length() - 1) == '/';
+				int left = MAX_PATH_LEN - sb2.length() - 40 /*digest.length()*/ - STR_DH.length() - ".i".length();
+				assert left >= 0;
+				fnameStart++; // move from / to actual name
+				sb2.append(sb, fnameStart, fnameStart + left > sb.length() ? sb.length() : fnameStart+left);
+				completeHashName.append(sb2);
 			}
-		}
-		final int MAX_PATH_LEN_IN_HGSTORE = 120;
-		if (fncache && (sb.length() + STR_DATA.length() > MAX_PATH_LEN_IN_HGSTORE)) {
-			throw HgRepository.notImplemented(); // FIXME digest and fncache use
-		}
-		if (store) {
+			completeHashName.append(digest);
+			sb = completeHashName;
+		} else if (store) {
 			sb.insert(0, STR_STORE + STR_DATA);
 		}
 		sb.append(".i");
 		return sb.toString();
 	}
+	
+	private void encodeWindowsDeviceNames(StringBuilder sb) {
+		char[] hexByte = new char[2];
+		int x = 0; // last segment start
+		final TreeSet<String> windowsReservedFilenames = new TreeSet<String>();
+		windowsReservedFilenames.addAll(Arrays.asList("con prn aux nul com1 com2 com3 com4 com5 com6 com7 com8 com9 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9".split(" "))); 
+		do {
+			int i = sb.indexOf("/", x);
+			if (i == -1) {
+				i = sb.length();
+			}
+			// windows reserved filenames are at least of length 3 
+			if (i - x >= 3) {
+				boolean found = false;
+				if (i-x == 3 || i-x == 4) {
+					found = windowsReservedFilenames.contains(sb.subSequence(x, i));
+				} else if (sb.charAt(x+3) == '.') { // implicit i-x > 3
+					found = windowsReservedFilenames.contains(sb.subSequence(x, x+3));
+				} else if (i-x > 4 && sb.charAt(x+4) == '.') {
+					found = windowsReservedFilenames.contains(sb.subSequence(x, x+4));
+				}
+				if (found) {
+					sb.insert(x+3, toHexByte(sb.charAt(x+2), hexByte));
+					sb.setCharAt(x+2, '~');
+					i += 2;
+				}
+			}
+			if (dotencode && (sb.charAt(x) == '.' || sb.charAt(x) == ' ')) {
+				sb.insert(x+1, toHexByte(sb.charAt(x), hexByte));
+				sb.setCharAt(x, '~'); // setChar *after* charAt/insert to get ~2e, not ~7e for '.'
+				i += 2;
+			}
+			x = i+1;
+		} while (x < sb.length());
+	}
 
 	private static char[] toHexByte(int ch, char[] buf) {
 		assert buf.length > 1;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/org/tmatesoft/hg/test/TestStorePath.java	Tue Jan 25 22:44:14 2011 +0100
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2011 TMate Software Ltd
+ *  
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For information on how to redistribute this software under
+ * the terms of a license other than GNU General Public License
+ * contact TMate Software at support@svnkit.com
+ */
+package org.tmatesoft.hg.test;
+
+import org.tmatesoft.hg.internal.Internals;
+import org.tmatesoft.hg.util.PathRewrite;
+
+/**
+ *
+ * @author Artem Tikhomirov
+ * @author TMate Software Ltd.
+ */
+public class TestStorePath {
+	
+	private PathRewrite storePathHelper;
+
+	public static void main(String[] args) {
+		final TestStorePath test = new TestStorePath();
+		test.testWindowsFilenames();
+		test.testHashLongPath();
+	}
+	
+	public TestStorePath() {
+		final Internals i = new Internals();
+		i.setStorageConfig(1, 0x7);
+		storePathHelper = i.buildDataFilesHelper();
+	}
+
+	public void testWindowsFilenames() {
+		// see http://mercurial.selenic.com/wiki/fncacheRepoFormat#Encoding_of_Windows_reserved_names
+		String n1 = "aux.bla/bla.aux/prn/PRN/lpt/com3/nul/coma/foo.NUL/normal.c";
+		String r1 = "store/data/au~78.bla/bla.aux/pr~6e/_p_r_n/lpt/co~6d3/nu~6c/coma/foo._n_u_l/normal.c.i";
+		report("Windows filenames are ", n1, r1);
+	}
+
+	public void testHashLongPath() {
+		String n1 = "AUX/SECOND/X.PRN/FOURTH/FI:FTH/SIXTH/SEVENTH/EIGHTH/NINETH/TENTH/ELEVENTH/LOREMIPSUM.TXT";
+		String r1 = "store/dh/au~78/second/x.prn/fourth/fi~3afth/sixth/seventh/eighth/nineth/tenth/loremia20419e358ddff1bf8751e38288aff1d7c32ec05.i";
+		String n2 = "enterprise/openesbaddons/contrib-imola/corba-bc/netbeansplugin/wsdlExtension/src/main/java/META-INF/services/org.netbeans.modules.xml.wsdl.bindingsupport.spi.ExtensibilityElementTemplateProvider";
+		String r2 = "store/dh/enterpri/openesba/contrib-/corba-bc/netbeans/wsdlexte/src/main/java/org.net7018f27961fdf338a598a40c4683429e7ffb9743.i";
+		String n3 = "AUX.THE-QUICK-BROWN-FOX-JU:MPS-OVER-THE-LAZY-DOG-THE-QUICK-BROWN-FOX-JUMPS-OVER-THE-LAZY-DOG.TXT";
+		String r3 = "store/dh/au~78.the-quick-brown-fox-ju~3amps-over-the-lazy-dog-the-quick-brown-fox-jud4dcadd033000ab2b26eb66bae1906bcb15d4a70.i";
+		// TODO segment[8] == [. ], segment[8] in the middle of windows reserved name or character (to see if ~xx is broken)
+		report("1", n1, r1);
+		report("2", n2, r2);
+		report("3", n3, r3);
+	}
+
+	private void report(String msg, String name, String expected) {
+		String res = check(name, expected);
+		System.out.println(msg + (res == null ? "OK" : "WRONG:" + res));
+	}
+
+	private String check(String name, String expected) {
+		String result = storePathHelper.rewrite(name);
+		return expected.equals(result) ? null : result;
+	}
+}