changeset 412:63c5a9d7ca3f smartgit3

Follow-up for Issue 29: unify path translation for manifest and dirstate
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 21 Mar 2012 14:54:02 +0100
parents 464b4404e75d
children bb278ccf9866 d0e5dc3cae6e
files src/org/tmatesoft/hg/internal/EncodingHelper.java src/org/tmatesoft/hg/internal/Internals.java src/org/tmatesoft/hg/repo/HgDirstate.java src/org/tmatesoft/hg/repo/HgInternals.java src/org/tmatesoft/hg/repo/HgManifest.java src/org/tmatesoft/hg/repo/HgRepository.java test/org/tmatesoft/hg/test/TestStorePath.java
diffstat 7 files changed, 88 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/internal/EncodingHelper.java	Tue Mar 20 17:56:50 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/EncodingHelper.java	Wed Mar 21 14:54:02 2012 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 TMate Software Ltd
+ * Copyright (c) 2011-2012 TMate Software Ltd
  *  
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -16,9 +16,11 @@
  */
 package org.tmatesoft.hg.internal;
 
-import java.io.UnsupportedEncodingException;
-
-import org.tmatesoft.hg.core.HgBadStateException;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
 
 /**
  * Keep all encoding-related issues in the single place
@@ -27,13 +29,33 @@
  */
 public class EncodingHelper {
 	// XXX perhaps, shall not be full of statics, but rather an instance coming from e.g. HgRepository?
+	/*
+	 * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see
+	 * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html
+	 */
+	
+	private final CharsetEncoder encoder;
+	private final CharsetDecoder decoder;
+	
+	EncodingHelper(Charset fsEncoding) {
+		decoder = fsEncoding.newDecoder();
+		encoder = fsEncoding.newEncoder();
+	}
 
-	public static String fromManifest(byte[] data, int start, int length) {
+	public String fromManifest(byte[] data, int start, int length) {
 		try {
-			return new String(data, start, length, "ISO-8859-1");
-		} catch (UnsupportedEncodingException ex) {
-			// can't happen
-			throw new HgBadStateException(ex);
+			return decoder.decode(ByteBuffer.wrap(data, start, length)).toString();
+		} catch (CharacterCodingException ex) {
+			// resort to system-default
+			return new String(data, start, length);
 		}
 	}
+
+	public String fromDirstate(byte[] data, int start, int length) throws CharacterCodingException {
+		return decoder.decode(ByteBuffer.wrap(data, start, length)).toString();
+	}
+
+	public Charset charset() {
+		return encoder.charset();
+	}
 }
--- a/src/org/tmatesoft/hg/internal/Internals.java	Tue Mar 20 17:56:50 2012 +0100
+++ b/src/org/tmatesoft/hg/internal/Internals.java	Wed Mar 21 14:54:02 2012 +0100
@@ -71,7 +71,7 @@
 	 * and if your project happen to use anything but filesystem default (say, UTF8 on cp1251 system),
 	 * native storage paths won't match
 	 */
-	public static final String CFG_PROPERT_FS_FILENAME_ENCODING = "hg.fs.filename.encoding";
+	public static final String CFG_PROPERTY_FS_FILENAME_ENCODING = "hg.fs.filename.encoding";
 	
 	private int requiresFlags = 0;
 	private List<Filter.Factory> filterFactories;
@@ -109,20 +109,9 @@
 
 	// XXX perhaps, should keep both fields right here, not in the HgRepository
 	public PathRewrite buildDataFilesHelper() {
-		Object altEncoding = sessionContext.getProperty(CFG_PROPERT_FS_FILENAME_ENCODING, null);
-		Charset cs;
-		if (altEncoding == null) {
-			cs = Charset.defaultCharset();
-		} else {
-			try {
-				cs = Charset.forName(altEncoding.toString());
-			} catch (IllegalArgumentException ex) {
-				// both IllegalCharsetNameException and UnsupportedCharsetException are subclasses of IAE, too
-				// not severe enough to throw an exception, imo. Just record the fact it's bad ad we ignore it 
-				sessionContext.getLog().error(getClass(), ex, String.format("Bad configuration value for filename encoding %s", altEncoding));
-				cs = Charset.defaultCharset();
-			}
-		}
+		// Note, tests in TestStorePath depend on the encoding not being cached
+		Charset cs = getFileEncoding();
+		// StoragePathHelper needs fine-grained control over char encoding, hence doesn't use EncodingHelper
 		return new StoragePathHelper((requiresFlags & STORE) != 0, (requiresFlags & FNCACHE) != 0, (requiresFlags & DOTENCODE) != 0, cs);
 	}
 
@@ -178,6 +167,28 @@
 	public boolean isCaseSensitiveFileSystem() {
 		return isCaseSensitiveFileSystem;
 	}
+	
+	public EncodingHelper buildFileNameEncodingHelper() {
+		return new EncodingHelper(getFileEncoding());
+	}
+	
+	private Charset getFileEncoding() {
+		Object altEncoding = sessionContext.getProperty(CFG_PROPERTY_FS_FILENAME_ENCODING, null);
+		Charset cs;
+		if (altEncoding == null) {
+			cs = Charset.defaultCharset();
+		} else {
+			try {
+				cs = Charset.forName(altEncoding.toString());
+			} catch (IllegalArgumentException ex) {
+				// both IllegalCharsetNameException and UnsupportedCharsetException are subclasses of IAE, too
+				// not severe enough to throw an exception, imo. Just record the fact it's bad ad we ignore it 
+				sessionContext.getLog().error(Internals.class, ex, String.format("Bad configuration value for filename encoding %s", altEncoding));
+				cs = Charset.defaultCharset();
+			}
+		}
+		return cs;
+	}
 
 	public static boolean runningOnWindows() {
 		return System.getProperty("os.name").indexOf("Windows") != -1;
--- a/src/org/tmatesoft/hg/repo/HgDirstate.java	Tue Mar 20 17:56:50 2012 +0100
+++ b/src/org/tmatesoft/hg/repo/HgDirstate.java	Wed Mar 21 14:54:02 2012 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2011 TMate Software Ltd
+ * Copyright (c) 2010-2012 TMate Software Ltd
  *  
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
 import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.IOException;
+import java.nio.charset.CharacterCodingException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
@@ -32,6 +33,7 @@
 import org.tmatesoft.hg.core.HgInvalidControlFileException;
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.internal.DataAccess;
+import org.tmatesoft.hg.internal.EncodingHelper;
 import org.tmatesoft.hg.util.Pair;
 import org.tmatesoft.hg.util.Path;
 import org.tmatesoft.hg.util.PathPool;
@@ -74,7 +76,7 @@
 		canonicalPathRewrite = canonicalPath;
 	}
 
-	/*package-local*/ void read() throws HgInvalidControlFileException {
+	/*package-local*/ void read(EncodingHelper encodingHelper) throws HgInvalidControlFileException {
 		normal = added = removed = merged = Collections.<Path, Record>emptyMap();
 		parents = new Pair<Nodeid,Nodeid>(Nodeid.NULL, Nodeid.NULL);
 		if (canonicalPathRewrite != null) {
@@ -108,13 +110,13 @@
 				da.readBytes(name, 0, nameLen);
 				for (int i = 0; i < nameLen; i++) {
 					if (name[i] == 0) {
-						fn1 = new String(name, 0, i, "UTF-8"); // XXX unclear from documentation what encoding is used there
-						fn2 = new String(name, i+1, nameLen - i - 1, "UTF-8"); // need to check with different system codepages
+						fn1 = encodingHelper.fromDirstate(name, 0, i);
+						fn2 = encodingHelper.fromDirstate(name, i+1, nameLen - i - 1);
 						break;
 					}
 				}
 				if (fn1 == null) {
-					fn1 = new String(name);
+					fn1 = encodingHelper.fromDirstate(name, 0, nameLen);
 				}
 				Record r = new Record(fmode, size, time, pathPool.path(fn1), fn2 == null ? null : pathPool.path(fn2));
 				if (canonicalPathRewrite != null) {
@@ -145,6 +147,8 @@
 					repo.getContext().getLog().warn(getClass(), "Dirstate record for file %s (size: %d, tstamp:%d) has unknown state '%c'", r.name1, r.size(), r.time, state);
 				}
 			}
+		} catch (CharacterCodingException ex) {
+			throw new HgInvalidControlFileException(String.format("Failed reading file names from dirstate using encoding %s", encodingHelper.charset().name()), ex, dirstateFile);
 		} catch (IOException ex) {
 			throw new HgInvalidControlFileException("Dirstate read failed", ex, dirstateFile); 
 		} finally {
--- a/src/org/tmatesoft/hg/repo/HgInternals.java	Tue Mar 20 17:56:50 2012 +0100
+++ b/src/org/tmatesoft/hg/repo/HgInternals.java	Wed Mar 21 14:54:02 2012 +0100
@@ -73,7 +73,7 @@
 			};
 		}
 		HgDirstate ds = new HgDirstate(repo, new File(repo.getRepositoryRoot(), "dirstate"), new PathPool(new PathRewrite.Empty()), canonicalPath);
-		ds.read();
+		ds.read(repo.getImplHelper().buildFileNameEncodingHelper());
 		return ds;
 	}
 	
--- a/src/org/tmatesoft/hg/repo/HgManifest.java	Tue Mar 20 17:56:50 2012 +0100
+++ b/src/org/tmatesoft/hg/repo/HgManifest.java	Wed Mar 21 14:54:02 2012 +0100
@@ -51,6 +51,7 @@
  */
 public class HgManifest extends Revlog {
 	private RevisionMapper revisionMap;
+	private EncodingHelper encodingHelper;
 	
 	public enum Flags {
 		Exec, Link;
@@ -95,8 +96,9 @@
 		}
 	}
 
-	/*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content) {
+	/*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content, EncodingHelper eh) {
 		super(hgRepo, content);
+		encodingHelper = eh;
 	}
 
 	/**
@@ -171,7 +173,7 @@
 			manifestLast = manifestFirst;
 			manifestFirst = x;
 		}
-		content.iterate(manifestFirst, manifestLast, true, new ManifestParser(inspector));
+		content.iterate(manifestFirst, manifestLast, true, new ManifestParser(inspector, encodingHelper));
 	}
 	
 	/**
@@ -189,7 +191,7 @@
 			throw new IllegalArgumentException();
 		}
 		int[] manifestRevs = toManifestRevisionIndexes(revisionIndexes, inspector);
-		content.iterate(manifestRevs, true, new ManifestParser(inspector));
+		content.iterate(manifestRevs, true, new ManifestParser(inspector, encodingHelper));
 	}
 	
 	// 
@@ -331,11 +333,13 @@
 		private int start; 
 		private final int hash, length;
 		private Path result;
+		private final EncodingHelper encHelper;
 
-		public PathProxy(byte[] data, int start, int length) {
+		public PathProxy(byte[] data, int start, int length, EncodingHelper eh) {
 			this.data = data;
 			this.start = start;
 			this.length = length;
+			this.encHelper = eh;
 
 			// copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode
 			// just need some nice algorithm here
@@ -373,7 +377,7 @@
 		
 		public Path freeze() {
 			if (result == null) {
-				result = Path.create(EncodingHelper.fromManifest(data, start, length));
+				result = Path.create(encHelper.fromManifest(data, start, length));
 				// release reference to bigger data array, make a copy of relevant part only
 				// use original bytes, not those from String above to avoid cache misses due to different encodings 
 				byte[] d = new byte[length];
@@ -393,11 +397,13 @@
 		private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool
 		private final ProgressSupport progressHelper;
 		private IterateControlMediator iterateControl;
+		private final EncodingHelper encHelper;
 		
-		public ManifestParser(Inspector delegate) {
+		public ManifestParser(Inspector delegate, EncodingHelper eh) {
 			assert delegate != null;
 			inspector = delegate;
 			inspector2 = delegate instanceof Inspector2 ? (Inspector2) delegate : null;
+			encHelper = eh;
 			nodeidPool = new Pool2<Nodeid>();
 			fnamePool = new Pool2<PathProxy>();
 			thisRevPool = new Pool2<Nodeid>();
@@ -421,7 +427,7 @@
 						int x = i;
 						for( ; data[i] != '\n' && i < actualLen; i++) {
 							if (fname == null && data[i] == 0) {
-								PathProxy px = fnamePool.unify(new PathProxy(data, x, i - x));
+								PathProxy px = fnamePool.unify(new PathProxy(data, x, i - x, encHelper));
 								// if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit
 								// cpython 0..10k: hits: 15 989 152, misses: 3020
 								fname = px.freeze();
--- a/src/org/tmatesoft/hg/repo/HgRepository.java	Tue Mar 20 17:56:50 2012 +0100
+++ b/src/org/tmatesoft/hg/repo/HgRepository.java	Wed Mar 21 14:54:02 2012 +0100
@@ -145,7 +145,7 @@
 	public HgManifest getManifest() {
 		if (manifest == null) {
 			RevlogStream content = resolve(Path.create(repoPathHelper.rewrite("00manifest.i")), true);
-			manifest = new HgManifest(this, content);
+			manifest = new HgManifest(this, content, impl.buildFileNameEncodingHelper());
 		}
 		return manifest;
 	}
@@ -308,7 +308,7 @@
 			};
 		}
 		HgDirstate ds = new HgDirstate(this, new File(repoDir, "dirstate"), pathPool, canonicalPath);
-		ds.read();
+		ds.read(impl.buildFileNameEncodingHelper());
 		return ds;
 	}
 
@@ -385,6 +385,10 @@
 	/*package-local*/ SessionContext getContext() {
 		return sessionContext;
 	}
+	
+	/*package-local*/ Internals getImplHelper() {
+		return impl;
+	}
 
 	private List<Filter> instantiateFilters(Path p, Filter.Options opts) {
 		List<Filter.Factory> factories = impl.getFilters(this);
--- a/test/org/tmatesoft/hg/test/TestStorePath.java	Tue Mar 20 17:56:50 2012 +0100
+++ b/test/org/tmatesoft/hg/test/TestStorePath.java	Wed Mar 21 14:54:02 2012 +0100
@@ -119,11 +119,11 @@
 	public void testNationalChars() {
 		String s = "Привет.txt";
 		//
-		propertyOverrides.put(Internals.CFG_PROPERT_FS_FILENAME_ENCODING, "cp1251");
+		propertyOverrides.put(Internals.CFG_PROPERTY_FS_FILENAME_ENCODING, "cp1251");
 		PathRewrite sph = internals.buildDataFilesHelper();
 		errorCollector.checkThat(sph.rewrite(s), CoreMatchers.<CharSequence>equalTo("store/data/~cf~f0~e8~e2~e5~f2.txt.i"));
 		//
-		propertyOverrides.put(Internals.CFG_PROPERT_FS_FILENAME_ENCODING, "UTF8");
+		propertyOverrides.put(Internals.CFG_PROPERTY_FS_FILENAME_ENCODING, "UTF8");
 		sph = internals.buildDataFilesHelper();
 		errorCollector.checkThat(sph.rewrite(s), CoreMatchers.<CharSequence>equalTo("store/data/~d0~9f~d1~80~d0~b8~d0~b2~d0~b5~d1~82.txt.i"));
 	}