comparison src/org/tmatesoft/hg/internal/EncodingHelper.java @ 667:fba85bc1dfb8

Refactoring: move all encoding/decoding operations into single place, EncodingHelper
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 11 Jul 2013 17:54:08 +0200
parents 47b7bedf0569
children f568330dd9c0
comparison
equal deleted inserted replaced
666:27a3ddedd6cc 667:fba85bc1dfb8
16 */ 16 */
17 package org.tmatesoft.hg.internal; 17 package org.tmatesoft.hg.internal;
18 18
19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error; 19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error;
20 20
21 import java.io.UnsupportedEncodingException;
21 import java.nio.ByteBuffer; 22 import java.nio.ByteBuffer;
22 import java.nio.CharBuffer; 23 import java.nio.CharBuffer;
23 import java.nio.charset.CharacterCodingException; 24 import java.nio.charset.CharacterCodingException;
24 import java.nio.charset.Charset; 25 import java.nio.charset.Charset;
25 import java.nio.charset.CharsetDecoder; 26 import java.nio.charset.CharsetDecoder;
26 import java.nio.charset.CharsetEncoder; 27 import java.nio.charset.CharsetEncoder;
27 28
28 import org.tmatesoft.hg.core.SessionContext; 29 import org.tmatesoft.hg.core.SessionContext;
30 import org.tmatesoft.hg.repo.HgInvalidStateException;
29 31
30 /** 32 /**
31 * Keep all encoding-related issues in the single place 33 * Keep all encoding-related issues in the single place
32 * NOT thread-safe (encoder and decoder requires synchronized access) 34 * NOT thread-safe (encoder and decoder requires synchronized access)
35 *
36 * @see http://mercurial.selenic.com/wiki/EncodingStrategy
37 * @see http://mercurial.selenic.com/wiki/WindowsUTF8Plan
38 * @see http://mercurial.selenic.com/wiki/CharacterEncodingOnWindows
33 * @author Artem Tikhomirov 39 * @author Artem Tikhomirov
34 * @author TMate Software Ltd. 40 * @author TMate Software Ltd.
35 */ 41 */
36 public class EncodingHelper { 42 public class EncodingHelper {
37 // XXX perhaps, shall not be full of statics, but rather an instance coming from e.g. HgRepository?
38 /* 43 /*
39 * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see 44 * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see
40 * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html 45 * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html
41 */ 46 */
42 47
43 private final SessionContext sessionContext; 48 private final SessionContext sessionContext;
44 private final CharsetEncoder encoder; 49 private final CharsetEncoder encoder;
45 private final CharsetDecoder decoder; 50 private final CharsetDecoder decoder;
46 51 private final CharsetEncoder utfEncoder;
47 EncodingHelper(Charset fsEncoding, SessionContext ctx) { 52 private final CharsetDecoder utfDecoder;
48 sessionContext = ctx; 53
54 EncodingHelper(Charset fsEncoding, SessionContext.Source ctx) {
55 sessionContext = ctx.getSessionContext();
49 decoder = fsEncoding.newDecoder(); 56 decoder = fsEncoding.newDecoder();
50 encoder = fsEncoding.newEncoder(); 57 encoder = fsEncoding.newEncoder();
58 Charset utf8 = getUTF8();
59 if (fsEncoding.equals(utf8)) {
60 utfDecoder = decoder;
61 utfEncoder = encoder;
62 } else {
63 utfDecoder = utf8.newDecoder();
64 utfEncoder = utf8.newEncoder();
65 }
51 } 66 }
52 67
53 /** 68 /**
54 * Translate file names from manifest to amazing Unicode string 69 * Translate file names from manifest to amazing Unicode string
55 */ 70 */
63 public byte[] toManifest(CharSequence s) { 78 public byte[] toManifest(CharSequence s) {
64 if (s == null) { 79 if (s == null) {
65 // perhaps, can return byte[0] in this case? 80 // perhaps, can return byte[0] in this case?
66 throw new IllegalArgumentException(); 81 throw new IllegalArgumentException();
67 } 82 }
68 return encodeWithSystemDefaultFallback(s); 83 return toArray(encodeWithSystemDefaultFallback(s));
69 } 84 }
70 85
71 /** 86 /**
72 * Translate file names from dirstate to amazing Unicode string 87 * Translate file names from dirstate to amazing Unicode string
73 */ 88 */
77 92
78 public byte[] toDirstate(CharSequence fname) { 93 public byte[] toDirstate(CharSequence fname) {
79 if (fname == null) { 94 if (fname == null) {
80 throw new IllegalArgumentException(); 95 throw new IllegalArgumentException();
81 } 96 }
97 return toArray(encodeWithSystemDefaultFallback(fname));
98 }
99
100 /**
101 * prepare filename to be serialized into fncache file
102 */
103 public ByteBuffer toFNCache(CharSequence fname) {
82 return encodeWithSystemDefaultFallback(fname); 104 return encodeWithSystemDefaultFallback(fname);
83 } 105 }
84 106
107 public byte[] toBundle(CharSequence fname) {
108 // yes, mercurial transfers filenames in local encoding
109 // so that if your local encoding doesn't match that on server,
110 // and you use native characters, you'd likely fail
111 return toArray(encodeWithSystemDefaultFallback(fname));
112 }
113 public String fromBundle(byte[] data, int start, int length) {
114 return decodeWithSystemDefaultFallback(data, start, length);
115 }
116
117
118 public String userFromChangeset(byte[] data, int start, int length) {
119 return decodeUnicodeWithFallback(data, start, length);
120 }
121
122 public String commentFromChangeset(byte[] data, int start, int length) {
123 return decodeUnicodeWithFallback(data, start, length);
124 }
125
126 public String fileFromChangeset(byte[] data, int start, int length) {
127 return decodeWithSystemDefaultFallback(data, start, length);
128 }
129
130 public byte[] userToChangeset(CharSequence user) {
131 return toArray(encodeUnicode(user));
132 }
133
134 public byte[] commentToChangeset(CharSequence comment) {
135 return toArray(encodeUnicode(comment));
136 }
137
138 public byte[] fileToChangeset(CharSequence file) {
139 return toArray(encodeWithSystemDefaultFallback(file));
140 }
141
85 private String decodeWithSystemDefaultFallback(byte[] data, int start, int length) { 142 private String decodeWithSystemDefaultFallback(byte[] data, int start, int length) {
86 try { 143 try {
87 return decoder.decode(ByteBuffer.wrap(data, start, length)).toString(); 144 return decoder.decode(ByteBuffer.wrap(data, start, length)).toString();
88 } catch (CharacterCodingException ex) { 145 } catch (CharacterCodingException ex) {
89 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); 146 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name()));
90 // resort to system-default 147 // resort to system-default
91 return new String(data, start, length); 148 return new String(data, start, length);
92 } 149 }
93 } 150 }
94 151
95 private byte[] encodeWithSystemDefaultFallback(CharSequence s) { 152 private ByteBuffer encodeWithSystemDefaultFallback(CharSequence s) {
96 try { 153 try {
97 // synchronized(encoder) { 154 // synchronized(encoder) {
98 ByteBuffer bb = encoder.encode(CharBuffer.wrap(s)); 155 return encoder.encode(CharBuffer.wrap(s));
99 // } 156 // }
100 byte[] rv = new byte[bb.remaining()];
101 bb.get(rv, 0, rv.length);
102 return rv;
103 } catch (CharacterCodingException ex) { 157 } catch (CharacterCodingException ex) {
104 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); 158 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name()));
105 // resort to system-default 159 // resort to system-default
106 return s.toString().getBytes(); 160 return ByteBuffer.wrap(s.toString().getBytes());
161 }
162 }
163
164 private byte[] toArray(ByteBuffer bb) {
165 byte[] rv;
166 if (bb.hasArray() && bb.arrayOffset() == 0) {
167 rv = bb.array();
168 if (rv.length == bb.remaining()) {
169 return rv;
170 }
171 // fall through
172 }
173 rv = new byte[bb.remaining()];
174 bb.get(rv, 0, rv.length);
175 return rv;
176 }
177
178 private String decodeUnicodeWithFallback(byte[] data, int start, int length) {
179 try {
180 return utfDecoder.decode(ByteBuffer.wrap(data, start, length)).toString();
181 } catch (CharacterCodingException ex) {
182 // TODO post-1.2 respect ui.fallbackencoding actual setting
183 return new String(data, start, length, Charset.forName("ISO-8859-1"));
184 }
185 }
186
187 private ByteBuffer encodeUnicode(CharSequence s) {
188 //
189 try {
190 return utfEncoder.encode(CharBuffer.wrap(s));
191 } catch (CharacterCodingException ex) {
192 byte[] rv;
193 try {
194 rv = s.toString().getBytes(getUTF8().name()); // XXX Java 1.5
195 } catch (UnsupportedEncodingException e) {
196 throw new HgInvalidStateException("Unexpected error trying to get UTF-8 encoding");
197 }
198 return ByteBuffer.wrap(rv);
107 } 199 }
108 } 200 }
109 201
110 private Charset charset() { 202 private Charset charset() {
111 return encoder.charset(); 203 return encoder.charset();