Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/EncodingHelper.java @ 667:fba85bc1dfb8
Refactoring: move all encoding/decoding operations into single place, EncodingHelper
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Thu, 11 Jul 2013 17:54:08 +0200 |
parents | 47b7bedf0569 |
children | f568330dd9c0 |
comparison
equal
deleted
inserted
replaced
666:27a3ddedd6cc | 667:fba85bc1dfb8 |
---|---|
16 */ | 16 */ |
17 package org.tmatesoft.hg.internal; | 17 package org.tmatesoft.hg.internal; |
18 | 18 |
19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error; | 19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error; |
20 | 20 |
21 import java.io.UnsupportedEncodingException; | |
21 import java.nio.ByteBuffer; | 22 import java.nio.ByteBuffer; |
22 import java.nio.CharBuffer; | 23 import java.nio.CharBuffer; |
23 import java.nio.charset.CharacterCodingException; | 24 import java.nio.charset.CharacterCodingException; |
24 import java.nio.charset.Charset; | 25 import java.nio.charset.Charset; |
25 import java.nio.charset.CharsetDecoder; | 26 import java.nio.charset.CharsetDecoder; |
26 import java.nio.charset.CharsetEncoder; | 27 import java.nio.charset.CharsetEncoder; |
27 | 28 |
28 import org.tmatesoft.hg.core.SessionContext; | 29 import org.tmatesoft.hg.core.SessionContext; |
30 import org.tmatesoft.hg.repo.HgInvalidStateException; | |
29 | 31 |
30 /** | 32 /** |
31 * Keep all encoding-related issues in the single place | 33 * Keep all encoding-related issues in the single place |
32 * NOT thread-safe (encoder and decoder requires synchronized access) | 34 * NOT thread-safe (encoder and decoder requires synchronized access) |
35 * | |
36 * @see http://mercurial.selenic.com/wiki/EncodingStrategy | |
37 * @see http://mercurial.selenic.com/wiki/WindowsUTF8Plan | |
38 * @see http://mercurial.selenic.com/wiki/CharacterEncodingOnWindows | |
33 * @author Artem Tikhomirov | 39 * @author Artem Tikhomirov |
34 * @author TMate Software Ltd. | 40 * @author TMate Software Ltd. |
35 */ | 41 */ |
36 public class EncodingHelper { | 42 public class EncodingHelper { |
37 // XXX perhaps, shall not be full of statics, but rather an instance coming from e.g. HgRepository? | |
38 /* | 43 /* |
39 * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see | 44 * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see |
40 * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html | 45 * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html |
41 */ | 46 */ |
42 | 47 |
43 private final SessionContext sessionContext; | 48 private final SessionContext sessionContext; |
44 private final CharsetEncoder encoder; | 49 private final CharsetEncoder encoder; |
45 private final CharsetDecoder decoder; | 50 private final CharsetDecoder decoder; |
46 | 51 private final CharsetEncoder utfEncoder; |
47 EncodingHelper(Charset fsEncoding, SessionContext ctx) { | 52 private final CharsetDecoder utfDecoder; |
48 sessionContext = ctx; | 53 |
54 EncodingHelper(Charset fsEncoding, SessionContext.Source ctx) { | |
55 sessionContext = ctx.getSessionContext(); | |
49 decoder = fsEncoding.newDecoder(); | 56 decoder = fsEncoding.newDecoder(); |
50 encoder = fsEncoding.newEncoder(); | 57 encoder = fsEncoding.newEncoder(); |
58 Charset utf8 = getUTF8(); | |
59 if (fsEncoding.equals(utf8)) { | |
60 utfDecoder = decoder; | |
61 utfEncoder = encoder; | |
62 } else { | |
63 utfDecoder = utf8.newDecoder(); | |
64 utfEncoder = utf8.newEncoder(); | |
65 } | |
51 } | 66 } |
52 | 67 |
53 /** | 68 /** |
54 * Translate file names from manifest to amazing Unicode string | 69 * Translate file names from manifest to amazing Unicode string |
55 */ | 70 */ |
63 public byte[] toManifest(CharSequence s) { | 78 public byte[] toManifest(CharSequence s) { |
64 if (s == null) { | 79 if (s == null) { |
65 // perhaps, can return byte[0] in this case? | 80 // perhaps, can return byte[0] in this case? |
66 throw new IllegalArgumentException(); | 81 throw new IllegalArgumentException(); |
67 } | 82 } |
68 return encodeWithSystemDefaultFallback(s); | 83 return toArray(encodeWithSystemDefaultFallback(s)); |
69 } | 84 } |
70 | 85 |
71 /** | 86 /** |
72 * Translate file names from dirstate to amazing Unicode string | 87 * Translate file names from dirstate to amazing Unicode string |
73 */ | 88 */ |
77 | 92 |
78 public byte[] toDirstate(CharSequence fname) { | 93 public byte[] toDirstate(CharSequence fname) { |
79 if (fname == null) { | 94 if (fname == null) { |
80 throw new IllegalArgumentException(); | 95 throw new IllegalArgumentException(); |
81 } | 96 } |
97 return toArray(encodeWithSystemDefaultFallback(fname)); | |
98 } | |
99 | |
100 /** | |
101 * prepare filename to be serialized into fncache file | |
102 */ | |
103 public ByteBuffer toFNCache(CharSequence fname) { | |
82 return encodeWithSystemDefaultFallback(fname); | 104 return encodeWithSystemDefaultFallback(fname); |
83 } | 105 } |
84 | 106 |
107 public byte[] toBundle(CharSequence fname) { | |
108 // yes, mercurial transfers filenames in local encoding | |
109 // so that if your local encoding doesn't match that on server, | |
110 // and you use native characters, you'd likely fail | |
111 return toArray(encodeWithSystemDefaultFallback(fname)); | |
112 } | |
113 public String fromBundle(byte[] data, int start, int length) { | |
114 return decodeWithSystemDefaultFallback(data, start, length); | |
115 } | |
116 | |
117 | |
118 public String userFromChangeset(byte[] data, int start, int length) { | |
119 return decodeUnicodeWithFallback(data, start, length); | |
120 } | |
121 | |
122 public String commentFromChangeset(byte[] data, int start, int length) { | |
123 return decodeUnicodeWithFallback(data, start, length); | |
124 } | |
125 | |
126 public String fileFromChangeset(byte[] data, int start, int length) { | |
127 return decodeWithSystemDefaultFallback(data, start, length); | |
128 } | |
129 | |
130 public byte[] userToChangeset(CharSequence user) { | |
131 return toArray(encodeUnicode(user)); | |
132 } | |
133 | |
134 public byte[] commentToChangeset(CharSequence comment) { | |
135 return toArray(encodeUnicode(comment)); | |
136 } | |
137 | |
138 public byte[] fileToChangeset(CharSequence file) { | |
139 return toArray(encodeWithSystemDefaultFallback(file)); | |
140 } | |
141 | |
85 private String decodeWithSystemDefaultFallback(byte[] data, int start, int length) { | 142 private String decodeWithSystemDefaultFallback(byte[] data, int start, int length) { |
86 try { | 143 try { |
87 return decoder.decode(ByteBuffer.wrap(data, start, length)).toString(); | 144 return decoder.decode(ByteBuffer.wrap(data, start, length)).toString(); |
88 } catch (CharacterCodingException ex) { | 145 } catch (CharacterCodingException ex) { |
89 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); | 146 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); |
90 // resort to system-default | 147 // resort to system-default |
91 return new String(data, start, length); | 148 return new String(data, start, length); |
92 } | 149 } |
93 } | 150 } |
94 | 151 |
95 private byte[] encodeWithSystemDefaultFallback(CharSequence s) { | 152 private ByteBuffer encodeWithSystemDefaultFallback(CharSequence s) { |
96 try { | 153 try { |
97 // synchronized(encoder) { | 154 // synchronized(encoder) { |
98 ByteBuffer bb = encoder.encode(CharBuffer.wrap(s)); | 155 return encoder.encode(CharBuffer.wrap(s)); |
99 // } | 156 // } |
100 byte[] rv = new byte[bb.remaining()]; | |
101 bb.get(rv, 0, rv.length); | |
102 return rv; | |
103 } catch (CharacterCodingException ex) { | 157 } catch (CharacterCodingException ex) { |
104 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); | 158 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); |
105 // resort to system-default | 159 // resort to system-default |
106 return s.toString().getBytes(); | 160 return ByteBuffer.wrap(s.toString().getBytes()); |
161 } | |
162 } | |
163 | |
164 private byte[] toArray(ByteBuffer bb) { | |
165 byte[] rv; | |
166 if (bb.hasArray() && bb.arrayOffset() == 0) { | |
167 rv = bb.array(); | |
168 if (rv.length == bb.remaining()) { | |
169 return rv; | |
170 } | |
171 // fall through | |
172 } | |
173 rv = new byte[bb.remaining()]; | |
174 bb.get(rv, 0, rv.length); | |
175 return rv; | |
176 } | |
177 | |
178 private String decodeUnicodeWithFallback(byte[] data, int start, int length) { | |
179 try { | |
180 return utfDecoder.decode(ByteBuffer.wrap(data, start, length)).toString(); | |
181 } catch (CharacterCodingException ex) { | |
182 // TODO post-1.2 respect ui.fallbackencoding actual setting | |
183 return new String(data, start, length, Charset.forName("ISO-8859-1")); | |
184 } | |
185 } | |
186 | |
187 private ByteBuffer encodeUnicode(CharSequence s) { | |
188 // | |
189 try { | |
190 return utfEncoder.encode(CharBuffer.wrap(s)); | |
191 } catch (CharacterCodingException ex) { | |
192 byte[] rv; | |
193 try { | |
194 rv = s.toString().getBytes(getUTF8().name()); // XXX Java 1.5 | |
195 } catch (UnsupportedEncodingException e) { | |
196 throw new HgInvalidStateException("Unexpected error trying to get UTF-8 encoding"); | |
197 } | |
198 return ByteBuffer.wrap(rv); | |
107 } | 199 } |
108 } | 200 } |
109 | 201 |
110 private Charset charset() { | 202 private Charset charset() { |
111 return encoder.charset(); | 203 return encoder.charset(); |