Mercurial > jhg
comparison src/org/tmatesoft/hg/internal/EncodingHelper.java @ 667:fba85bc1dfb8
Refactoring: move all encoding/decoding operations into single place, EncodingHelper
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Thu, 11 Jul 2013 17:54:08 +0200 |
| parents | 47b7bedf0569 |
| children | f568330dd9c0 |
comparison
equal
deleted
inserted
replaced
| 666:27a3ddedd6cc | 667:fba85bc1dfb8 |
|---|---|
| 16 */ | 16 */ |
| 17 package org.tmatesoft.hg.internal; | 17 package org.tmatesoft.hg.internal; |
| 18 | 18 |
| 19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error; | 19 import static org.tmatesoft.hg.util.LogFacility.Severity.Error; |
| 20 | 20 |
| 21 import java.io.UnsupportedEncodingException; | |
| 21 import java.nio.ByteBuffer; | 22 import java.nio.ByteBuffer; |
| 22 import java.nio.CharBuffer; | 23 import java.nio.CharBuffer; |
| 23 import java.nio.charset.CharacterCodingException; | 24 import java.nio.charset.CharacterCodingException; |
| 24 import java.nio.charset.Charset; | 25 import java.nio.charset.Charset; |
| 25 import java.nio.charset.CharsetDecoder; | 26 import java.nio.charset.CharsetDecoder; |
| 26 import java.nio.charset.CharsetEncoder; | 27 import java.nio.charset.CharsetEncoder; |
| 27 | 28 |
| 28 import org.tmatesoft.hg.core.SessionContext; | 29 import org.tmatesoft.hg.core.SessionContext; |
| 30 import org.tmatesoft.hg.repo.HgInvalidStateException; | |
| 29 | 31 |
| 30 /** | 32 /** |
| 31 * Keep all encoding-related issues in the single place | 33 * Keep all encoding-related issues in the single place |
| 32 * NOT thread-safe (encoder and decoder requires synchronized access) | 34 * NOT thread-safe (encoder and decoder requires synchronized access) |
| 35 * | |
| 36 * @see http://mercurial.selenic.com/wiki/EncodingStrategy | |
| 37 * @see http://mercurial.selenic.com/wiki/WindowsUTF8Plan | |
| 38 * @see http://mercurial.selenic.com/wiki/CharacterEncodingOnWindows | |
| 33 * @author Artem Tikhomirov | 39 * @author Artem Tikhomirov |
| 34 * @author TMate Software Ltd. | 40 * @author TMate Software Ltd. |
| 35 */ | 41 */ |
| 36 public class EncodingHelper { | 42 public class EncodingHelper { |
| 37 // XXX perhaps, shall not be full of statics, but rather an instance coming from e.g. HgRepository? | |
| 38 /* | 43 /* |
| 39 * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see | 44 * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see |
| 40 * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html | 45 * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html |
| 41 */ | 46 */ |
| 42 | 47 |
| 43 private final SessionContext sessionContext; | 48 private final SessionContext sessionContext; |
| 44 private final CharsetEncoder encoder; | 49 private final CharsetEncoder encoder; |
| 45 private final CharsetDecoder decoder; | 50 private final CharsetDecoder decoder; |
| 46 | 51 private final CharsetEncoder utfEncoder; |
| 47 EncodingHelper(Charset fsEncoding, SessionContext ctx) { | 52 private final CharsetDecoder utfDecoder; |
| 48 sessionContext = ctx; | 53 |
| 54 EncodingHelper(Charset fsEncoding, SessionContext.Source ctx) { | |
| 55 sessionContext = ctx.getSessionContext(); | |
| 49 decoder = fsEncoding.newDecoder(); | 56 decoder = fsEncoding.newDecoder(); |
| 50 encoder = fsEncoding.newEncoder(); | 57 encoder = fsEncoding.newEncoder(); |
| 58 Charset utf8 = getUTF8(); | |
| 59 if (fsEncoding.equals(utf8)) { | |
| 60 utfDecoder = decoder; | |
| 61 utfEncoder = encoder; | |
| 62 } else { | |
| 63 utfDecoder = utf8.newDecoder(); | |
| 64 utfEncoder = utf8.newEncoder(); | |
| 65 } | |
| 51 } | 66 } |
| 52 | 67 |
| 53 /** | 68 /** |
| 54 * Translate file names from manifest to amazing Unicode string | 69 * Translate file names from manifest to amazing Unicode string |
| 55 */ | 70 */ |
| 63 public byte[] toManifest(CharSequence s) { | 78 public byte[] toManifest(CharSequence s) { |
| 64 if (s == null) { | 79 if (s == null) { |
| 65 // perhaps, can return byte[0] in this case? | 80 // perhaps, can return byte[0] in this case? |
| 66 throw new IllegalArgumentException(); | 81 throw new IllegalArgumentException(); |
| 67 } | 82 } |
| 68 return encodeWithSystemDefaultFallback(s); | 83 return toArray(encodeWithSystemDefaultFallback(s)); |
| 69 } | 84 } |
| 70 | 85 |
| 71 /** | 86 /** |
| 72 * Translate file names from dirstate to amazing Unicode string | 87 * Translate file names from dirstate to amazing Unicode string |
| 73 */ | 88 */ |
| 77 | 92 |
| 78 public byte[] toDirstate(CharSequence fname) { | 93 public byte[] toDirstate(CharSequence fname) { |
| 79 if (fname == null) { | 94 if (fname == null) { |
| 80 throw new IllegalArgumentException(); | 95 throw new IllegalArgumentException(); |
| 81 } | 96 } |
| 97 return toArray(encodeWithSystemDefaultFallback(fname)); | |
| 98 } | |
| 99 | |
| 100 /** | |
| 101 * prepare filename to be serialized into fncache file | |
| 102 */ | |
| 103 public ByteBuffer toFNCache(CharSequence fname) { | |
| 82 return encodeWithSystemDefaultFallback(fname); | 104 return encodeWithSystemDefaultFallback(fname); |
| 83 } | 105 } |
| 84 | 106 |
| 107 public byte[] toBundle(CharSequence fname) { | |
| 108 // yes, mercurial transfers filenames in local encoding | |
| 109 // so that if your local encoding doesn't match that on server, | |
| 110 // and you use native characters, you'd likely fail | |
| 111 return toArray(encodeWithSystemDefaultFallback(fname)); | |
| 112 } | |
| 113 public String fromBundle(byte[] data, int start, int length) { | |
| 114 return decodeWithSystemDefaultFallback(data, start, length); | |
| 115 } | |
| 116 | |
| 117 | |
| 118 public String userFromChangeset(byte[] data, int start, int length) { | |
| 119 return decodeUnicodeWithFallback(data, start, length); | |
| 120 } | |
| 121 | |
| 122 public String commentFromChangeset(byte[] data, int start, int length) { | |
| 123 return decodeUnicodeWithFallback(data, start, length); | |
| 124 } | |
| 125 | |
| 126 public String fileFromChangeset(byte[] data, int start, int length) { | |
| 127 return decodeWithSystemDefaultFallback(data, start, length); | |
| 128 } | |
| 129 | |
| 130 public byte[] userToChangeset(CharSequence user) { | |
| 131 return toArray(encodeUnicode(user)); | |
| 132 } | |
| 133 | |
| 134 public byte[] commentToChangeset(CharSequence comment) { | |
| 135 return toArray(encodeUnicode(comment)); | |
| 136 } | |
| 137 | |
| 138 public byte[] fileToChangeset(CharSequence file) { | |
| 139 return toArray(encodeWithSystemDefaultFallback(file)); | |
| 140 } | |
| 141 | |
| 85 private String decodeWithSystemDefaultFallback(byte[] data, int start, int length) { | 142 private String decodeWithSystemDefaultFallback(byte[] data, int start, int length) { |
| 86 try { | 143 try { |
| 87 return decoder.decode(ByteBuffer.wrap(data, start, length)).toString(); | 144 return decoder.decode(ByteBuffer.wrap(data, start, length)).toString(); |
| 88 } catch (CharacterCodingException ex) { | 145 } catch (CharacterCodingException ex) { |
| 89 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); | 146 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); |
| 90 // resort to system-default | 147 // resort to system-default |
| 91 return new String(data, start, length); | 148 return new String(data, start, length); |
| 92 } | 149 } |
| 93 } | 150 } |
| 94 | 151 |
| 95 private byte[] encodeWithSystemDefaultFallback(CharSequence s) { | 152 private ByteBuffer encodeWithSystemDefaultFallback(CharSequence s) { |
| 96 try { | 153 try { |
| 97 // synchronized(encoder) { | 154 // synchronized(encoder) { |
| 98 ByteBuffer bb = encoder.encode(CharBuffer.wrap(s)); | 155 return encoder.encode(CharBuffer.wrap(s)); |
| 99 // } | 156 // } |
| 100 byte[] rv = new byte[bb.remaining()]; | |
| 101 bb.get(rv, 0, rv.length); | |
| 102 return rv; | |
| 103 } catch (CharacterCodingException ex) { | 157 } catch (CharacterCodingException ex) { |
| 104 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); | 158 sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name())); |
| 105 // resort to system-default | 159 // resort to system-default |
| 106 return s.toString().getBytes(); | 160 return ByteBuffer.wrap(s.toString().getBytes()); |
| 161 } | |
| 162 } | |
| 163 | |
| 164 private byte[] toArray(ByteBuffer bb) { | |
| 165 byte[] rv; | |
| 166 if (bb.hasArray() && bb.arrayOffset() == 0) { | |
| 167 rv = bb.array(); | |
| 168 if (rv.length == bb.remaining()) { | |
| 169 return rv; | |
| 170 } | |
| 171 // fall through | |
| 172 } | |
| 173 rv = new byte[bb.remaining()]; | |
| 174 bb.get(rv, 0, rv.length); | |
| 175 return rv; | |
| 176 } | |
| 177 | |
| 178 private String decodeUnicodeWithFallback(byte[] data, int start, int length) { | |
| 179 try { | |
| 180 return utfDecoder.decode(ByteBuffer.wrap(data, start, length)).toString(); | |
| 181 } catch (CharacterCodingException ex) { | |
| 182 // TODO post-1.2 respect ui.fallbackencoding actual setting | |
| 183 return new String(data, start, length, Charset.forName("ISO-8859-1")); | |
| 184 } | |
| 185 } | |
| 186 | |
| 187 private ByteBuffer encodeUnicode(CharSequence s) { | |
| 188 // | |
| 189 try { | |
| 190 return utfEncoder.encode(CharBuffer.wrap(s)); | |
| 191 } catch (CharacterCodingException ex) { | |
| 192 byte[] rv; | |
| 193 try { | |
| 194 rv = s.toString().getBytes(getUTF8().name()); // XXX Java 1.5 | |
| 195 } catch (UnsupportedEncodingException e) { | |
| 196 throw new HgInvalidStateException("Unexpected error trying to get UTF-8 encoding"); | |
| 197 } | |
| 198 return ByteBuffer.wrap(rv); | |
| 107 } | 199 } |
| 108 } | 200 } |
| 109 | 201 |
| 110 private Charset charset() { | 202 private Charset charset() { |
| 111 return encoder.charset(); | 203 return encoder.charset(); |
