tikhomirov@74: /* tikhomirov@616: * Copyright (c) 2011-2013 TMate Software Ltd tikhomirov@74: * tikhomirov@74: * This program is free software; you can redistribute it and/or modify tikhomirov@74: * it under the terms of the GNU General Public License as published by tikhomirov@74: * the Free Software Foundation; version 2 of the License. tikhomirov@74: * tikhomirov@74: * This program is distributed in the hope that it will be useful, tikhomirov@74: * but WITHOUT ANY WARRANTY; without even the implied warranty of tikhomirov@74: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the tikhomirov@74: * GNU General Public License for more details. tikhomirov@74: * tikhomirov@74: * For information on how to redistribute this software under tikhomirov@74: * the terms of a license other than GNU General Public License tikhomirov@102: * contact TMate Software at support@hg4j.com tikhomirov@74: */ tikhomirov@74: package org.tmatesoft.hg.internal; tikhomirov@74: tikhomirov@411: import java.nio.ByteBuffer; tikhomirov@411: import java.nio.CharBuffer; tikhomirov@411: import java.nio.charset.Charset; tikhomirov@411: import java.nio.charset.CharsetEncoder; tikhomirov@74: import java.util.Arrays; tikhomirov@74: import java.util.TreeSet; tikhomirov@74: tikhomirov@74: import org.tmatesoft.hg.util.PathRewrite; tikhomirov@74: tikhomirov@74: /** tikhomirov@74: * @see http://mercurial.selenic.com/wiki/CaseFoldingPlan tikhomirov@80: * @see http://mercurial.selenic.com/wiki/fncacheRepoFormat tikhomirov@411: * @see http://mercurial.selenic.com/wiki/EncodingStrategy tikhomirov@74: * tikhomirov@74: * @author Artem Tikhomirov tikhomirov@74: * @author TMate Software Ltd. tikhomirov@74: */ tikhomirov@74: class StoragePathHelper implements PathRewrite { tikhomirov@616: tikhomirov@616: static final String STR_STORE = "store/"; tikhomirov@616: static final String STR_DATA = "data/"; tikhomirov@616: static final String STR_DH = "dh/"; tikhomirov@616: tikhomirov@74: private final boolean store; tikhomirov@74: private final boolean fncache; tikhomirov@74: private final boolean dotencode; tikhomirov@616: private final EncodeDirPathHelper dirPathRewrite; tikhomirov@411: private final CharsetEncoder csEncoder; tikhomirov@411: private final char[] hexEncodedByte = new char[] {'~', '0', '0'}; tikhomirov@411: private final ByteBuffer byteEncodingBuf; tikhomirov@411: private final CharBuffer charEncodingBuf; tikhomirov@411: tikhomirov@411: public StoragePathHelper(boolean isStore, boolean isFncache, boolean isDotencode) { tikhomirov@411: this(isStore, isFncache, isDotencode, Charset.defaultCharset()); tikhomirov@411: } tikhomirov@74: tikhomirov@411: public StoragePathHelper(boolean isStore, boolean isFncache, boolean isDotencode, Charset fsEncoding) { tikhomirov@411: assert fsEncoding != null; tikhomirov@74: store = isStore; tikhomirov@74: fncache = isFncache; tikhomirov@74: dotencode = isDotencode; tikhomirov@616: dirPathRewrite = new EncodeDirPathHelper(); tikhomirov@425: csEncoder = fsEncoding.newEncoder(); tikhomirov@411: byteEncodingBuf = ByteBuffer.allocate(Math.round(csEncoder.maxBytesPerChar()) + 1/*in fact, need ceil, hence +1*/); tikhomirov@411: charEncodingBuf = CharBuffer.allocate(1); tikhomirov@74: } tikhomirov@74: tikhomirov@418: /** tikhomirov@418: * path argument is repository-relative name of the user's file. tikhomirov@418: * It has to be normalized (slashes) and shall not include extension .i or .d. tikhomirov@418: */ tikhomirov@292: public CharSequence rewrite(CharSequence p) { tikhomirov@83: final String reservedChars = "\\:*?\"<>|"; tikhomirov@74: tikhomirov@616: CharSequence path = dirPathRewrite.rewrite(p); tikhomirov@411: tikhomirov@74: StringBuilder sb = new StringBuilder(path.length() << 1); tikhomirov@74: if (store || fncache) { tikhomirov@74: for (int i = 0; i < path.length(); i++) { tikhomirov@74: final char ch = path.charAt(i); tikhomirov@74: if (ch >= 'a' && ch <= 'z') { tikhomirov@74: sb.append(ch); // POIRAE tikhomirov@74: } else if (ch >= 'A' && ch <= 'Z') { tikhomirov@74: sb.append('_'); tikhomirov@74: sb.append(Character.toLowerCase(ch)); // Perhaps, (char) (((int) ch) + 32)? Even better, |= 0x20? tikhomirov@83: } else if (reservedChars.indexOf(ch) != -1) { tikhomirov@411: sb.append(toHexByte(ch)); tikhomirov@74: } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { tikhomirov@411: sb.append(toHexByte(ch)); tikhomirov@74: } else if (ch == '_') { tikhomirov@74: sb.append('_'); tikhomirov@74: sb.append('_'); tikhomirov@74: } else { tikhomirov@411: // either ASCII char that doesn't require special handling, or an Unicode character to get encoded tikhomirov@411: // according to filesystem/native encoding, see http://mercurial.selenic.com/wiki/EncodingStrategy tikhomirov@411: // despite of what the page says, use of native encoding seems worst solution to me (repositories tikhomirov@411: // can't be easily shared between OS'es with different encodings then, e.g. Win1251 and Linux UTF8). tikhomirov@411: // If the ease of sharing was not the point, what's the reason to mangle with names at all then ( tikhomirov@411: // lowercase and exclude reserved device names). tikhomirov@411: if (ch < '~' /*126*/ || !csEncoder.canEncode(ch)) { tikhomirov@411: sb.append(ch); tikhomirov@411: } else { tikhomirov@411: appendEncoded(sb, ch); tikhomirov@411: } tikhomirov@74: } tikhomirov@74: } tikhomirov@74: // auxencode tikhomirov@74: if (fncache) { tikhomirov@83: encodeWindowsDeviceNames(sb); tikhomirov@83: } tikhomirov@83: } tikhomirov@83: final int MAX_PATH_LEN = 120; tikhomirov@83: if (fncache && (sb.length() + STR_DATA.length() + ".i".length() > MAX_PATH_LEN)) { tikhomirov@411: // TODO [post-1.0] Mercurial uses system encoding for paths, hence we need to pass bytes to DigestHelper tikhomirov@411: // to ensure our sha1 value (default encoding of unicode string if one looks into DH impl) match that tikhomirov@411: // produced by Mercurial (based on native string). tikhomirov@83: String digest = new DigestHelper().sha1(STR_DATA, path, ".i").asHexString(); tikhomirov@83: final int DIR_PREFIX_LEN = 8; tikhomirov@83: // not sure why (-4) is here. 120 - 40 = up to 80 for path with ext. dh/ + ext(.i) = 3+2 tikhomirov@83: final int MAX_DIR_PREFIX = 8 * (DIR_PREFIX_LEN + 1) - 4; tikhomirov@83: sb = new StringBuilder(MAX_PATH_LEN); tikhomirov@83: for (int i = 0; i < path.length(); i++) { tikhomirov@83: final char ch = path.charAt(i); tikhomirov@83: if (ch >= 'a' && ch <= 'z') { tikhomirov@83: sb.append(ch); tikhomirov@83: } else if (ch >= 'A' && ch <= 'Z') { tikhomirov@83: sb.append((char) (ch | 0x20)); // lowercase tikhomirov@83: } else if (reservedChars.indexOf(ch) != -1) { tikhomirov@411: sb.append(toHexByte(ch)); tikhomirov@83: } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { tikhomirov@411: sb.append(toHexByte(ch)); tikhomirov@83: } else { tikhomirov@411: if (ch < '~' /*126*/ || !csEncoder.canEncode(ch)) { tikhomirov@411: sb.append(ch); tikhomirov@411: } else { tikhomirov@411: appendEncoded(sb, ch); tikhomirov@411: } tikhomirov@83: } tikhomirov@83: } tikhomirov@83: encodeWindowsDeviceNames(sb); tikhomirov@83: int fnameStart = sb.lastIndexOf("/"); // since we rewrite file names, it never ends with slash (for dirs, I'd pass length-2); tikhomirov@83: StringBuilder completeHashName = new StringBuilder(MAX_PATH_LEN); tikhomirov@83: completeHashName.append(STR_STORE); tikhomirov@83: completeHashName.append(STR_DH); tikhomirov@83: if (fnameStart == -1) { tikhomirov@83: // no dirs, just long filename tikhomirov@83: sb.setLength(MAX_PATH_LEN - 40 /*digest.length()*/ - STR_DH.length() - ".i".length()); tikhomirov@83: completeHashName.append(sb); tikhomirov@83: } else { tikhomirov@83: StringBuilder sb2 = new StringBuilder(MAX_PATH_LEN); tikhomirov@83: int x = 0; tikhomirov@74: do { tikhomirov@74: int i = sb.indexOf("/", x); tikhomirov@83: final int sb2Len = sb2.length(); tikhomirov@83: if (i-x <= DIR_PREFIX_LEN) { // a b c d e f g h / tikhomirov@83: sb2.append(sb, x, i + 1); // with slash tikhomirov@83: } else { tikhomirov@83: sb2.append(sb, x, x + DIR_PREFIX_LEN); tikhomirov@83: // may unexpectedly end with bad character tikhomirov@83: final int last = sb2.length()-1; tikhomirov@83: char lastChar = sb2.charAt(last); tikhomirov@83: assert lastChar == sb.charAt(x + DIR_PREFIX_LEN - 1); tikhomirov@83: if (lastChar == '.' || lastChar == ' ') { tikhomirov@83: sb2.setCharAt(last, '_'); tikhomirov@74: } tikhomirov@83: sb2.append('/'); tikhomirov@74: } tikhomirov@83: if (sb2.length()-1 > MAX_DIR_PREFIX) { tikhomirov@83: sb2.setLength(sb2Len); // strip off last segment, it's too much tikhomirov@83: break; tikhomirov@74: } tikhomirov@83: x = i+1; tikhomirov@83: } while (x < fnameStart); tikhomirov@83: assert sb2.charAt(sb2.length() - 1) == '/'; tikhomirov@83: int left = MAX_PATH_LEN - sb2.length() - 40 /*digest.length()*/ - STR_DH.length() - ".i".length(); tikhomirov@83: assert left >= 0; tikhomirov@83: fnameStart++; // move from / to actual name tikhomirov@346: if (fnameStart + left > sb.length()) { tikhomirov@346: // there left less chars in the mangled name that we can fit tikhomirov@346: sb2.append(sb, fnameStart, sb.length()); tikhomirov@346: int stillAvailable = (fnameStart+left) - sb.length(); tikhomirov@346: // stillAvailable > 0; tikhomirov@346: sb2.append(".i", 0, stillAvailable > 2 ? 2 : stillAvailable); tikhomirov@346: } else { tikhomirov@346: // add as much as we can tikhomirov@346: sb2.append(sb, fnameStart, fnameStart+left); tikhomirov@346: } tikhomirov@83: completeHashName.append(sb2); tikhomirov@74: } tikhomirov@83: completeHashName.append(digest); tikhomirov@83: sb = completeHashName; tikhomirov@83: } else if (store) { tikhomirov@74: sb.insert(0, STR_STORE + STR_DATA); tikhomirov@74: } tikhomirov@74: sb.append(".i"); tikhomirov@74: return sb.toString(); tikhomirov@74: } tikhomirov@83: tikhomirov@83: private void encodeWindowsDeviceNames(StringBuilder sb) { tikhomirov@83: int x = 0; // last segment start tikhomirov@83: final TreeSet windowsReservedFilenames = new TreeSet(); tikhomirov@83: windowsReservedFilenames.addAll(Arrays.asList("con prn aux nul com1 com2 com3 com4 com5 com6 com7 com8 com9 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9".split(" "))); tikhomirov@83: do { tikhomirov@83: int i = sb.indexOf("/", x); tikhomirov@83: if (i == -1) { tikhomirov@83: i = sb.length(); tikhomirov@83: } tikhomirov@83: // windows reserved filenames are at least of length 3 tikhomirov@83: if (i - x >= 3) { tikhomirov@83: boolean found = false; tikhomirov@83: if (i-x == 3 || i-x == 4) { tikhomirov@83: found = windowsReservedFilenames.contains(sb.subSequence(x, i)); tikhomirov@83: } else if (sb.charAt(x+3) == '.') { // implicit i-x > 3 tikhomirov@83: found = windowsReservedFilenames.contains(sb.subSequence(x, x+3)); tikhomirov@83: } else if (i-x > 4 && sb.charAt(x+4) == '.') { tikhomirov@83: found = windowsReservedFilenames.contains(sb.subSequence(x, x+4)); tikhomirov@83: } tikhomirov@83: if (found) { tikhomirov@411: // x+2 as we change the third letter in device name tikhomirov@411: replace(sb, x+2, toHexByte(sb.charAt(x+2))); tikhomirov@83: i += 2; tikhomirov@83: } tikhomirov@83: } tikhomirov@83: if (dotencode && (sb.charAt(x) == '.' || sb.charAt(x) == ' ')) { tikhomirov@411: char dotOrSpace = sb.charAt(x); // beware, replace() below changes charAt(x), rather get a copy tikhomirov@411: // not to get ~7e for '.' instead of ~2e, if later refactoring changes the logic tikhomirov@411: replace(sb, x, toHexByte(dotOrSpace)); tikhomirov@83: i += 2; tikhomirov@83: } tikhomirov@83: x = i+1; tikhomirov@83: } while (x < sb.length()); tikhomirov@83: } tikhomirov@411: tikhomirov@411: // shall be synchronized in case of multithreaded use tikhomirov@411: private void appendEncoded(StringBuilder sb, char ch) { tikhomirov@411: charEncodingBuf.clear(); tikhomirov@411: byteEncodingBuf.clear(); tikhomirov@411: charEncodingBuf.put(ch).flip(); tikhomirov@411: csEncoder.encode(charEncodingBuf, byteEncodingBuf, false); tikhomirov@411: byteEncodingBuf.flip(); tikhomirov@411: while (byteEncodingBuf.hasRemaining()) { tikhomirov@411: sb.append(toHexByte(byteEncodingBuf.get())); tikhomirov@411: } tikhomirov@411: } tikhomirov@74: tikhomirov@411: /** tikhomirov@411: * replace char at sb[index] with a sequence tikhomirov@411: */ tikhomirov@411: private static void replace(StringBuilder sb, int index, char[] with) { tikhomirov@411: // there's StringBuilder.replace(int, int+1, String), but with char[] - I don't want to make a string out of hexEncodedByte tikhomirov@411: sb.setCharAt(index, with[0]); tikhomirov@411: sb.insert(index+1, with, 1, with.length - 1); tikhomirov@411: } tikhomirov@411: tikhomirov@411: /** tikhomirov@411: * put hex representation of byte ch into buf from specified offset tikhomirov@411: */ tikhomirov@411: private char[] toHexByte(int ch) { tikhomirov@74: final String hexDigits = "0123456789abcdef"; tikhomirov@411: hexEncodedByte[1] = hexDigits.charAt((ch & 0x00F0) >>> 4); tikhomirov@411: hexEncodedByte[2] = hexDigits.charAt(ch & 0x0F); tikhomirov@411: return hexEncodedByte; tikhomirov@74: } tikhomirov@74: }