Mercurial > jhg
comparison src/org/tmatesoft/hg/internal/StoragePathHelper.java @ 83:a5275143664c
Complete path hash calculation of fncache requirement
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Tue, 25 Jan 2011 22:44:14 +0100 |
| parents | 4222b04f34ee |
| children | a3a2e5deb320 |
comparison
equal
deleted
inserted
replaced
| 82:7255c971dd66 | 83:a5275143664c |
|---|---|
| 17 package org.tmatesoft.hg.internal; | 17 package org.tmatesoft.hg.internal; |
| 18 | 18 |
| 19 import java.util.Arrays; | 19 import java.util.Arrays; |
| 20 import java.util.TreeSet; | 20 import java.util.TreeSet; |
| 21 | 21 |
| 22 import org.tmatesoft.hg.repo.HgRepository; | |
| 23 import org.tmatesoft.hg.util.PathRewrite; | 22 import org.tmatesoft.hg.util.PathRewrite; |
| 24 | 23 |
| 25 /** | 24 /** |
| 26 * @see http://mercurial.selenic.com/wiki/CaseFoldingPlan | 25 * @see http://mercurial.selenic.com/wiki/CaseFoldingPlan |
| 27 * @see http://mercurial.selenic.com/wiki/fncacheRepoFormat | 26 * @see http://mercurial.selenic.com/wiki/fncacheRepoFormat |
| 47 // FIXME much more to be done, see store.py:_hybridencode | 46 // FIXME much more to be done, see store.py:_hybridencode |
| 48 public String rewrite(String path) { | 47 public String rewrite(String path) { |
| 49 final String STR_STORE = "store/"; | 48 final String STR_STORE = "store/"; |
| 50 final String STR_DATA = "data/"; | 49 final String STR_DATA = "data/"; |
| 51 final String STR_DH = "dh/"; | 50 final String STR_DH = "dh/"; |
| 51 final String reservedChars = "\\:*?\"<>|"; | |
| 52 char[] hexByte = new char[2]; | |
| 52 | 53 |
| 53 path = path.replace(".hg/", ".hg.hg/").replace(".i/", ".i.hg/").replace(".d/", ".d.hg/"); | 54 path = path.replace(".hg/", ".hg.hg/").replace(".i/", ".i.hg/").replace(".d/", ".d.hg/"); |
| 54 StringBuilder sb = new StringBuilder(path.length() << 1); | 55 StringBuilder sb = new StringBuilder(path.length() << 1); |
| 55 if (store || fncache) { | 56 if (store || fncache) { |
| 56 // encodefilename | 57 // encodefilename |
| 57 final String reservedChars = "\\:*?\"<>|"; | |
| 58 // in fact, \\ is unlikely to match, ever - we've replaced all of them already, above. Just regards to store.py | |
| 59 int x; | |
| 60 char[] hexByte = new char[2]; | |
| 61 for (int i = 0; i < path.length(); i++) { | 58 for (int i = 0; i < path.length(); i++) { |
| 62 final char ch = path.charAt(i); | 59 final char ch = path.charAt(i); |
| 63 if (ch >= 'a' && ch <= 'z') { | 60 if (ch >= 'a' && ch <= 'z') { |
| 64 sb.append(ch); // POIRAE | 61 sb.append(ch); // POIRAE |
| 65 } else if (ch >= 'A' && ch <= 'Z') { | 62 } else if (ch >= 'A' && ch <= 'Z') { |
| 66 sb.append('_'); | 63 sb.append('_'); |
| 67 sb.append(Character.toLowerCase(ch)); // Perhaps, (char) (((int) ch) + 32)? Even better, |= 0x20? | 64 sb.append(Character.toLowerCase(ch)); // Perhaps, (char) (((int) ch) + 32)? Even better, |= 0x20? |
| 68 } else if ( (x = reservedChars.indexOf(ch)) != -1) { | 65 } else if (reservedChars.indexOf(ch) != -1) { |
| 69 sb.append('~'); | 66 sb.append('~'); |
| 70 sb.append(toHexByte(reservedChars.charAt(x), hexByte)); | 67 sb.append(toHexByte(ch, hexByte)); |
| 71 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { | 68 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { |
| 72 sb.append('~'); | 69 sb.append('~'); |
| 73 sb.append(toHexByte(ch, hexByte)); | 70 sb.append(toHexByte(ch, hexByte)); |
| 74 } else if (ch == '_') { | 71 } else if (ch == '_') { |
| 75 // note, encoding from store.py:_buildencodefun and :_build_lower_encodefun | |
| 76 // differ in the way they process '_' (latter doesn't escape it) | |
| 77 sb.append('_'); | 72 sb.append('_'); |
| 78 sb.append('_'); | 73 sb.append('_'); |
| 79 } else { | 74 } else { |
| 80 sb.append(ch); | 75 sb.append(ch); |
| 81 } | 76 } |
| 82 } | 77 } |
| 83 // auxencode | 78 // auxencode |
| 84 if (fncache) { | 79 if (fncache) { |
| 85 x = 0; // last segment start | 80 encodeWindowsDeviceNames(sb); |
| 86 final TreeSet<String> windowsReservedFilenames = new TreeSet<String>(); | 81 } |
| 87 windowsReservedFilenames.addAll(Arrays.asList("con prn aux nul com1 com2 com3 com4 com5 com6 com7 com8 com9 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9".split(" "))); | 82 } |
| 83 final int MAX_PATH_LEN = 120; | |
| 84 if (fncache && (sb.length() + STR_DATA.length() + ".i".length() > MAX_PATH_LEN)) { | |
| 85 String digest = new DigestHelper().sha1(STR_DATA, path, ".i").asHexString(); | |
| 86 final int DIR_PREFIX_LEN = 8; | |
| 87 // not sure why (-4) is here. 120 - 40 = up to 80 for path with ext. dh/ + ext(.i) = 3+2 | |
| 88 final int MAX_DIR_PREFIX = 8 * (DIR_PREFIX_LEN + 1) - 4; | |
| 89 sb = new StringBuilder(MAX_PATH_LEN); | |
| 90 for (int i = 0; i < path.length(); i++) { | |
| 91 final char ch = path.charAt(i); | |
| 92 if (ch >= 'a' && ch <= 'z') { | |
| 93 sb.append(ch); | |
| 94 } else if (ch >= 'A' && ch <= 'Z') { | |
| 95 sb.append((char) (ch | 0x20)); // lowercase | |
| 96 } else if (reservedChars.indexOf(ch) != -1) { | |
| 97 sb.append('~'); | |
| 98 sb.append(toHexByte(ch, hexByte)); | |
| 99 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { | |
| 100 sb.append('~'); | |
| 101 sb.append(toHexByte(ch, hexByte)); | |
| 102 } else { | |
| 103 sb.append(ch); | |
| 104 } | |
| 105 } | |
| 106 encodeWindowsDeviceNames(sb); | |
| 107 int fnameStart = sb.lastIndexOf("/"); // since we rewrite file names, it never ends with slash (for dirs, I'd pass length-2); | |
| 108 StringBuilder completeHashName = new StringBuilder(MAX_PATH_LEN); | |
| 109 completeHashName.append(STR_STORE); | |
| 110 completeHashName.append(STR_DH); | |
| 111 if (fnameStart == -1) { | |
| 112 // no dirs, just long filename | |
| 113 sb.setLength(MAX_PATH_LEN - 40 /*digest.length()*/ - STR_DH.length() - ".i".length()); | |
| 114 completeHashName.append(sb); | |
| 115 } else { | |
| 116 StringBuilder sb2 = new StringBuilder(MAX_PATH_LEN); | |
| 117 int x = 0; | |
| 88 do { | 118 do { |
| 89 int i = sb.indexOf("/", x); | 119 int i = sb.indexOf("/", x); |
| 90 if (i == -1) { | 120 final int sb2Len = sb2.length(); |
| 91 i = sb.length(); | 121 if (i-x <= DIR_PREFIX_LEN) { // a b c d e f g h / |
| 122 sb2.append(sb, x, i + 1); // with slash | |
| 123 } else { | |
| 124 sb2.append(sb, x, x + DIR_PREFIX_LEN); | |
| 125 // may unexpectedly end with bad character | |
| 126 final int last = sb2.length()-1; | |
| 127 char lastChar = sb2.charAt(last); | |
| 128 assert lastChar == sb.charAt(x + DIR_PREFIX_LEN - 1); | |
| 129 if (lastChar == '.' || lastChar == ' ') { | |
| 130 sb2.setCharAt(last, '_'); | |
| 131 } | |
| 132 sb2.append('/'); | |
| 92 } | 133 } |
| 93 // windows reserved filenames are at least of length 3 | 134 if (sb2.length()-1 > MAX_DIR_PREFIX) { |
| 94 if (i - x >= 3) { | 135 sb2.setLength(sb2Len); // strip off last segment, it's too much |
| 95 boolean found = false; | 136 break; |
| 96 if (i-x == 3) { | |
| 97 found = windowsReservedFilenames.contains(sb.subSequence(x, i)); | |
| 98 } else if (sb.charAt(x+3) == '.') { // implicit i-x > 3 | |
| 99 found = windowsReservedFilenames.contains(sb.subSequence(x, x+3)); | |
| 100 } else if (i-x > 4 && sb.charAt(x+4) == '.') { | |
| 101 found = windowsReservedFilenames.contains(sb.subSequence(x, x+4)); | |
| 102 } | |
| 103 if (found) { | |
| 104 sb.setCharAt(x, '~'); | |
| 105 sb.insert(x+1, toHexByte(sb.charAt(x+2), hexByte)); | |
| 106 i += 2; | |
| 107 } | |
| 108 } | 137 } |
| 109 if (dotencode && (sb.charAt(x) == '.' || sb.charAt(x) == ' ')) { | 138 x = i+1; |
| 110 sb.insert(x+1, toHexByte(sb.charAt(x), hexByte)); | 139 } while (x < fnameStart); |
| 111 sb.setCharAt(x, '~'); // setChar *after* charAt/insert to get ~2e, not ~7e for '.' | 140 assert sb2.charAt(sb2.length() - 1) == '/'; |
| 112 i += 2; | 141 int left = MAX_PATH_LEN - sb2.length() - 40 /*digest.length()*/ - STR_DH.length() - ".i".length(); |
| 113 } | 142 assert left >= 0; |
| 114 x = i+1; | 143 fnameStart++; // move from / to actual name |
| 115 } while (x < sb.length()); | 144 sb2.append(sb, fnameStart, fnameStart + left > sb.length() ? sb.length() : fnameStart+left); |
| 145 completeHashName.append(sb2); | |
| 116 } | 146 } |
| 117 } | 147 completeHashName.append(digest); |
| 118 final int MAX_PATH_LEN_IN_HGSTORE = 120; | 148 sb = completeHashName; |
| 119 if (fncache && (sb.length() + STR_DATA.length() > MAX_PATH_LEN_IN_HGSTORE)) { | 149 } else if (store) { |
| 120 throw HgRepository.notImplemented(); // FIXME digest and fncache use | |
| 121 } | |
| 122 if (store) { | |
| 123 sb.insert(0, STR_STORE + STR_DATA); | 150 sb.insert(0, STR_STORE + STR_DATA); |
| 124 } | 151 } |
| 125 sb.append(".i"); | 152 sb.append(".i"); |
| 126 return sb.toString(); | 153 return sb.toString(); |
| 154 } | |
| 155 | |
| 156 private void encodeWindowsDeviceNames(StringBuilder sb) { | |
| 157 char[] hexByte = new char[2]; | |
| 158 int x = 0; // last segment start | |
| 159 final TreeSet<String> windowsReservedFilenames = new TreeSet<String>(); | |
| 160 windowsReservedFilenames.addAll(Arrays.asList("con prn aux nul com1 com2 com3 com4 com5 com6 com7 com8 com9 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9".split(" "))); | |
| 161 do { | |
| 162 int i = sb.indexOf("/", x); | |
| 163 if (i == -1) { | |
| 164 i = sb.length(); | |
| 165 } | |
| 166 // windows reserved filenames are at least of length 3 | |
| 167 if (i - x >= 3) { | |
| 168 boolean found = false; | |
| 169 if (i-x == 3 || i-x == 4) { | |
| 170 found = windowsReservedFilenames.contains(sb.subSequence(x, i)); | |
| 171 } else if (sb.charAt(x+3) == '.') { // implicit i-x > 3 | |
| 172 found = windowsReservedFilenames.contains(sb.subSequence(x, x+3)); | |
| 173 } else if (i-x > 4 && sb.charAt(x+4) == '.') { | |
| 174 found = windowsReservedFilenames.contains(sb.subSequence(x, x+4)); | |
| 175 } | |
| 176 if (found) { | |
| 177 sb.insert(x+3, toHexByte(sb.charAt(x+2), hexByte)); | |
| 178 sb.setCharAt(x+2, '~'); | |
| 179 i += 2; | |
| 180 } | |
| 181 } | |
| 182 if (dotencode && (sb.charAt(x) == '.' || sb.charAt(x) == ' ')) { | |
| 183 sb.insert(x+1, toHexByte(sb.charAt(x), hexByte)); | |
| 184 sb.setCharAt(x, '~'); // setChar *after* charAt/insert to get ~2e, not ~7e for '.' | |
| 185 i += 2; | |
| 186 } | |
| 187 x = i+1; | |
| 188 } while (x < sb.length()); | |
| 127 } | 189 } |
| 128 | 190 |
| 129 private static char[] toHexByte(int ch, char[] buf) { | 191 private static char[] toHexByte(int ch, char[] buf) { |
| 130 assert buf.length > 1; | 192 assert buf.length > 1; |
| 131 final String hexDigits = "0123456789abcdef"; | 193 final String hexDigits = "0123456789abcdef"; |
