comparison src/org/tmatesoft/hg/internal/StoragePathHelper.java @ 83:a5275143664c

Complete path hash calculation of fncache requirement
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 25 Jan 2011 22:44:14 +0100
parents 4222b04f34ee
children a3a2e5deb320
comparison
equal deleted inserted replaced
82:7255c971dd66 83:a5275143664c
17 package org.tmatesoft.hg.internal; 17 package org.tmatesoft.hg.internal;
18 18
19 import java.util.Arrays; 19 import java.util.Arrays;
20 import java.util.TreeSet; 20 import java.util.TreeSet;
21 21
22 import org.tmatesoft.hg.repo.HgRepository;
23 import org.tmatesoft.hg.util.PathRewrite; 22 import org.tmatesoft.hg.util.PathRewrite;
24 23
25 /** 24 /**
26 * @see http://mercurial.selenic.com/wiki/CaseFoldingPlan 25 * @see http://mercurial.selenic.com/wiki/CaseFoldingPlan
27 * @see http://mercurial.selenic.com/wiki/fncacheRepoFormat 26 * @see http://mercurial.selenic.com/wiki/fncacheRepoFormat
47 // FIXME much more to be done, see store.py:_hybridencode 46 // FIXME much more to be done, see store.py:_hybridencode
48 public String rewrite(String path) { 47 public String rewrite(String path) {
49 final String STR_STORE = "store/"; 48 final String STR_STORE = "store/";
50 final String STR_DATA = "data/"; 49 final String STR_DATA = "data/";
51 final String STR_DH = "dh/"; 50 final String STR_DH = "dh/";
51 final String reservedChars = "\\:*?\"<>|";
52 char[] hexByte = new char[2];
52 53
53 path = path.replace(".hg/", ".hg.hg/").replace(".i/", ".i.hg/").replace(".d/", ".d.hg/"); 54 path = path.replace(".hg/", ".hg.hg/").replace(".i/", ".i.hg/").replace(".d/", ".d.hg/");
54 StringBuilder sb = new StringBuilder(path.length() << 1); 55 StringBuilder sb = new StringBuilder(path.length() << 1);
55 if (store || fncache) { 56 if (store || fncache) {
56 // encodefilename 57 // encodefilename
57 final String reservedChars = "\\:*?\"<>|";
58 // in fact, \\ is unlikely to match, ever - we've replaced all of them already, above. Just regards to store.py
59 int x;
60 char[] hexByte = new char[2];
61 for (int i = 0; i < path.length(); i++) { 58 for (int i = 0; i < path.length(); i++) {
62 final char ch = path.charAt(i); 59 final char ch = path.charAt(i);
63 if (ch >= 'a' && ch <= 'z') { 60 if (ch >= 'a' && ch <= 'z') {
64 sb.append(ch); // POIRAE 61 sb.append(ch); // POIRAE
65 } else if (ch >= 'A' && ch <= 'Z') { 62 } else if (ch >= 'A' && ch <= 'Z') {
66 sb.append('_'); 63 sb.append('_');
67 sb.append(Character.toLowerCase(ch)); // Perhaps, (char) (((int) ch) + 32)? Even better, |= 0x20? 64 sb.append(Character.toLowerCase(ch)); // Perhaps, (char) (((int) ch) + 32)? Even better, |= 0x20?
68 } else if ( (x = reservedChars.indexOf(ch)) != -1) { 65 } else if (reservedChars.indexOf(ch) != -1) {
69 sb.append('~'); 66 sb.append('~');
70 sb.append(toHexByte(reservedChars.charAt(x), hexByte)); 67 sb.append(toHexByte(ch, hexByte));
71 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { 68 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) {
72 sb.append('~'); 69 sb.append('~');
73 sb.append(toHexByte(ch, hexByte)); 70 sb.append(toHexByte(ch, hexByte));
74 } else if (ch == '_') { 71 } else if (ch == '_') {
75 // note, encoding from store.py:_buildencodefun and :_build_lower_encodefun
76 // differ in the way they process '_' (latter doesn't escape it)
77 sb.append('_'); 72 sb.append('_');
78 sb.append('_'); 73 sb.append('_');
79 } else { 74 } else {
80 sb.append(ch); 75 sb.append(ch);
81 } 76 }
82 } 77 }
83 // auxencode 78 // auxencode
84 if (fncache) { 79 if (fncache) {
85 x = 0; // last segment start 80 encodeWindowsDeviceNames(sb);
86 final TreeSet<String> windowsReservedFilenames = new TreeSet<String>(); 81 }
87 windowsReservedFilenames.addAll(Arrays.asList("con prn aux nul com1 com2 com3 com4 com5 com6 com7 com8 com9 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9".split(" "))); 82 }
83 final int MAX_PATH_LEN = 120;
84 if (fncache && (sb.length() + STR_DATA.length() + ".i".length() > MAX_PATH_LEN)) {
85 String digest = new DigestHelper().sha1(STR_DATA, path, ".i").asHexString();
86 final int DIR_PREFIX_LEN = 8;
87 // not sure why (-4) is here. 120 - 40 = up to 80 for path with ext. dh/ + ext(.i) = 3+2
88 final int MAX_DIR_PREFIX = 8 * (DIR_PREFIX_LEN + 1) - 4;
89 sb = new StringBuilder(MAX_PATH_LEN);
90 for (int i = 0; i < path.length(); i++) {
91 final char ch = path.charAt(i);
92 if (ch >= 'a' && ch <= 'z') {
93 sb.append(ch);
94 } else if (ch >= 'A' && ch <= 'Z') {
95 sb.append((char) (ch | 0x20)); // lowercase
96 } else if (reservedChars.indexOf(ch) != -1) {
97 sb.append('~');
98 sb.append(toHexByte(ch, hexByte));
99 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) {
100 sb.append('~');
101 sb.append(toHexByte(ch, hexByte));
102 } else {
103 sb.append(ch);
104 }
105 }
106 encodeWindowsDeviceNames(sb);
107 int fnameStart = sb.lastIndexOf("/"); // since we rewrite file names, it never ends with slash (for dirs, I'd pass length-2);
108 StringBuilder completeHashName = new StringBuilder(MAX_PATH_LEN);
109 completeHashName.append(STR_STORE);
110 completeHashName.append(STR_DH);
111 if (fnameStart == -1) {
112 // no dirs, just long filename
113 sb.setLength(MAX_PATH_LEN - 40 /*digest.length()*/ - STR_DH.length() - ".i".length());
114 completeHashName.append(sb);
115 } else {
116 StringBuilder sb2 = new StringBuilder(MAX_PATH_LEN);
117 int x = 0;
88 do { 118 do {
89 int i = sb.indexOf("/", x); 119 int i = sb.indexOf("/", x);
90 if (i == -1) { 120 final int sb2Len = sb2.length();
91 i = sb.length(); 121 if (i-x <= DIR_PREFIX_LEN) { // a b c d e f g h /
122 sb2.append(sb, x, i + 1); // with slash
123 } else {
124 sb2.append(sb, x, x + DIR_PREFIX_LEN);
125 // may unexpectedly end with bad character
126 final int last = sb2.length()-1;
127 char lastChar = sb2.charAt(last);
128 assert lastChar == sb.charAt(x + DIR_PREFIX_LEN - 1);
129 if (lastChar == '.' || lastChar == ' ') {
130 sb2.setCharAt(last, '_');
131 }
132 sb2.append('/');
92 } 133 }
93 // windows reserved filenames are at least of length 3 134 if (sb2.length()-1 > MAX_DIR_PREFIX) {
94 if (i - x >= 3) { 135 sb2.setLength(sb2Len); // strip off last segment, it's too much
95 boolean found = false; 136 break;
96 if (i-x == 3) {
97 found = windowsReservedFilenames.contains(sb.subSequence(x, i));
98 } else if (sb.charAt(x+3) == '.') { // implicit i-x > 3
99 found = windowsReservedFilenames.contains(sb.subSequence(x, x+3));
100 } else if (i-x > 4 && sb.charAt(x+4) == '.') {
101 found = windowsReservedFilenames.contains(sb.subSequence(x, x+4));
102 }
103 if (found) {
104 sb.setCharAt(x, '~');
105 sb.insert(x+1, toHexByte(sb.charAt(x+2), hexByte));
106 i += 2;
107 }
108 } 137 }
109 if (dotencode && (sb.charAt(x) == '.' || sb.charAt(x) == ' ')) { 138 x = i+1;
110 sb.insert(x+1, toHexByte(sb.charAt(x), hexByte)); 139 } while (x < fnameStart);
111 sb.setCharAt(x, '~'); // setChar *after* charAt/insert to get ~2e, not ~7e for '.' 140 assert sb2.charAt(sb2.length() - 1) == '/';
112 i += 2; 141 int left = MAX_PATH_LEN - sb2.length() - 40 /*digest.length()*/ - STR_DH.length() - ".i".length();
113 } 142 assert left >= 0;
114 x = i+1; 143 fnameStart++; // move from / to actual name
115 } while (x < sb.length()); 144 sb2.append(sb, fnameStart, fnameStart + left > sb.length() ? sb.length() : fnameStart+left);
145 completeHashName.append(sb2);
116 } 146 }
117 } 147 completeHashName.append(digest);
118 final int MAX_PATH_LEN_IN_HGSTORE = 120; 148 sb = completeHashName;
119 if (fncache && (sb.length() + STR_DATA.length() > MAX_PATH_LEN_IN_HGSTORE)) { 149 } else if (store) {
120 throw HgRepository.notImplemented(); // FIXME digest and fncache use
121 }
122 if (store) {
123 sb.insert(0, STR_STORE + STR_DATA); 150 sb.insert(0, STR_STORE + STR_DATA);
124 } 151 }
125 sb.append(".i"); 152 sb.append(".i");
126 return sb.toString(); 153 return sb.toString();
154 }
155
156 private void encodeWindowsDeviceNames(StringBuilder sb) {
157 char[] hexByte = new char[2];
158 int x = 0; // last segment start
159 final TreeSet<String> windowsReservedFilenames = new TreeSet<String>();
160 windowsReservedFilenames.addAll(Arrays.asList("con prn aux nul com1 com2 com3 com4 com5 com6 com7 com8 com9 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9".split(" ")));
161 do {
162 int i = sb.indexOf("/", x);
163 if (i == -1) {
164 i = sb.length();
165 }
166 // windows reserved filenames are at least of length 3
167 if (i - x >= 3) {
168 boolean found = false;
169 if (i-x == 3 || i-x == 4) {
170 found = windowsReservedFilenames.contains(sb.subSequence(x, i));
171 } else if (sb.charAt(x+3) == '.') { // implicit i-x > 3
172 found = windowsReservedFilenames.contains(sb.subSequence(x, x+3));
173 } else if (i-x > 4 && sb.charAt(x+4) == '.') {
174 found = windowsReservedFilenames.contains(sb.subSequence(x, x+4));
175 }
176 if (found) {
177 sb.insert(x+3, toHexByte(sb.charAt(x+2), hexByte));
178 sb.setCharAt(x+2, '~');
179 i += 2;
180 }
181 }
182 if (dotencode && (sb.charAt(x) == '.' || sb.charAt(x) == ' ')) {
183 sb.insert(x+1, toHexByte(sb.charAt(x), hexByte));
184 sb.setCharAt(x, '~'); // setChar *after* charAt/insert to get ~2e, not ~7e for '.'
185 i += 2;
186 }
187 x = i+1;
188 } while (x < sb.length());
127 } 189 }
128 190
129 private static char[] toHexByte(int ch, char[] buf) { 191 private static char[] toHexByte(int ch, char[] buf) {
130 assert buf.length > 1; 192 assert buf.length > 1;
131 final String hexDigits = "0123456789abcdef"; 193 final String hexDigits = "0123456789abcdef";