comparison src/org/tmatesoft/hg/repo/HgManifest.java @ 285:6dbbc53fc46d

Use Path instead of plain String for manifest file names
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Sat, 03 Sep 2011 21:46:13 +0200
parents c5980f287cc4
children 650b45d290b1
comparison
equal deleted inserted replaced
284:7232b94f2ae3 285:6dbbc53fc46d
39 * @author Artem Tikhomirov 39 * @author Artem Tikhomirov
40 * @author TMate Software Ltd. 40 * @author TMate Software Ltd.
41 */ 41 */
42 public class HgManifest extends Revlog { 42 public class HgManifest extends Revlog {
43 private RevisionMapper revisionMap; 43 private RevisionMapper revisionMap;
44
45 public enum Flags {
46 Exec, Link;
47
48 static Flags parse(String flags) {
49 if ("x".equalsIgnoreCase(flags)) {
50 return Exec;
51 }
52 if ("l".equalsIgnoreCase(flags)) {
53 return Link;
54 }
55 if (flags == null) {
56 return null;
57 }
58 throw new IllegalStateException(flags);
59 }
60
61 static Flags parse(byte[] data, int start, int length) {
62 if (length == 0) {
63 return null;
64 }
65 if (length == 1) {
66 if (data[start] == 'x') {
67 return Exec;
68 }
69 if (data[start] == 'l') {
70 return Link;
71 }
72 // FALL THROUGH
73 }
74 throw new IllegalStateException(new String(data, start, length));
75 }
76
77 String nativeString() {
78 if (this == Exec) {
79 return "x";
80 }
81 if (this == Link) {
82 return "l";
83 }
84 throw new IllegalStateException(toString());
85 }
86 }
44 87
45 /*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content) { 88 /*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content) {
46 super(hgRepo, content); 89 super(hgRepo, content);
47 } 90 }
48 91
144 return rv[0]; 187 return rv[0];
145 } 188 }
146 189
147 public interface Inspector { 190 public interface Inspector {
148 boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision); 191 boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision);
192 /**
193 * @deprecated switch to {@link Inspector2#next(Nodeid, Path, Flags)}
194 */
195 @Deprecated
149 boolean next(Nodeid nid, String fname, String flags); 196 boolean next(Nodeid nid, String fname, String flags);
150 boolean end(int manifestRevision); 197 boolean end(int manifestRevision);
151 } 198 }
152 199
153 public interface ElementProxy<T> { 200 @Experimental(reason="Explore Path alternative for filenames and enum for flags")
154 T get(); 201 public interface Inspector2 extends Inspector {
202 boolean next(Nodeid nid, Path fname, Flags flags);
155 } 203 }
156 204
157 /** 205 /**
158 * When Pool uses Strings directly, 206 * When Pool uses Strings directly,
159 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion. 207 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion.
160 * For cpython repo, walk(0..10k), there are over 16 million filenames, of them only 3020 unique. 208 * For cpython repo, walk(0..10k), there are over 16 million filenames, of them only 3020 unique.
161 * This means there are 15.9 million useless char[] instances and byte->char conversions 209 * This means there are 15.9 million useless char[] instances and byte->char conversions
162 * 210 *
163 * When String is wrapped into {@link StringProxy}, there's extra overhead of byte[] representation 211 * When String (Path) is wrapped into {@link PathProxy}, there's extra overhead of byte[] representation
164 * of the String, but these are only for unique Strings (3020 in the example above). Besides, I save 212 * of the String, but these are only for unique Strings (Paths) (3020 in the example above). Besides, I save
165 * useless char[] and byte->char conversions. 213 * useless char[] and byte->char conversions.
166 */ 214 */
167 private static class StringProxy { 215 private static class PathProxy {
168 private byte[] data; 216 private byte[] data;
169 private int start; 217 private int start;
170 private final int hash, length; 218 private final int hash, length;
171 private String result; 219 private Path result;
172 220
173 public StringProxy(byte[] data, int start, int length) { 221 public PathProxy(byte[] data, int start, int length) {
174 this.data = data; 222 this.data = data;
175 this.start = start; 223 this.start = start;
176 this.length = length; 224 this.length = length;
177 225
178 // copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode 226 // copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode
185 hash = h; 233 hash = h;
186 } 234 }
187 235
188 @Override 236 @Override
189 public boolean equals(Object obj) { 237 public boolean equals(Object obj) {
190 if (false == obj instanceof StringProxy) { 238 if (false == obj instanceof PathProxy) {
191 return false; 239 return false;
192 } 240 }
193 StringProxy o = (StringProxy) obj; 241 PathProxy o = (PathProxy) obj;
194 if (o.result != null && result != null) { 242 if (o.result != null && result != null) {
195 return result.equals(o.result); 243 return result.equals(o.result);
196 } 244 }
197 if (o.length != length || o.hash != hash) { 245 if (o.length != length || o.hash != hash) {
198 return false; 246 return false;
207 @Override 255 @Override
208 public int hashCode() { 256 public int hashCode() {
209 return hash; 257 return hash;
210 } 258 }
211 259
212 public String freeze() { 260 public Path freeze() {
213 if (result == null) { 261 if (result == null) {
214 result = new String(data, start, length); 262 result = Path.create(new String(data, start, length));
215 // release reference to bigger data array, make a copy of relevant part only 263 // release reference to bigger data array, make a copy of relevant part only
264 // use original bytes, not those from String above to avoid cache misses due to different encodings
216 byte[] d = new byte[length]; 265 byte[] d = new byte[length];
217 System.arraycopy(data, start, d, 0, length); 266 System.arraycopy(data, start, d, 0, length);
218 data = d; 267 data = d;
219 start = 0; 268 start = 0;
220 } 269 }
223 } 272 }
224 273
225 private static class ManifestParser implements RevlogStream.Inspector { 274 private static class ManifestParser implements RevlogStream.Inspector {
226 private boolean gtg = true; // good to go 275 private boolean gtg = true; // good to go
227 private final Inspector inspector; 276 private final Inspector inspector;
277 private final Inspector2 inspector2;
228 private Pool<Nodeid> nodeidPool, thisRevPool; 278 private Pool<Nodeid> nodeidPool, thisRevPool;
229 private final Pool<StringProxy> fnamePool; 279 private final Pool<PathProxy> fnamePool;
230 private final Pool<StringProxy> flagsPool;
231 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool 280 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool
232 281
233 public ManifestParser(Inspector delegate) { 282 public ManifestParser(Inspector delegate) {
234 assert delegate != null; 283 assert delegate != null;
235 inspector = delegate; 284 inspector = delegate;
285 inspector2 = delegate instanceof Inspector2 ? (Inspector2) delegate : null;
236 nodeidPool = new Pool<Nodeid>(); 286 nodeidPool = new Pool<Nodeid>();
237 fnamePool = new Pool<StringProxy>(); 287 fnamePool = new Pool<PathProxy>();
238 flagsPool = new Pool<StringProxy>();
239 thisRevPool = new Pool<Nodeid>(); 288 thisRevPool = new Pool<Nodeid>();
240 } 289 }
241 290
242 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { 291 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
243 if (!gtg) { 292 if (!gtg) {
244 return; 293 return;
245 } 294 }
246 try { 295 try {
247 gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision); 296 gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision);
248 String fname = null; 297 Path fname = null;
249 String flags = null; 298 Flags flags = null;
250 Nodeid nid = null; 299 Nodeid nid = null;
251 int i; 300 int i;
252 byte[] data = da.byteArray(); 301 byte[] data = da.byteArray();
253 for (i = 0; gtg && i < actualLen; i++) { 302 for (i = 0; gtg && i < actualLen; i++) {
254 int x = i; 303 int x = i;
255 for( ; data[i] != '\n' && i < actualLen; i++) { 304 for( ; data[i] != '\n' && i < actualLen; i++) {
256 if (fname == null && data[i] == 0) { 305 if (fname == null && data[i] == 0) {
257 StringProxy px = fnamePool.unify(new StringProxy(data, x, i - x)); 306 PathProxy px = fnamePool.unify(new PathProxy(data, x, i - x));
258 // if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit 307 // if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit
259 // cpython 0..10k: hits: 15 989 152, misses: 3020 308 // cpython 0..10k: hits: 15 989 152, misses: 3020
260 fname = px.freeze(); 309 fname = px.freeze();
261 x = i+1; 310 x = i+1;
262 } 311 }
275 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. 324 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses.
276 thisRevPool.record(nid); // memorize revision for the next iteration. 325 thisRevPool.record(nid); // memorize revision for the next iteration.
277 if (nodeidLen + x < i) { 326 if (nodeidLen + x < i) {
278 // 'x' and 'l' for executable bits and symlinks? 327 // 'x' and 'l' for executable bits and symlinks?
279 // hg --debug manifest shows 644 for each regular file in my repo 328 // hg --debug manifest shows 644 for each regular file in my repo
280 // for cpython 0..10k, there are 4361062 flagPool checks, and there's only 1 unique flag 329 // for cpython 0..10k, there are 4361062 flag checks, and there's only 1 unique flag
281 flags = flagsPool.unify(new StringProxy(data, x + nodeidLen, i-x-nodeidLen)).freeze(); 330 flags = Flags.parse(data, x + nodeidLen, i-x-nodeidLen);
331 } else {
332 flags = null;
282 } 333 }
283 gtg = gtg && inspector.next(nid, fname, flags); 334 if (inspector2 == null) {
335 String flagString = flags == null ? null : flags.nativeString();
336 gtg = inspector.next(nid, fname.toString(), flagString);
337 } else {
338 gtg = inspector2.next(nid, fname, flags);
339 }
284 } 340 }
285 nid = null; 341 nid = null;
286 fname = flags = null; 342 fname = null;
343 flags = null;
287 } 344 }
288 gtg = gtg && inspector.end(revisionNumber); 345 gtg = gtg && inspector.end(revisionNumber);
289 // 346 //
290 // keep only actual file revisions, found at this version 347 // keep only actual file revisions, found at this version
291 // (next manifest is likely to refer to most of them, although in specific cases 348 // (next manifest is likely to refer to most of them, although in specific cases