Mercurial > hg4j
comparison src/org/tmatesoft/hg/repo/HgManifest.java @ 285:6dbbc53fc46d
Use Path instead of plain String for manifest file names
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Sat, 03 Sep 2011 21:46:13 +0200 |
parents | c5980f287cc4 |
children | 650b45d290b1 |
comparison
equal
deleted
inserted
replaced
284:7232b94f2ae3 | 285:6dbbc53fc46d |
---|---|
39 * @author Artem Tikhomirov | 39 * @author Artem Tikhomirov |
40 * @author TMate Software Ltd. | 40 * @author TMate Software Ltd. |
41 */ | 41 */ |
42 public class HgManifest extends Revlog { | 42 public class HgManifest extends Revlog { |
43 private RevisionMapper revisionMap; | 43 private RevisionMapper revisionMap; |
44 | |
45 public enum Flags { | |
46 Exec, Link; | |
47 | |
48 static Flags parse(String flags) { | |
49 if ("x".equalsIgnoreCase(flags)) { | |
50 return Exec; | |
51 } | |
52 if ("l".equalsIgnoreCase(flags)) { | |
53 return Link; | |
54 } | |
55 if (flags == null) { | |
56 return null; | |
57 } | |
58 throw new IllegalStateException(flags); | |
59 } | |
60 | |
61 static Flags parse(byte[] data, int start, int length) { | |
62 if (length == 0) { | |
63 return null; | |
64 } | |
65 if (length == 1) { | |
66 if (data[start] == 'x') { | |
67 return Exec; | |
68 } | |
69 if (data[start] == 'l') { | |
70 return Link; | |
71 } | |
72 // FALL THROUGH | |
73 } | |
74 throw new IllegalStateException(new String(data, start, length)); | |
75 } | |
76 | |
77 String nativeString() { | |
78 if (this == Exec) { | |
79 return "x"; | |
80 } | |
81 if (this == Link) { | |
82 return "l"; | |
83 } | |
84 throw new IllegalStateException(toString()); | |
85 } | |
86 } | |
44 | 87 |
45 /*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content) { | 88 /*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content) { |
46 super(hgRepo, content); | 89 super(hgRepo, content); |
47 } | 90 } |
48 | 91 |
144 return rv[0]; | 187 return rv[0]; |
145 } | 188 } |
146 | 189 |
147 public interface Inspector { | 190 public interface Inspector { |
148 boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision); | 191 boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision); |
192 /** | |
193 * @deprecated switch to {@link Inspector2#next(Nodeid, Path, Flags)} | |
194 */ | |
195 @Deprecated | |
149 boolean next(Nodeid nid, String fname, String flags); | 196 boolean next(Nodeid nid, String fname, String flags); |
150 boolean end(int manifestRevision); | 197 boolean end(int manifestRevision); |
151 } | 198 } |
152 | 199 |
153 public interface ElementProxy<T> { | 200 @Experimental(reason="Explore Path alternative for filenames and enum for flags") |
154 T get(); | 201 public interface Inspector2 extends Inspector { |
202 boolean next(Nodeid nid, Path fname, Flags flags); | |
155 } | 203 } |
156 | 204 |
157 /** | 205 /** |
158 * When Pool uses Strings directly, | 206 * When Pool uses Strings directly, |
159 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion. | 207 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion. |
160 * For cpython repo, walk(0..10k), there are over 16 million filenames, of them only 3020 unique. | 208 * For cpython repo, walk(0..10k), there are over 16 million filenames, of them only 3020 unique. |
161 * This means there are 15.9 million useless char[] instances and byte->char conversions | 209 * This means there are 15.9 million useless char[] instances and byte->char conversions |
162 * | 210 * |
163 * When String is wrapped into {@link StringProxy}, there's extra overhead of byte[] representation | 211 * When String (Path) is wrapped into {@link PathProxy}, there's extra overhead of byte[] representation |
164 * of the String, but these are only for unique Strings (3020 in the example above). Besides, I save | 212 * of the String, but these are only for unique Strings (Paths) (3020 in the example above). Besides, I save |
165 * useless char[] and byte->char conversions. | 213 * useless char[] and byte->char conversions. |
166 */ | 214 */ |
167 private static class StringProxy { | 215 private static class PathProxy { |
168 private byte[] data; | 216 private byte[] data; |
169 private int start; | 217 private int start; |
170 private final int hash, length; | 218 private final int hash, length; |
171 private String result; | 219 private Path result; |
172 | 220 |
173 public StringProxy(byte[] data, int start, int length) { | 221 public PathProxy(byte[] data, int start, int length) { |
174 this.data = data; | 222 this.data = data; |
175 this.start = start; | 223 this.start = start; |
176 this.length = length; | 224 this.length = length; |
177 | 225 |
178 // copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode | 226 // copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode |
185 hash = h; | 233 hash = h; |
186 } | 234 } |
187 | 235 |
188 @Override | 236 @Override |
189 public boolean equals(Object obj) { | 237 public boolean equals(Object obj) { |
190 if (false == obj instanceof StringProxy) { | 238 if (false == obj instanceof PathProxy) { |
191 return false; | 239 return false; |
192 } | 240 } |
193 StringProxy o = (StringProxy) obj; | 241 PathProxy o = (PathProxy) obj; |
194 if (o.result != null && result != null) { | 242 if (o.result != null && result != null) { |
195 return result.equals(o.result); | 243 return result.equals(o.result); |
196 } | 244 } |
197 if (o.length != length || o.hash != hash) { | 245 if (o.length != length || o.hash != hash) { |
198 return false; | 246 return false; |
207 @Override | 255 @Override |
208 public int hashCode() { | 256 public int hashCode() { |
209 return hash; | 257 return hash; |
210 } | 258 } |
211 | 259 |
212 public String freeze() { | 260 public Path freeze() { |
213 if (result == null) { | 261 if (result == null) { |
214 result = new String(data, start, length); | 262 result = Path.create(new String(data, start, length)); |
215 // release reference to bigger data array, make a copy of relevant part only | 263 // release reference to bigger data array, make a copy of relevant part only |
264 // use original bytes, not those from String above to avoid cache misses due to different encodings | |
216 byte[] d = new byte[length]; | 265 byte[] d = new byte[length]; |
217 System.arraycopy(data, start, d, 0, length); | 266 System.arraycopy(data, start, d, 0, length); |
218 data = d; | 267 data = d; |
219 start = 0; | 268 start = 0; |
220 } | 269 } |
223 } | 272 } |
224 | 273 |
225 private static class ManifestParser implements RevlogStream.Inspector { | 274 private static class ManifestParser implements RevlogStream.Inspector { |
226 private boolean gtg = true; // good to go | 275 private boolean gtg = true; // good to go |
227 private final Inspector inspector; | 276 private final Inspector inspector; |
277 private final Inspector2 inspector2; | |
228 private Pool<Nodeid> nodeidPool, thisRevPool; | 278 private Pool<Nodeid> nodeidPool, thisRevPool; |
229 private final Pool<StringProxy> fnamePool; | 279 private final Pool<PathProxy> fnamePool; |
230 private final Pool<StringProxy> flagsPool; | |
231 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool | 280 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool |
232 | 281 |
233 public ManifestParser(Inspector delegate) { | 282 public ManifestParser(Inspector delegate) { |
234 assert delegate != null; | 283 assert delegate != null; |
235 inspector = delegate; | 284 inspector = delegate; |
285 inspector2 = delegate instanceof Inspector2 ? (Inspector2) delegate : null; | |
236 nodeidPool = new Pool<Nodeid>(); | 286 nodeidPool = new Pool<Nodeid>(); |
237 fnamePool = new Pool<StringProxy>(); | 287 fnamePool = new Pool<PathProxy>(); |
238 flagsPool = new Pool<StringProxy>(); | |
239 thisRevPool = new Pool<Nodeid>(); | 288 thisRevPool = new Pool<Nodeid>(); |
240 } | 289 } |
241 | 290 |
242 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { | 291 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { |
243 if (!gtg) { | 292 if (!gtg) { |
244 return; | 293 return; |
245 } | 294 } |
246 try { | 295 try { |
247 gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision); | 296 gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision); |
248 String fname = null; | 297 Path fname = null; |
249 String flags = null; | 298 Flags flags = null; |
250 Nodeid nid = null; | 299 Nodeid nid = null; |
251 int i; | 300 int i; |
252 byte[] data = da.byteArray(); | 301 byte[] data = da.byteArray(); |
253 for (i = 0; gtg && i < actualLen; i++) { | 302 for (i = 0; gtg && i < actualLen; i++) { |
254 int x = i; | 303 int x = i; |
255 for( ; data[i] != '\n' && i < actualLen; i++) { | 304 for( ; data[i] != '\n' && i < actualLen; i++) { |
256 if (fname == null && data[i] == 0) { | 305 if (fname == null && data[i] == 0) { |
257 StringProxy px = fnamePool.unify(new StringProxy(data, x, i - x)); | 306 PathProxy px = fnamePool.unify(new PathProxy(data, x, i - x)); |
258 // if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit | 307 // if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit |
259 // cpython 0..10k: hits: 15 989 152, misses: 3020 | 308 // cpython 0..10k: hits: 15 989 152, misses: 3020 |
260 fname = px.freeze(); | 309 fname = px.freeze(); |
261 x = i+1; | 310 x = i+1; |
262 } | 311 } |
275 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. | 324 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. |
276 thisRevPool.record(nid); // memorize revision for the next iteration. | 325 thisRevPool.record(nid); // memorize revision for the next iteration. |
277 if (nodeidLen + x < i) { | 326 if (nodeidLen + x < i) { |
278 // 'x' and 'l' for executable bits and symlinks? | 327 // 'x' and 'l' for executable bits and symlinks? |
279 // hg --debug manifest shows 644 for each regular file in my repo | 328 // hg --debug manifest shows 644 for each regular file in my repo |
280 // for cpython 0..10k, there are 4361062 flagPool checks, and there's only 1 unique flag | 329 // for cpython 0..10k, there are 4361062 flag checks, and there's only 1 unique flag |
281 flags = flagsPool.unify(new StringProxy(data, x + nodeidLen, i-x-nodeidLen)).freeze(); | 330 flags = Flags.parse(data, x + nodeidLen, i-x-nodeidLen); |
331 } else { | |
332 flags = null; | |
282 } | 333 } |
283 gtg = gtg && inspector.next(nid, fname, flags); | 334 if (inspector2 == null) { |
335 String flagString = flags == null ? null : flags.nativeString(); | |
336 gtg = inspector.next(nid, fname.toString(), flagString); | |
337 } else { | |
338 gtg = inspector2.next(nid, fname, flags); | |
339 } | |
284 } | 340 } |
285 nid = null; | 341 nid = null; |
286 fname = flags = null; | 342 fname = null; |
343 flags = null; | |
287 } | 344 } |
288 gtg = gtg && inspector.end(revisionNumber); | 345 gtg = gtg && inspector.end(revisionNumber); |
289 // | 346 // |
290 // keep only actual file revisions, found at this version | 347 // keep only actual file revisions, found at this version |
291 // (next manifest is likely to refer to most of them, although in specific cases | 348 // (next manifest is likely to refer to most of them, although in specific cases |