Mercurial > hg4j
comparison src/org/tmatesoft/hg/repo/HgManifest.java @ 268:c5980f287cc4
Use StringProxy when parsing manifest to minimize number of useless conversions and array instances
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Tue, 23 Aug 2011 22:30:56 +0200 |
parents | ec921ef0628e |
children | 6dbbc53fc46d |
comparison
equal
deleted
inserted
replaced
267:ec921ef0628e | 268:c5980f287cc4 |
---|---|
152 | 152 |
153 public interface ElementProxy<T> { | 153 public interface ElementProxy<T> { |
154 T get(); | 154 T get(); |
155 } | 155 } |
156 | 156 |
157 private static class PoolStringProxy { | 157 /** |
158 * When Pool uses Strings directly, | |
159 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion. | |
160 * For cpython repo, walk(0..10k), there are over 16 million filenames, of them only 3020 unique. | |
161 * This means there are 15.9 million useless char[] instances and byte->char conversions | |
162 * | |
163 * When String is wrapped into {@link StringProxy}, there's extra overhead of byte[] representation | |
164 * of the String, but these are only for unique Strings (3020 in the example above). Besides, I save | |
165 * useless char[] and byte->char conversions. | |
166 */ | |
167 private static class StringProxy { | |
158 private byte[] data; | 168 private byte[] data; |
159 private int start, length; | 169 private int start; |
160 private int hash; | 170 private final int hash, length; |
161 private String result; | 171 private String result; |
162 | 172 |
163 public void init(byte[] data, int start, int length) { | 173 public StringProxy(byte[] data, int start, int length) { |
164 this.data = data; | 174 this.data = data; |
165 this.start = start; | 175 this.start = start; |
166 this.length = length; | 176 this.length = length; |
167 | 177 |
168 // copy from String.hashCode() | 178 // copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode |
179 // just need some nice algorithm here | |
169 int h = 0; | 180 int h = 0; |
170 byte[] d = data; | 181 byte[] d = data; |
171 for (int i = 0, off = start, len = length; i < len; i++) { | 182 for (int i = 0, off = start, len = length; i < len; i++) { |
172 h = 31 * h + d[off++]; | 183 h = 31 * h + d[off++]; |
173 } | 184 } |
174 hash = h; | 185 hash = h; |
175 } | 186 } |
176 | 187 |
177 @Override | 188 @Override |
178 public boolean equals(Object obj) { | 189 public boolean equals(Object obj) { |
179 if (false == obj instanceof PoolStringProxy) { | 190 if (false == obj instanceof StringProxy) { |
180 return false; | 191 return false; |
181 } | 192 } |
182 PoolStringProxy o = (PoolStringProxy) obj; | 193 StringProxy o = (StringProxy) obj; |
183 if (o.result != null && result != null) { | 194 if (o.result != null && result != null) { |
184 return result.equals(o.result); | 195 return result.equals(o.result); |
185 } | 196 } |
186 if (o.result == null && result != null || o.result != null && result == null) { | 197 if (o.length != length || o.hash != hash) { |
187 String s; PoolStringProxy noString; | |
188 if (o.result == null) { | |
189 s = result; | |
190 noString = o; | |
191 } else { | |
192 s = o.result; | |
193 noString = this; | |
194 } | |
195 | |
196 } | |
197 // both are null | |
198 if (o.length != length) { | |
199 return false; | 198 return false; |
200 } | 199 } |
201 for (int i = 0, x = o.start, y = start; i < length; i++) { | 200 for (int i = 0, x = o.start, y = start; i < length; i++) { |
202 if (o.data[x++] != data[y++]) { | 201 if (o.data[x++] != data[y++]) { |
203 return false; | 202 return false; |
211 } | 210 } |
212 | 211 |
213 public String freeze() { | 212 public String freeze() { |
214 if (result == null) { | 213 if (result == null) { |
215 result = new String(data, start, length); | 214 result = new String(data, start, length); |
216 data = null; | 215 // release reference to bigger data array, make a copy of relevant part only |
217 start = length = -1; | 216 byte[] d = new byte[length]; |
217 System.arraycopy(data, start, d, 0, length); | |
218 data = d; | |
219 start = 0; | |
218 } | 220 } |
219 return result; | 221 return result; |
220 } | 222 } |
221 } | 223 } |
222 | 224 |
223 private static class ManifestParser implements RevlogStream.Inspector/*, Lifecycle */{ | 225 private static class ManifestParser implements RevlogStream.Inspector { |
224 private boolean gtg = true; // good to go | 226 private boolean gtg = true; // good to go |
225 private final Inspector inspector; | 227 private final Inspector inspector; |
226 private Pool<Nodeid> nodeidPool, thisRevPool; | 228 private Pool<Nodeid> nodeidPool, thisRevPool; |
227 private final Pool<String> fnamePool; | 229 private final Pool<StringProxy> fnamePool; |
228 private final Pool<String> flagsPool; | 230 private final Pool<StringProxy> flagsPool; |
229 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool | 231 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool |
230 | 232 |
231 public ManifestParser(Inspector delegate) { | 233 public ManifestParser(Inspector delegate) { |
232 assert delegate != null; | 234 assert delegate != null; |
233 inspector = delegate; | 235 inspector = delegate; |
234 nodeidPool = new Pool<Nodeid>(); | 236 nodeidPool = new Pool<Nodeid>(); |
235 fnamePool = new Pool<String>(); | 237 fnamePool = new Pool<StringProxy>(); |
236 flagsPool = new Pool<String>(); | 238 flagsPool = new Pool<StringProxy>(); |
237 thisRevPool = new Pool<Nodeid>(); | 239 thisRevPool = new Pool<Nodeid>(); |
238 } | 240 } |
239 | 241 |
240 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { | 242 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { |
241 if (!gtg) { | 243 if (!gtg) { |
242 return; | 244 return; |
243 } | 245 } |
244 try { | 246 try { |
250 byte[] data = da.byteArray(); | 252 byte[] data = da.byteArray(); |
251 for (i = 0; gtg && i < actualLen; i++) { | 253 for (i = 0; gtg && i < actualLen; i++) { |
252 int x = i; | 254 int x = i; |
253 for( ; data[i] != '\n' && i < actualLen; i++) { | 255 for( ; data[i] != '\n' && i < actualLen; i++) { |
254 if (fname == null && data[i] == 0) { | 256 if (fname == null && data[i] == 0) { |
255 fname = fnamePool.unify(new String(data, x, i - x)); | 257 StringProxy px = fnamePool.unify(new StringProxy(data, x, i - x)); |
258 // if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit | |
259 // cpython 0..10k: hits: 15 989 152, misses: 3020 | |
260 fname = px.freeze(); | |
256 x = i+1; | 261 x = i+1; |
257 } | 262 } |
258 } | 263 } |
259 if (i < actualLen) { | 264 if (i < actualLen) { |
260 assert data[i] == '\n'; | 265 assert data[i] == '\n'; |
270 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. | 275 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. |
271 thisRevPool.record(nid); // memorize revision for the next iteration. | 276 thisRevPool.record(nid); // memorize revision for the next iteration. |
272 if (nodeidLen + x < i) { | 277 if (nodeidLen + x < i) { |
273 // 'x' and 'l' for executable bits and symlinks? | 278 // 'x' and 'l' for executable bits and symlinks? |
274 // hg --debug manifest shows 644 for each regular file in my repo | 279 // hg --debug manifest shows 644 for each regular file in my repo |
275 flags = flagsPool.unify(new String(data, x + nodeidLen, i-x-nodeidLen)); | 280 // for cpython 0..10k, there are 4361062 flagPool checks, and there's only 1 unique flag |
281 flags = flagsPool.unify(new StringProxy(data, x + nodeidLen, i-x-nodeidLen)).freeze(); | |
276 } | 282 } |
277 gtg = gtg && inspector.next(nid, fname, flags); | 283 gtg = gtg && inspector.next(nid, fname, flags); |
278 } | 284 } |
279 nid = null; | 285 nid = null; |
280 fname = flags = null; | 286 fname = flags = null; |
290 thisRevPool = t; | 296 thisRevPool = t; |
291 } catch (IOException ex) { | 297 } catch (IOException ex) { |
292 throw new HgBadStateException(ex); | 298 throw new HgBadStateException(ex); |
293 } | 299 } |
294 } | 300 } |
295 // | |
296 // public void start(int count, Callback callback, Object token) { | |
297 // } | |
298 // | |
299 // public void finish(Object token) { | |
300 // System.out.println(fnamePool); | |
301 // System.out.println(nodeidPool); | |
302 // System.out.printf("Free mem once parse done: %,d\n", Runtime.getRuntime().freeMemory()); | |
303 // } | |
304 } | 301 } |
305 | 302 |
306 private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle { | 303 private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle { |
307 | 304 |
308 private final int changelogRevisions; | 305 private final int changelogRevisions; |