comparison src/org/tmatesoft/hg/repo/HgManifest.java @ 268:c5980f287cc4

Use StringProxy when parsing manifest to minimize number of useless conversions and array instances
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 23 Aug 2011 22:30:56 +0200
parents ec921ef0628e
children 6dbbc53fc46d
comparison
equal deleted inserted replaced
267:ec921ef0628e 268:c5980f287cc4
152 152
153 public interface ElementProxy<T> { 153 public interface ElementProxy<T> {
154 T get(); 154 T get();
155 } 155 }
156 156
157 private static class PoolStringProxy { 157 /**
158 * When Pool uses Strings directly,
159 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion.
160 * For cpython repo, walk(0..10k), there are over 16 million filenames, of them only 3020 unique.
161 * This means there are 15.9 million useless char[] instances and byte->char conversions
162 *
163 * When String is wrapped into {@link StringProxy}, there's extra overhead of byte[] representation
164 * of the String, but these are only for unique Strings (3020 in the example above). Besides, I save
165 * useless char[] and byte->char conversions.
166 */
167 private static class StringProxy {
158 private byte[] data; 168 private byte[] data;
159 private int start, length; 169 private int start;
160 private int hash; 170 private final int hash, length;
161 private String result; 171 private String result;
162 172
163 public void init(byte[] data, int start, int length) { 173 public StringProxy(byte[] data, int start, int length) {
164 this.data = data; 174 this.data = data;
165 this.start = start; 175 this.start = start;
166 this.length = length; 176 this.length = length;
167 177
168 // copy from String.hashCode() 178 // copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode
179 // just need some nice algorithm here
169 int h = 0; 180 int h = 0;
170 byte[] d = data; 181 byte[] d = data;
171 for (int i = 0, off = start, len = length; i < len; i++) { 182 for (int i = 0, off = start, len = length; i < len; i++) {
172 h = 31 * h + d[off++]; 183 h = 31 * h + d[off++];
173 } 184 }
174 hash = h; 185 hash = h;
175 } 186 }
176 187
177 @Override 188 @Override
178 public boolean equals(Object obj) { 189 public boolean equals(Object obj) {
179 if (false == obj instanceof PoolStringProxy) { 190 if (false == obj instanceof StringProxy) {
180 return false; 191 return false;
181 } 192 }
182 PoolStringProxy o = (PoolStringProxy) obj; 193 StringProxy o = (StringProxy) obj;
183 if (o.result != null && result != null) { 194 if (o.result != null && result != null) {
184 return result.equals(o.result); 195 return result.equals(o.result);
185 } 196 }
186 if (o.result == null && result != null || o.result != null && result == null) { 197 if (o.length != length || o.hash != hash) {
187 String s; PoolStringProxy noString;
188 if (o.result == null) {
189 s = result;
190 noString = o;
191 } else {
192 s = o.result;
193 noString = this;
194 }
195
196 }
197 // both are null
198 if (o.length != length) {
199 return false; 198 return false;
200 } 199 }
201 for (int i = 0, x = o.start, y = start; i < length; i++) { 200 for (int i = 0, x = o.start, y = start; i < length; i++) {
202 if (o.data[x++] != data[y++]) { 201 if (o.data[x++] != data[y++]) {
203 return false; 202 return false;
211 } 210 }
212 211
213 public String freeze() { 212 public String freeze() {
214 if (result == null) { 213 if (result == null) {
215 result = new String(data, start, length); 214 result = new String(data, start, length);
216 data = null; 215 // release reference to bigger data array, make a copy of relevant part only
217 start = length = -1; 216 byte[] d = new byte[length];
217 System.arraycopy(data, start, d, 0, length);
218 data = d;
219 start = 0;
218 } 220 }
219 return result; 221 return result;
220 } 222 }
221 } 223 }
222 224
223 private static class ManifestParser implements RevlogStream.Inspector/*, Lifecycle */{ 225 private static class ManifestParser implements RevlogStream.Inspector {
224 private boolean gtg = true; // good to go 226 private boolean gtg = true; // good to go
225 private final Inspector inspector; 227 private final Inspector inspector;
226 private Pool<Nodeid> nodeidPool, thisRevPool; 228 private Pool<Nodeid> nodeidPool, thisRevPool;
227 private final Pool<String> fnamePool; 229 private final Pool<StringProxy> fnamePool;
228 private final Pool<String> flagsPool; 230 private final Pool<StringProxy> flagsPool;
229 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool 231 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool
230 232
231 public ManifestParser(Inspector delegate) { 233 public ManifestParser(Inspector delegate) {
232 assert delegate != null; 234 assert delegate != null;
233 inspector = delegate; 235 inspector = delegate;
234 nodeidPool = new Pool<Nodeid>(); 236 nodeidPool = new Pool<Nodeid>();
235 fnamePool = new Pool<String>(); 237 fnamePool = new Pool<StringProxy>();
236 flagsPool = new Pool<String>(); 238 flagsPool = new Pool<StringProxy>();
237 thisRevPool = new Pool<Nodeid>(); 239 thisRevPool = new Pool<Nodeid>();
238 } 240 }
239 241
240 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { 242 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
241 if (!gtg) { 243 if (!gtg) {
242 return; 244 return;
243 } 245 }
244 try { 246 try {
250 byte[] data = da.byteArray(); 252 byte[] data = da.byteArray();
251 for (i = 0; gtg && i < actualLen; i++) { 253 for (i = 0; gtg && i < actualLen; i++) {
252 int x = i; 254 int x = i;
253 for( ; data[i] != '\n' && i < actualLen; i++) { 255 for( ; data[i] != '\n' && i < actualLen; i++) {
254 if (fname == null && data[i] == 0) { 256 if (fname == null && data[i] == 0) {
255 fname = fnamePool.unify(new String(data, x, i - x)); 257 StringProxy px = fnamePool.unify(new StringProxy(data, x, i - x));
258 // if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit
259 // cpython 0..10k: hits: 15 989 152, misses: 3020
260 fname = px.freeze();
256 x = i+1; 261 x = i+1;
257 } 262 }
258 } 263 }
259 if (i < actualLen) { 264 if (i < actualLen) {
260 assert data[i] == '\n'; 265 assert data[i] == '\n';
270 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. 275 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses.
271 thisRevPool.record(nid); // memorize revision for the next iteration. 276 thisRevPool.record(nid); // memorize revision for the next iteration.
272 if (nodeidLen + x < i) { 277 if (nodeidLen + x < i) {
273 // 'x' and 'l' for executable bits and symlinks? 278 // 'x' and 'l' for executable bits and symlinks?
274 // hg --debug manifest shows 644 for each regular file in my repo 279 // hg --debug manifest shows 644 for each regular file in my repo
275 flags = flagsPool.unify(new String(data, x + nodeidLen, i-x-nodeidLen)); 280 // for cpython 0..10k, there are 4361062 flagPool checks, and there's only 1 unique flag
281 flags = flagsPool.unify(new StringProxy(data, x + nodeidLen, i-x-nodeidLen)).freeze();
276 } 282 }
277 gtg = gtg && inspector.next(nid, fname, flags); 283 gtg = gtg && inspector.next(nid, fname, flags);
278 } 284 }
279 nid = null; 285 nid = null;
280 fname = flags = null; 286 fname = flags = null;
290 thisRevPool = t; 296 thisRevPool = t;
291 } catch (IOException ex) { 297 } catch (IOException ex) {
292 throw new HgBadStateException(ex); 298 throw new HgBadStateException(ex);
293 } 299 }
294 } 300 }
295 //
296 // public void start(int count, Callback callback, Object token) {
297 // }
298 //
299 // public void finish(Object token) {
300 // System.out.println(fnamePool);
301 // System.out.println(nodeidPool);
302 // System.out.printf("Free mem once parse done: %,d\n", Runtime.getRuntime().freeMemory());
303 // }
304 } 301 }
305 302
306 private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle { 303 private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle {
307 304
308 private final int changelogRevisions; 305 private final int changelogRevisions;