Mercurial > hg4j
comparison src/org/tmatesoft/hg/repo/Revlog.java @ 200:114c9fe7b643
Performance optimization: reduce memory ParentWalker hogs
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Wed, 20 Apr 2011 21:14:51 +0200 |
parents | 3a7696fb457c |
children | 047b1dec7a04 |
comparison
equal
deleted
inserted
replaced
199:f4fa4456fa50 | 200:114c9fe7b643 |
---|---|
21 | 21 |
22 import java.io.IOException; | 22 import java.io.IOException; |
23 import java.nio.ByteBuffer; | 23 import java.nio.ByteBuffer; |
24 import java.util.Arrays; | 24 import java.util.Arrays; |
25 import java.util.Collection; | 25 import java.util.Collection; |
26 import java.util.Collections; | |
27 import java.util.HashMap; | |
28 import java.util.HashSet; | 26 import java.util.HashSet; |
29 import java.util.LinkedHashSet; | |
30 import java.util.LinkedList; | 27 import java.util.LinkedList; |
31 import java.util.List; | 28 import java.util.List; |
32 import java.util.Map; | |
33 import java.util.Set; | |
34 | 29 |
35 import org.tmatesoft.hg.core.HgBadStateException; | 30 import org.tmatesoft.hg.core.HgBadStateException; |
36 import org.tmatesoft.hg.core.HgException; | 31 import org.tmatesoft.hg.core.HgException; |
37 import org.tmatesoft.hg.core.Nodeid; | 32 import org.tmatesoft.hg.core.Nodeid; |
38 import org.tmatesoft.hg.internal.DataAccess; | 33 import org.tmatesoft.hg.internal.DataAccess; |
198 * add Revlog#getParentWalker(), static class, make cons() and #init package-local, and keep SoftReference to allow walker reuse. | 193 * add Revlog#getParentWalker(), static class, make cons() and #init package-local, and keep SoftReference to allow walker reuse. |
199 * | 194 * |
200 * and yes, walker is not a proper name | 195 * and yes, walker is not a proper name |
201 */ | 196 */ |
202 public final class ParentWalker { | 197 public final class ParentWalker { |
203 private Map<Nodeid, Nodeid> firstParent; | 198 |
204 private Map<Nodeid, Nodeid> secondParent; | 199 |
205 private final LinkedHashSet<Nodeid> allNodes = new LinkedHashSet<Nodeid>(); // childrenOf rely on ordering | 200 private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering |
201 private Nodeid[] sorted; // for binary search | |
202 private int[] sorted2natural; | |
203 private Nodeid[] firstParent; | |
204 private Nodeid[] secondParent; | |
205 | |
206 // Nodeid instances shall be shared between all arrays | |
206 | 207 |
207 public ParentWalker() { | 208 public ParentWalker() { |
208 firstParent = secondParent = Collections.emptyMap(); | |
209 } | 209 } |
210 | 210 |
211 public HgRepository getRepo() { | 211 public HgRepository getRepo() { |
212 return Revlog.this.getRepo(); | 212 return Revlog.this.getRepo(); |
213 } | 213 } |
214 | 214 |
215 public void init() { | 215 public void init() { |
216 final RevlogStream stream = Revlog.this.content; | 216 final RevlogStream stream = Revlog.this.content; |
217 final int revisionCount = stream.revisionCount(); | 217 final int revisionCount = stream.revisionCount(); |
218 firstParent = new HashMap<Nodeid, Nodeid>(revisionCount); | 218 firstParent = new Nodeid[revisionCount]; |
219 secondParent = new HashMap<Nodeid, Nodeid>(firstParent.size() >> 1); // assume branches/merges are less frequent | 219 // although branches/merges are less frequent, and most of secondParent would be -1/null, some sort of |
220 | 220 // SparseOrderedList might be handy, provided its inner structures do not overweight simplicity of an array |
221 secondParent = new Nodeid[revisionCount]; | |
222 // | |
223 sequential = new Nodeid[revisionCount]; | |
224 sorted = new Nodeid[revisionCount]; | |
225 | |
221 RevlogStream.Inspector insp = new RevlogStream.Inspector() { | 226 RevlogStream.Inspector insp = new RevlogStream.Inspector() { |
222 final Nodeid[] sequentialRevisionNodeids = new Nodeid[revisionCount]; | 227 |
223 int ix = 0; | 228 int ix = 0; |
224 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { | 229 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { |
225 if (ix != revisionNumber) { | 230 if (ix != revisionNumber) { |
226 // XXX temp code, just to make sure I understand what's going on here | 231 // XXX temp code, just to make sure I understand what's going on here |
227 throw new IllegalStateException(); | 232 throw new IllegalStateException(); |
228 } | 233 } |
229 if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) { | 234 if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) { |
230 throw new IllegalStateException(); // sanity, revisions are sequential | 235 throw new IllegalStateException(); // sanity, revisions are sequential |
231 } | 236 } |
232 final Nodeid nid = new Nodeid(nodeid, true); | 237 final Nodeid nid = new Nodeid(nodeid, true); |
233 sequentialRevisionNodeids[ix++] = nid; | 238 sequential[ix] = sorted[ix] = nid; |
234 allNodes.add(nid); | |
235 if (parent1Revision != -1) { | 239 if (parent1Revision != -1) { |
236 firstParent.put(nid, sequentialRevisionNodeids[parent1Revision]); | 240 assert parent1Revision < ix; |
241 firstParent[ix] = sequential[parent1Revision]; | |
237 } | 242 } |
238 if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1 | 243 if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1 |
239 secondParent.put(nid, sequentialRevisionNodeids[parent2Revision]); | 244 assert parent2Revision < ix; |
245 secondParent[ix] = sequential[parent2Revision]; | |
240 } | 246 } |
247 ix++; | |
241 } | 248 } |
242 }; | 249 }; |
243 stream.iterate(0, TIP, false, insp); | 250 stream.iterate(0, TIP, false, insp); |
244 } | 251 Arrays.sort(sorted); |
245 | 252 sorted2natural = new int[revisionCount]; |
246 public Set<Nodeid> allNodes() { | 253 for (int i = 0; i < revisionCount; i++) { |
247 return Collections.unmodifiableSet(allNodes); | 254 Nodeid n = sequential[i]; |
255 int x = Arrays.binarySearch(sorted, n); | |
256 assertSortedIndex(x); | |
257 sorted2natural[x] = i; | |
258 } | |
259 } | |
260 | |
261 private void assertSortedIndex(int x) { | |
262 if (x < 0) { | |
263 throw new HgBadStateException(); | |
264 } | |
248 } | 265 } |
249 | 266 |
250 // FIXME need to decide whether Nodeid(00 * 20) is always known or not | 267 // FIXME need to decide whether Nodeid(00 * 20) is always known or not |
268 // right now Nodeid.NULL is not recognized as known if passed to this method, | |
269 // caller is supposed to make explicit check | |
251 public boolean knownNode(Nodeid nid) { | 270 public boolean knownNode(Nodeid nid) { |
252 return allNodes.contains(nid); | 271 return Arrays.binarySearch(sorted, nid) >= 0; |
253 } | 272 } |
254 | 273 |
255 // null if none | 274 /** |
275 * null if none. only known nodes (as per #knownNode) are accepted as arguments | |
276 */ | |
256 public Nodeid firstParent(Nodeid nid) { | 277 public Nodeid firstParent(Nodeid nid) { |
257 return firstParent.get(nid); | 278 int x = Arrays.binarySearch(sorted, nid); |
279 assertSortedIndex(x); | |
280 int i = sorted2natural[x]; | |
281 return firstParent[i]; | |
258 } | 282 } |
259 | 283 |
260 // never null, Nodeid.NULL if none known | 284 // never null, Nodeid.NULL if none known |
261 public Nodeid safeFirstParent(Nodeid nid) { | 285 public Nodeid safeFirstParent(Nodeid nid) { |
262 Nodeid rv = firstParent(nid); | 286 Nodeid rv = firstParent(nid); |
263 return rv == null ? Nodeid.NULL : rv; | 287 return rv == null ? Nodeid.NULL : rv; |
264 } | 288 } |
265 | 289 |
266 public Nodeid secondParent(Nodeid nid) { | 290 public Nodeid secondParent(Nodeid nid) { |
267 return secondParent.get(nid); | 291 int x = Arrays.binarySearch(sorted, nid); |
292 assertSortedIndex(x); | |
293 int i = sorted2natural[x]; | |
294 return secondParent[i]; | |
268 } | 295 } |
269 | 296 |
270 public Nodeid safeSecondParent(Nodeid nid) { | 297 public Nodeid safeSecondParent(Nodeid nid) { |
271 Nodeid rv = secondParent(nid); | 298 Nodeid rv = secondParent(nid); |
272 return rv == null ? Nodeid.NULL : rv; | 299 return rv == null ? Nodeid.NULL : rv; |
273 } | 300 } |
274 | 301 |
275 public boolean appendParentsOf(Nodeid nid, Collection<Nodeid> c) { | 302 public boolean appendParentsOf(Nodeid nid, Collection<Nodeid> c) { |
276 Nodeid p1 = firstParent(nid); | 303 int x = Arrays.binarySearch(sorted, nid); |
304 assertSortedIndex(x); | |
305 int i = sorted2natural[x]; | |
306 Nodeid p1 = firstParent[i]; | |
277 boolean modified = false; | 307 boolean modified = false; |
278 if (p1 != null) { | 308 if (p1 != null) { |
279 modified = c.add(p1); | 309 modified = c.add(p1); |
280 } | 310 } |
281 Nodeid p2 = secondParent(nid); | 311 Nodeid p2 = secondParent[i]; |
282 if (p2 != null) { | 312 if (p2 != null) { |
283 modified = c.add(p2) || modified; | 313 modified = c.add(p2) || modified; |
284 } | 314 } |
285 return modified; | 315 return modified; |
286 } | 316 } |
287 | 317 |
288 // XXX alternative (and perhaps more reliable) approach would be to make a copy of allNodes and remove | 318 // XXX alternative (and perhaps more reliable) approach would be to make a copy of allNodes and remove |
289 // nodes, their parents and so on. | 319 // nodes, their parents and so on. |
290 | 320 |
291 // @return ordered collection of all children rooted at supplied nodes. Nodes shall not be descendants of each other! | 321 // @return ordered collection of all children rooted at supplied nodes. Nodes shall not be descendants of each other! |
292 public List<Nodeid> childrenOf(List<Nodeid> nodes) { | 322 // Nodeids shall belong to this revlog |
323 public List<Nodeid> childrenOf(List<Nodeid> roots) { | |
293 HashSet<Nodeid> parents = new HashSet<Nodeid>(); | 324 HashSet<Nodeid> parents = new HashSet<Nodeid>(); |
294 LinkedHashSet<Nodeid> result = new LinkedHashSet<Nodeid>(); | 325 LinkedList<Nodeid> result = new LinkedList<Nodeid>(); |
295 LinkedList<Nodeid> orderedResult = new LinkedList<Nodeid>(); | 326 int earliestRevision = Integer.MAX_VALUE; |
296 for(Nodeid next : allNodes) { | 327 assert sequential.length == firstParent.length && firstParent.length == secondParent.length; |
297 // i assume allNodes is sorted, hence do not check any parents unless we hit any common known node first | 328 // first, find earliest index of roots in question, as there's no sense |
298 if (nodes.contains(next)) { | 329 // to check children among nodes prior to branch's root node |
299 parents.add(next); | 330 for (Nodeid r : roots) { |
300 } else { | 331 int x = Arrays.binarySearch(sorted, r); |
301 if (parents.isEmpty()) { | 332 assertSortedIndex(x); |
302 // didn't scroll up to any known yet | 333 int i = sorted2natural[x]; |
303 continue; | 334 if (i < earliestRevision) { |
304 } | 335 earliestRevision = i; |
305 // record each and every node reported after first common known node hit | |
306 orderedResult.addLast(next); | |
307 if (parents.contains(firstParent(next)) || parents.contains(secondParent(next))) { | |
308 result.add(next); | |
309 parents.add(next); // to find next's children | |
310 } | |
311 } | 336 } |
312 } | 337 parents.add(sequential[i]); // add canonical instance in hope equals() is bit faster when can do a == |
313 // leave only those of interest in ordered sequence | 338 } |
314 orderedResult.retainAll(result); | 339 for (int i = earliestRevision + 1; i < sequential.length; i++) { |
315 return orderedResult; | 340 if (parents.contains(firstParent[i]) || parents.contains(secondParent[i])) { |
341 parents.add(sequential[i]); // to find next child | |
342 result.add(sequential[i]); | |
343 } | |
344 } | |
345 return result; | |
316 } | 346 } |
317 | 347 |
318 /** | 348 /** |
319 * @param node possibly parent node | 349 * @param nid possibly parent node, shall be {@link #knownNode(Nodeid) known} in this revlog. |
320 * @return <code>true</code> if there's any node in this revlog that has specified node as one of its parents. | 350 * @return <code>true</code> if there's any node in this revlog that has specified node as one of its parents. |
321 */ | 351 */ |
322 public boolean hasChildren(Nodeid node) { | 352 public boolean hasChildren(Nodeid nid) { |
323 // FIXME containsValue is linear, likely. May want to optimize it with another (Tree|Hash)Set, created on demand | 353 int x = Arrays.binarySearch(sorted, nid); |
324 // on first use | 354 assertSortedIndex(x); |
325 return firstParent.containsValue(node) || secondParent.containsValue(node); | 355 int i = sorted2natural[x]; |
356 assert firstParent.length == secondParent.length; // just in case later I implement sparse array for secondParent | |
357 assert firstParent.length == sequential.length; | |
358 // to use == instead of equals, take the same Nodeid instance we used to fill all the arrays. | |
359 final Nodeid canonicalNode = sequential[i]; | |
360 i++; // no need to check node itself. child nodes may appear in sequential only after revision in question | |
361 for (; i < sequential.length; i++) { | |
362 // FIXME likely, not very effective. | |
363 // May want to optimize it with another (Tree|Hash)Set, created on demand on first use, | |
364 // however, need to be careful with memory usage | |
365 if (firstParent[i] == canonicalNode || secondParent[i] == canonicalNode) { | |
366 return true; | |
367 } | |
368 } | |
369 return false; | |
326 } | 370 } |
327 } | 371 } |
328 | 372 |
329 protected static class ContentPipe implements RevlogStream.Inspector, CancelSupport { | 373 protected static class ContentPipe implements RevlogStream.Inspector, CancelSupport { |
330 private final ByteChannel sink; | 374 private final ByteChannel sink; |