Mercurial > hg4j
comparison src/org/tmatesoft/hg/repo/Revlog.java @ 200:114c9fe7b643
Performance optimization: reduce memory ParentWalker hogs
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Wed, 20 Apr 2011 21:14:51 +0200 |
| parents | 3a7696fb457c |
| children | 047b1dec7a04 |
comparison
equal
deleted
inserted
replaced
| 199:f4fa4456fa50 | 200:114c9fe7b643 |
|---|---|
| 21 | 21 |
| 22 import java.io.IOException; | 22 import java.io.IOException; |
| 23 import java.nio.ByteBuffer; | 23 import java.nio.ByteBuffer; |
| 24 import java.util.Arrays; | 24 import java.util.Arrays; |
| 25 import java.util.Collection; | 25 import java.util.Collection; |
| 26 import java.util.Collections; | |
| 27 import java.util.HashMap; | |
| 28 import java.util.HashSet; | 26 import java.util.HashSet; |
| 29 import java.util.LinkedHashSet; | |
| 30 import java.util.LinkedList; | 27 import java.util.LinkedList; |
| 31 import java.util.List; | 28 import java.util.List; |
| 32 import java.util.Map; | |
| 33 import java.util.Set; | |
| 34 | 29 |
| 35 import org.tmatesoft.hg.core.HgBadStateException; | 30 import org.tmatesoft.hg.core.HgBadStateException; |
| 36 import org.tmatesoft.hg.core.HgException; | 31 import org.tmatesoft.hg.core.HgException; |
| 37 import org.tmatesoft.hg.core.Nodeid; | 32 import org.tmatesoft.hg.core.Nodeid; |
| 38 import org.tmatesoft.hg.internal.DataAccess; | 33 import org.tmatesoft.hg.internal.DataAccess; |
| 198 * add Revlog#getParentWalker(), static class, make cons() and #init package-local, and keep SoftReference to allow walker reuse. | 193 * add Revlog#getParentWalker(), static class, make cons() and #init package-local, and keep SoftReference to allow walker reuse. |
| 199 * | 194 * |
| 200 * and yes, walker is not a proper name | 195 * and yes, walker is not a proper name |
| 201 */ | 196 */ |
| 202 public final class ParentWalker { | 197 public final class ParentWalker { |
| 203 private Map<Nodeid, Nodeid> firstParent; | 198 |
| 204 private Map<Nodeid, Nodeid> secondParent; | 199 |
| 205 private final LinkedHashSet<Nodeid> allNodes = new LinkedHashSet<Nodeid>(); // childrenOf rely on ordering | 200 private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering |
| 201 private Nodeid[] sorted; // for binary search | |
| 202 private int[] sorted2natural; | |
| 203 private Nodeid[] firstParent; | |
| 204 private Nodeid[] secondParent; | |
| 205 | |
| 206 // Nodeid instances shall be shared between all arrays | |
| 206 | 207 |
| 207 public ParentWalker() { | 208 public ParentWalker() { |
| 208 firstParent = secondParent = Collections.emptyMap(); | |
| 209 } | 209 } |
| 210 | 210 |
| 211 public HgRepository getRepo() { | 211 public HgRepository getRepo() { |
| 212 return Revlog.this.getRepo(); | 212 return Revlog.this.getRepo(); |
| 213 } | 213 } |
| 214 | 214 |
| 215 public void init() { | 215 public void init() { |
| 216 final RevlogStream stream = Revlog.this.content; | 216 final RevlogStream stream = Revlog.this.content; |
| 217 final int revisionCount = stream.revisionCount(); | 217 final int revisionCount = stream.revisionCount(); |
| 218 firstParent = new HashMap<Nodeid, Nodeid>(revisionCount); | 218 firstParent = new Nodeid[revisionCount]; |
| 219 secondParent = new HashMap<Nodeid, Nodeid>(firstParent.size() >> 1); // assume branches/merges are less frequent | 219 // although branches/merges are less frequent, and most of secondParent would be -1/null, some sort of |
| 220 | 220 // SparseOrderedList might be handy, provided its inner structures do not overweight simplicity of an array |
| 221 secondParent = new Nodeid[revisionCount]; | |
| 222 // | |
| 223 sequential = new Nodeid[revisionCount]; | |
| 224 sorted = new Nodeid[revisionCount]; | |
| 225 | |
| 221 RevlogStream.Inspector insp = new RevlogStream.Inspector() { | 226 RevlogStream.Inspector insp = new RevlogStream.Inspector() { |
| 222 final Nodeid[] sequentialRevisionNodeids = new Nodeid[revisionCount]; | 227 |
| 223 int ix = 0; | 228 int ix = 0; |
| 224 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { | 229 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { |
| 225 if (ix != revisionNumber) { | 230 if (ix != revisionNumber) { |
| 226 // XXX temp code, just to make sure I understand what's going on here | 231 // XXX temp code, just to make sure I understand what's going on here |
| 227 throw new IllegalStateException(); | 232 throw new IllegalStateException(); |
| 228 } | 233 } |
| 229 if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) { | 234 if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) { |
| 230 throw new IllegalStateException(); // sanity, revisions are sequential | 235 throw new IllegalStateException(); // sanity, revisions are sequential |
| 231 } | 236 } |
| 232 final Nodeid nid = new Nodeid(nodeid, true); | 237 final Nodeid nid = new Nodeid(nodeid, true); |
| 233 sequentialRevisionNodeids[ix++] = nid; | 238 sequential[ix] = sorted[ix] = nid; |
| 234 allNodes.add(nid); | |
| 235 if (parent1Revision != -1) { | 239 if (parent1Revision != -1) { |
| 236 firstParent.put(nid, sequentialRevisionNodeids[parent1Revision]); | 240 assert parent1Revision < ix; |
| 241 firstParent[ix] = sequential[parent1Revision]; | |
| 237 } | 242 } |
| 238 if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1 | 243 if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1 |
| 239 secondParent.put(nid, sequentialRevisionNodeids[parent2Revision]); | 244 assert parent2Revision < ix; |
| 245 secondParent[ix] = sequential[parent2Revision]; | |
| 240 } | 246 } |
| 247 ix++; | |
| 241 } | 248 } |
| 242 }; | 249 }; |
| 243 stream.iterate(0, TIP, false, insp); | 250 stream.iterate(0, TIP, false, insp); |
| 244 } | 251 Arrays.sort(sorted); |
| 245 | 252 sorted2natural = new int[revisionCount]; |
| 246 public Set<Nodeid> allNodes() { | 253 for (int i = 0; i < revisionCount; i++) { |
| 247 return Collections.unmodifiableSet(allNodes); | 254 Nodeid n = sequential[i]; |
| 255 int x = Arrays.binarySearch(sorted, n); | |
| 256 assertSortedIndex(x); | |
| 257 sorted2natural[x] = i; | |
| 258 } | |
| 259 } | |
| 260 | |
| 261 private void assertSortedIndex(int x) { | |
| 262 if (x < 0) { | |
| 263 throw new HgBadStateException(); | |
| 264 } | |
| 248 } | 265 } |
| 249 | 266 |
| 250 // FIXME need to decide whether Nodeid(00 * 20) is always known or not | 267 // FIXME need to decide whether Nodeid(00 * 20) is always known or not |
| 268 // right now Nodeid.NULL is not recognized as known if passed to this method, | |
| 269 // caller is supposed to make explicit check | |
| 251 public boolean knownNode(Nodeid nid) { | 270 public boolean knownNode(Nodeid nid) { |
| 252 return allNodes.contains(nid); | 271 return Arrays.binarySearch(sorted, nid) >= 0; |
| 253 } | 272 } |
| 254 | 273 |
| 255 // null if none | 274 /** |
| 275 * null if none. only known nodes (as per #knownNode) are accepted as arguments | |
| 276 */ | |
| 256 public Nodeid firstParent(Nodeid nid) { | 277 public Nodeid firstParent(Nodeid nid) { |
| 257 return firstParent.get(nid); | 278 int x = Arrays.binarySearch(sorted, nid); |
| 279 assertSortedIndex(x); | |
| 280 int i = sorted2natural[x]; | |
| 281 return firstParent[i]; | |
| 258 } | 282 } |
| 259 | 283 |
| 260 // never null, Nodeid.NULL if none known | 284 // never null, Nodeid.NULL if none known |
| 261 public Nodeid safeFirstParent(Nodeid nid) { | 285 public Nodeid safeFirstParent(Nodeid nid) { |
| 262 Nodeid rv = firstParent(nid); | 286 Nodeid rv = firstParent(nid); |
| 263 return rv == null ? Nodeid.NULL : rv; | 287 return rv == null ? Nodeid.NULL : rv; |
| 264 } | 288 } |
| 265 | 289 |
| 266 public Nodeid secondParent(Nodeid nid) { | 290 public Nodeid secondParent(Nodeid nid) { |
| 267 return secondParent.get(nid); | 291 int x = Arrays.binarySearch(sorted, nid); |
| 292 assertSortedIndex(x); | |
| 293 int i = sorted2natural[x]; | |
| 294 return secondParent[i]; | |
| 268 } | 295 } |
| 269 | 296 |
| 270 public Nodeid safeSecondParent(Nodeid nid) { | 297 public Nodeid safeSecondParent(Nodeid nid) { |
| 271 Nodeid rv = secondParent(nid); | 298 Nodeid rv = secondParent(nid); |
| 272 return rv == null ? Nodeid.NULL : rv; | 299 return rv == null ? Nodeid.NULL : rv; |
| 273 } | 300 } |
| 274 | 301 |
| 275 public boolean appendParentsOf(Nodeid nid, Collection<Nodeid> c) { | 302 public boolean appendParentsOf(Nodeid nid, Collection<Nodeid> c) { |
| 276 Nodeid p1 = firstParent(nid); | 303 int x = Arrays.binarySearch(sorted, nid); |
| 304 assertSortedIndex(x); | |
| 305 int i = sorted2natural[x]; | |
| 306 Nodeid p1 = firstParent[i]; | |
| 277 boolean modified = false; | 307 boolean modified = false; |
| 278 if (p1 != null) { | 308 if (p1 != null) { |
| 279 modified = c.add(p1); | 309 modified = c.add(p1); |
| 280 } | 310 } |
| 281 Nodeid p2 = secondParent(nid); | 311 Nodeid p2 = secondParent[i]; |
| 282 if (p2 != null) { | 312 if (p2 != null) { |
| 283 modified = c.add(p2) || modified; | 313 modified = c.add(p2) || modified; |
| 284 } | 314 } |
| 285 return modified; | 315 return modified; |
| 286 } | 316 } |
| 287 | 317 |
| 288 // XXX alternative (and perhaps more reliable) approach would be to make a copy of allNodes and remove | 318 // XXX alternative (and perhaps more reliable) approach would be to make a copy of allNodes and remove |
| 289 // nodes, their parents and so on. | 319 // nodes, their parents and so on. |
| 290 | 320 |
| 291 // @return ordered collection of all children rooted at supplied nodes. Nodes shall not be descendants of each other! | 321 // @return ordered collection of all children rooted at supplied nodes. Nodes shall not be descendants of each other! |
| 292 public List<Nodeid> childrenOf(List<Nodeid> nodes) { | 322 // Nodeids shall belong to this revlog |
| 323 public List<Nodeid> childrenOf(List<Nodeid> roots) { | |
| 293 HashSet<Nodeid> parents = new HashSet<Nodeid>(); | 324 HashSet<Nodeid> parents = new HashSet<Nodeid>(); |
| 294 LinkedHashSet<Nodeid> result = new LinkedHashSet<Nodeid>(); | 325 LinkedList<Nodeid> result = new LinkedList<Nodeid>(); |
| 295 LinkedList<Nodeid> orderedResult = new LinkedList<Nodeid>(); | 326 int earliestRevision = Integer.MAX_VALUE; |
| 296 for(Nodeid next : allNodes) { | 327 assert sequential.length == firstParent.length && firstParent.length == secondParent.length; |
| 297 // i assume allNodes is sorted, hence do not check any parents unless we hit any common known node first | 328 // first, find earliest index of roots in question, as there's no sense |
| 298 if (nodes.contains(next)) { | 329 // to check children among nodes prior to branch's root node |
| 299 parents.add(next); | 330 for (Nodeid r : roots) { |
| 300 } else { | 331 int x = Arrays.binarySearch(sorted, r); |
| 301 if (parents.isEmpty()) { | 332 assertSortedIndex(x); |
| 302 // didn't scroll up to any known yet | 333 int i = sorted2natural[x]; |
| 303 continue; | 334 if (i < earliestRevision) { |
| 304 } | 335 earliestRevision = i; |
| 305 // record each and every node reported after first common known node hit | |
| 306 orderedResult.addLast(next); | |
| 307 if (parents.contains(firstParent(next)) || parents.contains(secondParent(next))) { | |
| 308 result.add(next); | |
| 309 parents.add(next); // to find next's children | |
| 310 } | |
| 311 } | 336 } |
| 312 } | 337 parents.add(sequential[i]); // add canonical instance in hope equals() is bit faster when can do a == |
| 313 // leave only those of interest in ordered sequence | 338 } |
| 314 orderedResult.retainAll(result); | 339 for (int i = earliestRevision + 1; i < sequential.length; i++) { |
| 315 return orderedResult; | 340 if (parents.contains(firstParent[i]) || parents.contains(secondParent[i])) { |
| 341 parents.add(sequential[i]); // to find next child | |
| 342 result.add(sequential[i]); | |
| 343 } | |
| 344 } | |
| 345 return result; | |
| 316 } | 346 } |
| 317 | 347 |
| 318 /** | 348 /** |
| 319 * @param node possibly parent node | 349 * @param nid possibly parent node, shall be {@link #knownNode(Nodeid) known} in this revlog. |
| 320 * @return <code>true</code> if there's any node in this revlog that has specified node as one of its parents. | 350 * @return <code>true</code> if there's any node in this revlog that has specified node as one of its parents. |
| 321 */ | 351 */ |
| 322 public boolean hasChildren(Nodeid node) { | 352 public boolean hasChildren(Nodeid nid) { |
| 323 // FIXME containsValue is linear, likely. May want to optimize it with another (Tree|Hash)Set, created on demand | 353 int x = Arrays.binarySearch(sorted, nid); |
| 324 // on first use | 354 assertSortedIndex(x); |
| 325 return firstParent.containsValue(node) || secondParent.containsValue(node); | 355 int i = sorted2natural[x]; |
| 356 assert firstParent.length == secondParent.length; // just in case later I implement sparse array for secondParent | |
| 357 assert firstParent.length == sequential.length; | |
| 358 // to use == instead of equals, take the same Nodeid instance we used to fill all the arrays. | |
| 359 final Nodeid canonicalNode = sequential[i]; | |
| 360 i++; // no need to check node itself. child nodes may appear in sequential only after revision in question | |
| 361 for (; i < sequential.length; i++) { | |
| 362 // FIXME likely, not very effective. | |
| 363 // May want to optimize it with another (Tree|Hash)Set, created on demand on first use, | |
| 364 // however, need to be careful with memory usage | |
| 365 if (firstParent[i] == canonicalNode || secondParent[i] == canonicalNode) { | |
| 366 return true; | |
| 367 } | |
| 368 } | |
| 369 return false; | |
| 326 } | 370 } |
| 327 } | 371 } |
| 328 | 372 |
| 329 protected static class ContentPipe implements RevlogStream.Inspector, CancelSupport { | 373 protected static class ContentPipe implements RevlogStream.Inspector, CancelSupport { |
| 330 private final ByteChannel sink; | 374 private final ByteChannel sink; |
