comparison src/org/tmatesoft/hg/repo/Revlog.java @ 200:114c9fe7b643

Performance optimization: reduce memory ParentWalker hogs
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 20 Apr 2011 21:14:51 +0200
parents 3a7696fb457c
children 047b1dec7a04
comparison
equal deleted inserted replaced
199:f4fa4456fa50 200:114c9fe7b643
21 21
22 import java.io.IOException; 22 import java.io.IOException;
23 import java.nio.ByteBuffer; 23 import java.nio.ByteBuffer;
24 import java.util.Arrays; 24 import java.util.Arrays;
25 import java.util.Collection; 25 import java.util.Collection;
26 import java.util.Collections;
27 import java.util.HashMap;
28 import java.util.HashSet; 26 import java.util.HashSet;
29 import java.util.LinkedHashSet;
30 import java.util.LinkedList; 27 import java.util.LinkedList;
31 import java.util.List; 28 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 29
35 import org.tmatesoft.hg.core.HgBadStateException; 30 import org.tmatesoft.hg.core.HgBadStateException;
36 import org.tmatesoft.hg.core.HgException; 31 import org.tmatesoft.hg.core.HgException;
37 import org.tmatesoft.hg.core.Nodeid; 32 import org.tmatesoft.hg.core.Nodeid;
38 import org.tmatesoft.hg.internal.DataAccess; 33 import org.tmatesoft.hg.internal.DataAccess;
198 * add Revlog#getParentWalker(), static class, make cons() and #init package-local, and keep SoftReference to allow walker reuse. 193 * add Revlog#getParentWalker(), static class, make cons() and #init package-local, and keep SoftReference to allow walker reuse.
199 * 194 *
200 * and yes, walker is not a proper name 195 * and yes, walker is not a proper name
201 */ 196 */
202 public final class ParentWalker { 197 public final class ParentWalker {
203 private Map<Nodeid, Nodeid> firstParent; 198
204 private Map<Nodeid, Nodeid> secondParent; 199
205 private final LinkedHashSet<Nodeid> allNodes = new LinkedHashSet<Nodeid>(); // childrenOf rely on ordering 200 private Nodeid[] sequential; // natural repository order, childrenOf rely on ordering
201 private Nodeid[] sorted; // for binary search
202 private int[] sorted2natural;
203 private Nodeid[] firstParent;
204 private Nodeid[] secondParent;
205
206 // Nodeid instances shall be shared between all arrays
206 207
207 public ParentWalker() { 208 public ParentWalker() {
208 firstParent = secondParent = Collections.emptyMap();
209 } 209 }
210 210
211 public HgRepository getRepo() { 211 public HgRepository getRepo() {
212 return Revlog.this.getRepo(); 212 return Revlog.this.getRepo();
213 } 213 }
214 214
215 public void init() { 215 public void init() {
216 final RevlogStream stream = Revlog.this.content; 216 final RevlogStream stream = Revlog.this.content;
217 final int revisionCount = stream.revisionCount(); 217 final int revisionCount = stream.revisionCount();
218 firstParent = new HashMap<Nodeid, Nodeid>(revisionCount); 218 firstParent = new Nodeid[revisionCount];
219 secondParent = new HashMap<Nodeid, Nodeid>(firstParent.size() >> 1); // assume branches/merges are less frequent 219 // although branches/merges are less frequent, and most of secondParent would be -1/null, some sort of
220 220 // SparseOrderedList might be handy, provided its inner structures do not overweight simplicity of an array
221 secondParent = new Nodeid[revisionCount];
222 //
223 sequential = new Nodeid[revisionCount];
224 sorted = new Nodeid[revisionCount];
225
221 RevlogStream.Inspector insp = new RevlogStream.Inspector() { 226 RevlogStream.Inspector insp = new RevlogStream.Inspector() {
222 final Nodeid[] sequentialRevisionNodeids = new Nodeid[revisionCount]; 227
223 int ix = 0; 228 int ix = 0;
224 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { 229 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
225 if (ix != revisionNumber) { 230 if (ix != revisionNumber) {
226 // XXX temp code, just to make sure I understand what's going on here 231 // XXX temp code, just to make sure I understand what's going on here
227 throw new IllegalStateException(); 232 throw new IllegalStateException();
228 } 233 }
229 if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) { 234 if (parent1Revision >= revisionNumber || parent2Revision >= revisionNumber) {
230 throw new IllegalStateException(); // sanity, revisions are sequential 235 throw new IllegalStateException(); // sanity, revisions are sequential
231 } 236 }
232 final Nodeid nid = new Nodeid(nodeid, true); 237 final Nodeid nid = new Nodeid(nodeid, true);
233 sequentialRevisionNodeids[ix++] = nid; 238 sequential[ix] = sorted[ix] = nid;
234 allNodes.add(nid);
235 if (parent1Revision != -1) { 239 if (parent1Revision != -1) {
236 firstParent.put(nid, sequentialRevisionNodeids[parent1Revision]); 240 assert parent1Revision < ix;
241 firstParent[ix] = sequential[parent1Revision];
237 } 242 }
238 if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1 243 if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1
239 secondParent.put(nid, sequentialRevisionNodeids[parent2Revision]); 244 assert parent2Revision < ix;
245 secondParent[ix] = sequential[parent2Revision];
240 } 246 }
247 ix++;
241 } 248 }
242 }; 249 };
243 stream.iterate(0, TIP, false, insp); 250 stream.iterate(0, TIP, false, insp);
244 } 251 Arrays.sort(sorted);
245 252 sorted2natural = new int[revisionCount];
246 public Set<Nodeid> allNodes() { 253 for (int i = 0; i < revisionCount; i++) {
247 return Collections.unmodifiableSet(allNodes); 254 Nodeid n = sequential[i];
255 int x = Arrays.binarySearch(sorted, n);
256 assertSortedIndex(x);
257 sorted2natural[x] = i;
258 }
259 }
260
261 private void assertSortedIndex(int x) {
262 if (x < 0) {
263 throw new HgBadStateException();
264 }
248 } 265 }
249 266
250 // FIXME need to decide whether Nodeid(00 * 20) is always known or not 267 // FIXME need to decide whether Nodeid(00 * 20) is always known or not
268 // right now Nodeid.NULL is not recognized as known if passed to this method,
269 // caller is supposed to make explicit check
251 public boolean knownNode(Nodeid nid) { 270 public boolean knownNode(Nodeid nid) {
252 return allNodes.contains(nid); 271 return Arrays.binarySearch(sorted, nid) >= 0;
253 } 272 }
254 273
255 // null if none 274 /**
275 * null if none. only known nodes (as per #knownNode) are accepted as arguments
276 */
256 public Nodeid firstParent(Nodeid nid) { 277 public Nodeid firstParent(Nodeid nid) {
257 return firstParent.get(nid); 278 int x = Arrays.binarySearch(sorted, nid);
279 assertSortedIndex(x);
280 int i = sorted2natural[x];
281 return firstParent[i];
258 } 282 }
259 283
260 // never null, Nodeid.NULL if none known 284 // never null, Nodeid.NULL if none known
261 public Nodeid safeFirstParent(Nodeid nid) { 285 public Nodeid safeFirstParent(Nodeid nid) {
262 Nodeid rv = firstParent(nid); 286 Nodeid rv = firstParent(nid);
263 return rv == null ? Nodeid.NULL : rv; 287 return rv == null ? Nodeid.NULL : rv;
264 } 288 }
265 289
266 public Nodeid secondParent(Nodeid nid) { 290 public Nodeid secondParent(Nodeid nid) {
267 return secondParent.get(nid); 291 int x = Arrays.binarySearch(sorted, nid);
292 assertSortedIndex(x);
293 int i = sorted2natural[x];
294 return secondParent[i];
268 } 295 }
269 296
270 public Nodeid safeSecondParent(Nodeid nid) { 297 public Nodeid safeSecondParent(Nodeid nid) {
271 Nodeid rv = secondParent(nid); 298 Nodeid rv = secondParent(nid);
272 return rv == null ? Nodeid.NULL : rv; 299 return rv == null ? Nodeid.NULL : rv;
273 } 300 }
274 301
275 public boolean appendParentsOf(Nodeid nid, Collection<Nodeid> c) { 302 public boolean appendParentsOf(Nodeid nid, Collection<Nodeid> c) {
276 Nodeid p1 = firstParent(nid); 303 int x = Arrays.binarySearch(sorted, nid);
304 assertSortedIndex(x);
305 int i = sorted2natural[x];
306 Nodeid p1 = firstParent[i];
277 boolean modified = false; 307 boolean modified = false;
278 if (p1 != null) { 308 if (p1 != null) {
279 modified = c.add(p1); 309 modified = c.add(p1);
280 } 310 }
281 Nodeid p2 = secondParent(nid); 311 Nodeid p2 = secondParent[i];
282 if (p2 != null) { 312 if (p2 != null) {
283 modified = c.add(p2) || modified; 313 modified = c.add(p2) || modified;
284 } 314 }
285 return modified; 315 return modified;
286 } 316 }
287 317
288 // XXX alternative (and perhaps more reliable) approach would be to make a copy of allNodes and remove 318 // XXX alternative (and perhaps more reliable) approach would be to make a copy of allNodes and remove
289 // nodes, their parents and so on. 319 // nodes, their parents and so on.
290 320
291 // @return ordered collection of all children rooted at supplied nodes. Nodes shall not be descendants of each other! 321 // @return ordered collection of all children rooted at supplied nodes. Nodes shall not be descendants of each other!
292 public List<Nodeid> childrenOf(List<Nodeid> nodes) { 322 // Nodeids shall belong to this revlog
323 public List<Nodeid> childrenOf(List<Nodeid> roots) {
293 HashSet<Nodeid> parents = new HashSet<Nodeid>(); 324 HashSet<Nodeid> parents = new HashSet<Nodeid>();
294 LinkedHashSet<Nodeid> result = new LinkedHashSet<Nodeid>(); 325 LinkedList<Nodeid> result = new LinkedList<Nodeid>();
295 LinkedList<Nodeid> orderedResult = new LinkedList<Nodeid>(); 326 int earliestRevision = Integer.MAX_VALUE;
296 for(Nodeid next : allNodes) { 327 assert sequential.length == firstParent.length && firstParent.length == secondParent.length;
297 // i assume allNodes is sorted, hence do not check any parents unless we hit any common known node first 328 // first, find earliest index of roots in question, as there's no sense
298 if (nodes.contains(next)) { 329 // to check children among nodes prior to branch's root node
299 parents.add(next); 330 for (Nodeid r : roots) {
300 } else { 331 int x = Arrays.binarySearch(sorted, r);
301 if (parents.isEmpty()) { 332 assertSortedIndex(x);
302 // didn't scroll up to any known yet 333 int i = sorted2natural[x];
303 continue; 334 if (i < earliestRevision) {
304 } 335 earliestRevision = i;
305 // record each and every node reported after first common known node hit
306 orderedResult.addLast(next);
307 if (parents.contains(firstParent(next)) || parents.contains(secondParent(next))) {
308 result.add(next);
309 parents.add(next); // to find next's children
310 }
311 } 336 }
312 } 337 parents.add(sequential[i]); // add canonical instance in hope equals() is bit faster when can do a ==
313 // leave only those of interest in ordered sequence 338 }
314 orderedResult.retainAll(result); 339 for (int i = earliestRevision + 1; i < sequential.length; i++) {
315 return orderedResult; 340 if (parents.contains(firstParent[i]) || parents.contains(secondParent[i])) {
341 parents.add(sequential[i]); // to find next child
342 result.add(sequential[i]);
343 }
344 }
345 return result;
316 } 346 }
317 347
318 /** 348 /**
319 * @param node possibly parent node 349 * @param nid possibly parent node, shall be {@link #knownNode(Nodeid) known} in this revlog.
320 * @return <code>true</code> if there's any node in this revlog that has specified node as one of its parents. 350 * @return <code>true</code> if there's any node in this revlog that has specified node as one of its parents.
321 */ 351 */
322 public boolean hasChildren(Nodeid node) { 352 public boolean hasChildren(Nodeid nid) {
323 // FIXME containsValue is linear, likely. May want to optimize it with another (Tree|Hash)Set, created on demand 353 int x = Arrays.binarySearch(sorted, nid);
324 // on first use 354 assertSortedIndex(x);
325 return firstParent.containsValue(node) || secondParent.containsValue(node); 355 int i = sorted2natural[x];
356 assert firstParent.length == secondParent.length; // just in case later I implement sparse array for secondParent
357 assert firstParent.length == sequential.length;
358 // to use == instead of equals, take the same Nodeid instance we used to fill all the arrays.
359 final Nodeid canonicalNode = sequential[i];
360 i++; // no need to check node itself. child nodes may appear in sequential only after revision in question
361 for (; i < sequential.length; i++) {
362 // FIXME likely, not very effective.
363 // May want to optimize it with another (Tree|Hash)Set, created on demand on first use,
364 // however, need to be careful with memory usage
365 if (firstParent[i] == canonicalNode || secondParent[i] == canonicalNode) {
366 return true;
367 }
368 }
369 return false;
326 } 370 }
327 } 371 }
328 372
329 protected static class ContentPipe implements RevlogStream.Inspector, CancelSupport { 373 protected static class ContentPipe implements RevlogStream.Inspector, CancelSupport {
330 private final ByteChannel sink; 374 private final ByteChannel sink;