comparison src/org/tmatesoft/hg/repo/HgStatusCollector.java @ 248:3fbfce107f94

Issue 8: Means to find out information about given file at specific changeset. Inner ManifestRevisionInspector got promoted to ManifestRevision
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Fri, 12 Aug 2011 18:48:57 +0200
parents 1ec6b327a6ac
children 81e9a3c9bafe
comparison
equal deleted inserted replaced
247:f052f40839ec 248:3fbfce107f94
29 import java.util.TreeMap; 29 import java.util.TreeMap;
30 import java.util.TreeSet; 30 import java.util.TreeSet;
31 31
32 import org.tmatesoft.hg.core.HgDataStreamException; 32 import org.tmatesoft.hg.core.HgDataStreamException;
33 import org.tmatesoft.hg.core.Nodeid; 33 import org.tmatesoft.hg.core.Nodeid;
34 import org.tmatesoft.hg.internal.ManifestRevision;
34 import org.tmatesoft.hg.internal.Pool; 35 import org.tmatesoft.hg.internal.Pool;
35 import org.tmatesoft.hg.util.Path; 36 import org.tmatesoft.hg.util.Path;
36 import org.tmatesoft.hg.util.PathPool; 37 import org.tmatesoft.hg.util.PathPool;
37 import org.tmatesoft.hg.util.PathRewrite; 38 import org.tmatesoft.hg.util.PathRewrite;
38 39
44 * @author TMate Software Ltd. 45 * @author TMate Software Ltd.
45 */ 46 */
46 public class HgStatusCollector { 47 public class HgStatusCollector {
47 48
48 private final HgRepository repo; 49 private final HgRepository repo;
49 private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact 50 private final SortedMap<Integer, ManifestRevision> cache; // sparse array, in fact
50 // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output 51 // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output
51 // no cache limit, no nodeids and fname caching - OOME on changeset 1035 52 // no cache limit, no nodeids and fname caching - OOME on changeset 1035
52 // no cache limit, but with cached nodeids and filenames - 1730+ 53 // no cache limit, but with cached nodeids and filenames - 1730+
53 // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) 54 // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped)
54 private final int cacheMaxSize = 50; // do not keep too much manifest revisions 55 private final int cacheMaxSize = 50; // do not keep too much manifest revisions
55 private PathPool pathPool; 56 private PathPool pathPool;
56 private final Pool<Nodeid> cacheNodes; 57 private final Pool<Nodeid> cacheNodes;
57 private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution 58 private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution
58 private final ManifestRevisionInspector emptyFakeState; 59 private final ManifestRevision emptyFakeState;
59 private Path.Matcher scope = new Path.Matcher.Any(); 60 private Path.Matcher scope = new Path.Matcher.Any();
60 61
61 62
62 public HgStatusCollector(HgRepository hgRepo) { 63 public HgStatusCollector(HgRepository hgRepo) {
63 this.repo = hgRepo; 64 this.repo = hgRepo;
64 cache = new TreeMap<Integer, ManifestRevisionInspector>(); 65 cache = new TreeMap<Integer, ManifestRevision>();
65 cacheNodes = new Pool<Nodeid>(); 66 cacheNodes = new Pool<Nodeid>();
66 cacheFilenames = new Pool<String>(); 67 cacheFilenames = new Pool<String>();
67 68
68 emptyFakeState = new ManifestRevisionInspector(null, null); 69 emptyFakeState = new ManifestRevision(null, null);
69 emptyFakeState.begin(-1, null, -1); 70 emptyFakeState.begin(-1, null, -1);
70 emptyFakeState.end(-1); 71 emptyFakeState.end(-1);
71 } 72 }
72 73
73 public HgRepository getRepo() { 74 public HgRepository getRepo() {
74 return repo; 75 return repo;
75 } 76 }
76 77
77 private ManifestRevisionInspector get(int rev) { 78 private ManifestRevision get(int rev) {
78 ManifestRevisionInspector i = cache.get(rev); 79 ManifestRevision i = cache.get(rev);
79 if (i == null) { 80 if (i == null) {
80 if (rev == -1) { 81 if (rev == -1) {
81 return emptyFakeState; 82 return emptyFakeState;
82 } 83 }
83 while (cache.size() > cacheMaxSize) { 84 while (cache.size() > cacheMaxSize) {
84 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary 85 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary
85 cache.remove(cache.firstKey()); 86 cache.remove(cache.firstKey());
86 } 87 }
87 i = new ManifestRevisionInspector(cacheNodes, cacheFilenames); 88 i = new ManifestRevision(cacheNodes, cacheFilenames);
88 cache.put(rev, i); 89 cache.put(rev, i);
89 repo.getManifest().walk(rev, rev, i); 90 repo.getManifest().walk(rev, rev, i);
90 } 91 }
91 return i; 92 return i;
92 } 93 }
99 while (cache.size() > cacheMaxSize) { 100 while (cache.size() > cacheMaxSize) {
100 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary 101 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary
101 cache.remove(cache.firstKey()); 102 cache.remove(cache.firstKey());
102 } 103 }
103 repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() { 104 repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() {
104 private ManifestRevisionInspector delegate; 105 private ManifestRevision delegate;
105 private boolean cacheHit; // range may include revisions we already know about, do not re-create them 106 private boolean cacheHit; // range may include revisions we already know about, do not re-create them
106 107
107 public boolean begin(int manifestRevision, Nodeid nid, int changelogRevision) { 108 public boolean begin(int manifestRevision, Nodeid nid, int changelogRevision) {
108 assert delegate == null; 109 assert delegate == null;
109 if (cache.containsKey(changelogRevision)) { // don't need to check emptyFakeState hit as revision never -1 here 110 if (cache.containsKey(changelogRevision)) { // don't need to check emptyFakeState hit as revision never -1 here
110 cacheHit = true; 111 cacheHit = true;
111 } else { 112 } else {
112 cache.put(changelogRevision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames)); 113 cache.put(changelogRevision, delegate = new ManifestRevision(cacheNodes, cacheFilenames));
113 // cache may grow bigger than max size here, but it's ok as present simplistic cache clearing mechanism may 114 // cache may grow bigger than max size here, but it's ok as present simplistic cache clearing mechanism may
114 // otherwise remove entries we just added 115 // otherwise remove entries we just added
115 delegate.begin(manifestRevision, nid, changelogRevision); 116 delegate.begin(manifestRevision, nid, changelogRevision);
116 cacheHit = false; 117 cacheHit = false;
117 } 118 }
134 return true; 135 return true;
135 } 136 }
136 }); 137 });
137 } 138 }
138 139
139 /*package-local*/ ManifestRevisionInspector raw(int rev) { 140 /*package-local*/ ManifestRevision raw(int rev) {
140 return get(rev); 141 return get(rev);
141 } 142 }
142 /*package-local*/ PathPool getPathPool() { 143 /*package-local*/ PathPool getPathPool() {
143 if (pathPool == null) { 144 if (pathPool == null) {
144 pathPool = new PathPool(new PathRewrite.Empty()); 145 pathPool = new PathPool(new PathRewrite.Empty());
191 if (rev2 == TIP) { 192 if (rev2 == TIP) {
192 rev2 = lastManifestRevision; 193 rev2 = lastManifestRevision;
193 } 194 }
194 // in fact, rev1 and rev2 are often next (or close) to each other, 195 // in fact, rev1 and rev2 are often next (or close) to each other,
195 // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2)) 196 // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2))
196 ManifestRevisionInspector r1, r2 ; 197 ManifestRevision r1, r2 ;
197 boolean need1 = !cached(rev1), need2 = !cached(rev2); 198 boolean need1 = !cached(rev1), need2 = !cached(rev2);
198 if (need1 || need2) { 199 if (need1 || need2) {
199 int minRev, maxRev; 200 int minRev, maxRev;
200 if (need1 && need2 && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) { 201 if (need1 && need2 && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) {
201 minRev = rev1 < rev2 ? rev1 : rev2; 202 minRev = rev1 < rev2 ? rev1 : rev2;
420 } 421 }
421 l.add(p); 422 l.add(p);
422 return l; 423 return l;
423 } 424 }
424 } 425 }
425
426 /*package-local*/ static final class ManifestRevisionInspector implements HgManifest.Inspector {
427 private final TreeMap<String, Nodeid> idsMap;
428 private final TreeMap<String, String> flagsMap;
429 private final Pool<Nodeid> idsPool;
430 private final Pool<String> namesPool;
431
432 // optional pools for effective management of nodeids and filenames (they are likely
433 // to be duplicated among different manifest revisions
434 public ManifestRevisionInspector(Pool<Nodeid> nodeidPool, Pool<String> filenamePool) {
435 idsPool = nodeidPool;
436 namesPool = filenamePool;
437 idsMap = new TreeMap<String, Nodeid>();
438 flagsMap = new TreeMap<String, String>();
439 }
440
441 public Collection<String> files() {
442 return idsMap.keySet();
443 }
444
445 public Nodeid nodeid(String fname) {
446 return idsMap.get(fname);
447 }
448
449 public String flags(String fname) {
450 return flagsMap.get(fname);
451 }
452
453 //
454
455 public boolean next(Nodeid nid, String fname, String flags) {
456 if (namesPool != null) {
457 fname = namesPool.unify(fname);
458 }
459 if (idsPool != null) {
460 nid = idsPool.unify(nid);
461 }
462 idsMap.put(fname, nid);
463 if (flags != null) {
464 // TreeMap$Entry takes 32 bytes. No reason to keep null for such price
465 // Perhaps, Map<String, Pair<Nodeid, String>> might be better solution
466 flagsMap.put(fname, flags);
467 }
468 return true;
469 }
470
471 public boolean end(int revision) {
472 // in fact, this class cares about single revision
473 return false;
474 }
475
476 public boolean begin(int revision, Nodeid nid, int changelogRevision) {
477 return true;
478 }
479 }
480 426
481 } 427 }