comparison src/org/tmatesoft/hg/repo/HgStatusCollector.java @ 197:3a7696fb457c

Investigate optimization options to allow fast processing of huge repositories. Fix defect in StatusCollector that lead to wrong result comparing first revision to empty repo (-1 to 0), due to same TIP constant value
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 19 Apr 2011 03:49:29 +0200
parents c9b305df0b89
children 047b1dec7a04
comparison
equal deleted inserted replaced
196:e2115da4cf6a 197:3a7696fb457c
49 private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact 49 private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact
50 // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output 50 // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output
51 // no cache limit, no nodeids and fname caching - OOME on changeset 1035 51 // no cache limit, no nodeids and fname caching - OOME on changeset 1035
52 // no cache limit, but with cached nodeids and filenames - 1730+ 52 // no cache limit, but with cached nodeids and filenames - 1730+
53 // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) 53 // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped)
54 private final int cacheMaxSize = 100; // do not keep too much manifest revisions 54 private final int cacheMaxSize = 50; // do not keep too much manifest revisions
55 private PathPool pathPool; 55 private PathPool pathPool;
56 private final Pool<Nodeid> cacheNodes; 56 private final Pool<Nodeid> cacheNodes;
57 private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution 57 private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution
58 private final ManifestRevisionInspector emptyFakeState;
58 59
59 60
60 public HgStatusCollector(HgRepository hgRepo) { 61 public HgStatusCollector(HgRepository hgRepo) {
61 this.repo = hgRepo; 62 this.repo = hgRepo;
62 cache = new TreeMap<Integer, ManifestRevisionInspector>(); 63 cache = new TreeMap<Integer, ManifestRevisionInspector>();
63 cacheNodes = new Pool<Nodeid>(); 64 cacheNodes = new Pool<Nodeid>();
64 cacheFilenames = new Pool<String>(); 65 cacheFilenames = new Pool<String>();
65 ManifestRevisionInspector emptyFakeState = new ManifestRevisionInspector(null, null); 66
67 emptyFakeState = new ManifestRevisionInspector(null, null);
66 emptyFakeState.begin(-1, null); 68 emptyFakeState.begin(-1, null);
67 emptyFakeState.end(-1); // FIXME HgRepo.TIP == -1 as well, need to distinguish fake "prior to first" revision from "the very last" 69 emptyFakeState.end(-1);
68 cache.put(-1, emptyFakeState);
69 } 70 }
70 71
71 public HgRepository getRepo() { 72 public HgRepository getRepo() {
72 return repo; 73 return repo;
73 } 74 }
74 75
75 private ManifestRevisionInspector get(int rev) { 76 private ManifestRevisionInspector get(int rev) {
76 ManifestRevisionInspector i = cache.get(rev); 77 ManifestRevisionInspector i = cache.get(rev);
77 if (i == null) { 78 if (i == null) {
78 if (cache.size() > cacheMaxSize) { 79 if (rev == -1) {
80 return emptyFakeState;
81 }
82 while (cache.size() > cacheMaxSize) {
79 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary 83 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary
80 cache.remove(cache.firstKey()); 84 cache.remove(cache.firstKey());
81 } 85 }
82 i = new ManifestRevisionInspector(cacheNodes, cacheFilenames); 86 i = new ManifestRevisionInspector(cacheNodes, cacheFilenames);
83 cache.put(rev, i); 87 cache.put(rev, i);
84 repo.getManifest().walk(rev, rev, i); 88 repo.getManifest().walk(rev, rev, i);
85 } 89 }
86 return i; 90 return i;
91 }
92
93 private boolean cached(int revision) {
94 return cache.containsKey(revision) || revision == -1;
95 }
96
97 private void initCacheRange(int minRev, int maxRev) {
98 while (cache.size() > cacheMaxSize) {
99 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary
100 cache.remove(cache.firstKey());
101 }
102 repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() {
103 private ManifestRevisionInspector delegate;
104 private boolean cacheHit; // range may include revisions we already know about, do not re-create them
105
106 public boolean begin(int revision, Nodeid nid) {
107 assert delegate == null;
108 if (cache.containsKey(revision)) { // don't need to check emptyFakeState hit as revision never -1 here
109 cacheHit = true;
110 } else {
111 cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames));
112 // cache may grow bigger than max size here, but it's ok as present simplistic cache clearing mechanism may
113 // otherwise remove entries we just added
114 delegate.begin(revision, nid);
115 cacheHit = false;
116 }
117 return true;
118 }
119
120 public boolean next(Nodeid nid, String fname, String flags) {
121 if (!cacheHit) {
122 delegate.next(nid, fname, flags);
123 }
124 return true;
125 }
126
127 public boolean end(int revision) {
128 if (!cacheHit) {
129 delegate.end(revision);
130 }
131 cacheHit = false;
132 delegate = null;
133 return true;
134 }
135 });
87 } 136 }
88 137
89 /*package-local*/ ManifestRevisionInspector raw(int rev) { 138 /*package-local*/ ManifestRevisionInspector raw(int rev) {
90 return get(rev); 139 return get(rev);
91 } 140 }
108 public void change(int rev, HgStatusInspector inspector) { 157 public void change(int rev, HgStatusInspector inspector) {
109 int[] parents = new int[2]; 158 int[] parents = new int[2];
110 repo.getChangelog().parents(rev, parents, null, null); 159 repo.getChangelog().parents(rev, parents, null, null);
111 walk(parents[0], rev, inspector); 160 walk(parents[0], rev, inspector);
112 } 161 }
113 162
114 // I assume revision numbers are the same for changelog and manifest - here 163 // I assume revision numbers are the same for changelog and manifest - here
115 // user would like to pass changelog revision numbers, and I use them directly to walk manifest. 164 // user would like to pass changelog revision numbers, and I use them directly to walk manifest.
116 // if this assumption is wrong, fix this (lookup manifest revisions from changeset). 165 // if this assumption is wrong, fix this (lookup manifest revisions from changeset).
166 // rev1 and rev2 may be -1 to indicate comparison to empty repository
167 // argument order matters
117 public void walk(int rev1, int rev2, HgStatusInspector inspector) { 168 public void walk(int rev1, int rev2, HgStatusInspector inspector) {
118 if (rev1 == rev2) { 169 if (rev1 == rev2) {
119 throw new IllegalArgumentException(); 170 throw new IllegalArgumentException();
120 } 171 }
121 if (inspector == null) { 172 if (inspector == null) {
122 throw new IllegalArgumentException(); 173 throw new IllegalArgumentException();
123 } 174 }
124 if (inspector instanceof Record) { 175 if (inspector instanceof Record) {
125 ((Record) inspector).init(rev1, rev2, this); 176 ((Record) inspector).init(rev1, rev2, this);
126 } 177 }
178 final int lastManifestRevision = repo.getManifest().getLastRevision();
127 if (rev1 == TIP) { 179 if (rev1 == TIP) {
128 rev1 = repo.getManifest().getLastRevision(); 180 rev1 = lastManifestRevision;
129 } 181 }
130 if (rev2 == TIP) { 182 if (rev2 == TIP) {
131 rev2 = repo.getManifest().getLastRevision(); 183 rev2 = lastManifestRevision;
132 } 184 }
133 // in fact, rev1 and rev2 are often next (or close) to each other, 185 // in fact, rev1 and rev2 are often next (or close) to each other,
134 // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2)) 186 // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2))
135 ManifestRevisionInspector r1, r2 ; 187 ManifestRevisionInspector r1, r2 ;
136 if (!cache.containsKey(rev1) && !cache.containsKey(rev2) && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) { 188 boolean need1 = !cached(rev1), need2 = !cached(rev2);
137 int minRev = rev1 < rev2 ? rev1 : rev2; 189 if (need1 || need2) {
138 int maxRev = minRev == rev1 ? rev2 : rev1; 190 int minRev, maxRev;
139 if (minRev > 0) { 191 if (need1 && need2 && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) {
140 minRev--; // expand range a bit 192 minRev = rev1 < rev2 ? rev1 : rev2;
141 // XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision 193 maxRev = minRev == rev1 ? rev2 : rev1;
142 // which gonna be read anyway 194 if (minRev > 0) {
143 } 195 minRev--; // expand range a bit
144 196 }
145 repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() { 197 initCacheRange(minRev, maxRev);
146 private ManifestRevisionInspector delegate; 198 need1 = need2 = false;
147 199 }
148 public boolean begin(int revision, Nodeid nid) { 200 // either both unknown and far from each other, or just one of them.
149 cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames)); 201 // read with neighbors to save potential subsequent calls for neighboring elements
150 delegate.begin(revision, nid); 202 // XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision
151 return true; 203 // which going to be read anyway
152 } 204 if (need1) {
153 205 minRev = rev1;
154 public boolean next(Nodeid nid, String fname, String flags) { 206 maxRev = rev1 < lastManifestRevision-5 ? rev1+5 : lastManifestRevision;
155 delegate.next(nid, fname, flags); 207 initCacheRange(minRev, maxRev);
156 return true; 208 }
157 } 209 if (need2) {
158 210 minRev = rev2;
159 public boolean end(int revision) { 211 maxRev = rev2 < lastManifestRevision-5 ? rev2+5 : lastManifestRevision;
160 delegate.end(revision); 212 initCacheRange(minRev, maxRev);
161 delegate = null; 213 }
162 return true;
163 }
164 });
165 } 214 }
166 r1 = get(rev1); 215 r1 = get(rev1);
167 r2 = get(rev2); 216 r2 = get(rev2);
168 217
169 PathPool pp = getPathPool(); 218 PathPool pp = getPathPool();
394 } 443 }
395 if (idsPool != null) { 444 if (idsPool != null) {
396 nid = idsPool.unify(nid); 445 nid = idsPool.unify(nid);
397 } 446 }
398 idsMap.put(fname, nid); 447 idsMap.put(fname, nid);
399 flagsMap.put(fname, flags); 448 if (flags != null) {
449 // TreeMap$Entry takes 32 bytes. No reason to keep null for such price
450 // Perhaps, Map<String, Pair<Nodeid, String>> might be better solution
451 flagsMap.put(fname, flags);
452 }
400 return true; 453 return true;
401 } 454 }
402 455
403 public boolean end(int revision) { 456 public boolean end(int revision) {
404 // in fact, this class cares about single revision 457 // in fact, this class cares about single revision