Mercurial > hg4j
comparison src/org/tmatesoft/hg/repo/HgStatusCollector.java @ 197:3a7696fb457c
Investigate optimization options to allow fast processing of huge repositories. Fix defect in StatusCollector that lead to wrong result comparing first revision to empty repo (-1 to 0), due to same TIP constant value
author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
---|---|
date | Tue, 19 Apr 2011 03:49:29 +0200 |
parents | c9b305df0b89 |
children | 047b1dec7a04 |
comparison
equal
deleted
inserted
replaced
196:e2115da4cf6a | 197:3a7696fb457c |
---|---|
49 private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact | 49 private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact |
50 // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output | 50 // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output |
51 // no cache limit, no nodeids and fname caching - OOME on changeset 1035 | 51 // no cache limit, no nodeids and fname caching - OOME on changeset 1035 |
52 // no cache limit, but with cached nodeids and filenames - 1730+ | 52 // no cache limit, but with cached nodeids and filenames - 1730+ |
53 // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) | 53 // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) |
54 private final int cacheMaxSize = 100; // do not keep too much manifest revisions | 54 private final int cacheMaxSize = 50; // do not keep too much manifest revisions |
55 private PathPool pathPool; | 55 private PathPool pathPool; |
56 private final Pool<Nodeid> cacheNodes; | 56 private final Pool<Nodeid> cacheNodes; |
57 private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution | 57 private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution |
58 private final ManifestRevisionInspector emptyFakeState; | |
58 | 59 |
59 | 60 |
60 public HgStatusCollector(HgRepository hgRepo) { | 61 public HgStatusCollector(HgRepository hgRepo) { |
61 this.repo = hgRepo; | 62 this.repo = hgRepo; |
62 cache = new TreeMap<Integer, ManifestRevisionInspector>(); | 63 cache = new TreeMap<Integer, ManifestRevisionInspector>(); |
63 cacheNodes = new Pool<Nodeid>(); | 64 cacheNodes = new Pool<Nodeid>(); |
64 cacheFilenames = new Pool<String>(); | 65 cacheFilenames = new Pool<String>(); |
65 ManifestRevisionInspector emptyFakeState = new ManifestRevisionInspector(null, null); | 66 |
67 emptyFakeState = new ManifestRevisionInspector(null, null); | |
66 emptyFakeState.begin(-1, null); | 68 emptyFakeState.begin(-1, null); |
67 emptyFakeState.end(-1); // FIXME HgRepo.TIP == -1 as well, need to distinguish fake "prior to first" revision from "the very last" | 69 emptyFakeState.end(-1); |
68 cache.put(-1, emptyFakeState); | |
69 } | 70 } |
70 | 71 |
71 public HgRepository getRepo() { | 72 public HgRepository getRepo() { |
72 return repo; | 73 return repo; |
73 } | 74 } |
74 | 75 |
75 private ManifestRevisionInspector get(int rev) { | 76 private ManifestRevisionInspector get(int rev) { |
76 ManifestRevisionInspector i = cache.get(rev); | 77 ManifestRevisionInspector i = cache.get(rev); |
77 if (i == null) { | 78 if (i == null) { |
78 if (cache.size() > cacheMaxSize) { | 79 if (rev == -1) { |
80 return emptyFakeState; | |
81 } | |
82 while (cache.size() > cacheMaxSize) { | |
79 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary | 83 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary |
80 cache.remove(cache.firstKey()); | 84 cache.remove(cache.firstKey()); |
81 } | 85 } |
82 i = new ManifestRevisionInspector(cacheNodes, cacheFilenames); | 86 i = new ManifestRevisionInspector(cacheNodes, cacheFilenames); |
83 cache.put(rev, i); | 87 cache.put(rev, i); |
84 repo.getManifest().walk(rev, rev, i); | 88 repo.getManifest().walk(rev, rev, i); |
85 } | 89 } |
86 return i; | 90 return i; |
91 } | |
92 | |
93 private boolean cached(int revision) { | |
94 return cache.containsKey(revision) || revision == -1; | |
95 } | |
96 | |
97 private void initCacheRange(int minRev, int maxRev) { | |
98 while (cache.size() > cacheMaxSize) { | |
99 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary | |
100 cache.remove(cache.firstKey()); | |
101 } | |
102 repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() { | |
103 private ManifestRevisionInspector delegate; | |
104 private boolean cacheHit; // range may include revisions we already know about, do not re-create them | |
105 | |
106 public boolean begin(int revision, Nodeid nid) { | |
107 assert delegate == null; | |
108 if (cache.containsKey(revision)) { // don't need to check emptyFakeState hit as revision never -1 here | |
109 cacheHit = true; | |
110 } else { | |
111 cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames)); | |
112 // cache may grow bigger than max size here, but it's ok as present simplistic cache clearing mechanism may | |
113 // otherwise remove entries we just added | |
114 delegate.begin(revision, nid); | |
115 cacheHit = false; | |
116 } | |
117 return true; | |
118 } | |
119 | |
120 public boolean next(Nodeid nid, String fname, String flags) { | |
121 if (!cacheHit) { | |
122 delegate.next(nid, fname, flags); | |
123 } | |
124 return true; | |
125 } | |
126 | |
127 public boolean end(int revision) { | |
128 if (!cacheHit) { | |
129 delegate.end(revision); | |
130 } | |
131 cacheHit = false; | |
132 delegate = null; | |
133 return true; | |
134 } | |
135 }); | |
87 } | 136 } |
88 | 137 |
89 /*package-local*/ ManifestRevisionInspector raw(int rev) { | 138 /*package-local*/ ManifestRevisionInspector raw(int rev) { |
90 return get(rev); | 139 return get(rev); |
91 } | 140 } |
108 public void change(int rev, HgStatusInspector inspector) { | 157 public void change(int rev, HgStatusInspector inspector) { |
109 int[] parents = new int[2]; | 158 int[] parents = new int[2]; |
110 repo.getChangelog().parents(rev, parents, null, null); | 159 repo.getChangelog().parents(rev, parents, null, null); |
111 walk(parents[0], rev, inspector); | 160 walk(parents[0], rev, inspector); |
112 } | 161 } |
113 | 162 |
114 // I assume revision numbers are the same for changelog and manifest - here | 163 // I assume revision numbers are the same for changelog and manifest - here |
115 // user would like to pass changelog revision numbers, and I use them directly to walk manifest. | 164 // user would like to pass changelog revision numbers, and I use them directly to walk manifest. |
116 // if this assumption is wrong, fix this (lookup manifest revisions from changeset). | 165 // if this assumption is wrong, fix this (lookup manifest revisions from changeset). |
166 // rev1 and rev2 may be -1 to indicate comparison to empty repository | |
167 // argument order matters | |
117 public void walk(int rev1, int rev2, HgStatusInspector inspector) { | 168 public void walk(int rev1, int rev2, HgStatusInspector inspector) { |
118 if (rev1 == rev2) { | 169 if (rev1 == rev2) { |
119 throw new IllegalArgumentException(); | 170 throw new IllegalArgumentException(); |
120 } | 171 } |
121 if (inspector == null) { | 172 if (inspector == null) { |
122 throw new IllegalArgumentException(); | 173 throw new IllegalArgumentException(); |
123 } | 174 } |
124 if (inspector instanceof Record) { | 175 if (inspector instanceof Record) { |
125 ((Record) inspector).init(rev1, rev2, this); | 176 ((Record) inspector).init(rev1, rev2, this); |
126 } | 177 } |
178 final int lastManifestRevision = repo.getManifest().getLastRevision(); | |
127 if (rev1 == TIP) { | 179 if (rev1 == TIP) { |
128 rev1 = repo.getManifest().getLastRevision(); | 180 rev1 = lastManifestRevision; |
129 } | 181 } |
130 if (rev2 == TIP) { | 182 if (rev2 == TIP) { |
131 rev2 = repo.getManifest().getLastRevision(); | 183 rev2 = lastManifestRevision; |
132 } | 184 } |
133 // in fact, rev1 and rev2 are often next (or close) to each other, | 185 // in fact, rev1 and rev2 are often next (or close) to each other, |
134 // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2)) | 186 // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2)) |
135 ManifestRevisionInspector r1, r2 ; | 187 ManifestRevisionInspector r1, r2 ; |
136 if (!cache.containsKey(rev1) && !cache.containsKey(rev2) && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) { | 188 boolean need1 = !cached(rev1), need2 = !cached(rev2); |
137 int minRev = rev1 < rev2 ? rev1 : rev2; | 189 if (need1 || need2) { |
138 int maxRev = minRev == rev1 ? rev2 : rev1; | 190 int minRev, maxRev; |
139 if (minRev > 0) { | 191 if (need1 && need2 && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) { |
140 minRev--; // expand range a bit | 192 minRev = rev1 < rev2 ? rev1 : rev2; |
141 // XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision | 193 maxRev = minRev == rev1 ? rev2 : rev1; |
142 // which gonna be read anyway | 194 if (minRev > 0) { |
143 } | 195 minRev--; // expand range a bit |
144 | 196 } |
145 repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() { | 197 initCacheRange(minRev, maxRev); |
146 private ManifestRevisionInspector delegate; | 198 need1 = need2 = false; |
147 | 199 } |
148 public boolean begin(int revision, Nodeid nid) { | 200 // either both unknown and far from each other, or just one of them. |
149 cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames)); | 201 // read with neighbors to save potential subsequent calls for neighboring elements |
150 delegate.begin(revision, nid); | 202 // XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision |
151 return true; | 203 // which going to be read anyway |
152 } | 204 if (need1) { |
153 | 205 minRev = rev1; |
154 public boolean next(Nodeid nid, String fname, String flags) { | 206 maxRev = rev1 < lastManifestRevision-5 ? rev1+5 : lastManifestRevision; |
155 delegate.next(nid, fname, flags); | 207 initCacheRange(minRev, maxRev); |
156 return true; | 208 } |
157 } | 209 if (need2) { |
158 | 210 minRev = rev2; |
159 public boolean end(int revision) { | 211 maxRev = rev2 < lastManifestRevision-5 ? rev2+5 : lastManifestRevision; |
160 delegate.end(revision); | 212 initCacheRange(minRev, maxRev); |
161 delegate = null; | 213 } |
162 return true; | |
163 } | |
164 }); | |
165 } | 214 } |
166 r1 = get(rev1); | 215 r1 = get(rev1); |
167 r2 = get(rev2); | 216 r2 = get(rev2); |
168 | 217 |
169 PathPool pp = getPathPool(); | 218 PathPool pp = getPathPool(); |
394 } | 443 } |
395 if (idsPool != null) { | 444 if (idsPool != null) { |
396 nid = idsPool.unify(nid); | 445 nid = idsPool.unify(nid); |
397 } | 446 } |
398 idsMap.put(fname, nid); | 447 idsMap.put(fname, nid); |
399 flagsMap.put(fname, flags); | 448 if (flags != null) { |
449 // TreeMap$Entry takes 32 bytes. No reason to keep null for such price | |
450 // Perhaps, Map<String, Pair<Nodeid, String>> might be better solution | |
451 flagsMap.put(fname, flags); | |
452 } | |
400 return true; | 453 return true; |
401 } | 454 } |
402 | 455 |
403 public boolean end(int revision) { | 456 public boolean end(int revision) { |
404 // in fact, this class cares about single revision | 457 // in fact, this class cares about single revision |