comparison src/org/tmatesoft/hg/internal/RepositoryComparator.java @ 181:cd3371670f0b

Refactor incoming and outgoing code to be shared with RepositoryComparator. Placeholders for in/out commands. Refactor common remote lookup code
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Tue, 12 Apr 2011 19:10:38 +0200
parents
children ec1820f64d2b
comparison
equal deleted inserted replaced
180:42fe9a94b9d0 181:cd3371670f0b
1 /*
2 * Copyright (c) 2011 TMate Software Ltd
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; version 2 of the License.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * For information on how to redistribute this software under
14 * the terms of a license other than GNU General Public License
15 * contact TMate Software at support@hg4j.com
16 */
17 package org.tmatesoft.hg.internal;
18
19 import static org.tmatesoft.hg.core.Nodeid.NULL;
20
21 import java.util.Collections;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.LinkedList;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.Map.Entry;
28
29 import org.tmatesoft.hg.core.HgBadStateException;
30 import org.tmatesoft.hg.core.HgException;
31 import org.tmatesoft.hg.core.Nodeid;
32 import org.tmatesoft.hg.repo.HgChangelog;
33 import org.tmatesoft.hg.repo.HgRemoteRepository;
34 import org.tmatesoft.hg.repo.HgRemoteRepository.Range;
35 import org.tmatesoft.hg.repo.HgRemoteRepository.RemoteBranch;
36 import org.tmatesoft.hg.util.CancelSupport;
37 import org.tmatesoft.hg.util.CancelledException;
38 import org.tmatesoft.hg.util.ProgressSupport;
39
40 /**
41 *
42 * @author Artem Tikhomirov
43 * @author TMate Software Ltd.
44 */
45 public class RepositoryComparator {
46
47 private final HgChangelog.ParentWalker localRepo;
48 private final HgRemoteRepository remoteRepo;
49 private List<Nodeid> common;
50
51 public RepositoryComparator(HgChangelog.ParentWalker pwLocal, HgRemoteRepository hgRemote) {
52 localRepo = pwLocal;
53 remoteRepo = hgRemote;
54 }
55
56 public RepositoryComparator compare(Object context) throws HgException, CancelledException {
57 ProgressSupport progressSupport = ProgressSupport.Factory.get(context);
58 CancelSupport cancelSupport = CancelSupport.Factory.get(context);
59 cancelSupport.checkCancelled();
60 progressSupport.start(10);
61 common = Collections.unmodifiableList(findCommonWithRemote());
62 progressSupport.done();
63 return this;
64 }
65
66 public List<Nodeid> getCommon() {
67 if (common == null) {
68 throw new HgBadStateException("Call #compare(Object) first");
69 }
70 return common;
71 }
72
73 private List<Nodeid> findCommonWithRemote() throws HgException {
74 List<Nodeid> remoteHeads = remoteRepo.heads();
75 LinkedList<Nodeid> resultCommon = new LinkedList<Nodeid>(); // these remotes are known in local
76 LinkedList<Nodeid> toQuery = new LinkedList<Nodeid>(); // these need further queries to find common
77 for (Nodeid rh : remoteHeads) {
78 if (localRepo.knownNode(rh)) {
79 resultCommon.add(rh);
80 } else {
81 toQuery.add(rh);
82 }
83 }
84 if (toQuery.isEmpty()) {
85 return resultCommon;
86 }
87 LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>(); // branch.root and branch.head are of interest only.
88 // these are branches with unknown head but known root, which might not be the last common known,
89 // i.e. there might be children changeset that are also available at remote, [..?..common-head..remote-head] - need to
90 // scroll up to common head.
91 while (!toQuery.isEmpty()) {
92 List<RemoteBranch> remoteBranches = remoteRepo.branches(toQuery); //head, root, first parent, second parent
93 toQuery.clear();
94 while(!remoteBranches.isEmpty()) {
95 RemoteBranch rb = remoteBranches.remove(0);
96 // I assume branches remote call gives branches with head equal to what I pass there, i.e.
97 // that I don't need to check whether rb.head is unknown.
98 if (localRepo.knownNode(rb.root)) {
99 // we known branch start, common head is somewhere in its descendants line
100 checkUp2Head.add(rb);
101 } else {
102 // dig deeper in the history, if necessary
103 if (!NULL.equals(rb.p1) && !localRepo.knownNode(rb.p1)) {
104 toQuery.add(rb.p1);
105 }
106 if (!NULL.equals(rb.p2) && !localRepo.knownNode(rb.p2)) {
107 toQuery.add(rb.p2);
108 }
109 }
110 }
111 }
112 // can't check nodes between checkUp2Head element and local heads, remote might have distinct descendants sequence
113 for (RemoteBranch rb : checkUp2Head) {
114 // rb.root is known locally
115 List<Nodeid> remoteRevisions = remoteRepo.between(rb.head, rb.root);
116 if (remoteRevisions.isEmpty()) {
117 // head is immediate child
118 resultCommon.add(rb.root);
119 } else {
120 // between gives result from head to root, I'd like to go in reverse direction
121 Collections.reverse(remoteRevisions);
122 Nodeid root = rb.root;
123 while(!remoteRevisions.isEmpty()) {
124 Nodeid n = remoteRevisions.remove(0);
125 if (localRepo.knownNode(n)) {
126 if (remoteRevisions.isEmpty()) {
127 // this is the last known node before an unknown
128 resultCommon.add(n);
129 break;
130 }
131 if (remoteRevisions.size() == 1) {
132 // there's only one left between known n and unknown head
133 // this check is to save extra between query, not really essential
134 Nodeid last = remoteRevisions.remove(0);
135 resultCommon.add(localRepo.knownNode(last) ? last : n);
136 break;
137 }
138 // might get handy for next between query, to narrow search down
139 root = n;
140 } else {
141 remoteRevisions = remoteRepo.between(n, root);
142 Collections.reverse(remoteRevisions);
143 if (remoteRevisions.isEmpty()) {
144 resultCommon.add(root);
145 }
146 }
147 }
148 }
149 }
150 // TODO ensure unique elements in the list
151 return resultCommon;
152 }
153
154 // somewhat similar to Outgoing.findCommonWithRemote()
155 public List<BranchChain> calculateMissingBranches() throws HgException {
156 List<Nodeid> remoteHeads = remoteRepo.heads();
157 LinkedList<Nodeid> common = new LinkedList<Nodeid>(); // these remotes are known in local
158 LinkedList<Nodeid> toQuery = new LinkedList<Nodeid>(); // these need further queries to find common
159 for (Nodeid rh : remoteHeads) {
160 if (localRepo.knownNode(rh)) {
161 common.add(rh);
162 } else {
163 toQuery.add(rh);
164 }
165 }
166 if (toQuery.isEmpty()) {
167 return Collections.emptyList(); // no incoming changes
168 }
169 LinkedList<BranchChain> branches2load = new LinkedList<BranchChain>(); // return value
170 // detailed comments are in Outgoing.findCommonWithRemote
171 LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>();
172 // records relation between branch head and its parent branch, if any
173 HashMap<Nodeid, BranchChain> head2chain = new HashMap<Nodeid, BranchChain>();
174 while (!toQuery.isEmpty()) {
175 List<RemoteBranch> remoteBranches = remoteRepo.branches(toQuery); //head, root, first parent, second parent
176 toQuery.clear();
177 while(!remoteBranches.isEmpty()) {
178 RemoteBranch rb = remoteBranches.remove(0);
179 BranchChain chainElement = head2chain.get(rb.head);
180 if (chainElement == null) {
181 chainElement = new BranchChain(rb.head);
182 // record this unknown branch to download later
183 branches2load.add(chainElement);
184 }
185 if (localRepo.knownNode(rb.root)) {
186 // we known branch start, common head is somewhere in its descendants line
187 checkUp2Head.add(rb);
188 } else {
189 chainElement.branchRoot = rb.root;
190 // dig deeper in the history, if necessary
191 if (!NULL.equals(rb.p1) && !localRepo.knownNode(rb.p1)) {
192 toQuery.add(rb.p1);
193 head2chain.put(rb.p1, chainElement.p1 = new BranchChain(rb.p1));
194 }
195 if (!NULL.equals(rb.p2) && !localRepo.knownNode(rb.p2)) {
196 toQuery.add(rb.p2);
197 head2chain.put(rb.p2, chainElement.p2 = new BranchChain(rb.p2));
198 }
199 }
200 }
201 }
202 for (RemoteBranch rb : checkUp2Head) {
203 Nodeid h = rb.head;
204 Nodeid r = rb.root;
205 int watchdog = 1000;
206 BranchChain bc = head2chain.get(h);
207 assert bc != null;
208 // if we know branch root locally, there could be no parent branch chain elements.
209 assert bc.p1 == null;
210 assert bc.p2 == null;
211 do {
212 List<Nodeid> between = remoteRepo.between(h, r);
213 if (between.isEmpty()) {
214 bc.branchRoot = r;
215 break;
216 } else {
217 Collections.reverse(between);
218 for (Nodeid n : between) {
219 if (localRepo.knownNode(n)) {
220 r = n;
221 } else {
222 h = n;
223 break;
224 }
225 }
226 Nodeid lastInBetween = between.get(between.size() - 1);
227 if (r.equals(lastInBetween)) {
228 bc.branchRoot = r;
229 break;
230 } else if (h.equals(lastInBetween)) { // the only chance for current head pointer to point to the sequence tail
231 // is when r is second from the between list end (iow, head,1,[2],4,8...,root)
232 bc.branchRoot = r;
233 break;
234 }
235 }
236 } while(--watchdog > 0);
237 if (watchdog == 0) {
238 throw new HgBadStateException(String.format("Can't narrow down branch [%s, %s]", rb.head.shortNotation(), rb.root.shortNotation()));
239 }
240 }
241 return branches2load;
242 }
243
244 public static final class BranchChain {
245 // when we construct a chain, we know head which is missing locally, hence init it right away.
246 // as for root (branch unknown start), we might happen to have one locally, and need further digging to find out right branch start
247 public final Nodeid branchHead;
248 public Nodeid branchRoot;
249 // either of these can be null, or both.
250 // although RemoteBranch has either both parents null, or both non-null, when we construct a chain
251 // we might encounter that we locally know one of branch's parent, hence in the chain corresponding field will be blank.
252 public BranchChain p1;
253 public BranchChain p2;
254
255 public BranchChain(Nodeid head) {
256 assert head != null;
257 branchHead = head;
258 }
259 public boolean isTerminal() {
260 return p1 == null || p2 == null;
261 }
262
263 @Override
264 public String toString() {
265 return String.format("BranchChain [%s, %s]", branchRoot, branchHead);
266 }
267
268 public void dump() {
269 System.out.println(toString());
270 internalDump(" ");
271 }
272
273 private void internalDump(String prefix) {
274 if (p1 != null) {
275 System.out.println(prefix + p1.toString());
276 }
277 if (p2 != null) {
278 System.out.println(prefix + p2.toString());
279 }
280 prefix += " ";
281 if (p1 != null) {
282 p1.internalDump(prefix);
283 }
284 if (p2 != null) {
285 p2.internalDump(prefix);
286 }
287 }
288 }
289
290 /**
291 * @return list of nodeids from branchRoot to branchHead, inclusive. IOW, first element of the list is always root of the branch
292 */
293 public List<Nodeid> completeBranch(final Nodeid branchRoot, final Nodeid branchHead) throws HgException {
294 class DataEntry {
295 public final Nodeid queryHead;
296 public final int headIndex;
297 public List<Nodeid> entries;
298
299 public DataEntry(Nodeid head, int index, List<Nodeid> data) {
300 queryHead = head;
301 headIndex = index;
302 entries = data;
303 }
304 };
305
306 List<Nodeid> initial = remoteRepo.between(branchHead, branchRoot);
307 Nodeid[] result = new Nodeid[1 + (1 << initial.size())];
308 result[0] = branchHead;
309 int rootIndex = -1; // index in the result, where to place branche's root.
310 if (initial.isEmpty()) {
311 rootIndex = 1;
312 } else if (initial.size() == 1) {
313 rootIndex = 2;
314 }
315 LinkedList<DataEntry> datas = new LinkedList<DataEntry>();
316 // DataEntry in datas has entries list filled with 'between' data, whereas
317 // DataEntry in toQuery keeps only nodeid and its index, with entries to be initialized before
318 // moving to datas.
319 LinkedList<DataEntry> toQuery = new LinkedList<DataEntry>();
320 //
321 datas.add(new DataEntry(branchHead, 0, initial));
322 int totalQueries = 1;
323 HashSet<Nodeid> queried = new HashSet<Nodeid>();
324 while(!datas.isEmpty()) {
325 // keep record of those planned to be queried next time we call between()
326 // although may keep these in queried, if really don't want separate collection
327 HashSet<Nodeid> scheduled = new HashSet<Nodeid>();
328 do {
329 DataEntry de = datas.removeFirst();
330 // populate result with discovered elements between de.qiueryRoot and branch's head
331 for (int i = 1, j = 0; j < de.entries.size(); i = i << 1, j++) {
332 int idx = de.headIndex + i;
333 result[idx] = de.entries.get(j);
334 }
335 // form next query entries from new unknown elements
336 if (de.entries.size() > 1) {
337 /* when entries has only one element, it means de.queryRoot was at head-2 position, and thus
338 * no new information can be obtained. E.g. when it's 2, it might be case of [0..4] query with
339 * [1,2] result, and we need one more query to get element 3.
340 */
341 for (int i =1, j = 0; j < de.entries.size(); i = i<<1, j++) {
342 int idx = de.headIndex + i;
343 Nodeid x = de.entries.get(j);
344 if (!queried.contains(x) && !scheduled.contains(x) && (rootIndex == -1 || rootIndex - de.headIndex > 1)) {
345 /*queries for elements right before head is senseless, but unless we know head's index, do it anyway*/
346 toQuery.add(new DataEntry(x, idx, null));
347 scheduled.add(x);
348 }
349 }
350 }
351 } while (!datas.isEmpty());
352 if (!toQuery.isEmpty()) {
353 totalQueries++;
354 }
355 // for each query, create an between request range, keep record Range->DataEntry to know range's start index
356 LinkedList<HgRemoteRepository.Range> betweenBatch = new LinkedList<HgRemoteRepository.Range>();
357 HashMap<HgRemoteRepository.Range, DataEntry> rangeToEntry = new HashMap<HgRemoteRepository.Range, DataEntry>();
358 for (DataEntry de : toQuery) {
359 queried.add(de.queryHead);
360 HgRemoteRepository.Range r = new HgRemoteRepository.Range(branchRoot, de.queryHead);
361 betweenBatch.add(r);
362 rangeToEntry.put(r, de);
363 }
364 if (!betweenBatch.isEmpty()) {
365 Map<Range, List<Nodeid>> between = remoteRepo.between(betweenBatch);
366 for (Entry<Range, List<Nodeid>> e : between.entrySet()) {
367 DataEntry de = rangeToEntry.get(e.getKey());
368 assert de != null;
369 de.entries = e.getValue();
370 if (rootIndex == -1 && de.entries.size() == 1) {
371 // returned sequence of length 1 means we used element from [head-2] as root
372 int numberOfElementsExcludingRootAndHead = de.headIndex + 1;
373 rootIndex = numberOfElementsExcludingRootAndHead + 1;
374 System.out.printf("On query %d found out exact number of missing elements: %d\n", totalQueries, numberOfElementsExcludingRootAndHead);
375 }
376 datas.add(de); // queue up to record result and construct further requests
377 }
378 betweenBatch.clear();
379 rangeToEntry.clear();
380 }
381 toQuery.clear();
382 }
383 if (rootIndex == -1) {
384 throw new HgBadStateException("Shall not happen, provided between output is correct"); // FIXME
385 }
386 result[rootIndex] = branchRoot;
387 boolean resultOk = true;
388 LinkedList<Nodeid> fromRootToHead = new LinkedList<Nodeid>();
389 for (int i = 0; i <= rootIndex; i++) {
390 Nodeid n = result[i];
391 if (n == null) {
392 System.out.printf("ERROR: element %d wasn't found\n",i);
393 resultOk = false;
394 }
395 fromRootToHead.addFirst(n); // reverse order
396 }
397 System.out.println("Total queries:" + totalQueries);
398 if (!resultOk) {
399 throw new HgBadStateException("See console for details"); // FIXME
400 }
401 return fromRootToHead;
402 }
403 }