comparison hg4j/src/main/java/org/tmatesoft/hg/internal/RepositoryComparator.java @ 213:6ec4af642ba8 gradle

Project uses Gradle for build - actual changes
author Alexander Kitaev <kitaev@gmail.com>
date Tue, 10 May 2011 10:52:53 +0200
parents
children
comparison
equal deleted inserted replaced
212:edb2e2829352 213:6ec4af642ba8
1 /*
2 * Copyright (c) 2011 TMate Software Ltd
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; version 2 of the License.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * For information on how to redistribute this software under
14 * the terms of a license other than GNU General Public License
15 * contact TMate Software at support@hg4j.com
16 */
17 package org.tmatesoft.hg.internal;
18
19 import static org.tmatesoft.hg.core.Nodeid.NULL;
20
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.HashSet;
25 import java.util.LinkedList;
26 import java.util.List;
27 import java.util.ListIterator;
28 import java.util.Map;
29 import java.util.Map.Entry;
30 import java.util.Set;
31
32 import org.tmatesoft.hg.core.HgBadStateException;
33 import org.tmatesoft.hg.core.HgException;
34 import org.tmatesoft.hg.core.Nodeid;
35 import org.tmatesoft.hg.repo.HgChangelog;
36 import org.tmatesoft.hg.repo.HgRemoteRepository;
37 import org.tmatesoft.hg.repo.HgRemoteRepository.Range;
38 import org.tmatesoft.hg.repo.HgRemoteRepository.RemoteBranch;
39 import org.tmatesoft.hg.util.CancelSupport;
40 import org.tmatesoft.hg.util.CancelledException;
41 import org.tmatesoft.hg.util.ProgressSupport;
42
43 /**
44 *
45 * @author Artem Tikhomirov
46 * @author TMate Software Ltd.
47 */
48 public class RepositoryComparator {
49
50 private final boolean debug = Boolean.parseBoolean(System.getProperty("hg4j.remote.debug"));
51 private final HgChangelog.ParentWalker localRepo;
52 private final HgRemoteRepository remoteRepo;
53 private List<Nodeid> common;
54
55 public RepositoryComparator(HgChangelog.ParentWalker pwLocal, HgRemoteRepository hgRemote) {
56 localRepo = pwLocal;
57 remoteRepo = hgRemote;
58 }
59
60 public RepositoryComparator compare(Object context) throws HgException, CancelledException {
61 ProgressSupport progressSupport = ProgressSupport.Factory.get(context);
62 CancelSupport cancelSupport = CancelSupport.Factory.get(context);
63 cancelSupport.checkCancelled();
64 progressSupport.start(10);
65 common = Collections.unmodifiableList(findCommonWithRemote());
66 // sanity check
67 for (Nodeid n : common) {
68 if (!localRepo.knownNode(n)) {
69 throw new HgBadStateException("Unknown node reported as common:" + n);
70 }
71 }
72 progressSupport.done();
73 return this;
74 }
75
76 public List<Nodeid> getCommon() {
77 if (common == null) {
78 throw new HgBadStateException("Call #compare(Object) first");
79 }
80 return common;
81 }
82
83 /**
84 * @return revisions that are children of common entries, i.e. revisions that are present on the local server and not on remote.
85 */
86 public List<Nodeid> getLocalOnlyRevisions() {
87 return localRepo.childrenOf(getCommon());
88 }
89
90 /**
91 * Similar to @link {@link #getLocalOnlyRevisions()}, use this one if you need access to changelog entry content, not
92 * only its revision number.
93 * @param inspector delegate to analyze changesets, shall not be <code>null</code>
94 */
95 public void visitLocalOnlyRevisions(HgChangelog.Inspector inspector) {
96 if (inspector == null) {
97 throw new IllegalArgumentException();
98 }
99 // one can use localRepo.childrenOf(getCommon()) and then iterate over nodeids, but there seems to be
100 // another approach to get all changes after common:
101 // find index of earliest revision, and report all that were later
102 final HgChangelog changelog = localRepo.getRepo().getChangelog();
103 int earliestRevision = Integer.MAX_VALUE;
104 List<Nodeid> commonKnown = getCommon();
105 for (Nodeid n : commonKnown) {
106 if (!localRepo.hasChildren(n)) {
107 // there might be (old) nodes, known both locally and remotely, with no children
108 // hence, we don't need to consider their local revision number
109 continue;
110 }
111 int lr = changelog.getLocalRevision(n);
112 if (lr < earliestRevision) {
113 earliestRevision = lr;
114 }
115 }
116 if (earliestRevision == Integer.MAX_VALUE) {
117 // either there are no common nodes (known locally and at remote)
118 // or no local children found (local is up to date). In former case, perhaps I shall bit return silently,
119 // but check for possible wrong repo comparison (hs says 'repository is unrelated' if I try to
120 // check in/out for a repo that has no common nodes.
121 return;
122 }
123 if (earliestRevision < 0 || earliestRevision >= changelog.getLastRevision()) {
124 throw new HgBadStateException(String.format("Invalid index of common known revision: %d in total of %d", earliestRevision, 1+changelog.getLastRevision()));
125 }
126 changelog.range(earliestRevision+1, changelog.getLastRevision(), inspector);
127 }
128
129 private List<Nodeid> findCommonWithRemote() throws HgException {
130 List<Nodeid> remoteHeads = remoteRepo.heads();
131 LinkedList<Nodeid> resultCommon = new LinkedList<Nodeid>(); // these remotes are known in local
132 LinkedList<Nodeid> toQuery = new LinkedList<Nodeid>(); // these need further queries to find common
133 for (Nodeid rh : remoteHeads) {
134 if (localRepo.knownNode(rh)) {
135 resultCommon.add(rh);
136 } else {
137 toQuery.add(rh);
138 }
139 }
140 if (toQuery.isEmpty()) {
141 return resultCommon;
142 }
143 LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>(); // branch.root and branch.head are of interest only.
144 // these are branches with unknown head but known root, which might not be the last common known,
145 // i.e. there might be children changeset that are also available at remote, [..?..common-head..remote-head] - need to
146 // scroll up to common head.
147 while (!toQuery.isEmpty()) {
148 List<RemoteBranch> remoteBranches = remoteRepo.branches(toQuery); //head, root, first parent, second parent
149 toQuery.clear();
150 while(!remoteBranches.isEmpty()) {
151 RemoteBranch rb = remoteBranches.remove(0);
152 // I assume branches remote call gives branches with head equal to what I pass there, i.e.
153 // that I don't need to check whether rb.head is unknown.
154 if (localRepo.knownNode(rb.root)) {
155 // we known branch start, common head is somewhere in its descendants line
156 checkUp2Head.add(rb);
157 } else {
158 // dig deeper in the history, if necessary
159 if (!NULL.equals(rb.p1) && !localRepo.knownNode(rb.p1)) {
160 toQuery.add(rb.p1);
161 }
162 if (!NULL.equals(rb.p2) && !localRepo.knownNode(rb.p2)) {
163 toQuery.add(rb.p2);
164 }
165 }
166 }
167 }
168 // can't check nodes between checkUp2Head element and local heads, remote might have distinct descendants sequence
169 for (RemoteBranch rb : checkUp2Head) {
170 // rb.root is known locally
171 List<Nodeid> remoteRevisions = remoteRepo.between(rb.head, rb.root);
172 if (remoteRevisions.isEmpty()) {
173 // head is immediate child
174 resultCommon.add(rb.root);
175 } else {
176 // between gives result from head to root, I'd like to go in reverse direction
177 Collections.reverse(remoteRevisions);
178 Nodeid root = rb.root;
179 while(!remoteRevisions.isEmpty()) {
180 Nodeid n = remoteRevisions.remove(0);
181 if (localRepo.knownNode(n)) {
182 if (remoteRevisions.isEmpty()) {
183 // this is the last known node before an unknown
184 resultCommon.add(n);
185 break;
186 }
187 if (remoteRevisions.size() == 1) {
188 // there's only one left between known n and unknown head
189 // this check is to save extra between query, not really essential
190 Nodeid last = remoteRevisions.remove(0);
191 resultCommon.add(localRepo.knownNode(last) ? last : n);
192 break;
193 }
194 // might get handy for next between query, to narrow search down
195 root = n;
196 } else {
197 remoteRevisions = remoteRepo.between(n, root);
198 Collections.reverse(remoteRevisions);
199 if (remoteRevisions.isEmpty()) {
200 resultCommon.add(root);
201 }
202 }
203 }
204 }
205 }
206 // TODO ensure unique elements in the list
207 return resultCommon;
208 }
209
210 // somewhat similar to Outgoing.findCommonWithRemote()
211 public List<BranchChain> calculateMissingBranches() throws HgException {
212 List<Nodeid> remoteHeads = remoteRepo.heads();
213 LinkedList<Nodeid> common = new LinkedList<Nodeid>(); // these remotes are known in local
214 LinkedList<Nodeid> toQuery = new LinkedList<Nodeid>(); // these need further queries to find common
215 for (Nodeid rh : remoteHeads) {
216 if (localRepo.knownNode(rh)) {
217 common.add(rh);
218 } else {
219 toQuery.add(rh);
220 }
221 }
222 if (toQuery.isEmpty()) {
223 return Collections.emptyList(); // no incoming changes
224 }
225 LinkedList<BranchChain> branches2load = new LinkedList<BranchChain>(); // return value
226 // detailed comments are in Outgoing.findCommonWithRemote
227 LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>();
228 // records relation between branch head and its parent branch, if any
229 HashMap<Nodeid, BranchChain> head2chain = new HashMap<Nodeid, BranchChain>();
230 while (!toQuery.isEmpty()) {
231 List<RemoteBranch> remoteBranches = remoteRepo.branches(toQuery); //head, root, first parent, second parent
232 toQuery.clear();
233 while(!remoteBranches.isEmpty()) {
234 RemoteBranch rb = remoteBranches.remove(0);
235 BranchChain chainElement = head2chain.get(rb.head);
236 if (chainElement == null) {
237 chainElement = new BranchChain(rb.head);
238 // record this unknown branch to download later
239 branches2load.add(chainElement);
240 // the only chance we'll need chainElement in the head2chain is when we know this branch's root
241 head2chain.put(rb.head, chainElement);
242 }
243 if (localRepo.knownNode(rb.root)) {
244 // we known branch start, common head is somewhere in its descendants line
245 checkUp2Head.add(rb);
246 } else {
247 chainElement.branchRoot = rb.root;
248 // dig deeper in the history, if necessary
249 boolean hasP1 = !NULL.equals(rb.p1), hasP2 = !NULL.equals(rb.p2);
250 if (hasP1 && !localRepo.knownNode(rb.p1)) {
251 toQuery.add(rb.p1);
252 // we might have seen parent node already, and recorded it as a branch chain
253 // we shall reuse existing BC to get it completely initializer (head2chain map
254 // on second put with the same key would leave first BC uninitialized.
255
256 // It seems there's no reason to be affraid (XXX although shall double-check)
257 // that BC's chain would get corrupt (its p1 and p2 fields assigned twice with different values)
258 // as parents are always the same (and likely, BC that is common would be the last unknown)
259 BranchChain bc = head2chain.get(rb.p1);
260 if (bc == null) {
261 head2chain.put(rb.p1, bc = new BranchChain(rb.p1));
262 }
263 chainElement.p1 = bc;
264 }
265 if (hasP2 && !localRepo.knownNode(rb.p2)) {
266 toQuery.add(rb.p2);
267 BranchChain bc = head2chain.get(rb.p2);
268 if (bc == null) {
269 head2chain.put(rb.p2, bc = new BranchChain(rb.p2));
270 }
271 chainElement.p2 = bc;
272 }
273 if (!hasP1 && !hasP2) {
274 // special case, when we do incoming against blank repository, chainElement.branchRoot
275 // is first unknown element (revision 0). We need to add another fake BranchChain
276 // to fill the promise that terminal BranchChain has branchRoot that is known both locally and remotely
277 BranchChain fake = new BranchChain(NULL);
278 fake.branchRoot = NULL;
279 chainElement.p1 = chainElement.p2 = fake;
280 }
281 }
282 }
283 }
284 for (RemoteBranch rb : checkUp2Head) {
285 Nodeid h = rb.head;
286 Nodeid r = rb.root;
287 int watchdog = 1000;
288 assert head2chain.containsKey(h);
289 BranchChain bc = head2chain.get(h);
290 assert bc != null : h.toString();
291 // if we know branch root locally, there could be no parent branch chain elements.
292 assert bc.p1 == null;
293 assert bc.p2 == null;
294 do {
295 List<Nodeid> between = remoteRepo.between(h, r);
296 if (between.isEmpty()) {
297 bc.branchRoot = r;
298 break;
299 } else {
300 Collections.reverse(between);
301 for (Nodeid n : between) {
302 if (localRepo.knownNode(n)) {
303 r = n;
304 } else {
305 h = n;
306 break;
307 }
308 }
309 Nodeid lastInBetween = between.get(between.size() - 1);
310 if (r.equals(lastInBetween)) {
311 bc.branchRoot = r;
312 break;
313 } else if (h.equals(lastInBetween)) { // the only chance for current head pointer to point to the sequence tail
314 // is when r is second from the between list end (iow, head,1,[2],4,8...,root)
315 bc.branchRoot = r;
316 break;
317 }
318 }
319 } while(--watchdog > 0);
320 if (watchdog == 0) {
321 throw new HgBadStateException(String.format("Can't narrow down branch [%s, %s]", rb.head.shortNotation(), rb.root.shortNotation()));
322 }
323 }
324 if (debug) {
325 System.out.println("calculateMissingBranches:");
326 for (BranchChain bc : branches2load) {
327 bc.dump();
328 }
329 }
330 return branches2load;
331 }
332
333 // root and head (and all between) are unknown for each chain element but last (terminal), which has known root (revision
334 // known to be locally and at remote server
335 // alternative would be to keep only unknown elements (so that promise of calculateMissingBranches would be 100% true), but that
336 // seems to complicate the method, while being useful only for the case when we ask incoming for an empty repository (i.e.
337 // where branch chain return all nodes, -1..tip.
338 public static final class BranchChain {
339 // when we construct a chain, we know head which is missing locally, hence init it right away.
340 // as for root (branch unknown start), we might happen to have one locally, and need further digging to find out right branch start
341 public final Nodeid branchHead;
342 public Nodeid branchRoot;
343 // either of these can be null, or both.
344 // although RemoteBranch has either both parents null, or both non-null, when we construct a chain
345 // we might encounter that we locally know one of branch's parent, hence in the chain corresponding field will be blank.
346 public BranchChain p1;
347 public BranchChain p2;
348
349 public BranchChain(Nodeid head) {
350 assert head != null;
351 branchHead = head;
352 }
353 public boolean isTerminal() {
354 return p1 == null && p2 == null; // either can be null, see comment above. Terminal is only when no way to descent
355 }
356
357 // true when this BranchChain is a branch that spans up to very start of the repository
358 // Thus, the only common revision is NULL, recorded in a fake BranchChain object shared between p1 and p2
359 /*package-local*/ boolean isRepoStart() {
360 return p1 == p2 && p1 != null && p1.branchHead == p1.branchRoot && NULL.equals(p1.branchHead);
361 }
362
363 @Override
364 public String toString() {
365 return String.format("BranchChain [%s, %s]", branchRoot, branchHead);
366 }
367
368 void dump() {
369 System.out.println(toString());
370 internalDump(" ");
371 }
372
373 private void internalDump(String prefix) {
374 if (p1 != null) {
375 System.out.println(prefix + p1.toString());
376 } else if (p2 != null) {
377 System.out.println(prefix + "NONE?!");
378 }
379 if (p2 != null) {
380 System.out.println(prefix + p2.toString());
381 } else if (p1 != null) {
382 System.out.println(prefix + "NONE?!");
383 }
384 prefix += " ";
385 if (p1 != null) {
386 p1.internalDump(prefix);
387 }
388 if (p2 != null) {
389 p2.internalDump(prefix);
390 }
391 }
392 }
393
394 /**
395 * @return list of nodeids from branchRoot to branchHead, inclusive. IOW, first element of the list is always root of the branch
396 */
397 public List<Nodeid> completeBranch(final Nodeid branchRoot, final Nodeid branchHead) throws HgException {
398 class DataEntry {
399 public final Nodeid queryHead;
400 public final int headIndex;
401 public List<Nodeid> entries;
402
403 public DataEntry(Nodeid head, int index, List<Nodeid> data) {
404 queryHead = head;
405 headIndex = index;
406 entries = data;
407 }
408 };
409
410 List<Nodeid> initial = remoteRepo.between(branchHead, branchRoot);
411 Nodeid[] result = new Nodeid[1 + (1 << initial.size())];
412 result[0] = branchHead;
413 int rootIndex = -1; // index in the result, where to place branche's root.
414 if (initial.isEmpty()) {
415 rootIndex = 1;
416 } else if (initial.size() == 1) {
417 rootIndex = 2;
418 }
419 LinkedList<DataEntry> datas = new LinkedList<DataEntry>();
420 // DataEntry in datas has entries list filled with 'between' data, whereas
421 // DataEntry in toQuery keeps only nodeid and its index, with entries to be initialized before
422 // moving to datas.
423 LinkedList<DataEntry> toQuery = new LinkedList<DataEntry>();
424 //
425 datas.add(new DataEntry(branchHead, 0, initial));
426 int totalQueries = 1;
427 HashSet<Nodeid> queried = new HashSet<Nodeid>();
428 while(!datas.isEmpty()) {
429 // keep record of those planned to be queried next time we call between()
430 // although may keep these in queried, if really don't want separate collection
431 HashSet<Nodeid> scheduled = new HashSet<Nodeid>();
432 do {
433 DataEntry de = datas.removeFirst();
434 // populate result with discovered elements between de.qiueryRoot and branch's head
435 for (int i = 1, j = 0; j < de.entries.size(); i = i << 1, j++) {
436 int idx = de.headIndex + i;
437 result[idx] = de.entries.get(j);
438 }
439 // form next query entries from new unknown elements
440 if (de.entries.size() > 1) {
441 /* when entries has only one element, it means de.queryRoot was at head-2 position, and thus
442 * no new information can be obtained. E.g. when it's 2, it might be case of [0..4] query with
443 * [1,2] result, and we need one more query to get element 3.
444 */
445 for (int i =1, j = 0; j < de.entries.size(); i = i<<1, j++) {
446 int idx = de.headIndex + i;
447 Nodeid x = de.entries.get(j);
448 if (!queried.contains(x) && !scheduled.contains(x) && (rootIndex == -1 || rootIndex - de.headIndex > 1)) {
449 /*queries for elements right before head is senseless, but unless we know head's index, do it anyway*/
450 toQuery.add(new DataEntry(x, idx, null));
451 scheduled.add(x);
452 }
453 }
454 }
455 } while (!datas.isEmpty());
456 if (!toQuery.isEmpty()) {
457 totalQueries++;
458 }
459 // for each query, create an between request range, keep record Range->DataEntry to know range's start index
460 LinkedList<HgRemoteRepository.Range> betweenBatch = new LinkedList<HgRemoteRepository.Range>();
461 HashMap<HgRemoteRepository.Range, DataEntry> rangeToEntry = new HashMap<HgRemoteRepository.Range, DataEntry>();
462 for (DataEntry de : toQuery) {
463 queried.add(de.queryHead);
464 HgRemoteRepository.Range r = new HgRemoteRepository.Range(branchRoot, de.queryHead);
465 betweenBatch.add(r);
466 rangeToEntry.put(r, de);
467 }
468 if (!betweenBatch.isEmpty()) {
469 Map<Range, List<Nodeid>> between = remoteRepo.between(betweenBatch);
470 for (Entry<Range, List<Nodeid>> e : between.entrySet()) {
471 DataEntry de = rangeToEntry.get(e.getKey());
472 assert de != null;
473 de.entries = e.getValue();
474 if (rootIndex == -1 && de.entries.size() == 1) {
475 // returned sequence of length 1 means we used element from [head-2] as root
476 int numberOfElementsExcludingRootAndHead = de.headIndex + 1;
477 rootIndex = numberOfElementsExcludingRootAndHead + 1;
478 if (debug) {
479 System.out.printf("On query %d found out exact number of missing elements: %d\n", totalQueries, numberOfElementsExcludingRootAndHead);
480 }
481 }
482 datas.add(de); // queue up to record result and construct further requests
483 }
484 betweenBatch.clear();
485 rangeToEntry.clear();
486 }
487 toQuery.clear();
488 }
489 if (rootIndex == -1) {
490 throw new HgBadStateException("Shall not happen, provided between output is correct"); // FIXME
491 }
492 result[rootIndex] = branchRoot;
493 boolean resultOk = true;
494 LinkedList<Nodeid> fromRootToHead = new LinkedList<Nodeid>();
495 for (int i = 0; i <= rootIndex; i++) {
496 Nodeid n = result[i];
497 if (n == null) {
498 System.out.printf("ERROR: element %d wasn't found\n",i);
499 resultOk = false;
500 }
501 fromRootToHead.addFirst(n); // reverse order
502 }
503 if (debug) {
504 System.out.println("Total queries:" + totalQueries);
505 }
506 if (!resultOk) {
507 throw new HgBadStateException("See console for details"); // FIXME
508 }
509 return fromRootToHead;
510 }
511
512 /**
513 * returns in order from branch root to head
514 * for a non-empty BranchChain, shall return modifiable list
515 */
516 public List<Nodeid> visitBranches(BranchChain bc) throws HgException {
517 if (bc == null) {
518 return Collections.emptyList();
519 }
520 List<Nodeid> mine = completeBranch(bc.branchRoot, bc.branchHead);
521 if (bc.isTerminal() || bc.isRepoStart()) {
522 return mine;
523 }
524 List<Nodeid> parentBranch1 = visitBranches(bc.p1);
525 List<Nodeid> parentBranch2 = visitBranches(bc.p2);
526 // merge
527 LinkedList<Nodeid> merged = new LinkedList<Nodeid>();
528 ListIterator<Nodeid> i1 = parentBranch1.listIterator(), i2 = parentBranch2.listIterator();
529 while (i1.hasNext() && i2.hasNext()) {
530 Nodeid n1 = i1.next();
531 Nodeid n2 = i2.next();
532 if (n1.equals(n2)) {
533 merged.addLast(n1);
534 } else {
535 // first different => add both, and continue adding both tails sequentially
536 merged.add(n2);
537 merged.add(n1);
538 break;
539 }
540 }
541 // copy rest of second parent branch
542 while (i2.hasNext()) {
543 merged.add(i2.next());
544 }
545 // copy rest of first parent branch
546 while (i1.hasNext()) {
547 merged.add(i1.next());
548 }
549 //
550 ArrayList<Nodeid> rv = new ArrayList<Nodeid>(mine.size() + merged.size());
551 rv.addAll(merged);
552 rv.addAll(mine);
553 return rv;
554 }
555
556 public void collectKnownRoots(BranchChain bc, Set<Nodeid> result) {
557 if (bc == null) {
558 return;
559 }
560 if (bc.isTerminal()) {
561 result.add(bc.branchRoot);
562 return;
563 }
564 if (bc.isRepoStart()) {
565 return;
566 }
567 collectKnownRoots(bc.p1, result);
568 collectKnownRoots(bc.p2, result);
569 }
570 }