comparison src/org/tmatesoft/hg/repo/HgBundle.java @ 169:8c8e3f372fa1

Towards initial clone: refactor HgBundle to provide slightly higher-level structure of the bundle
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Wed, 23 Mar 2011 14:13:11 +0100
parents d5268ca7715b
children 71ddbf8603e8
comparison
equal deleted inserted replaced
168:dd525ca65de8 169:8c8e3f372fa1
14 * the terms of a license other than GNU General Public License 14 * the terms of a license other than GNU General Public License
15 * contact TMate Software at support@hg4j.com 15 * contact TMate Software at support@hg4j.com
16 */ 16 */
17 package org.tmatesoft.hg.repo; 17 package org.tmatesoft.hg.repo;
18 18
19 import static org.tmatesoft.hg.core.Nodeid.NULL;
20
19 import java.io.File; 21 import java.io.File;
20 import java.io.IOException; 22 import java.io.IOException;
21 import java.util.LinkedList; 23 import java.util.LinkedList;
22 import java.util.List; 24 import java.util.List;
23 25
26 import org.tmatesoft.hg.core.HgBadStateException;
24 import org.tmatesoft.hg.core.HgException; 27 import org.tmatesoft.hg.core.HgException;
25 import org.tmatesoft.hg.core.Nodeid; 28 import org.tmatesoft.hg.core.Nodeid;
26 import org.tmatesoft.hg.internal.ByteArrayChannel; 29 import org.tmatesoft.hg.internal.ByteArrayChannel;
27 import org.tmatesoft.hg.internal.ByteArrayDataAccess; 30 import org.tmatesoft.hg.internal.ByteArrayDataAccess;
28 import org.tmatesoft.hg.internal.DataAccess; 31 import org.tmatesoft.hg.internal.DataAccess;
29 import org.tmatesoft.hg.internal.DataAccessProvider; 32 import org.tmatesoft.hg.internal.DataAccessProvider;
30 import org.tmatesoft.hg.internal.DigestHelper; 33 import org.tmatesoft.hg.internal.DigestHelper;
34 import org.tmatesoft.hg.internal.InflaterDataAccess;
31 import org.tmatesoft.hg.internal.RevlogStream; 35 import org.tmatesoft.hg.internal.RevlogStream;
32 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset; 36 import org.tmatesoft.hg.repo.HgChangelog.RawChangeset;
33 import org.tmatesoft.hg.util.CancelledException; 37 import org.tmatesoft.hg.util.CancelledException;
34 38
35
36 /** 39 /**
37 * @see http://mercurial.selenic.com/wiki/BundleFormat 40 * @see http://mercurial.selenic.com/wiki/BundleFormat
38 * 41 *
39 * @author Artem Tikhomirov 42 * @author Artem Tikhomirov
40 * @author TMate Software Ltd. 43 * @author TMate Software Ltd.
41 */ 44 */
42 public class HgBundle { 45 public class HgBundle {
43 46
44 private final File bundleFile; 47 private final File bundleFile;
45 private final DataAccessProvider accessProvider; 48 private final DataAccessProvider accessProvider;
46 49
47 public HgBundle(DataAccessProvider dap, File bundle) { 50 HgBundle(DataAccessProvider dap, File bundle) {
48 accessProvider = dap; 51 accessProvider = dap;
49 bundleFile = bundle; 52 bundleFile = bundle;
50 } 53 }
51 54
52 public void changes(HgRepository hgRepo) throws HgException, IOException { 55 private DataAccess getDataStream() throws IOException {
53 DataAccess da = accessProvider.create(bundleFile); 56 DataAccess da = accessProvider.create(bundleFile);
54 DigestHelper dh = new DigestHelper(); 57 byte[] signature = new byte[6];
58 if (da.length() > 6) {
59 da.readBytes(signature, 0, 6);
60 if (signature[0] == 'H' && signature[1] == 'G' && signature[2] == '1' && signature[3] == '0') {
61 if (signature[4] == 'G' && signature[5] == 'Z') {
62 return new InflaterDataAccess(da, 6, da.length() - 6);
63 }
64 if (signature[4] == 'B' && signature[5] == 'Z') {
65 throw HgRepository.notImplemented();
66 }
67 if (signature[4] != 'U' || signature[5] != 'N') {
68 throw new HgBadStateException("Bad bundle signature:" + new String(signature));
69 }
70 // "...UN", fall-through
71 } else {
72 da.reset();
73 }
74 }
75 return da;
76 }
77
78 // shows changes recorded in the bundle that are missing from the supplied repository
79 public void changes(final HgRepository hgRepo) throws HgException, IOException {
80 Inspector insp = new Inspector() {
81 DigestHelper dh = new DigestHelper();
82 boolean emptyChangelog = true;
83 private DataAccess prevRevContent;
84
85 public void changelogStart() {
86 emptyChangelog = true;
87
88 }
89
90 public void changelogEnd() {
91 if (emptyChangelog) {
92 throw new IllegalStateException("No changelog group in the bundle"); // XXX perhaps, just be silent and/or log?
93 }
94 }
95
96 /*
97 * Despite that BundleFormat wiki says: "Each Changelog entry patches the result of all previous patches
98 * (the previous, or parent patch of a given patch p is the patch that has a node equal to p's p1 field)",
99 * it seems not to hold true. Instead, each entry patches previous one, regardless of whether the one
100 * before is its parent (i.e. ge.firstParent()) or not.
101 *
102 Actual state in the changelog.i
103 Index Offset Flags Packed Actual Base Rev Link Rev Parent1 Parent2 nodeid
104 50: 9212 0 209 329 48 50 49 -1 f1db8610da62a3e0beb8d360556ee1fd6eb9885e
105 51: 9421 0 278 688 48 51 50 -1 9429c7bd1920fab164a9d2b621d38d57bcb49ae0
106 52: 9699 0 154 179 52 52 50 -1 30bd389788464287cee22ccff54c330a4b715de5
107 53: 9853 0 133 204 52 53 51 52 a6f39e595b2b54f56304470269a936ead77f5725
108 54: 9986 0 156 182 54 54 52 -1 fd4f2c98995beb051070630c272a9be87bef617d
109
110 Excerpt from bundle (nodeid, p1, p2, cs):
111 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 26e3eeaa39623de552b45ee1f55c14f36460f220 0000000000000000000000000000000000000000 f1db8610da62a3e0beb8d360556ee1fd6eb9885e; patches:4
112 9429c7bd1920fab164a9d2b621d38d57bcb49ae0 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 9429c7bd1920fab164a9d2b621d38d57bcb49ae0; patches:3
113 > 30bd389788464287cee22ccff54c330a4b715de5 f1db8610da62a3e0beb8d360556ee1fd6eb9885e 0000000000000000000000000000000000000000 30bd389788464287cee22ccff54c330a4b715de5; patches:3
114 a6f39e595b2b54f56304470269a936ead77f5725 9429c7bd1920fab164a9d2b621d38d57bcb49ae0 30bd389788464287cee22ccff54c330a4b715de5 a6f39e595b2b54f56304470269a936ead77f5725; patches:3
115 fd4f2c98995beb051070630c272a9be87bef617d 30bd389788464287cee22ccff54c330a4b715de5 0000000000000000000000000000000000000000 fd4f2c98995beb051070630c272a9be87bef617d; patches:3
116
117 To recreate 30bd..e5, one have to take content of 9429..e0, not its p1 f1db..5e
118 */
119 public boolean element(GroupElement ge) {
120 emptyChangelog = false;
121 HgChangelog changelog = hgRepo.getChangelog();
122 try {
123 if (prevRevContent == null) {
124 if (NULL.equals(ge.firstParent()) && NULL.equals(ge.secondParent())) {
125 prevRevContent = new ByteArrayDataAccess(new byte[0]);
126 } else {
127 final Nodeid base = ge.firstParent();
128 if (!changelog.isKnown(base) /*only first parent, that's Bundle contract*/) {
129 throw new IllegalStateException(String.format("Revision %s needs a parent %s, which is missing in the supplied repo %s", ge.node().shortNotation(), base.shortNotation(), hgRepo.toString()));
130 }
131 ByteArrayChannel bac = new ByteArrayChannel();
132 changelog.rawContent(base, bac); // FIXME get DataAccess directly, to avoid
133 // extra byte[] (inside ByteArrayChannel) duplication just for the sake of subsequent ByteArrayDataChannel wrap.
134 prevRevContent = new ByteArrayDataAccess(bac.toArray());
135 }
136 }
137 //
138 byte[] csetContent = ge.apply(prevRevContent);
139 dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid?
140 if (!ge.node().equalsTo(dh.asBinary())) {
141 throw new IllegalStateException("Integrity check failed on " + bundleFile + ", node:" + ge.node());
142 }
143 ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent);
144 if (changelog.isKnown(ge.node())) {
145 System.out.print("+");
146 } else {
147 System.out.print("-");
148 }
149 RawChangeset cs = RawChangeset.parse(csetDataAccess);
150 System.out.println(cs.toString());
151 prevRevContent = csetDataAccess.reset();
152 } catch (CancelledException ex) {
153 return false;
154 } catch (Exception ex) {
155 throw new HgBadStateException(ex); // FIXME
156 }
157 return true;
158 }
159
160 public void manifestStart() {}
161 public void manifestEnd() {}
162 public void fileStart(String name) {}
163 public void fileEnd(String name) {}
164
165 };
166 inspectChangelog(insp);
167 }
168
169 public void dump() throws IOException {
170 Dump dump = new Dump();
171 inspectAll(dump);
172 System.out.println("Total files:" + dump.names.size());
173 for (String s : dump.names) {
174 System.out.println(s);
175 }
176 }
177
178 // callback to minimize amount of Strings and Nodeids instantiated
179 public interface Inspector {
180 void changelogStart();
181
182 void changelogEnd();
183
184 void manifestStart();
185
186 void manifestEnd();
187
188 void fileStart(String name);
189
190 void fileEnd(String name);
191
192 /**
193 * @param element
194 * data element, instance might be reused
195 * @return <code>true</code> to continue
196 */
197 boolean element(GroupElement element);
198 }
199
200 public static class Dump implements Inspector {
201 public final LinkedList<String> names = new LinkedList<String>();
202
203 public void changelogStart() {
204 System.out.println("Changelog group");
205 }
206
207 public void changelogEnd() {
208 }
209
210 public void manifestStart() {
211 System.out.println("Manifest group");
212 }
213
214 public void manifestEnd() {
215 }
216
217 public void fileStart(String name) {
218 names.add(name);
219 System.out.println(name);
220 }
221
222 public void fileEnd(String name) {
223 }
224
225 public boolean element(GroupElement ge) {
226 try {
227 System.out.printf(" %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches().size());
228 } catch (Exception ex) {
229 ex.printStackTrace(); // FIXME
230 }
231 return true;
232 }
233 }
234
235 public void inspectChangelog(Inspector inspector) throws IOException {
236 if (inspector == null) {
237 throw new IllegalArgumentException();
238 }
239 DataAccess da = getDataStream();
55 try { 240 try {
56 List<GroupElement> changelogGroup = readGroup(da); 241 if (da.isEmpty()) {
57 if (changelogGroup.isEmpty()) { 242 return;
58 throw new IllegalStateException("No changelog group in the bundle"); // XXX perhaps, just be silent and/or log? 243 }
59 } 244 inspector.changelogStart();
60 // XXX in fact, bundle not necessarily starts with the first revision missing in hgRepo 245 readGroup(da, inspector);
61 // need to 'scroll' till the last one common. 246 inspector.changelogEnd();
62 final Nodeid base = changelogGroup.get(0).firstParent();
63 if (!hgRepo.getChangelog().isKnown(base)) {
64 throw new IllegalArgumentException("unknown parent");
65 }
66 // BundleFormat wiki says:
67 // Each Changelog entry patches the result of all previous patches
68 // (the previous, or parent patch of a given patch p is the patch that has a node equal to p's p1 field)
69 ByteArrayChannel bac = new ByteArrayChannel();
70 hgRepo.getChangelog().rawContent(base, bac); // FIXME get DataAccess directly, to avoid
71 // extra byte[] (inside ByteArrayChannel) duplication just for the sake of subsequent ByteArrayDataChannel wrap.
72 ByteArrayDataAccess baseRevContent = new ByteArrayDataAccess(bac.toArray());
73 for (GroupElement ge : changelogGroup) {
74 byte[] csetContent = RevlogStream.apply(baseRevContent, -1, ge.patches);
75 dh = dh.sha1(ge.firstParent(), ge.secondParent(), csetContent); // XXX ge may give me access to byte[] content of nodeid directly, perhaps, I don't need DH to be friend of Nodeid?
76 if (!ge.node().equalsTo(dh.asBinary())) {
77 throw new IllegalStateException("Integrity check failed on " + bundleFile + ", node:" + ge.node());
78 }
79 ByteArrayDataAccess csetDataAccess = new ByteArrayDataAccess(csetContent);
80 RawChangeset cs = RawChangeset.parse(csetDataAccess);
81 System.out.println(cs.toString());
82 baseRevContent = csetDataAccess.reset();
83 }
84 } catch (CancelledException ex) {
85 System.out.println("Operation cancelled");
86 } finally { 247 } finally {
87 da.done(); 248 da.done();
88 } 249 }
89 } 250 }
90 251
91 public void dump() throws IOException { 252 public void inspectManifest(Inspector inspector) throws IOException {
92 DataAccess da = accessProvider.create(bundleFile); 253 if (inspector == null) {
254 throw new IllegalArgumentException();
255 }
256 DataAccess da = getDataStream();
93 try { 257 try {
94 LinkedList<String> names = new LinkedList<String>(); 258 if (da.isEmpty()) {
259 return;
260 }
261 skipGroup(da); // changelog
95 if (!da.isEmpty()) { 262 if (!da.isEmpty()) {
96 System.out.println("Changelog group"); 263 inspector.manifestStart();
97 List<GroupElement> changelogGroup = readGroup(da); 264 readGroup(da, inspector);
98 for (GroupElement ge : changelogGroup) { 265 inspector.manifestEnd();
99 System.out.printf(" %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches.size());
100 }
101 System.out.println("Manifest group");
102 List<GroupElement> manifestGroup = readGroup(da);
103 for (GroupElement ge : manifestGroup) {
104 System.out.printf(" %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches.size());
105 }
106 while (!da.isEmpty()) {
107 int fnameLen = da.readInt();
108 if (fnameLen <= 4) {
109 break; // null chunk, the last one.
110 }
111 byte[] fname = new byte[fnameLen - 4];
112 da.readBytes(fname, 0, fname.length);
113 names.add(new String(fname));
114 List<GroupElement> fileGroup = readGroup(da);
115 System.out.println(names.getLast());
116 for (GroupElement ge : fileGroup) {
117 System.out.printf(" %s %s %s %s; patches:%d\n", ge.node(), ge.firstParent(), ge.secondParent(), ge.cset(), ge.patches.size());
118 }
119 }
120 }
121 System.out.println(names.size());
122 for (String s : names) {
123 System.out.println(s);
124 } 266 }
125 } finally { 267 } finally {
126 da.done(); 268 da.done();
127 } 269 }
128 } 270 }
129 271
130 private static List<GroupElement> readGroup(DataAccess da) throws IOException { 272 public void inspectFiles(Inspector inspector) throws IOException {
273 if (inspector == null) {
274 throw new IllegalArgumentException();
275 }
276 DataAccess da = getDataStream();
277 try {
278 if (!da.isEmpty()) {
279 skipGroup(da); // changelog
280 }
281 if (!da.isEmpty()) {
282 skipGroup(da); // manifest
283 }
284 while (!da.isEmpty()) {
285 int fnameLen = da.readInt();
286 if (fnameLen <= 4) {
287 break; // null chunk, the last one.
288 }
289 byte[] nameBuf = new byte[fnameLen - 4];
290 da.readBytes(nameBuf, 0, nameBuf.length);
291 String fname = new String(nameBuf);
292 inspector.fileStart(fname);
293 readGroup(da, inspector);
294 inspector.fileEnd(fname);
295 }
296 } finally {
297 da.done();
298 }
299 }
300
301 public void inspectAll(Inspector inspector) throws IOException {
302 if (inspector == null) {
303 throw new IllegalArgumentException();
304 }
305 DataAccess da = getDataStream();
306 try {
307 if (da.isEmpty()) {
308 return;
309 }
310 inspector.changelogStart();
311 readGroup(da, inspector);
312 inspector.changelogEnd();
313 //
314 if (da.isEmpty()) {
315 return;
316 }
317 inspector.manifestStart();
318 readGroup(da, inspector);
319 inspector.manifestEnd();
320 //
321 while (!da.isEmpty()) {
322 int fnameLen = da.readInt();
323 if (fnameLen <= 4) {
324 break; // null chunk, the last one.
325 }
326 byte[] fnameBuf = new byte[fnameLen - 4];
327 da.readBytes(fnameBuf, 0, fnameBuf.length);
328 String name = new String(fnameBuf);
329 inspector.fileStart(name);
330 readGroup(da, inspector);
331 inspector.fileEnd(name);
332 }
333 } finally {
334 da.done();
335 }
336 }
337
338 private static void readGroup(DataAccess da, Inspector inspector) throws IOException {
131 int len = da.readInt(); 339 int len = da.readInt();
132 LinkedList<GroupElement> rv = new LinkedList<HgBundle.GroupElement>(); 340 boolean good2go = true;
133 while (len > 4 && !da.isEmpty()) { 341 while (len > 4 && !da.isEmpty() && good2go) {
134 byte[] nb = new byte[80]; 342 byte[] nb = new byte[80];
135 da.readBytes(nb, 0, 80); 343 da.readBytes(nb, 0, 80);
136 int dataLength = len-84; 344 int dataLength = len - 84 /* length field + 4 nodeids */;
137 LinkedList<RevlogStream.PatchRecord> patches = new LinkedList<RevlogStream.PatchRecord>(); 345 byte[] data = new byte[dataLength];
138 while (dataLength > 0) { 346 da.readBytes(data, 0, dataLength);
139 RevlogStream.PatchRecord pr = RevlogStream.PatchRecord.read(da); 347 DataAccess slice = new ByteArrayDataAccess(data); // XXX in fact, may pass a slicing DataAccess.
140 patches.add(pr); 348 // Just need to make sure that we seek to proper location afterwards (where next GroupElement starts),
141 dataLength -= pr.len + 12; 349 // regardless whether that slice has read it or not.
142 } 350 GroupElement ge = new GroupElement(nb, slice);
143 rv.add(new GroupElement(nb, patches)); 351 good2go = inspector.element(ge);
144 len = da.isEmpty() ? 0 : da.readInt(); 352 len = da.isEmpty() ? 0 : da.readInt();
145 } 353 }
146 return rv; 354 // need to skip up to group end if inspector told he don't want to continue with the group,
147 } 355 // because outer code may try to read next group immediately as we return back.
148 356 while (len > 4 && !da.isEmpty()) {
149 static class GroupElement { 357 da.skip(len - 4 /* length field */);
150 private byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192 358 len = da.isEmpty() ? 0 : da.readInt();
359 }
360 }
361
362 private static void skipGroup(DataAccess da) throws IOException {
363 int len = da.readInt();
364 while (len > 4 && !da.isEmpty()) {
365 da.skip(len - 4); // sizeof(int)
366 len = da.isEmpty() ? 0 : da.readInt();
367 }
368 }
369
370 public static class GroupElement {
371 private final byte[] header; // byte[80] takes 120 bytes, 4 Nodeids - 192
372 private final DataAccess dataAccess;
151 private List<RevlogStream.PatchRecord> patches; 373 private List<RevlogStream.PatchRecord> patches;
152 374
153 GroupElement(byte[] fourNodeids, List<RevlogStream.PatchRecord> patchList) { 375 GroupElement(byte[] fourNodeids, DataAccess rawDataAccess) {
154 assert fourNodeids != null && fourNodeids.length == 80; 376 assert fourNodeids != null && fourNodeids.length == 80;
155 // patchList.size() > 0
156 header = fourNodeids; 377 header = fourNodeids;
157 patches = patchList; 378 dataAccess = rawDataAccess;
158 } 379 }
380
159 public Nodeid node() { 381 public Nodeid node() {
160 return Nodeid.fromBinary(header, 0); 382 return Nodeid.fromBinary(header, 0);
161 } 383 }
384
162 public Nodeid firstParent() { 385 public Nodeid firstParent() {
163 return Nodeid.fromBinary(header, 20); 386 return Nodeid.fromBinary(header, 20);
164 } 387 }
388
165 public Nodeid secondParent() { 389 public Nodeid secondParent() {
166 return Nodeid.fromBinary(header, 40); 390 return Nodeid.fromBinary(header, 40);
167 } 391 }
392
168 public Nodeid cset() { // cs seems to be changeset 393 public Nodeid cset() { // cs seems to be changeset
169 return Nodeid.fromBinary(header, 60); 394 return Nodeid.fromBinary(header, 60);
170 } 395 }
396
397 public DataAccess rawData() {
398 return dataAccess;
399 }
400
401 public List<RevlogStream.PatchRecord> patches() throws IOException {
402 if (patches == null) {
403 dataAccess.reset();
404 LinkedList<RevlogStream.PatchRecord> p = new LinkedList<RevlogStream.PatchRecord>();
405 while (!dataAccess.isEmpty()) {
406 RevlogStream.PatchRecord pr = RevlogStream.PatchRecord.read(dataAccess);
407 p.add(pr);
408 }
409 patches = p;
410 }
411 return patches;
412 }
413
414 public byte[] apply(DataAccess baseContent) throws IOException {
415 return RevlogStream.apply(baseContent, -1, patches());
416 }
171 } 417 }
172 } 418 }