comparison src/com/tmate/hgkit/ll/RevlogStream.java @ 3:24bb4f365164

Rudimentary log functionality with basic infrastructure is in place
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Mon, 20 Dec 2010 02:50:36 +0100
parents 08db726a0fb7
children aa1912c70b36
comparison
equal deleted inserted replaced
2:08db726a0fb7 3:24bb4f365164
1 /** 1 /**
2 * Copyright (c) 2010 Artem Tikhomirov 2 * Copyright (c) 2010 Artem Tikhomirov
3 */ 3 */
4 package com.tmate.hgkit.ll; 4 package com.tmate.hgkit.ll;
5 5
6 import java.io.BufferedInputStream;
6 import java.io.DataInput; 7 import java.io.DataInput;
8 import java.io.DataInputStream;
7 import java.io.EOFException; 9 import java.io.EOFException;
10 import java.io.File;
11 import java.io.FileInputStream;
12 import java.io.FileNotFoundException;
8 import java.io.IOException; 13 import java.io.IOException;
9 import java.util.ArrayList; 14 import java.util.ArrayList;
10 import java.util.Collections; 15 import java.util.Collections;
16 import java.util.LinkedList;
11 import java.util.List; 17 import java.util.List;
18 import java.util.zip.DataFormatException;
12 import java.util.zip.Inflater; 19 import java.util.zip.Inflater;
13 20
14 /** 21 /**
15 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), 22 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations),
16 * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)? 23 * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)?
20 */ 27 */
21 public class RevlogStream { 28 public class RevlogStream {
22 29
23 private List<IndexEntry> index; // indexed access highly needed 30 private List<IndexEntry> index; // indexed access highly needed
24 private boolean inline = false; 31 private boolean inline = false;
32 private final File indexFile;
33
34 RevlogStream(File indexFile) {
35 this.indexFile = indexFile;
36 }
25 37
26 private void detectVersion() { 38 private void detectVersion() {
27 39
28 } 40 }
29 41
30 /*package*/ DataInput getIndexStream() { 42 /*package*/ DataInput getIndexStream() {
31 // TODO Auto-generated method stub 43 DataInputStream dis = null;
32 return null; 44 try {
45 dis = new DataInputStream(new BufferedInputStream(new FileInputStream(indexFile)));
46 } catch (FileNotFoundException ex) {
47 ex.printStackTrace();
48 // should not happen, we checked for existence
49 }
50 return dis;
33 } 51 }
34 52
35 /*package*/ DataInput getDataStream() { 53 /*package*/ DataInput getDataStream() {
36 // TODO Auto-generated method stub 54 final String indexName = indexFile.getName();
37 return null; 55 File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d");
38 } 56 try {
57 return new DataInputStream(new BufferedInputStream(new FileInputStream(dataFile)));
58 } catch (FileNotFoundException ex) {
59 ex.printStackTrace();
60 return null;
61 }
62 }
63
39 public int revisionCount() { 64 public int revisionCount() {
40 initOutline(); 65 initOutline();
41 return index.size(); 66 return index.size();
42 } 67 }
43 68
44 // should be possible to use TIP, ALL 69 // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg
70 // ? boolean needsNodeid
45 public void iterate(int start, int end, boolean needData, Revlog.Inspector inspector) { 71 public void iterate(int start, int end, boolean needData, Revlog.Inspector inspector) {
46 initOutline(); 72 initOutline();
47 final int indexSize = index.size(); 73 final int indexSize = index.size();
74 if (indexSize == 0) {
75 return;
76 }
77 if (end == -1 /*FIXME TIP*/) {
78 end = indexSize - 1;
79 }
48 if (start < 0 || start >= indexSize) { 80 if (start < 0 || start >= indexSize) {
49 throw new IllegalArgumentException("Bad left range boundary " + start); 81 throw new IllegalArgumentException("Bad left range boundary " + start);
50 } 82 }
51 if (end < start || end >= indexSize) { 83 if (end < start || end >= indexSize) {
52 throw new IllegalArgumentException("Bad right range boundary " + end); 84 throw new IllegalArgumentException("Bad right range boundary " + end);
53 } 85 }
54 // XXX may cache [start .. end] from index with a single read (pre-read) 86 // XXX may cache [start .. end] from index with a single read (pre-read)
55 87
56 DataInput diIndex = null, diData = null; 88 DataInput diIndex = null, diData = null;
57 diIndex = getIndexStream(); 89 diIndex = getIndexStream();
58 if (needData) { 90 if (needData && !inline) {
59 diData = getDataStream(); 91 diData = getDataStream();
60 } 92 }
61 try { 93 try {
62 diIndex.skipBytes(inline ? (int) index.get(start).offset : start * 64); 94 diIndex.skipBytes(inline ? (int) index.get(start).offset : start * 64);
63 for (int i = start; i <= end && i < indexSize; i++ ) { 95 byte[] lastData = null;
96 for (int i = start; i <= end; i++ ) {
64 IndexEntry ie = index.get(i); 97 IndexEntry ie = index.get(i);
65 long l = diIndex.readLong(); 98 long l = diIndex.readLong();
66 long offset = l >>> 16; 99 long offset = l >>> 16;
67 int flags = (int) (l & 0X0FFFF); 100 int flags = (int) (l & 0X0FFFF);
68 int compressedLen = diIndex.readInt(); 101 int compressedLen = diIndex.readInt();
79 if (needData) { 112 if (needData) {
80 byte[] dataBuf = new byte[compressedLen]; 113 byte[] dataBuf = new byte[compressedLen];
81 if (inline) { 114 if (inline) {
82 diIndex.readFully(dataBuf); 115 diIndex.readFully(dataBuf);
83 } else { 116 } else {
84 diData.skipBytes((int) ie.offset); // FIXME not skip but seek!!! 117 diData.skipBytes((int) ie.offset); // FIXME not skip but seek!!! (skip would work only for the first time)
85 diData.readFully(dataBuf); 118 diData.readFully(dataBuf);
86 } 119 }
87 if (dataBuf[0] == 0x78 /* 'x' */) { 120 if (dataBuf[0] == 0x78 /* 'x' */) {
88 Inflater zlib = new Inflater(); 121 try {
89 zlib.setInput(dataBuf, 0, compressedLen); 122 Inflater zlib = new Inflater();
90 byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 123 zlib.setInput(dataBuf, 0, compressedLen);
91 int resultLen = zlib.inflate(result); 124 byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead
92 zlib.end(); 125 int resultLen = zlib.inflate(result);
93 data = new byte[resultLen]; 126 zlib.end();
94 System.arraycopy(result, 0, data, 0, resultLen); 127 data = new byte[resultLen];
128 System.arraycopy(result, 0, data, 0, resultLen);
129 } catch (DataFormatException ex) {
130 ex.printStackTrace();
131 data = new byte[0]; // FIXME need better failure strategy
132 }
95 } else if (dataBuf[0] == 0x75 /* 'u' */) { 133 } else if (dataBuf[0] == 0x75 /* 'u' */) {
96 data = new byte[dataBuf.length - 1]; 134 data = new byte[dataBuf.length - 1];
97 System.arraycopy(dataBuf, 1, data, 0, data.length); 135 System.arraycopy(dataBuf, 1, data, 0, data.length);
98 } else { 136 } else {
99 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' 137 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
100 // but I don't see reason not to just return data as is 138 // but I don't see reason not to return data as is
101 data = dataBuf; 139 data = dataBuf;
102 } 140 }
103 // FIXME if patch data (based on baseRevision), then apply patches to get true content 141 // XXX
142 if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch
143 // this is a patch
144 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>();
145 int patchElementIndex = 0;
146 do {
147 final int x = patchElementIndex; // shorthand
148 int p1 = (data[x] << 24) | (data[x+1] << 16) | (data[x+2] << 8) | data[x+3];
149 int p2 = (data[x+4] << 24) | (data[x+5] << 16) | (data[x+6] << 8) | data[x+7];
150 int len = (data[x+8] << 24) | (data[x+9] << 16) | (data[x+10] << 8) | data[x+11];
151 patchElementIndex += 12 + len;
152 patches.add(new PatchRecord(p1, p2, len, data, x+12));
153 } while (patchElementIndex < data.length);
154 //
155 byte[] baseRevContent;
156 if (baseRevision == i - 1) {
157 baseRevContent = lastData;
158 } else {
159 // FIXME implement delta collection from few revisions
160 // read baseRevision plus all deltas between this revision and base. Need to do this effectively.
161 throw HgRepository.notImplemented();
162 }
163
164 // FIXME need to collect all patches between baseRevision and current version
165 data = apply(baseRevContent, patches);
166 }
167 } else {
168 if (inline) {
169 diIndex.skipBytes(compressedLen);
170 }
104 } 171 }
105 inspector.next(compressedLen, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, buf, data); 172 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, buf, data);
173 lastData = data;
106 } 174 }
107 } catch (EOFException ex) { 175 } catch (EOFException ex) {
108 // should not happen as long as we read inside known boundaries 176 // should not happen as long as we read inside known boundaries
109 throw new IllegalStateException(ex); 177 throw new IllegalStateException(ex);
110 } catch (IOException ex) { 178 } catch (IOException ex) {
111 FIXME 179 throw new IllegalStateException(ex); // FIXME need better handling
180 } finally {
181 hackCloseFileStreams(diIndex, diData); // FIXME HACK!!!
112 } 182 }
113 } 183 }
114 184
115 private void initOutline() { 185 private void initOutline() {
116 if (index != null && !index.isEmpty()) { 186 if (index != null && !index.isEmpty()) {
118 } 188 }
119 ArrayList<IndexEntry> res = new ArrayList<IndexEntry>(); 189 ArrayList<IndexEntry> res = new ArrayList<IndexEntry>();
120 DataInput di = getIndexStream(); 190 DataInput di = getIndexStream();
121 try { 191 try {
122 int versionField = di.readInt(); 192 int versionField = di.readInt();
123 // di.undreadInt(); 193 di.readInt(); // just to skip next 2 bytes of offset + flags
124 final int INLINEDATA = 1 << 16; 194 final int INLINEDATA = 1 << 16;
125 inline = (versionField & INLINEDATA) != 0; 195 inline = (versionField & INLINEDATA) != 0;
126 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes 196 long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
127 while(true) { // EOFExcepiton should get us outta here. FIXME Out inputstream should has explicit no-more-data indicator 197 while(true) { // EOFExcepiton should get us outta here. FIXME Our inputstream should has explicit no-more-data indicator
128 int compressedLen = di.readInt(); 198 int compressedLen = di.readInt();
129 // 8+4 = 12 bytes total read 199 // 8+4 = 12 bytes total read
130 // int actualLen = di.readInt(); 200 // int actualLen = di.readInt();
131 // int baseRevision = di.readInt(); 201 // int baseRevision = di.readInt();
132 // int linkRevision = di.readInt(); 202 // int linkRevision = di.readInt();
133 // int parent1Revision = di.readInt(); 203 // int parent1Revision = di.readInt();
134 // int parent2Revision = di.readInt(); 204 // int parent2Revision = di.readInt();
135 // byte[] nodeid = new byte[32]; 205 // byte[] nodeid = new byte[32];
136 res.add(new IndexEntry(offset, compressedLen)); 206 res.add(new IndexEntry(offset, compressedLen));
137 if (inline) { 207 if (inline) {
138 di.skipBytes(6*4 + 32 + compressedLen); // Check: 56 (skip) + 12 (read) = 64 (total RevlogNG record size) 208 di.skipBytes(5*4 + 32 + compressedLen); // Check: 52 (skip) + 12 (read) = 64 (total RevlogNG record size)
139 } else { 209 } else {
140 di.skipBytes(6*4 + 32); 210 di.skipBytes(5*4 + 32);
141 } 211 }
142 long l = di.readLong(); 212 long l = di.readLong();
143 offset = l >>> 16; 213 offset = l >>> 16;
144 } 214 }
145 } catch (EOFException ex) { 215 } catch (EOFException ex) {
148 } catch (IOException ex) { 218 } catch (IOException ex) {
149 ex.printStackTrace(); 219 ex.printStackTrace();
150 // too bad, no outline then 220 // too bad, no outline then
151 index = Collections.emptyList(); 221 index = Collections.emptyList();
152 } 222 }
223 hackCloseFileStreams(di, null); // FIXME HACK!!!
224 }
225
226 // FIXME HACK to deal with File/FileStream nature of out data source. Won't need this once implement
227 // own DataInput based on bytearray chunks or RandomAccessFile
228 private void hackCloseFileStreams(DataInput index, DataInput data) {
229 try {
230 if (index != null) {
231 ((DataInputStream) index).close();
232 }
233 if (data != null) {
234 ((DataInputStream) data).close();
235 }
236 } catch (IOException ex) {
237 ex.printStackTrace();
238 }
153 } 239 }
154 240
155 241
156 // perhaps, package-local or protected, if anyone else from low-level needs them 242 // perhaps, package-local or protected, if anyone else from low-level needs them
157 private static class IndexEntry { 243 private static class IndexEntry {
161 public IndexEntry(long o, int l) { 247 public IndexEntry(long o, int l) {
162 offset = o; 248 offset = o;
163 length = l; 249 length = l;
164 } 250 }
165 } 251 }
252
253 // mpatch.c : apply()
254 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
255 private static byte[] apply(byte[] baseRevisionContent, List<PatchRecord> patch) {
256 byte[] tempBuf = new byte[512]; // XXX
257 int last = 0, destIndex = 0;
258 for (PatchRecord pr : patch) {
259 System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, pr.start-last);
260 destIndex += pr.start - last;
261 System.arraycopy(pr.data, 0, tempBuf, destIndex, pr.data.length);
262 destIndex += pr.data.length;
263 last = pr.end;
264 }
265 System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, baseRevisionContent.length - last);
266 destIndex += baseRevisionContent.length - last; // total length
267 byte[] rv = new byte[destIndex];
268 System.arraycopy(tempBuf, 0, rv, 0, destIndex);
269 return rv;
270 }
271
272 static class PatchRecord { // copy of struct frag from mpatch.c
273 int start, end, len;
274 byte[] data;
275
276 public PatchRecord(int p1, int p2, int len, byte[] src, int srcOffset) {
277 start = p1;
278 end = p2;
279 this.len = len;
280 data = new byte[len];
281 System.arraycopy(src, srcOffset, data, 0, len);
282 }
283 }
284
166 } 285 }