comparison src/com/tmate/hgkit/ll/RevlogStream.java @ 51:9429c7bd1920 wrap-data-access

Try DataAccess to reach revision data instead of plain byte arrays
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Sun, 16 Jan 2011 01:20:26 +0100
parents 26e3eeaa3962
children a6f39e595b2b
comparison
equal deleted inserted replaced
50:f1db8610da62 51:9429c7bd1920
9 import java.io.IOException; 9 import java.io.IOException;
10 import java.util.ArrayList; 10 import java.util.ArrayList;
11 import java.util.Collections; 11 import java.util.Collections;
12 import java.util.LinkedList; 12 import java.util.LinkedList;
13 import java.util.List; 13 import java.util.List;
14 import java.util.zip.DataFormatException; 14
15 import java.util.zip.Inflater; 15 import com.tmate.hgkit.fs.ByteArrayDataAccess;
16
17 import com.tmate.hgkit.fs.DataAccess; 16 import com.tmate.hgkit.fs.DataAccess;
18 import com.tmate.hgkit.fs.DataAccessProvider; 17 import com.tmate.hgkit.fs.DataAccessProvider;
18 import com.tmate.hgkit.fs.FilterDataAccess;
19 import com.tmate.hgkit.fs.InflaterDataAccess;
19 20
20 /** 21 /**
21 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), 22 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations),
22 * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)? 23 * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)?
23 * @author artem 24 * @author artem
133 if (needData && !inline) { 134 if (needData && !inline) {
134 daData = getDataStream(); 135 daData = getDataStream();
135 } 136 }
136 try { 137 try {
137 byte[] nodeidBuf = new byte[20]; 138 byte[] nodeidBuf = new byte[20];
138 byte[] lastData = null; 139 DataAccess lastUserData = null;
139 int i; 140 int i;
140 boolean extraReadsToBaseRev = false; 141 boolean extraReadsToBaseRev = false;
141 if (needData && index.get(start).baseRevision < start) { 142 if (needData && index.get(start).baseRevision < start) {
142 i = index.get(start).baseRevision; 143 i = index.get(start).baseRevision;
143 extraReadsToBaseRev = true; 144 extraReadsToBaseRev = true;
144 } else { 145 } else {
145 i = start; 146 i = start;
146 } 147 }
147 148
148 daIndex.seek(inline ? (int) index.get(i).offset : i * REVLOGV1_RECORD_SIZE); 149 daIndex.seek(inline ? index.get(i).offset : i * REVLOGV1_RECORD_SIZE);
149 for (; i <= end; i++ ) { 150 for (; i <= end; i++ ) {
151 if (inline && needData) {
152 // inspector reading data (though FilterDataAccess) may have affected index position
153 daIndex.seek(index.get(i).offset);
154 }
150 long l = daIndex.readLong(); 155 long l = daIndex.readLong();
151 @SuppressWarnings("unused") 156 @SuppressWarnings("unused")
152 long offset = l >>> 16; 157 long offset = l >>> 16;
153 @SuppressWarnings("unused") 158 @SuppressWarnings("unused")
154 int flags = (int) (l & 0X0FFFF); 159 int flags = (int) (l & 0X0FFFF);
159 int parent1Revision = daIndex.readInt(); 164 int parent1Revision = daIndex.readInt();
160 int parent2Revision = daIndex.readInt(); 165 int parent2Revision = daIndex.readInt();
161 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty 166 // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
162 daIndex.readBytes(nodeidBuf, 0, 20); 167 daIndex.readBytes(nodeidBuf, 0, 20);
163 daIndex.skip(12); 168 daIndex.skip(12);
164 byte[] data = null; 169 DataAccess userDataAccess = null;
165 if (needData) { 170 if (needData) {
166 byte[] dataBuf = new byte[compressedLen]; 171 final byte firstByte;
172 long streamOffset = index.get(i).offset;
173 DataAccess streamDataAccess;
167 if (inline) { 174 if (inline) {
168 daIndex.readBytes(dataBuf, 0, compressedLen); 175 streamDataAccess = daIndex;
176 streamOffset += REVLOGV1_RECORD_SIZE; // don't need to do seek as it's actual position in the index stream
169 } else { 177 } else {
170 daData.seek(index.get(i).offset); 178 streamDataAccess = daData;
171 daData.readBytes(dataBuf, 0, compressedLen); 179 daData.seek(streamOffset);
172 } 180 }
173 if (dataBuf[0] == 0x78 /* 'x' */) { 181 firstByte = streamDataAccess.readByte();
174 try { 182 if (firstByte == 0x78 /* 'x' */) {
175 Inflater zlib = new Inflater(); // XXX Consider reuse of Inflater, and/or stream alternative 183 userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen);
176 zlib.setInput(dataBuf, 0, compressedLen); 184 } else if (firstByte == 0x75 /* 'u' */) {
177 byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead 185 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
178 int resultLen = zlib.inflate(result);
179 zlib.end();
180 data = new byte[resultLen];
181 System.arraycopy(result, 0, data, 0, resultLen);
182 } catch (DataFormatException ex) {
183 ex.printStackTrace();
184 data = new byte[0]; // FIXME need better failure strategy
185 }
186 } else if (dataBuf[0] == 0x75 /* 'u' */) {
187 data = new byte[dataBuf.length - 1];
188 System.arraycopy(dataBuf, 1, data, 0, data.length);
189 } else { 186 } else {
190 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' 187 // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0'
191 // but I don't see reason not to return data as is 188 // but I don't see reason not to return data as is
192 data = dataBuf; 189 userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen);
193 } 190 }
194 // XXX 191 // XXX
195 if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch 192 if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch
196 // this is a patch 193 // this is a patch
197 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>(); 194 LinkedList<PatchRecord> patches = new LinkedList<PatchRecord>();
198 int patchElementIndex = 0; 195 while (!userDataAccess.isEmpty()) {
199 do { 196 PatchRecord pr = PatchRecord.read(userDataAccess);
200 PatchRecord pr = PatchRecord.read(data, patchElementIndex); 197 System.out.printf("PatchRecord:%d %d %d\n", pr.start, pr.end, pr.len);
201 patches.add(pr); 198 patches.add(pr);
202 patchElementIndex += 12 + pr.len; 199 }
203 } while (patchElementIndex < data.length); 200 userDataAccess.done();
204 // 201 //
205 byte[] baseRevContent = lastData; 202 byte[] userData = apply(lastUserData, actualLen, patches);
206 data = apply(baseRevContent, actualLen, patches); 203 userDataAccess = new ByteArrayDataAccess(userData);
207 } 204 }
208 } else { 205 } else {
209 if (inline) { 206 if (inline) {
210 daIndex.skip(compressedLen); 207 daIndex.skip(compressedLen);
211 } 208 }
212 } 209 }
213 if (!extraReadsToBaseRev || i >= start) { 210 if (!extraReadsToBaseRev || i >= start) {
214 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, data); 211 inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
215 } 212 }
216 lastData = data; 213 if (userDataAccess != null) {
214 userDataAccess.reset();
215 if (lastUserData != null) {
216 lastUserData.done();
217 }
218 lastUserData = userDataAccess;
219 }
217 } 220 }
218 } catch (IOException ex) { 221 } catch (IOException ex) {
219 throw new IllegalStateException(ex); // FIXME need better handling 222 throw new IllegalStateException(ex); // FIXME need better handling
220 } finally { 223 } finally {
221 daIndex.done(); 224 daIndex.done();
290 } 293 }
291 } 294 }
292 295
293 // mpatch.c : apply() 296 // mpatch.c : apply()
294 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF 297 // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF
295 /*package-local for HgBundle; until moved to better place*/static byte[] apply(byte[] baseRevisionContent, int outcomeLen, List<PatchRecord> patch) { 298 /*package-local for HgBundle; until moved to better place*/static byte[] apply(DataAccess baseRevisionContent, int outcomeLen, List<PatchRecord> patch) throws IOException {
296 int last = 0, destIndex = 0; 299 int last = 0, destIndex = 0;
297 if (outcomeLen == -1) { 300 if (outcomeLen == -1) {
298 outcomeLen = baseRevisionContent.length; 301 outcomeLen = (int) baseRevisionContent.length();
299 for (PatchRecord pr : patch) { 302 for (PatchRecord pr : patch) {
300 outcomeLen += pr.start - last + pr.len; 303 outcomeLen += pr.start - last + pr.len;
301 last = pr.end; 304 last = pr.end;
302 } 305 }
303 outcomeLen -= last; 306 outcomeLen -= last;
304 last = 0; 307 last = 0;
305 } 308 }
309 System.out.println(baseRevisionContent.length());
306 byte[] rv = new byte[outcomeLen]; 310 byte[] rv = new byte[outcomeLen];
307 for (PatchRecord pr : patch) { 311 for (PatchRecord pr : patch) {
308 System.arraycopy(baseRevisionContent, last, rv, destIndex, pr.start-last); 312 baseRevisionContent.seek(last);
313 baseRevisionContent.readBytes(rv, destIndex, pr.start-last);
309 destIndex += pr.start - last; 314 destIndex += pr.start - last;
310 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length); 315 System.arraycopy(pr.data, 0, rv, destIndex, pr.data.length);
311 destIndex += pr.data.length; 316 destIndex += pr.data.length;
312 last = pr.end; 317 last = pr.end;
313 } 318 }
314 System.arraycopy(baseRevisionContent, last, rv, destIndex, baseRevisionContent.length - last); 319 baseRevisionContent.seek(last);
320 baseRevisionContent.readBytes(rv, destIndex, (int) (baseRevisionContent.length() - last));
315 return rv; 321 return rv;
316 } 322 }
317 323
318 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description 324 // @see http://mercurial.selenic.com/wiki/BundleFormat, in Changelog group description
319 /*package-local*/ static class PatchRecord { // copy of struct frag from mpatch.c 325 /*package-local*/ static class PatchRecord { // copy of struct frag from mpatch.c