tikhomirov@0: /** tikhomirov@0: * Copyright (c) 2010 Artem Tikhomirov tikhomirov@0: */ tikhomirov@0: package com.tmate.hgkit.ll; tikhomirov@0: tikhomirov@3: import java.io.BufferedInputStream; tikhomirov@0: import java.io.DataInput; tikhomirov@3: import java.io.DataInputStream; tikhomirov@2: import java.io.EOFException; tikhomirov@3: import java.io.File; tikhomirov@3: import java.io.FileInputStream; tikhomirov@3: import java.io.FileNotFoundException; tikhomirov@2: import java.io.IOException; tikhomirov@2: import java.util.ArrayList; tikhomirov@2: import java.util.Collections; tikhomirov@3: import java.util.LinkedList; tikhomirov@2: import java.util.List; tikhomirov@3: import java.util.zip.DataFormatException; tikhomirov@2: import java.util.zip.Inflater; tikhomirov@0: tikhomirov@0: /** tikhomirov@0: * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), tikhomirov@0: * or numerous RevlogStream with separate representation of the underlaying data (cached, lazy ChunkStream)? tikhomirov@0: * @author artem tikhomirov@0: * @see http://mercurial.selenic.com/wiki/Revlog tikhomirov@0: * @see http://mercurial.selenic.com/wiki/RevlogNG tikhomirov@0: */ tikhomirov@0: public class RevlogStream { tikhomirov@2: tikhomirov@2: private List index; // indexed access highly needed tikhomirov@2: private boolean inline = false; tikhomirov@3: private final File indexFile; tikhomirov@3: tikhomirov@3: RevlogStream(File indexFile) { tikhomirov@3: this.indexFile = indexFile; tikhomirov@3: } tikhomirov@2: tikhomirov@0: private void detectVersion() { tikhomirov@0: tikhomirov@0: } tikhomirov@0: tikhomirov@0: /*package*/ DataInput getIndexStream() { tikhomirov@3: DataInputStream dis = null; tikhomirov@3: try { tikhomirov@3: dis = new DataInputStream(new BufferedInputStream(new FileInputStream(indexFile))); tikhomirov@3: } catch (FileNotFoundException ex) { tikhomirov@3: ex.printStackTrace(); tikhomirov@3: // should not happen, we checked for existence tikhomirov@3: } tikhomirov@3: return dis; tikhomirov@0: } tikhomirov@0: tikhomirov@0: /*package*/ DataInput getDataStream() { tikhomirov@3: final String indexName = indexFile.getName(); tikhomirov@3: File dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d"); tikhomirov@3: try { tikhomirov@3: return new DataInputStream(new BufferedInputStream(new FileInputStream(dataFile))); tikhomirov@3: } catch (FileNotFoundException ex) { tikhomirov@3: ex.printStackTrace(); tikhomirov@3: return null; tikhomirov@3: } tikhomirov@0: } tikhomirov@3: tikhomirov@2: public int revisionCount() { tikhomirov@2: initOutline(); tikhomirov@2: return index.size(); tikhomirov@2: } tikhomirov@2: tikhomirov@3: // should be possible to use TIP, ALL, or -1, -2, -n notation of Hg tikhomirov@3: // ? boolean needsNodeid tikhomirov@2: public void iterate(int start, int end, boolean needData, Revlog.Inspector inspector) { tikhomirov@2: initOutline(); tikhomirov@2: final int indexSize = index.size(); tikhomirov@3: if (indexSize == 0) { tikhomirov@3: return; tikhomirov@3: } tikhomirov@3: if (end == -1 /*FIXME TIP*/) { tikhomirov@3: end = indexSize - 1; tikhomirov@3: } tikhomirov@2: if (start < 0 || start >= indexSize) { tikhomirov@2: throw new IllegalArgumentException("Bad left range boundary " + start); tikhomirov@2: } tikhomirov@2: if (end < start || end >= indexSize) { tikhomirov@2: throw new IllegalArgumentException("Bad right range boundary " + end); tikhomirov@2: } tikhomirov@2: // XXX may cache [start .. end] from index with a single read (pre-read) tikhomirov@2: tikhomirov@2: DataInput diIndex = null, diData = null; tikhomirov@2: diIndex = getIndexStream(); tikhomirov@3: if (needData && !inline) { tikhomirov@2: diData = getDataStream(); tikhomirov@2: } tikhomirov@2: try { tikhomirov@4: int skipped = diIndex.skipBytes(inline ? (int) index.get(start).offset : start * 64); tikhomirov@3: byte[] lastData = null; tikhomirov@3: for (int i = start; i <= end; i++ ) { tikhomirov@2: IndexEntry ie = index.get(i); tikhomirov@2: long l = diIndex.readLong(); tikhomirov@2: long offset = l >>> 16; tikhomirov@2: int flags = (int) (l & 0X0FFFF); tikhomirov@2: int compressedLen = diIndex.readInt(); tikhomirov@2: int actualLen = diIndex.readInt(); tikhomirov@2: int baseRevision = diIndex.readInt(); tikhomirov@2: int linkRevision = diIndex.readInt(); tikhomirov@2: int parent1Revision = diIndex.readInt(); tikhomirov@2: int parent2Revision = diIndex.readInt(); tikhomirov@2: byte[] buf = new byte[32]; tikhomirov@2: // XXX Hg keeps 12 last bytes empty, we move them into front here tikhomirov@2: diIndex.readFully(buf, 12, 20); tikhomirov@2: diIndex.skipBytes(12); tikhomirov@2: byte[] data = null; tikhomirov@2: if (needData) { tikhomirov@2: byte[] dataBuf = new byte[compressedLen]; tikhomirov@2: if (inline) { tikhomirov@2: diIndex.readFully(dataBuf); tikhomirov@2: } else { tikhomirov@3: diData.skipBytes((int) ie.offset); // FIXME not skip but seek!!! (skip would work only for the first time) tikhomirov@2: diData.readFully(dataBuf); tikhomirov@2: } tikhomirov@2: if (dataBuf[0] == 0x78 /* 'x' */) { tikhomirov@3: try { tikhomirov@3: Inflater zlib = new Inflater(); tikhomirov@3: zlib.setInput(dataBuf, 0, compressedLen); tikhomirov@3: byte[] result = new byte[actualLen*2]; // FIXME need to use zlib.finished() instead tikhomirov@3: int resultLen = zlib.inflate(result); tikhomirov@3: zlib.end(); tikhomirov@3: data = new byte[resultLen]; tikhomirov@3: System.arraycopy(result, 0, data, 0, resultLen); tikhomirov@3: } catch (DataFormatException ex) { tikhomirov@3: ex.printStackTrace(); tikhomirov@3: data = new byte[0]; // FIXME need better failure strategy tikhomirov@3: } tikhomirov@2: } else if (dataBuf[0] == 0x75 /* 'u' */) { tikhomirov@2: data = new byte[dataBuf.length - 1]; tikhomirov@2: System.arraycopy(dataBuf, 1, data, 0, data.length); tikhomirov@2: } else { tikhomirov@2: // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' tikhomirov@3: // but I don't see reason not to return data as is tikhomirov@2: data = dataBuf; tikhomirov@2: } tikhomirov@3: // XXX tikhomirov@3: if (baseRevision != i) { // XXX not sure if this is the right way to detect a patch tikhomirov@3: // this is a patch tikhomirov@3: LinkedList patches = new LinkedList(); tikhomirov@3: int patchElementIndex = 0; tikhomirov@3: do { tikhomirov@3: final int x = patchElementIndex; // shorthand tikhomirov@3: int p1 = (data[x] << 24) | (data[x+1] << 16) | (data[x+2] << 8) | data[x+3]; tikhomirov@3: int p2 = (data[x+4] << 24) | (data[x+5] << 16) | (data[x+6] << 8) | data[x+7]; tikhomirov@3: int len = (data[x+8] << 24) | (data[x+9] << 16) | (data[x+10] << 8) | data[x+11]; tikhomirov@3: patchElementIndex += 12 + len; tikhomirov@3: patches.add(new PatchRecord(p1, p2, len, data, x+12)); tikhomirov@3: } while (patchElementIndex < data.length); tikhomirov@3: // tikhomirov@3: byte[] baseRevContent; tikhomirov@3: if (baseRevision == i - 1) { tikhomirov@3: baseRevContent = lastData; tikhomirov@3: } else { tikhomirov@3: // FIXME implement delta collection from few revisions tikhomirov@3: // read baseRevision plus all deltas between this revision and base. Need to do this effectively. tikhomirov@3: throw HgRepository.notImplemented(); tikhomirov@3: } tikhomirov@3: tikhomirov@3: // FIXME need to collect all patches between baseRevision and current version tikhomirov@3: data = apply(baseRevContent, patches); tikhomirov@3: } tikhomirov@3: } else { tikhomirov@3: if (inline) { tikhomirov@3: diIndex.skipBytes(compressedLen); tikhomirov@3: } tikhomirov@2: } tikhomirov@3: inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, buf, data); tikhomirov@3: lastData = data; tikhomirov@2: } tikhomirov@2: } catch (EOFException ex) { tikhomirov@2: // should not happen as long as we read inside known boundaries tikhomirov@2: throw new IllegalStateException(ex); tikhomirov@2: } catch (IOException ex) { tikhomirov@3: throw new IllegalStateException(ex); // FIXME need better handling tikhomirov@3: } finally { tikhomirov@3: hackCloseFileStreams(diIndex, diData); // FIXME HACK!!! tikhomirov@2: } tikhomirov@2: } tikhomirov@0: tikhomirov@2: private void initOutline() { tikhomirov@2: if (index != null && !index.isEmpty()) { tikhomirov@2: return; tikhomirov@2: } tikhomirov@2: ArrayList res = new ArrayList(); tikhomirov@2: DataInput di = getIndexStream(); tikhomirov@2: try { tikhomirov@2: int versionField = di.readInt(); tikhomirov@3: di.readInt(); // just to skip next 2 bytes of offset + flags tikhomirov@2: final int INLINEDATA = 1 << 16; tikhomirov@2: inline = (versionField & INLINEDATA) != 0; tikhomirov@2: long offset = 0; // first offset is always 0, thus Hg uses it for other purposes tikhomirov@3: while(true) { // EOFExcepiton should get us outta here. FIXME Our inputstream should has explicit no-more-data indicator tikhomirov@2: int compressedLen = di.readInt(); tikhomirov@2: // 8+4 = 12 bytes total read tikhomirov@2: // int actualLen = di.readInt(); tikhomirov@2: // int baseRevision = di.readInt(); tikhomirov@2: // int linkRevision = di.readInt(); tikhomirov@2: // int parent1Revision = di.readInt(); tikhomirov@2: // int parent2Revision = di.readInt(); tikhomirov@2: // byte[] nodeid = new byte[32]; tikhomirov@2: if (inline) { tikhomirov@4: res.add(new IndexEntry(offset + 64*res.size(), compressedLen)); tikhomirov@3: di.skipBytes(5*4 + 32 + compressedLen); // Check: 52 (skip) + 12 (read) = 64 (total RevlogNG record size) tikhomirov@2: } else { tikhomirov@4: res.add(new IndexEntry(offset, compressedLen)); tikhomirov@3: di.skipBytes(5*4 + 32); tikhomirov@2: } tikhomirov@2: long l = di.readLong(); tikhomirov@2: offset = l >>> 16; tikhomirov@2: } tikhomirov@2: } catch (EOFException ex) { tikhomirov@2: // fine, done then tikhomirov@2: index = res; tikhomirov@2: } catch (IOException ex) { tikhomirov@2: ex.printStackTrace(); tikhomirov@2: // too bad, no outline then tikhomirov@2: index = Collections.emptyList(); tikhomirov@2: } tikhomirov@3: hackCloseFileStreams(di, null); // FIXME HACK!!! tikhomirov@3: } tikhomirov@3: tikhomirov@3: // FIXME HACK to deal with File/FileStream nature of out data source. Won't need this once implement tikhomirov@3: // own DataInput based on bytearray chunks or RandomAccessFile tikhomirov@3: private void hackCloseFileStreams(DataInput index, DataInput data) { tikhomirov@3: try { tikhomirov@3: if (index != null) { tikhomirov@3: ((DataInputStream) index).close(); tikhomirov@3: } tikhomirov@3: if (data != null) { tikhomirov@3: ((DataInputStream) data).close(); tikhomirov@3: } tikhomirov@3: } catch (IOException ex) { tikhomirov@3: ex.printStackTrace(); tikhomirov@3: } tikhomirov@2: } tikhomirov@2: tikhomirov@2: tikhomirov@2: // perhaps, package-local or protected, if anyone else from low-level needs them tikhomirov@2: private static class IndexEntry { tikhomirov@4: public final long offset; // for separate .i and .d - copy of index record entry, for inline index - actual offset of the record in the .i file (record entry + revision * record size)) tikhomirov@2: public final int length; // data past fixed record (need to decide whether including header size or not), and whether length is of compressed data or not tikhomirov@2: tikhomirov@2: public IndexEntry(long o, int l) { tikhomirov@2: offset = o; tikhomirov@2: length = l; tikhomirov@2: } tikhomirov@2: } tikhomirov@3: tikhomirov@3: // mpatch.c : apply() tikhomirov@3: // FIXME need to implement patch merge (fold, combine, gather and discard from aforementioned mpatch.[c|py]), also see Revlog and Mercurial PDF tikhomirov@3: private static byte[] apply(byte[] baseRevisionContent, List patch) { tikhomirov@3: byte[] tempBuf = new byte[512]; // XXX tikhomirov@3: int last = 0, destIndex = 0; tikhomirov@3: for (PatchRecord pr : patch) { tikhomirov@3: System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, pr.start-last); tikhomirov@3: destIndex += pr.start - last; tikhomirov@3: System.arraycopy(pr.data, 0, tempBuf, destIndex, pr.data.length); tikhomirov@3: destIndex += pr.data.length; tikhomirov@3: last = pr.end; tikhomirov@3: } tikhomirov@3: System.arraycopy(baseRevisionContent, last, tempBuf, destIndex, baseRevisionContent.length - last); tikhomirov@3: destIndex += baseRevisionContent.length - last; // total length tikhomirov@3: byte[] rv = new byte[destIndex]; tikhomirov@3: System.arraycopy(tempBuf, 0, rv, 0, destIndex); tikhomirov@3: return rv; tikhomirov@3: } tikhomirov@3: tikhomirov@3: static class PatchRecord { // copy of struct frag from mpatch.c tikhomirov@3: int start, end, len; tikhomirov@3: byte[] data; tikhomirov@3: tikhomirov@3: public PatchRecord(int p1, int p2, int len, byte[] src, int srcOffset) { tikhomirov@3: start = p1; tikhomirov@3: end = p2; tikhomirov@3: this.len = len; tikhomirov@3: data = new byte[len]; tikhomirov@3: System.arraycopy(src, srcOffset, data, 0, len); tikhomirov@3: } tikhomirov@3: } tikhomirov@3: tikhomirov@0: }