# HG changeset patch # User Artem Tikhomirov # Date 1313631996 -7200 # Node ID 3dcd3dd90c77b056b79e21e02465fa820915f515 # Parent 436bb5f65ce13e1a3a012fa999b19e65fca6a528 Improve manifest parsing: decode bytes to chars once, minimize arraycopy on String instantiation, keep set of file revisions from previous manifest only diff -r 436bb5f65ce1 -r 3dcd3dd90c77 src/org/tmatesoft/hg/core/Nodeid.java --- a/src/org/tmatesoft/hg/core/Nodeid.java Wed Aug 17 20:30:06 2011 +0200 +++ b/src/org/tmatesoft/hg/core/Nodeid.java Thu Aug 18 03:46:36 2011 +0200 @@ -163,7 +163,7 @@ throw new IllegalArgumentException(); } // XXX is better impl for String possible? - return fromAscii(asciiRepresentation.getBytes(), 0, 40); + return fromAscii(asciiRepresentation.toCharArray(), 0, 40); } /** @@ -183,7 +183,8 @@ if (hiNibble >= 16 || lowNibble >= 16) { throw new IllegalArgumentException(String.format("Characters '%c%c' (%1$d and %2$d) at index %d are not valid hex digits", asciiRepresentation[j-2], asciiRepresentation[j-1], j-2)); } - data[i] = (byte) (((hiNibble << 4) | lowNibble) & 0xFF); + b = (((hiNibble << 4) | lowNibble) & 0xFF); + data[i] = (byte) b; zeroBytes = zeroBytes && b == 0; } if (zeroBytes) { @@ -191,4 +192,12 @@ } return new Nodeid(data, false); } + + public static Nodeid fromAscii(char[] asciiRepresentation, int offset, int length) { + byte[] b = new byte[length]; + for (int i = 0; i < b.length; i++) { + b[i] = (byte) asciiRepresentation[offset+i]; + } + return fromAscii(b, 0, b.length); + } } diff -r 436bb5f65ce1 -r 3dcd3dd90c77 src/org/tmatesoft/hg/internal/Pool.java --- a/src/org/tmatesoft/hg/internal/Pool.java Wed Aug 17 20:30:06 2011 +0200 +++ b/src/org/tmatesoft/hg/internal/Pool.java Thu Aug 18 03:46:36 2011 +0200 @@ -18,6 +18,8 @@ import java.util.HashMap; +import org.tmatesoft.hg.util.SparseSet; + /** * Instance pooling. * @@ -25,7 +27,20 @@ * @author TMate Software Ltd. */ public class Pool { - private final HashMap unify = new HashMap(); + private final HashMap unify; +// private final SparseSet unify = new SparseSet(); + + public Pool() { + unify = new HashMap(); + } + + public Pool(int sizeHint) { + if (sizeHint <= 0) { + unify = new HashMap(); + } else { + unify = new HashMap(sizeHint * 4 / 3, 0.75f); + } + } public T unify(T t) { T rv = unify.get(t); @@ -37,14 +52,30 @@ return rv; } + public boolean contains(T t) { + return unify.containsKey(t); + } + + public void record(T t) { + unify.put(t, t); + } + + public void clear() { + unify.clear(); + } + + public int size() { + return unify.size(); + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append(Pool.class.getSimpleName()); sb.append('<'); - if (!unify.isEmpty()) { - sb.append(unify.keySet().iterator().next().getClass().getName()); - } +// if (!unify.isEmpty()) { +// sb.append(unify.keySet().iterator().next().getClass().getName()); +// } sb.append('>'); sb.append(':'); sb.append(unify.size()); diff -r 436bb5f65ce1 -r 3dcd3dd90c77 src/org/tmatesoft/hg/repo/HgManifest.java --- a/src/org/tmatesoft/hg/repo/HgManifest.java Wed Aug 17 20:30:06 2011 +0200 +++ b/src/org/tmatesoft/hg/repo/HgManifest.java Thu Aug 18 03:46:36 2011 +0200 @@ -152,10 +152,10 @@ private static class ManifestParser implements RevlogStream.Inspector { private boolean gtg = true; // good to go private final Inspector inspector; - private final Pool nodeidPool; + private Pool nodeidPool; private final Pool fnamePool; private final Pool flagsPool; - + public ManifestParser(Inspector delegate) { assert delegate != null; inspector = delegate; @@ -163,41 +163,60 @@ fnamePool = new Pool(); flagsPool = new Pool(); } - + public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { if (!gtg) { return; } try { gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision); - int i; + Pool thisRevPool = new Pool(nodeidPool.size()); // supply hint to minimize map resize/rehash String fname = null; String flags = null; Nodeid nid = null; - byte[] data = da.byteArray(); - for (i = 0; gtg && i < actualLen; i++) { - int x = i; - for( ; data[i] != '\n' && i < actualLen; i++) { - if (fname == null && data[i] == 0) { - fname = fnamePool.unify(new String(data, x, i - x)); - x = i+1; + final char[] nodeidConvertCache = new char[40]; + String data = new String(da.byteArray()); + final int dataLen = data.length(); // due to byte->char conversion, may be different + for (int x = 0; gtg && x < dataLen; x++) { + int start = x; + x = data.indexOf('\n', x+1); + assert x != -1; + int z = data.indexOf('\0', start+1); + assert z!= -1; + assert z < x; + fname = data.substring(start, z); + if (fnamePool.contains(fname)) { + fname = fnamePool.unify(fname); + } else { + fnamePool.record(fname = new String(fname)); + } + z++; // cursor at first char of nodeid + int nodeidLen = x-z < 40 ? x-z : 40; // if x-z > 40, there are flags + data.getChars(z, z+nodeidLen, nodeidConvertCache, 0); + nid = nodeidPool.unify(Nodeid.fromAscii(nodeidConvertCache, 0, nodeidLen)); + thisRevPool.record(nid); // memorize revision for the next iteration. + if (x-z > 40) { + // 'x' and 'l' for executable bits and symlinks? + // hg --debug manifest shows 644 for each regular file in my repo + // for cpython repo, there are 755 in hg --debug output when 'x' flag is present + flags = data.substring(z + nodeidLen, x); + if (flagsPool.contains(flags)) { + flags = flagsPool.unify(flags); + } else { + flagsPool.record(flags = new String(flags)); } } - if (i < actualLen) { - assert data[i] == '\n'; - int nodeidLen = i - x < 40 ? i-x : 40; - nid = nodeidPool.unify(Nodeid.fromAscii(data, x, nodeidLen)); - if (nodeidLen + x < i) { - // 'x' and 'l' for executable bits and symlinks? - // hg --debug manifest shows 644 for each regular file in my repo - flags = flagsPool.unify(new String(data, x + nodeidLen, i-x-nodeidLen)); - } - gtg = gtg && inspector.next(nid, fname, flags); - } + gtg = gtg && inspector.next(nid, fname, flags); nid = null; fname = flags = null; } gtg = gtg && inspector.end(revisionNumber); + // + // keep only actual file revisions, found at this version + // (next manifest is likely to refer to most of them, although in specific cases + // like commit in another branch a lot may be useless) + nodeidPool.clear(); + nodeidPool = thisRevPool; } catch (IOException ex) { throw new HgBadStateException(ex); }