changeset 196:e2115da4cf6a

Pool objects to avoid memory polution with duplicates
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Mon, 18 Apr 2011 18:04:24 +0200 (2011-04-18)
parents c9b305df0b89
children 3a7696fb457c
files src/org/tmatesoft/hg/core/HgChangeset.java src/org/tmatesoft/hg/internal/Pool.java src/org/tmatesoft/hg/repo/HgChangelog.java src/org/tmatesoft/hg/repo/HgManifest.java
diffstat 4 files changed, 129 insertions(+), 70 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/tmatesoft/hg/core/HgChangeset.java	Fri Apr 15 05:17:44 2011 +0200
+++ b/src/org/tmatesoft/hg/core/HgChangeset.java	Mon Apr 18 18:04:24 2011 +0200
@@ -64,7 +64,7 @@
 	/*package-local*/ void init(int localRevNumber, Nodeid nid, RawChangeset rawChangeset) {
 		revNumber = localRevNumber;
 		nodeid = nid;
-		changeset = rawChangeset;
+		changeset = rawChangeset.clone();
 		modifiedFiles = addedFiles = null;
 		deletedFiles = null;
 		parent1 = parent2 = null;
@@ -169,7 +169,7 @@
 	public HgChangeset clone() {
 		try {
 			HgChangeset copy = (HgChangeset) super.clone();
-			copy.changeset = changeset.clone();
+			// copy.changeset references this.changeset, doesn't need own copy
 			return copy;
 		} catch (CloneNotSupportedException ex) {
 			throw new InternalError(ex.toString());
--- a/src/org/tmatesoft/hg/internal/Pool.java	Fri Apr 15 05:17:44 2011 +0200
+++ b/src/org/tmatesoft/hg/internal/Pool.java	Mon Apr 18 18:04:24 2011 +0200
@@ -36,4 +36,18 @@
 		}
 		return rv;
 	}
+	
+	@Override
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append(Pool.class.getSimpleName());
+		sb.append('<');
+		if (!unify.isEmpty()) {
+			sb.append(unify.keySet().iterator().next().getClass().getName());
+		}
+		sb.append('>');
+		sb.append(':');
+		sb.append(unify.size());
+		return sb.toString();
+	}
 }
\ No newline at end of file
--- a/src/org/tmatesoft/hg/repo/HgChangelog.java	Fri Apr 15 05:17:44 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgChangelog.java	Mon Apr 18 18:04:24 2011 +0200
@@ -30,8 +30,10 @@
 import java.util.Map;
 import java.util.TimeZone;
 
+import org.tmatesoft.hg.core.HgBadStateException;
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.internal.DataAccess;
+import org.tmatesoft.hg.internal.Pool;
 import org.tmatesoft.hg.internal.RevlogStream;
 
 /**
@@ -51,28 +53,16 @@
 	}
 
 	public void range(int start, int end, final HgChangelog.Inspector inspector) {
-		RevlogStream.Inspector i = new RevlogStream.Inspector() {
-
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
-				RawChangeset cset = RawChangeset.parse(da);
-				// XXX there's no guarantee for Changeset.Callback that distinct instance comes each time, consider instance reuse
-				inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset);
-			}
-		};
-		content.iterate(start, end, true, i);
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		content.iterate(start, end, true, new RawCsetParser(inspector));
 	}
 
 	public List<RawChangeset> range(int start, int end) {
-		final ArrayList<RawChangeset> rv = new ArrayList<RawChangeset>(end - start + 1);
-		RevlogStream.Inspector i = new RevlogStream.Inspector() {
-
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
-				RawChangeset cset = RawChangeset.parse(da);
-				rv.add(cset);
-			}
-		};
-		content.iterate(start, end, true, i);
-		return rv;
+		final RawCsetCollector c = new RawCsetCollector(end - start + 1);
+		range(start, end, c);
+		return c.result;
 	}
 
 	public void range(final HgChangelog.Inspector inspector, final int... revisions) {
@@ -80,11 +70,11 @@
 			return;
 		}
 		RevlogStream.Inspector i = new RevlogStream.Inspector() {
+			private final RawCsetParser delegate = new RawCsetParser(inspector);
 
 			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
 				if (Arrays.binarySearch(revisions, revisionNumber) >= 0) {
-					RawChangeset cset = RawChangeset.parse(da);
-					inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset);
+					delegate.next(revisionNumber, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeid, da);
 				}
 			}
 		};
@@ -206,14 +196,15 @@
 			try {
 				byte[] data = da.byteArray();
 				RawChangeset rv = new RawChangeset();
-				rv.init(data, 0, data.length);
+				rv.init(data, 0, data.length, null);
 				return rv;
 			} catch (IOException ex) {
-				throw new IllegalArgumentException(ex); // FIXME better handling of IOExc
+				throw new HgBadStateException(ex); // FIXME "Error reading changeset data"
 			}
 		}
 
-		/* package-local */void init(byte[] data, int offset, int length) {
+		// @param usersPool - it's likely user names get repeated again and again throughout repository. can be null
+		/* package-local */void init(byte[] data, int offset, int length, Pool<String> usersPool) {
 			final int bufferEndIndex = offset + length;
 			final byte lineBreak = (byte) '\n';
 			int breakIndex1 = indexOf(data, lineBreak, offset, bufferEndIndex);
@@ -226,6 +217,9 @@
 				throw new IllegalArgumentException("Bad Changeset data");
 			}
 			String _user = new String(data, breakIndex1 + 1, breakIndex2 - breakIndex1 - 1);
+			if (usersPool != null) {
+				_user = usersPool.unify(_user);
+			}
 			int breakIndex3 = indexOf(data, lineBreak, breakIndex2 + 1, bufferEndIndex);
 			if (breakIndex3 == -1) {
 				throw new IllegalArgumentException("Bad Changeset data");
@@ -312,4 +306,39 @@
 		}
 	}
 
+	private static class RawCsetCollector implements Inspector {
+		final ArrayList<RawChangeset> result;
+		
+		public RawCsetCollector(int count) {
+			result = new ArrayList<RawChangeset>(count > 0 ? count : 5);
+		}
+
+		public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) {
+			result.add(cset.clone());
+		}
+	}
+
+	private static class RawCsetParser implements RevlogStream.Inspector {
+		
+		private final Inspector inspector;
+		private final Pool<String> usersPool;
+		private final RawChangeset cset = new RawChangeset();
+
+		public RawCsetParser(HgChangelog.Inspector delegate) {
+			assert delegate != null;
+			inspector = delegate;
+			usersPool = new Pool<String>();
+		}
+
+		public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
+			try {
+				byte[] data = da.byteArray();
+				cset.init(data, 0, data.length, usersPool);
+				// XXX there's no guarantee for Changeset.Callback that distinct instance comes each time, consider instance reuse
+				inspector.next(revisionNumber, Nodeid.fromBinary(nodeid, 0), cset);
+			} catch (Exception ex) {
+				throw new HgBadStateException(ex); // FIXME exception handling
+			}
+		}
+	}
 }
--- a/src/org/tmatesoft/hg/repo/HgManifest.java	Fri Apr 15 05:17:44 2011 +0200
+++ b/src/org/tmatesoft/hg/repo/HgManifest.java	Mon Apr 18 18:04:24 2011 +0200
@@ -21,6 +21,7 @@
 import org.tmatesoft.hg.core.HgBadStateException;
 import org.tmatesoft.hg.core.Nodeid;
 import org.tmatesoft.hg.internal.DataAccess;
+import org.tmatesoft.hg.internal.Pool;
 import org.tmatesoft.hg.internal.RevlogStream;
 
 
@@ -36,50 +37,10 @@
 	}
 
 	public void walk(int start, int end, final Inspector inspector) {
-		RevlogStream.Inspector insp = new RevlogStream.Inspector() {
-
-			private boolean gtg = true; // good to go
-
-			public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
-				if (!gtg) {
-					return;
-				}
-				try {
-					gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true));
-					int i;
-					String fname = null;
-					String flags = null;
-					Nodeid nid = null;
-					byte[] data = da.byteArray();
-					for (i = 0; gtg && i < actualLen; i++) {
-						int x = i;
-						for( ; data[i] != '\n' && i < actualLen; i++) {
-							if (fname == null && data[i] == 0) {
-								fname = new String(data, x, i - x);
-								x = i+1;
-							}
-						}
-						if (i < actualLen) {
-							assert data[i] == '\n'; 
-							int nodeidLen = i - x < 40 ? i-x : 40;
-							nid = Nodeid.fromAscii(data, x, nodeidLen);
-							if (nodeidLen + x < i) {
-								// 'x' and 'l' for executable bits and symlinks?
-								// hg --debug manifest shows 644 for each regular file in my repo
-								flags = new String(data, x + nodeidLen, i-x-nodeidLen);
-							}
-							gtg = gtg && inspector.next(nid, fname, flags);
-						}
-						nid = null;
-						fname = flags = null;
-					}
-					gtg = gtg && inspector.end(revisionNumber);
-				} catch (IOException ex) {
-					throw new HgBadStateException(ex);
-				}
-			}
-		};
-		content.iterate(start, end, true, insp);
+		if (inspector == null) {
+			throw new IllegalArgumentException();
+		}
+		content.iterate(start, end, true, new ManifestParser(inspector));
 	}
 
 	public interface Inspector {
@@ -87,4 +48,59 @@
 		boolean next(Nodeid nid, String fname, String flags);
 		boolean end(int revision);
 	}
+
+	private static class ManifestParser implements RevlogStream.Inspector {
+		private boolean gtg = true; // good to go
+		private final Inspector inspector;
+		private final Pool<Nodeid> nodeidPool;
+		private final Pool<String> fnamePool;
+		private final Pool<String> flagsPool;
+
+		public ManifestParser(Inspector delegate) {
+			assert delegate != null;
+			inspector = delegate;
+			nodeidPool = new Pool<Nodeid>();
+			fnamePool = new Pool<String>();
+			flagsPool = new Pool<String>();
+		}
+
+		public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
+			if (!gtg) {
+				return;
+			}
+			try {
+				gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true));
+				int i;
+				String fname = null;
+				String flags = null;
+				Nodeid nid = null;
+				byte[] data = da.byteArray();
+				for (i = 0; gtg && i < actualLen; i++) {
+					int x = i;
+					for( ; data[i] != '\n' && i < actualLen; i++) {
+						if (fname == null && data[i] == 0) {
+							fname = fnamePool.unify(new String(data, x, i - x));
+							x = i+1;
+						}
+					}
+					if (i < actualLen) {
+						assert data[i] == '\n'; 
+						int nodeidLen = i - x < 40 ? i-x : 40;
+						nid = nodeidPool.unify(Nodeid.fromAscii(data, x, nodeidLen));
+						if (nodeidLen + x < i) {
+							// 'x' and 'l' for executable bits and symlinks?
+							// hg --debug manifest shows 644 for each regular file in my repo
+							flags = flagsPool.unify(new String(data, x + nodeidLen, i-x-nodeidLen));
+						}
+						gtg = gtg && inspector.next(nid, fname, flags);
+					}
+					nid = null;
+					fname = flags = null;
+				}
+				gtg = gtg && inspector.end(revisionNumber);
+			} catch (IOException ex) {
+				throw new HgBadStateException(ex);
+			}
+		}
+	}
 }