comparison src/org/tmatesoft/hg/repo/HgManifest.java @ 262:3dcd3dd90c77

Improve manifest parsing: decode bytes to chars once, minimize arraycopy on String instantiation, keep set of file revisions from previous manifest only
author Artem Tikhomirov <tikhomirov.artem@gmail.com>
date Thu, 18 Aug 2011 03:46:36 +0200
parents f39fb6b3cc76
children 6bb5e7ed051a
comparison
equal deleted inserted replaced
261:436bb5f65ce1 262:3dcd3dd90c77
150 } 150 }
151 151
152 private static class ManifestParser implements RevlogStream.Inspector { 152 private static class ManifestParser implements RevlogStream.Inspector {
153 private boolean gtg = true; // good to go 153 private boolean gtg = true; // good to go
154 private final Inspector inspector; 154 private final Inspector inspector;
155 private final Pool<Nodeid> nodeidPool; 155 private Pool<Nodeid> nodeidPool;
156 private final Pool<String> fnamePool; 156 private final Pool<String> fnamePool;
157 private final Pool<String> flagsPool; 157 private final Pool<String> flagsPool;
158 158
159 public ManifestParser(Inspector delegate) { 159 public ManifestParser(Inspector delegate) {
160 assert delegate != null; 160 assert delegate != null;
161 inspector = delegate; 161 inspector = delegate;
162 nodeidPool = new Pool<Nodeid>(); 162 nodeidPool = new Pool<Nodeid>();
163 fnamePool = new Pool<String>(); 163 fnamePool = new Pool<String>();
164 flagsPool = new Pool<String>(); 164 flagsPool = new Pool<String>();
165 } 165 }
166 166
167 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { 167 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) {
168 if (!gtg) { 168 if (!gtg) {
169 return; 169 return;
170 } 170 }
171 try { 171 try {
172 gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision); 172 gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision);
173 int i; 173 Pool<Nodeid> thisRevPool = new Pool<Nodeid>(nodeidPool.size()); // supply hint to minimize map resize/rehash
174 String fname = null; 174 String fname = null;
175 String flags = null; 175 String flags = null;
176 Nodeid nid = null; 176 Nodeid nid = null;
177 byte[] data = da.byteArray(); 177 final char[] nodeidConvertCache = new char[40];
178 for (i = 0; gtg && i < actualLen; i++) { 178 String data = new String(da.byteArray());
179 int x = i; 179 final int dataLen = data.length(); // due to byte->char conversion, may be different
180 for( ; data[i] != '\n' && i < actualLen; i++) { 180 for (int x = 0; gtg && x < dataLen; x++) {
181 if (fname == null && data[i] == 0) { 181 int start = x;
182 fname = fnamePool.unify(new String(data, x, i - x)); 182 x = data.indexOf('\n', x+1);
183 x = i+1; 183 assert x != -1;
184 int z = data.indexOf('\0', start+1);
185 assert z!= -1;
186 assert z < x;
187 fname = data.substring(start, z);
188 if (fnamePool.contains(fname)) {
189 fname = fnamePool.unify(fname);
190 } else {
191 fnamePool.record(fname = new String(fname));
192 }
193 z++; // cursor at first char of nodeid
194 int nodeidLen = x-z < 40 ? x-z : 40; // if x-z > 40, there are flags
195 data.getChars(z, z+nodeidLen, nodeidConvertCache, 0);
196 nid = nodeidPool.unify(Nodeid.fromAscii(nodeidConvertCache, 0, nodeidLen));
197 thisRevPool.record(nid); // memorize revision for the next iteration.
198 if (x-z > 40) {
199 // 'x' and 'l' for executable bits and symlinks?
200 // hg --debug manifest shows 644 for each regular file in my repo
201 // for cpython repo, there are 755 in hg --debug output when 'x' flag is present
202 flags = data.substring(z + nodeidLen, x);
203 if (flagsPool.contains(flags)) {
204 flags = flagsPool.unify(flags);
205 } else {
206 flagsPool.record(flags = new String(flags));
184 } 207 }
185 } 208 }
186 if (i < actualLen) { 209 gtg = gtg && inspector.next(nid, fname, flags);
187 assert data[i] == '\n';
188 int nodeidLen = i - x < 40 ? i-x : 40;
189 nid = nodeidPool.unify(Nodeid.fromAscii(data, x, nodeidLen));
190 if (nodeidLen + x < i) {
191 // 'x' and 'l' for executable bits and symlinks?
192 // hg --debug manifest shows 644 for each regular file in my repo
193 flags = flagsPool.unify(new String(data, x + nodeidLen, i-x-nodeidLen));
194 }
195 gtg = gtg && inspector.next(nid, fname, flags);
196 }
197 nid = null; 210 nid = null;
198 fname = flags = null; 211 fname = flags = null;
199 } 212 }
200 gtg = gtg && inspector.end(revisionNumber); 213 gtg = gtg && inspector.end(revisionNumber);
214 //
215 // keep only actual file revisions, found at this version
216 // (next manifest is likely to refer to most of them, although in specific cases
217 // like commit in another branch a lot may be useless)
218 nodeidPool.clear();
219 nodeidPool = thisRevPool;
201 } catch (IOException ex) { 220 } catch (IOException ex) {
202 throw new HgBadStateException(ex); 221 throw new HgBadStateException(ex);
203 } 222 }
204 } 223 }
205 } 224 }