Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/util.py @ 25406:be930f16a52a
util: introduce a bufferedinputpipe utility
To restore real time server output through ssh, we need to using polling feature
(like select) on the pipes used to communicate with the ssh client. However
we cannot use select alongside python level buffering of these pipe (because we
need to know if the buffer is non-empty before calling select).
However, unbuffered performance are terrible, presumably because the 'readline'
call is issuing 'read(1)' call until it find a '\n'. To work around that we
introduces our own overlay that do buffering by hand, exposing the state of the
buffer to the outside world.
The usage of polling IO will be introduced later in the 'sshpeer' module. All
its logic will be very specific to the way mercurial communicate over ssh and
does not belong to the generic 'util' module.
author | Pierre-Yves David <pierre-yves.david@fb.com> |
---|---|
date | Sat, 30 May 2015 23:55:24 -0700 |
parents | 504ef9c49f4a |
children | c2ec81891502 |
comparison
equal
deleted
inserted
replaced
25405:220a220ed088 | 25406:be930f16a52a |
---|---|
230 return memoryview(sliceable)[offset:] | 230 return memoryview(sliceable)[offset:] |
231 | 231 |
232 import subprocess | 232 import subprocess |
233 closefds = os.name == 'posix' | 233 closefds = os.name == 'posix' |
234 | 234 |
235 _chunksize = 4096 | |
236 | |
237 class bufferedinputpipe(object): | |
238 """a manually buffered input pipe | |
239 | |
240 Python will not let us use buffered IO and lazy reading with 'polling' at | |
241 the same time. We cannot probe the buffer state and select will not detect | |
242 that data are ready to read if they are already buffered. | |
243 | |
244 This class let us work around that by implementing its own buffering | |
245 (allowing efficient readline) while offering a way to know if the buffer is | |
246 empty from the output (allowing collaboration of the buffer with polling). | |
247 | |
248 This class lives in the 'util' module because it makes use of the 'os' | |
249 module from the python stdlib. | |
250 """ | |
251 | |
252 def __init__(self, input): | |
253 self._input = input | |
254 self._buffer = [] | |
255 self._eof = False | |
256 | |
257 @property | |
258 def hasbuffer(self): | |
259 """True is any data is currently buffered | |
260 | |
261 This will be used externally a pre-step for polling IO. If there is | |
262 already data then no polling should be set in place.""" | |
263 return bool(self._buffer) | |
264 | |
265 @property | |
266 def closed(self): | |
267 return self._input.closed | |
268 | |
269 def fileno(self): | |
270 return self._input.fileno() | |
271 | |
272 def close(self): | |
273 return self._input.close() | |
274 | |
275 def read(self, size): | |
276 while (not self._eof) and (self._lenbuf < size): | |
277 self._fillbuffer() | |
278 return self._frombuffer(size) | |
279 | |
280 def readline(self, *args, **kwargs): | |
281 if 1 < len(self._buffer): | |
282 # this should not happen because both read and readline end with a | |
283 # _frombuffer call that collapse it. | |
284 self._buffer = [''.join(self._buffer)] | |
285 lfi = -1 | |
286 if self._buffer: | |
287 lfi = self._buffer[-1].find('\n') | |
288 while (not self._eof) and lfi < 0: | |
289 self._fillbuffer() | |
290 if self._buffer: | |
291 lfi = self._buffer[-1].find('\n') | |
292 size = lfi + 1 | |
293 if lfi < 0: # end of file | |
294 size = self._lenbuf | |
295 elif 1 < len(self._buffer): | |
296 # we need to take previous chunks into account | |
297 size += self._lenbuf - len(self._buffer[-1]) | |
298 return self._frombuffer(size) | |
299 | |
300 @property | |
301 def _lenbuf(self): | |
302 """return the current lengh of buffered data""" | |
303 return sum(len(d) for d in self._buffer) | |
304 | |
305 def _frombuffer(self, size): | |
306 """return at most 'size' data from the buffer | |
307 | |
308 The data are removed from the buffer.""" | |
309 if size == 0 or not self._buffer: | |
310 return '' | |
311 buf = self._buffer[0] | |
312 if 1 < len(self._buffer): | |
313 buf = ''.join(self._buffer) | |
314 | |
315 data = buf[:size] | |
316 buf = buf[len(data):] | |
317 if buf: | |
318 self._buffer = [buf] | |
319 else: | |
320 self._buffer = [] | |
321 return data | |
322 | |
323 def _fillbuffer(self): | |
324 """read data to the buffer""" | |
325 data = os.read(self._input.fileno(), _chunksize) | |
326 if not data: | |
327 self._eof = True | |
328 else: | |
329 # inefficient add | |
330 self._buffer.append(data) | |
331 | |
235 def popen2(cmd, env=None, newlines=False): | 332 def popen2(cmd, env=None, newlines=False): |
236 # Setting bufsize to -1 lets the system decide the buffer size. | 333 # Setting bufsize to -1 lets the system decide the buffer size. |
237 # The default for bufsize is 0, meaning unbuffered. This leads to | 334 # The default for bufsize is 0, meaning unbuffered. This leads to |
238 # poor performance on Mac OS X: http://bugs.python.org/issue4194 | 335 # poor performance on Mac OS X: http://bugs.python.org/issue4194 |
239 p = subprocess.Popen(cmd, shell=True, bufsize=-1, | 336 p = subprocess.Popen(cmd, shell=True, bufsize=-1, |