comparison mercurial/util.py @ 25406:be930f16a52a

util: introduce a bufferedinputpipe utility To restore real time server output through ssh, we need to using polling feature (like select) on the pipes used to communicate with the ssh client. However we cannot use select alongside python level buffering of these pipe (because we need to know if the buffer is non-empty before calling select). However, unbuffered performance are terrible, presumably because the 'readline' call is issuing 'read(1)' call until it find a '\n'. To work around that we introduces our own overlay that do buffering by hand, exposing the state of the buffer to the outside world. The usage of polling IO will be introduced later in the 'sshpeer' module. All its logic will be very specific to the way mercurial communicate over ssh and does not belong to the generic 'util' module.
author Pierre-Yves David <pierre-yves.david@fb.com>
date Sat, 30 May 2015 23:55:24 -0700
parents 504ef9c49f4a
children c2ec81891502
comparison
equal deleted inserted replaced
25405:220a220ed088 25406:be930f16a52a
230 return memoryview(sliceable)[offset:] 230 return memoryview(sliceable)[offset:]
231 231
232 import subprocess 232 import subprocess
233 closefds = os.name == 'posix' 233 closefds = os.name == 'posix'
234 234
235 _chunksize = 4096
236
237 class bufferedinputpipe(object):
238 """a manually buffered input pipe
239
240 Python will not let us use buffered IO and lazy reading with 'polling' at
241 the same time. We cannot probe the buffer state and select will not detect
242 that data are ready to read if they are already buffered.
243
244 This class let us work around that by implementing its own buffering
245 (allowing efficient readline) while offering a way to know if the buffer is
246 empty from the output (allowing collaboration of the buffer with polling).
247
248 This class lives in the 'util' module because it makes use of the 'os'
249 module from the python stdlib.
250 """
251
252 def __init__(self, input):
253 self._input = input
254 self._buffer = []
255 self._eof = False
256
257 @property
258 def hasbuffer(self):
259 """True is any data is currently buffered
260
261 This will be used externally a pre-step for polling IO. If there is
262 already data then no polling should be set in place."""
263 return bool(self._buffer)
264
265 @property
266 def closed(self):
267 return self._input.closed
268
269 def fileno(self):
270 return self._input.fileno()
271
272 def close(self):
273 return self._input.close()
274
275 def read(self, size):
276 while (not self._eof) and (self._lenbuf < size):
277 self._fillbuffer()
278 return self._frombuffer(size)
279
280 def readline(self, *args, **kwargs):
281 if 1 < len(self._buffer):
282 # this should not happen because both read and readline end with a
283 # _frombuffer call that collapse it.
284 self._buffer = [''.join(self._buffer)]
285 lfi = -1
286 if self._buffer:
287 lfi = self._buffer[-1].find('\n')
288 while (not self._eof) and lfi < 0:
289 self._fillbuffer()
290 if self._buffer:
291 lfi = self._buffer[-1].find('\n')
292 size = lfi + 1
293 if lfi < 0: # end of file
294 size = self._lenbuf
295 elif 1 < len(self._buffer):
296 # we need to take previous chunks into account
297 size += self._lenbuf - len(self._buffer[-1])
298 return self._frombuffer(size)
299
300 @property
301 def _lenbuf(self):
302 """return the current lengh of buffered data"""
303 return sum(len(d) for d in self._buffer)
304
305 def _frombuffer(self, size):
306 """return at most 'size' data from the buffer
307
308 The data are removed from the buffer."""
309 if size == 0 or not self._buffer:
310 return ''
311 buf = self._buffer[0]
312 if 1 < len(self._buffer):
313 buf = ''.join(self._buffer)
314
315 data = buf[:size]
316 buf = buf[len(data):]
317 if buf:
318 self._buffer = [buf]
319 else:
320 self._buffer = []
321 return data
322
323 def _fillbuffer(self):
324 """read data to the buffer"""
325 data = os.read(self._input.fileno(), _chunksize)
326 if not data:
327 self._eof = True
328 else:
329 # inefficient add
330 self._buffer.append(data)
331
235 def popen2(cmd, env=None, newlines=False): 332 def popen2(cmd, env=None, newlines=False):
236 # Setting bufsize to -1 lets the system decide the buffer size. 333 # Setting bufsize to -1 lets the system decide the buffer size.
237 # The default for bufsize is 0, meaning unbuffered. This leads to 334 # The default for bufsize is 0, meaning unbuffered. This leads to
238 # poor performance on Mac OS X: http://bugs.python.org/issue4194 335 # poor performance on Mac OS X: http://bugs.python.org/issue4194
239 p = subprocess.Popen(cmd, shell=True, bufsize=-1, 336 p = subprocess.Popen(cmd, shell=True, bufsize=-1,