comparison mercurial/util.py @ 11758:a79214972da2 stable

chunkbuffer: use += rather than cStringIO to reduce memory footprint This significantly refactors the read() loop to use a queue of chunks. The queue is alternately filled to at least 256k and then emptied by concatenating onto the output buffer. For very large read sizes, += uses less memory because it can resize the target string in place.
author Matt Mackall <mpm@selenic.com>
date Fri, 06 Aug 2010 12:18:33 -0500
parents c37f35d7f2f5
children 05deba16c5d5 ff5cec76b1c5
comparison
equal deleted inserted replaced
11757:65bd4b8e48bd 11758:a79214972da2
13 hide platform-specific details from the core. 13 hide platform-specific details from the core.
14 """ 14 """
15 15
16 from i18n import _ 16 from i18n import _
17 import error, osutil, encoding 17 import error, osutil, encoding
18 import cStringIO, errno, re, shutil, sys, tempfile, traceback 18 import errno, re, shutil, sys, tempfile, traceback
19 import os, stat, time, calendar, textwrap, unicodedata, signal 19 import os, stat, time, calendar, textwrap, unicodedata, signal
20 import imp 20 import imp
21 21
22 # Python compatibility 22 # Python compatibility
23 23
907 907
908 def __init__(self, in_iter): 908 def __init__(self, in_iter):
909 """in_iter is the iterator that's iterating over the input chunks. 909 """in_iter is the iterator that's iterating over the input chunks.
910 targetsize is how big a buffer to try to maintain.""" 910 targetsize is how big a buffer to try to maintain."""
911 self.iter = iter(in_iter) 911 self.iter = iter(in_iter)
912 self.buf = '' 912 self._queue = []
913 self.targetsize = 2**16
914 913
915 def read(self, l): 914 def read(self, l):
916 """Read L bytes of data from the iterator of chunks of data. 915 """Read L bytes of data from the iterator of chunks of data.
917 Returns less than L bytes if the iterator runs dry.""" 916 Returns less than L bytes if the iterator runs dry."""
918 if l > len(self.buf) and self.iter: 917 left = l
919 # Clamp to a multiple of self.targetsize 918 buf = ''
920 targetsize = max(l, self.targetsize) 919 queue = self._queue
921 collector = cStringIO.StringIO() 920 while left > 0:
922 collector.write(self.buf) 921 # refill the queue
923 collected = len(self.buf) 922 if not queue:
924 for chunk in self.iter: 923 target = 2**18
925 collector.write(chunk) 924 for chunk in self.iter:
926 collected += len(chunk) 925 queue.append(chunk)
927 if collected >= targetsize: 926 target -= len(chunk)
927 if target <= 0:
928 break
929 if not queue:
928 break 930 break
929 if collected < targetsize: 931
930 self.iter = False 932 chunk = queue.pop(0)
931 self.buf = collector.getvalue() 933 left -= len(chunk)
932 if len(self.buf) == l: 934 if left < 0:
933 s, self.buf = str(self.buf), '' 935 queue.insert(0, chunk[left:])
934 else: 936 buf += chunk[:left]
935 s, self.buf = self.buf[:l], buffer(self.buf, l) 937 else:
936 return s 938 buf += chunk
939
940 return buf
941
937 942
938 def filechunkiter(f, size=65536, limit=None): 943 def filechunkiter(f, size=65536, limit=None):
939 """Create a generator that produces the data in the file size 944 """Create a generator that produces the data in the file size
940 (default 65536) bytes at a time, up to optional limit (default is 945 (default 65536) bytes at a time, up to optional limit (default is
941 to read all data). Chunks may be less than size bytes if the 946 to read all data). Chunks may be less than size bytes if the