Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/util.py @ 11758:a79214972da2 stable
chunkbuffer: use += rather than cStringIO to reduce memory footprint
This significantly refactors the read() loop to use a queue of chunks.
The queue is alternately filled to at least 256k and then emptied by
concatenating onto the output buffer.
For very large read sizes, += uses less memory because it can resize
the target string in place.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Fri, 06 Aug 2010 12:18:33 -0500 |
parents | c37f35d7f2f5 |
children | 05deba16c5d5 ff5cec76b1c5 |
comparison
equal
deleted
inserted
replaced
11757:65bd4b8e48bd | 11758:a79214972da2 |
---|---|
13 hide platform-specific details from the core. | 13 hide platform-specific details from the core. |
14 """ | 14 """ |
15 | 15 |
16 from i18n import _ | 16 from i18n import _ |
17 import error, osutil, encoding | 17 import error, osutil, encoding |
18 import cStringIO, errno, re, shutil, sys, tempfile, traceback | 18 import errno, re, shutil, sys, tempfile, traceback |
19 import os, stat, time, calendar, textwrap, unicodedata, signal | 19 import os, stat, time, calendar, textwrap, unicodedata, signal |
20 import imp | 20 import imp |
21 | 21 |
22 # Python compatibility | 22 # Python compatibility |
23 | 23 |
907 | 907 |
908 def __init__(self, in_iter): | 908 def __init__(self, in_iter): |
909 """in_iter is the iterator that's iterating over the input chunks. | 909 """in_iter is the iterator that's iterating over the input chunks. |
910 targetsize is how big a buffer to try to maintain.""" | 910 targetsize is how big a buffer to try to maintain.""" |
911 self.iter = iter(in_iter) | 911 self.iter = iter(in_iter) |
912 self.buf = '' | 912 self._queue = [] |
913 self.targetsize = 2**16 | |
914 | 913 |
915 def read(self, l): | 914 def read(self, l): |
916 """Read L bytes of data from the iterator of chunks of data. | 915 """Read L bytes of data from the iterator of chunks of data. |
917 Returns less than L bytes if the iterator runs dry.""" | 916 Returns less than L bytes if the iterator runs dry.""" |
918 if l > len(self.buf) and self.iter: | 917 left = l |
919 # Clamp to a multiple of self.targetsize | 918 buf = '' |
920 targetsize = max(l, self.targetsize) | 919 queue = self._queue |
921 collector = cStringIO.StringIO() | 920 while left > 0: |
922 collector.write(self.buf) | 921 # refill the queue |
923 collected = len(self.buf) | 922 if not queue: |
924 for chunk in self.iter: | 923 target = 2**18 |
925 collector.write(chunk) | 924 for chunk in self.iter: |
926 collected += len(chunk) | 925 queue.append(chunk) |
927 if collected >= targetsize: | 926 target -= len(chunk) |
927 if target <= 0: | |
928 break | |
929 if not queue: | |
928 break | 930 break |
929 if collected < targetsize: | 931 |
930 self.iter = False | 932 chunk = queue.pop(0) |
931 self.buf = collector.getvalue() | 933 left -= len(chunk) |
932 if len(self.buf) == l: | 934 if left < 0: |
933 s, self.buf = str(self.buf), '' | 935 queue.insert(0, chunk[left:]) |
934 else: | 936 buf += chunk[:left] |
935 s, self.buf = self.buf[:l], buffer(self.buf, l) | 937 else: |
936 return s | 938 buf += chunk |
939 | |
940 return buf | |
941 | |
937 | 942 |
938 def filechunkiter(f, size=65536, limit=None): | 943 def filechunkiter(f, size=65536, limit=None): |
939 """Create a generator that produces the data in the file size | 944 """Create a generator that produces the data in the file size |
940 (default 65536) bytes at a time, up to optional limit (default is | 945 (default 65536) bytes at a time, up to optional limit (default is |
941 to read all data). Chunks may be less than size bytes if the | 946 to read all data). Chunks may be less than size bytes if the |