Mercurial > public > mercurial-scm > hg
comparison mercurial/appendfile.py @ 2089:cb99c711c59f
make appendfile simpler so it does not break with revlogng on windows.
it used to cache open files. this made revlogng break because it wants
to rename files when splitting .i into .i/.d, but cannot rename or unlink
open files on windows.
new code is bit slower, but safe on linux and windows. proper fix for
too many open/close of changelog/manifest belongs in different place.
can get 10% speed improvement back.
author | Vadim Gelfer <vadim.gelfer@gmail.com> |
---|---|
date | Wed, 19 Apr 2006 08:33:46 -0700 |
parents | 856f0ba200bc |
children | c62112815801 |
comparison
equal
deleted
inserted
replaced
2088:f16435b45780 | 2089:cb99c711c59f |
---|---|
4 # | 4 # |
5 # This software may be used and distributed according to the terms | 5 # This software may be used and distributed according to the terms |
6 # of the GNU General Public License, incorporated herein by reference. | 6 # of the GNU General Public License, incorporated herein by reference. |
7 | 7 |
8 from demandload import * | 8 from demandload import * |
9 demandload(globals(), "cStringIO changelog manifest os tempfile") | 9 demandload(globals(), "cStringIO changelog errno manifest os tempfile") |
10 | 10 |
11 # writes to metadata files are ordered. reads: changelog, manifest, | 11 # writes to metadata files are ordered. reads: changelog, manifest, |
12 # normal files. writes: normal files, manifest, changelog. | 12 # normal files. writes: normal files, manifest, changelog. |
13 | 13 |
14 # manifest contains pointers to offsets in normal files. changelog | 14 # manifest contains pointers to offsets in normal files. changelog |
31 '''implement enough of file protocol to append to revlog file. | 31 '''implement enough of file protocol to append to revlog file. |
32 appended data is written to temp file. reads and seeks span real | 32 appended data is written to temp file. reads and seeks span real |
33 file and temp file. readers cannot see appended data until | 33 file and temp file. readers cannot see appended data until |
34 writedata called.''' | 34 writedata called.''' |
35 | 35 |
36 def __init__(self, fp): | 36 def __init__(self, fp, tmpname): |
37 fd, self.tmpname = tempfile.mkstemp() | 37 if tmpname: |
38 self.tmpfp = os.fdopen(fd, 'ab+') | 38 self.tmpname = tmpname |
39 self.tmpfp = open(self.tmpname, 'ab+') | |
40 else: | |
41 fd, self.tmpname = tempfile.mkstemp() | |
42 self.tmpfp = os.fdopen(fd, 'ab+') | |
39 self.realfp = fp | 43 self.realfp = fp |
40 self.offset = fp.tell() | 44 self.offset = fp.tell() |
41 # real file is not written by anyone else. cache its size so | 45 # real file is not written by anyone else. cache its size so |
42 # seek and read can be fast. | 46 # seek and read can be fast. |
43 self.fpsize = os.fstat(fp.fileno()).st_size | 47 self.realsize = os.fstat(fp.fileno()).st_size |
44 | 48 |
45 def end(self): | 49 def end(self): |
46 self.tmpfp.flush() # make sure the stat is correct | 50 self.tmpfp.flush() # make sure the stat is correct |
47 return self.fpsize + os.fstat(self.tmpfp.fileno()).st_size | 51 return self.realsize + os.fstat(self.tmpfp.fileno()).st_size |
52 | |
53 def tell(self): | |
54 return self.offset | |
55 | |
56 def flush(self): | |
57 self.tmpfp.flush() | |
58 | |
59 def close(self): | |
60 self.realfp.close() | |
61 self.tmpfp.close() | |
48 | 62 |
49 def seek(self, offset, whence=0): | 63 def seek(self, offset, whence=0): |
50 '''virtual file offset spans real file and temp file.''' | 64 '''virtual file offset spans real file and temp file.''' |
51 if whence == 0: | 65 if whence == 0: |
52 self.offset = offset | 66 self.offset = offset |
53 elif whence == 1: | 67 elif whence == 1: |
54 self.offset += offset | 68 self.offset += offset |
55 elif whence == 2: | 69 elif whence == 2: |
56 self.offset = self.end() + offset | 70 self.offset = self.end() + offset |
57 | 71 |
58 if self.offset < self.fpsize: | 72 if self.offset < self.realsize: |
59 self.realfp.seek(self.offset) | 73 self.realfp.seek(self.offset) |
60 else: | 74 else: |
61 self.tmpfp.seek(self.offset - self.fpsize) | 75 self.tmpfp.seek(self.offset - self.realsize) |
62 | 76 |
63 def read(self, count=-1): | 77 def read(self, count=-1): |
64 '''only trick here is reads that span real file and temp file.''' | 78 '''only trick here is reads that span real file and temp file.''' |
65 fp = cStringIO.StringIO() | 79 fp = cStringIO.StringIO() |
66 old_offset = self.offset | 80 old_offset = self.offset |
67 if self.offset < self.fpsize: | 81 if self.offset < self.realsize: |
68 s = self.realfp.read(count) | 82 s = self.realfp.read(count) |
69 fp.write(s) | 83 fp.write(s) |
70 self.offset += len(s) | 84 self.offset += len(s) |
71 if count > 0: | 85 if count > 0: |
72 count -= len(s) | 86 count -= len(s) |
73 if count != 0: | 87 if count != 0: |
74 if old_offset != self.offset: | 88 if old_offset != self.offset: |
75 self.tmpfp.seek(self.offset - self.fpsize) | 89 self.tmpfp.seek(self.offset - self.realsize) |
76 s = self.tmpfp.read(count) | 90 s = self.tmpfp.read(count) |
77 fp.write(s) | 91 fp.write(s) |
78 self.offset += len(s) | 92 self.offset += len(s) |
79 return fp.getvalue() | 93 return fp.getvalue() |
80 | 94 |
81 def write(self, s): | 95 def write(self, s): |
82 '''append to temp file.''' | 96 '''append to temp file.''' |
83 self.tmpfp.seek(0, 2) | 97 self.tmpfp.seek(0, 2) |
84 self.tmpfp.write(s) | 98 self.tmpfp.write(s) |
85 # all writes are appends, so offset must go to end of file. | 99 # all writes are appends, so offset must go to end of file. |
86 self.offset = self.fpsize + self.tmpfp.tell() | 100 self.offset = self.realsize + self.tmpfp.tell() |
87 | |
88 def writedata(self): | |
89 '''copy data from temp file to real file.''' | |
90 self.tmpfp.seek(0) | |
91 s = self.tmpfp.read() | |
92 self.tmpfp.close() | |
93 self.realfp.seek(0, 2) | |
94 # small race here. we write all new data in one call, but | |
95 # reader can see partial update due to python or os. file | |
96 # locking no help: slow, not portable, not reliable over nfs. | |
97 # only safe thing is write to temp file every time and rename, | |
98 # but performance bad when manifest or changelog gets big. | |
99 self.realfp.write(s) | |
100 self.realfp.close() | |
101 | |
102 def __del__(self): | |
103 '''delete temp file even if exception raised.''' | |
104 try: os.unlink(self.tmpname) | |
105 except: pass | |
106 | |
107 class sharedfile(object): | |
108 '''let file objects share a single appendfile safely. each | |
109 sharedfile has own offset, syncs up with appendfile offset before | |
110 read and after read and write.''' | |
111 | |
112 def __init__(self, fp): | |
113 self.fp = fp | |
114 self.offset = 0 | |
115 | |
116 def tell(self): | |
117 return self.offset | |
118 | |
119 def seek(self, offset, whence=0): | |
120 if whence == 0: | |
121 self.offset = offset | |
122 elif whence == 1: | |
123 self.offset += offset | |
124 elif whence == 2: | |
125 self.offset = self.fp.end() + offset | |
126 | |
127 def read(self, count=-1): | |
128 try: | |
129 if self.offset != self.fp.offset: | |
130 self.fp.seek(self.offset) | |
131 return self.fp.read(count) | |
132 finally: | |
133 self.offset = self.fp.offset | |
134 | |
135 def write(self, s): | |
136 try: | |
137 return self.fp.write(s) | |
138 finally: | |
139 self.offset = self.fp.offset | |
140 | |
141 def close(self): | |
142 # revlog wants this. | |
143 pass | |
144 | |
145 def flush(self): | |
146 # revlog wants this. | |
147 pass | |
148 | |
149 def writedata(self): | |
150 self.fp.writedata() | |
151 | 101 |
152 class appendopener(object): | 102 class appendopener(object): |
153 '''special opener for files that only read or append.''' | 103 '''special opener for files that only read or append.''' |
154 | 104 |
155 def __init__(self, opener): | 105 def __init__(self, opener): |
156 self.realopener = opener | 106 self.realopener = opener |
157 # key: file name, value: appendfile object | 107 # key: file name, value: appendfile name |
158 self.fps = {} | 108 self.tmpnames = {} |
159 | 109 |
160 def __call__(self, name, mode='r'): | 110 def __call__(self, name, mode='r'): |
161 '''open file. return same cached appendfile object for every | 111 '''open file.''' |
162 later call.''' | |
163 | 112 |
164 assert mode in 'ra+' | 113 assert mode in 'ra+' |
165 fp = self.fps.get(name) | 114 try: |
166 if fp is None: | 115 realfp = self.realopener(name, 'r') |
167 fp = appendfile(self.realopener(name, 'a+')) | 116 except IOError, err: |
168 self.fps[name] = fp | 117 if err.errno != errno.ENOENT: raise |
169 return sharedfile(fp) | 118 realfp = self.realopener(name, 'w+') |
119 tmpname = self.tmpnames.get(name) | |
120 fp = appendfile(realfp, tmpname) | |
121 if tmpname is None: | |
122 self.tmpnames[name] = fp.tmpname | |
123 return fp | |
170 | 124 |
171 def writedata(self): | 125 def writedata(self): |
172 '''copy data from temp files to real files.''' | 126 '''copy data from temp files to real files.''' |
173 # write .d file before .i file. | 127 # write .d file before .i file. |
174 fps = self.fps.items() | 128 tmpnames = self.tmpnames.items() |
175 fps.sort() | 129 tmpnames.sort() |
176 for name, fp in fps: | 130 for name, tmpname in tmpnames: |
177 fp.writedata() | 131 fp = open(tmpname, 'rb') |
132 s = fp.read() | |
133 fp.close() | |
134 fp = self.realopener(name, 'a') | |
135 fp.write(s) | |
136 fp.close() | |
137 | |
138 def __del__(self): | |
139 for tmpname in self.tmpnames.itervalues(): | |
140 os.unlink(tmpname) | |
178 | 141 |
179 # files for changelog and manifest are in different appendopeners, so | 142 # files for changelog and manifest are in different appendopeners, so |
180 # not mixed up together. | 143 # not mixed up together. |
181 | 144 |
182 class appendchangelog(changelog.changelog, appendopener): | 145 class appendchangelog(changelog.changelog, appendopener): |