comparison mercurial/mdiff.py @ 43077:687b865b95ad

formatting: byteify all mercurial/ and hgext/ string literals Done with python3.7 contrib/byteify-strings.py -i $(hg files 'set:mercurial/**.py - mercurial/thirdparty/** + hgext/**.py - hgext/fsmonitor/pywatchman/** - mercurial/__init__.py') black -l 80 -t py33 -S $(hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**" - hgext/fsmonitor/pywatchman/**') # skip-blame mass-reformatting only Differential Revision: https://phab.mercurial-scm.org/D6972
author Augie Fackler <augie@google.com>
date Sun, 06 Oct 2019 09:48:39 -0400
parents 2372284d9457
children 66f2cc210a29
comparison
equal deleted inserted replaced
43076:2372284d9457 43077:687b865b95ad
19 pycompat, 19 pycompat,
20 util, 20 util,
21 ) 21 )
22 from .utils import dateutil 22 from .utils import dateutil
23 23
24 _missing_newline_marker = "\\ No newline at end of file\n" 24 _missing_newline_marker = b"\\ No newline at end of file\n"
25 25
26 bdiff = policy.importmod(r'bdiff') 26 bdiff = policy.importmod(r'bdiff')
27 mpatch = policy.importmod(r'mpatch') 27 mpatch = policy.importmod(r'mpatch')
28 28
29 blocks = bdiff.blocks 29 blocks = bdiff.blocks
47 ignoreblanklines ignores changes whose lines are all blank 47 ignoreblanklines ignores changes whose lines are all blank
48 upgrade generates git diffs to avoid data loss 48 upgrade generates git diffs to avoid data loss
49 ''' 49 '''
50 50
51 defaults = { 51 defaults = {
52 'context': 3, 52 b'context': 3,
53 'text': False, 53 b'text': False,
54 'showfunc': False, 54 b'showfunc': False,
55 'git': False, 55 b'git': False,
56 'nodates': False, 56 b'nodates': False,
57 'nobinary': False, 57 b'nobinary': False,
58 'noprefix': False, 58 b'noprefix': False,
59 'index': 0, 59 b'index': 0,
60 'ignorews': False, 60 b'ignorews': False,
61 'ignorewsamount': False, 61 b'ignorewsamount': False,
62 'ignorewseol': False, 62 b'ignorewseol': False,
63 'ignoreblanklines': False, 63 b'ignoreblanklines': False,
64 'upgrade': False, 64 b'upgrade': False,
65 'showsimilarity': False, 65 b'showsimilarity': False,
66 'worddiff': False, 66 b'worddiff': False,
67 'xdiff': False, 67 b'xdiff': False,
68 } 68 }
69 69
70 def __init__(self, **opts): 70 def __init__(self, **opts):
71 opts = pycompat.byteskwargs(opts) 71 opts = pycompat.byteskwargs(opts)
72 for k in self.defaults.keys(): 72 for k in self.defaults.keys():
77 77
78 try: 78 try:
79 self.context = int(self.context) 79 self.context = int(self.context)
80 except ValueError: 80 except ValueError:
81 raise error.Abort( 81 raise error.Abort(
82 _('diff context lines count must be ' 'an integer, not %r') 82 _(b'diff context lines count must be ' b'an integer, not %r')
83 % pycompat.bytestr(self.context) 83 % pycompat.bytestr(self.context)
84 ) 84 )
85 85
86 def copy(self, **kwargs): 86 def copy(self, **kwargs):
87 opts = dict((k, getattr(self, k)) for k in self.defaults) 87 opts = dict((k, getattr(self, k)) for k in self.defaults)
97 if opts.ignorews: 97 if opts.ignorews:
98 text = bdiff.fixws(text, 1) 98 text = bdiff.fixws(text, 1)
99 elif opts.ignorewsamount: 99 elif opts.ignorewsamount:
100 text = bdiff.fixws(text, 0) 100 text = bdiff.fixws(text, 0)
101 if blank and opts.ignoreblanklines: 101 if blank and opts.ignoreblanklines:
102 text = re.sub('\n+', '\n', text).strip('\n') 102 text = re.sub(b'\n+', b'\n', text).strip(b'\n')
103 if opts.ignorewseol: 103 if opts.ignorewseol:
104 text = re.sub(br'[ \t\r\f]+\n', br'\n', text) 104 text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
105 return text 105 return text
106 106
107 107
111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1] 111 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2] 112 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
113 s1, e1 = 0, len(lines1) 113 s1, e1 = 0, len(lines1)
114 s2, e2 = 0, len(lines2) 114 s2, e2 = 0, len(lines2)
115 while s1 < e1 or s2 < e2: 115 while s1 < e1 or s2 < e2:
116 i1, i2, btype = s1, s2, '=' 116 i1, i2, btype = s1, s2, b'='
117 if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0: 117 if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
118 # Consume the block of blank lines 118 # Consume the block of blank lines
119 btype = '~' 119 btype = b'~'
120 while i1 < e1 and lines1[i1] == 0: 120 while i1 < e1 and lines1[i1] == 0:
121 i1 += 1 121 i1 += 1
122 while i2 < e2 and lines2[i2] == 0: 122 while i2 < e2 and lines2[i2] == 0:
123 i2 += 1 123 i2 += 1
124 else: 124 else:
172 lbb, ubb = rangeb 172 lbb, ubb = rangeb
173 lba, uba = None, None 173 lba, uba = None, None
174 filteredblocks = [] 174 filteredblocks = []
175 for block in blocks: 175 for block in blocks:
176 (a1, a2, b1, b2), stype = block 176 (a1, a2, b1, b2), stype = block
177 if lbb >= b1 and ubb <= b2 and stype == '=': 177 if lbb >= b1 and ubb <= b2 and stype == b'=':
178 # rangeb is within a single "=" hunk, restrict back linerange1 178 # rangeb is within a single "=" hunk, restrict back linerange1
179 # by offsetting rangeb 179 # by offsetting rangeb
180 lba = lbb - b1 + a1 180 lba = lbb - b1 + a1
181 uba = ubb - b1 + a1 181 uba = ubb - b1 + a1
182 else: 182 else:
183 if b1 <= lbb < b2: 183 if b1 <= lbb < b2:
184 if stype == '=': 184 if stype == b'=':
185 lba = a2 - (b2 - lbb) 185 lba = a2 - (b2 - lbb)
186 else: 186 else:
187 lba = a1 187 lba = a1
188 if b1 < ubb <= b2: 188 if b1 < ubb <= b2:
189 if stype == '=': 189 if stype == b'=':
190 uba = a1 + (ubb - b1) 190 uba = a1 + (ubb - b1)
191 else: 191 else:
192 uba = a2 192 uba = a2
193 if hunkinrange((b1, (b2 - b1)), rangeb): 193 if hunkinrange((b1, (b2 - b1)), rangeb):
194 filteredblocks.append(block) 194 filteredblocks.append(block)
195 if lba is None or uba is None or uba < lba: 195 if lba is None or uba is None or uba < lba:
196 raise error.Abort(_('line range exceeds file size')) 196 raise error.Abort(_(b'line range exceeds file size'))
197 return filteredblocks, (lba, uba) 197 return filteredblocks, (lba, uba)
198 198
199 199
200 def chooseblocksfunc(opts=None): 200 def chooseblocksfunc(opts=None):
201 if ( 201 if (
202 opts is None 202 opts is None
203 or not opts.xdiff 203 or not opts.xdiff
204 or not util.safehasattr(bdiff, 'xdiffblocks') 204 or not util.safehasattr(bdiff, b'xdiffblocks')
205 ): 205 ):
206 return bdiff.blocks 206 return bdiff.blocks
207 else: 207 else:
208 return bdiff.xdiffblocks 208 return bdiff.xdiffblocks
209 209
234 s = [s[1], s1[0], s[3], s1[2]] 234 s = [s[1], s1[0], s[3], s1[2]]
235 235
236 # bdiff sometimes gives huge matches past eof, this check eats them, 236 # bdiff sometimes gives huge matches past eof, this check eats them,
237 # and deals with the special first match case described above 237 # and deals with the special first match case described above
238 if s[0] != s[1] or s[2] != s[3]: 238 if s[0] != s[1] or s[2] != s[3]:
239 type = '!' 239 type = b'!'
240 if opts.ignoreblanklines: 240 if opts.ignoreblanklines:
241 if lines1 is None: 241 if lines1 is None:
242 lines1 = splitnewlines(text1) 242 lines1 = splitnewlines(text1)
243 if lines2 is None: 243 if lines2 is None:
244 lines2 = splitnewlines(text2) 244 lines2 = splitnewlines(text2)
245 old = wsclean(opts, "".join(lines1[s[0] : s[1]])) 245 old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
246 new = wsclean(opts, "".join(lines2[s[2] : s[3]])) 246 new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
247 if old == new: 247 if old == new:
248 type = '~' 248 type = b'~'
249 yield s, type 249 yield s, type
250 yield s1, '=' 250 yield s1, b'='
251 251
252 252
253 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts): 253 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
254 """Return a unified diff as a (headers, hunks) tuple. 254 """Return a unified diff as a (headers, hunks) tuple.
255 255
261 Set binary=True if either a or b should be taken as a binary file. 261 Set binary=True if either a or b should be taken as a binary file.
262 """ 262 """
263 263
264 def datetag(date, fn=None): 264 def datetag(date, fn=None):
265 if not opts.git and not opts.nodates: 265 if not opts.git and not opts.nodates:
266 return '\t%s' % date 266 return b'\t%s' % date
267 if fn and ' ' in fn: 267 if fn and b' ' in fn:
268 return '\t' 268 return b'\t'
269 return '' 269 return b''
270 270
271 sentinel = [], () 271 sentinel = [], ()
272 if not a and not b: 272 if not a and not b:
273 return sentinel 273 return sentinel
274 274
275 if opts.noprefix: 275 if opts.noprefix:
276 aprefix = bprefix = '' 276 aprefix = bprefix = b''
277 else: 277 else:
278 aprefix = 'a/' 278 aprefix = b'a/'
279 bprefix = 'b/' 279 bprefix = b'b/'
280 280
281 epoch = dateutil.datestr((0, 0)) 281 epoch = dateutil.datestr((0, 0))
282 282
283 fn1 = util.pconvert(fn1) 283 fn1 = util.pconvert(fn1)
284 fn2 = util.pconvert(fn2) 284 fn2 = util.pconvert(fn2)
285 285
286 if binary: 286 if binary:
287 if a and b and len(a) == len(b) and a == b: 287 if a and b and len(a) == len(b) and a == b:
288 return sentinel 288 return sentinel
289 headerlines = [] 289 headerlines = []
290 hunks = ((None, ['Binary file %s has changed\n' % fn1]),) 290 hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
291 elif not a: 291 elif not a:
292 without_newline = not b.endswith('\n') 292 without_newline = not b.endswith(b'\n')
293 b = splitnewlines(b) 293 b = splitnewlines(b)
294 if a is None: 294 if a is None:
295 l1 = '--- /dev/null%s' % datetag(epoch) 295 l1 = b'--- /dev/null%s' % datetag(epoch)
296 else: 296 else:
297 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)) 297 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
298 l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2)) 298 l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
299 headerlines = [l1, l2] 299 headerlines = [l1, l2]
300 size = len(b) 300 size = len(b)
301 hunkrange = (0, 0, 1, size) 301 hunkrange = (0, 0, 1, size)
302 hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b] 302 hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
303 if without_newline: 303 if without_newline:
304 hunklines[-1] += '\n' 304 hunklines[-1] += b'\n'
305 hunklines.append(_missing_newline_marker) 305 hunklines.append(_missing_newline_marker)
306 hunks = ((hunkrange, hunklines),) 306 hunks = ((hunkrange, hunklines),)
307 elif not b: 307 elif not b:
308 without_newline = not a.endswith('\n') 308 without_newline = not a.endswith(b'\n')
309 a = splitnewlines(a) 309 a = splitnewlines(a)
310 l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)) 310 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
311 if b is None: 311 if b is None:
312 l2 = '+++ /dev/null%s' % datetag(epoch) 312 l2 = b'+++ /dev/null%s' % datetag(epoch)
313 else: 313 else:
314 l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)) 314 l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
315 headerlines = [l1, l2] 315 headerlines = [l1, l2]
316 size = len(a) 316 size = len(a)
317 hunkrange = (1, size, 0, 0) 317 hunkrange = (1, size, 0, 0)
318 hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a] 318 hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
319 if without_newline: 319 if without_newline:
320 hunklines[-1] += '\n' 320 hunklines[-1] += b'\n'
321 hunklines.append(_missing_newline_marker) 321 hunklines.append(_missing_newline_marker)
322 hunks = ((hunkrange, hunklines),) 322 hunks = ((hunkrange, hunklines),)
323 else: 323 else:
324 hunks = _unidiff(a, b, opts=opts) 324 hunks = _unidiff(a, b, opts=opts)
325 if not next(hunks): 325 if not next(hunks):
326 return sentinel 326 return sentinel
327 327
328 headerlines = [ 328 headerlines = [
329 "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)), 329 b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
330 "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)), 330 b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
331 ] 331 ]
332 332
333 return headerlines, hunks 333 return headerlines, hunks
334 334
335 335
357 ret = l - opts.context 357 ret = l - opts.context
358 if ret < 0: 358 if ret < 0:
359 return 0 359 return 0
360 return ret 360 return ret
361 361
362 lastfunc = [0, ''] 362 lastfunc = [0, b'']
363 363
364 def yieldhunk(hunk): 364 def yieldhunk(hunk):
365 (astart, a2, bstart, b2, delta) = hunk 365 (astart, a2, bstart, b2, delta) = hunk
366 aend = contextend(a2, len(l1)) 366 aend = contextend(a2, len(l1))
367 alen = aend - astart 367 alen = aend - astart
368 blen = b2 - bstart + aend - a2 368 blen = b2 - bstart + aend - a2
369 369
370 func = "" 370 func = b""
371 if opts.showfunc: 371 if opts.showfunc:
372 lastpos, func = lastfunc 372 lastpos, func = lastfunc
373 # walk backwards from the start of the context up to the start of 373 # walk backwards from the start of the context up to the start of
374 # the previous hunk context until we find a line starting with an 374 # the previous hunk context until we find a line starting with an
375 # alphanumeric char. 375 # alphanumeric char.
393 if blen: 393 if blen:
394 bstart += 1 394 bstart += 1
395 395
396 hunkrange = astart, alen, bstart, blen 396 hunkrange = astart, alen, bstart, blen
397 hunklines = ( 397 hunklines = (
398 ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))] 398 [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
399 + delta 399 + delta
400 + [' ' + l1[x] for x in pycompat.xrange(a2, aend)] 400 + [b' ' + l1[x] for x in pycompat.xrange(a2, aend)]
401 ) 401 )
402 # If either file ends without a newline and the last line of 402 # If either file ends without a newline and the last line of
403 # that file is part of a hunk, a marker is printed. If the 403 # that file is part of a hunk, a marker is printed. If the
404 # last line of both files is identical and neither ends in 404 # last line of both files is identical and neither ends in
405 # a newline, print only one marker. That's the only case in 405 # a newline, print only one marker. That's the only case in
406 # which the hunk can end in a shared line without a newline. 406 # which the hunk can end in a shared line without a newline.
407 skip = False 407 skip = False
408 if not t1.endswith('\n') and astart + alen == len(l1) + 1: 408 if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
409 for i in pycompat.xrange(len(hunklines) - 1, -1, -1): 409 for i in pycompat.xrange(len(hunklines) - 1, -1, -1):
410 if hunklines[i].startswith(('-', ' ')): 410 if hunklines[i].startswith((b'-', b' ')):
411 if hunklines[i].startswith(' '): 411 if hunklines[i].startswith(b' '):
412 skip = True 412 skip = True
413 hunklines[i] += '\n' 413 hunklines[i] += b'\n'
414 hunklines.insert(i + 1, _missing_newline_marker) 414 hunklines.insert(i + 1, _missing_newline_marker)
415 break 415 break
416 if not skip and not t2.endswith('\n') and bstart + blen == len(l2) + 1: 416 if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
417 for i in pycompat.xrange(len(hunklines) - 1, -1, -1): 417 for i in pycompat.xrange(len(hunklines) - 1, -1, -1):
418 if hunklines[i].startswith('+'): 418 if hunklines[i].startswith(b'+'):
419 hunklines[i] += '\n' 419 hunklines[i] += b'\n'
420 hunklines.insert(i + 1, _missing_newline_marker) 420 hunklines.insert(i + 1, _missing_newline_marker)
421 break 421 break
422 yield hunkrange, hunklines 422 yield hunkrange, hunklines
423 423
424 # bdiff.blocks gives us the matching sequences in the files. The loop 424 # bdiff.blocks gives us the matching sequences in the files. The loop
428 hunk = None 428 hunk = None
429 ignoredlines = 0 429 ignoredlines = 0
430 has_hunks = False 430 has_hunks = False
431 for s, stype in allblocks(t1, t2, opts, l1, l2): 431 for s, stype in allblocks(t1, t2, opts, l1, l2):
432 a1, a2, b1, b2 = s 432 a1, a2, b1, b2 = s
433 if stype != '!': 433 if stype != b'!':
434 if stype == '~': 434 if stype == b'~':
435 # The diff context lines are based on t1 content. When 435 # The diff context lines are based on t1 content. When
436 # blank lines are ignored, the new lines offsets must 436 # blank lines are ignored, the new lines offsets must
437 # be adjusted as if equivalent blocks ('~') had the 437 # be adjusted as if equivalent blocks ('~') had the
438 # same sizes on both sides. 438 # same sizes on both sides.
439 ignoredlines += (b2 - b1) - (a2 - a1) 439 ignoredlines += (b2 - b1) - (a2 - a1)
466 delta = hunk[4] 466 delta = hunk[4]
467 else: 467 else:
468 # create a new hunk 468 # create a new hunk
469 hunk = [astart, a2, bstart, b2, delta] 469 hunk = [astart, a2, bstart, b2, delta]
470 470
471 delta[len(delta) :] = [' ' + x for x in l1[astart:a1]] 471 delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
472 delta[len(delta) :] = ['-' + x for x in old] 472 delta[len(delta) :] = [b'-' + x for x in old]
473 delta[len(delta) :] = ['+' + x for x in new] 473 delta[len(delta) :] = [b'+' + x for x in new]
474 474
475 if hunk: 475 if hunk:
476 if not has_hunks: 476 if not has_hunks:
477 has_hunks = True 477 has_hunks = True
478 yield True 478 yield True
486 '''print base85-encoded binary diff''' 486 '''print base85-encoded binary diff'''
487 487
488 def fmtline(line): 488 def fmtline(line):
489 l = len(line) 489 l = len(line)
490 if l <= 26: 490 if l <= 26:
491 l = pycompat.bytechr(ord('A') + l - 1) 491 l = pycompat.bytechr(ord(b'A') + l - 1)
492 else: 492 else:
493 l = pycompat.bytechr(l - 26 + ord('a') - 1) 493 l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
494 return '%c%s\n' % (l, util.b85encode(line, True)) 494 return b'%c%s\n' % (l, util.b85encode(line, True))
495 495
496 def chunk(text, csize=52): 496 def chunk(text, csize=52):
497 l = len(text) 497 l = len(text)
498 i = 0 498 i = 0
499 while i < l: 499 while i < l:
500 yield text[i : i + csize] 500 yield text[i : i + csize]
501 i += csize 501 i += csize
502 502
503 if to is None: 503 if to is None:
504 to = '' 504 to = b''
505 if tn is None: 505 if tn is None:
506 tn = '' 506 tn = b''
507 507
508 if to == tn: 508 if to == tn:
509 return '' 509 return b''
510 510
511 # TODO: deltas 511 # TODO: deltas
512 ret = [] 512 ret = []
513 ret.append('GIT binary patch\n') 513 ret.append(b'GIT binary patch\n')
514 ret.append('literal %d\n' % len(tn)) 514 ret.append(b'literal %d\n' % len(tn))
515 for l in chunk(zlib.compress(tn)): 515 for l in chunk(zlib.compress(tn)):
516 ret.append(fmtline(l)) 516 ret.append(fmtline(l))
517 ret.append('\n') 517 ret.append(b'\n')
518 518
519 return ''.join(ret) 519 return b''.join(ret)
520 520
521 521
522 def patchtext(bin): 522 def patchtext(bin):
523 pos = 0 523 pos = 0
524 t = [] 524 t = []
525 while pos < len(bin): 525 while pos < len(bin):
526 p1, p2, l = struct.unpack(">lll", bin[pos : pos + 12]) 526 p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
527 pos += 12 527 pos += 12
528 t.append(bin[pos : pos + l]) 528 t.append(bin[pos : pos + l])
529 pos += l 529 pos += l
530 return "".join(t) 530 return b"".join(t)
531 531
532 532
533 def patch(a, bin): 533 def patch(a, bin):
534 if len(a) == 0: 534 if len(a) == 0:
535 # skip over trivial delta header 535 # skip over trivial delta header
541 def get_matching_blocks(a, b): 541 def get_matching_blocks(a, b):
542 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)] 542 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
543 543
544 544
545 def trivialdiffheader(length): 545 def trivialdiffheader(length):
546 return struct.pack(">lll", 0, 0, length) if length else '' 546 return struct.pack(b">lll", 0, 0, length) if length else b''
547 547
548 548
549 def replacediffheader(oldlen, newlen): 549 def replacediffheader(oldlen, newlen):
550 return struct.pack(">lll", 0, oldlen, newlen) 550 return struct.pack(b">lll", 0, oldlen, newlen)