Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/patch.py @ 37731:5471348921c1
patch: buffer lines for a same hunk
Instead of yielding tokens directly, buffer them if they belong to a same
hunk. This makes it easier for the upcoming new worddiff algorithm to only
focus on the diff hunk, instead of having to worry about other contents.
This breaks how the existing experimental worddiff algorithm works, so the
algorithm was removed, and related tests are disabled for now. The next patch
will add a new worddiff algorithm.
Differential Revision: https://phab.mercurial-scm.org/D3211
author | Jun Wu <quark@fb.com> |
---|---|
date | Mon, 19 Mar 2018 04:28:30 -0700 |
parents | 8d730f96e792 |
children | 35632d392279 |
comparison
equal
deleted
inserted
replaced
37730:8d730f96e792 | 37731:5471348921c1 |
---|---|
9 from __future__ import absolute_import, print_function | 9 from __future__ import absolute_import, print_function |
10 | 10 |
11 import collections | 11 import collections |
12 import contextlib | 12 import contextlib |
13 import copy | 13 import copy |
14 import difflib | |
15 import email | 14 import email |
16 import errno | 15 import errno |
17 import hashlib | 16 import hashlib |
18 import os | 17 import os |
19 import posixpath | 18 import posixpath |
2479 except GitDiffRequired: | 2478 except GitDiffRequired: |
2480 return difffn(opts.copy(git=True), None) | 2479 return difffn(opts.copy(git=True), None) |
2481 else: | 2480 else: |
2482 return difffn(opts, None) | 2481 return difffn(opts, None) |
2483 | 2482 |
2483 def diffsinglehunk(hunklines): | |
2484 """yield tokens for a list of lines in a single hunk""" | |
2485 for line in hunklines: | |
2486 # chomp | |
2487 chompline = line.rstrip('\n') | |
2488 # highlight tabs and trailing whitespace | |
2489 stripline = chompline.rstrip() | |
2490 if line[0] == '-': | |
2491 label = 'diff.deleted' | |
2492 elif line[0] == '+': | |
2493 label = 'diff.inserted' | |
2494 else: | |
2495 raise error.ProgrammingError('unexpected hunk line: %s' % line) | |
2496 for token in tabsplitter.findall(stripline): | |
2497 if '\t' == token[0]: | |
2498 yield (token, 'diff.tab') | |
2499 else: | |
2500 yield (token, label) | |
2501 | |
2502 if chompline != stripline: | |
2503 yield (chompline[len(stripline):], 'diff.trailingwhitespace') | |
2504 if chompline != line: | |
2505 yield (line[len(chompline):], '') | |
2506 | |
2484 def difflabel(func, *args, **kw): | 2507 def difflabel(func, *args, **kw): |
2485 '''yields 2-tuples of (output, label) based on the output of func()''' | 2508 '''yields 2-tuples of (output, label) based on the output of func()''' |
2486 inlinecolor = False | |
2487 if kw.get(r'opts'): | |
2488 inlinecolor = kw[r'opts'].worddiff | |
2489 headprefixes = [('diff', 'diff.diffline'), | 2509 headprefixes = [('diff', 'diff.diffline'), |
2490 ('copy', 'diff.extended'), | 2510 ('copy', 'diff.extended'), |
2491 ('rename', 'diff.extended'), | 2511 ('rename', 'diff.extended'), |
2492 ('old', 'diff.extended'), | 2512 ('old', 'diff.extended'), |
2493 ('new', 'diff.extended'), | 2513 ('new', 'diff.extended'), |
2495 ('index', 'diff.extended'), | 2515 ('index', 'diff.extended'), |
2496 ('similarity', 'diff.extended'), | 2516 ('similarity', 'diff.extended'), |
2497 ('---', 'diff.file_a'), | 2517 ('---', 'diff.file_a'), |
2498 ('+++', 'diff.file_b')] | 2518 ('+++', 'diff.file_b')] |
2499 textprefixes = [('@', 'diff.hunk'), | 2519 textprefixes = [('@', 'diff.hunk'), |
2500 ('-', 'diff.deleted'), | 2520 # - and + are handled by diffsinglehunk |
2501 ('+', 'diff.inserted')] | 2521 ] |
2502 head = False | 2522 head = False |
2523 | |
2524 # buffers a hunk, i.e. adjacent "-", "+" lines without other changes. | |
2525 hunkbuffer = [] | |
2526 def consumehunkbuffer(): | |
2527 if hunkbuffer: | |
2528 for token in diffsinglehunk(hunkbuffer): | |
2529 yield token | |
2530 hunkbuffer[:] = [] | |
2531 | |
2503 for chunk in func(*args, **kw): | 2532 for chunk in func(*args, **kw): |
2504 lines = chunk.split('\n') | 2533 lines = chunk.split('\n') |
2505 matches = {} | |
2506 if inlinecolor: | |
2507 matches = _findmatches(lines) | |
2508 linecount = len(lines) | 2534 linecount = len(lines) |
2509 for i, line in enumerate(lines): | 2535 for i, line in enumerate(lines): |
2510 if head: | 2536 if head: |
2511 if line.startswith('@'): | 2537 if line.startswith('@'): |
2512 head = False | 2538 head = False |
2513 else: | 2539 else: |
2514 if line and not line.startswith((' ', '+', '-', '@', '\\')): | 2540 if line and not line.startswith((' ', '+', '-', '@', '\\')): |
2515 head = True | 2541 head = True |
2516 stripline = line | |
2517 diffline = False | 2542 diffline = False |
2518 if not head and line and line.startswith(('+', '-')): | 2543 if not head and line and line.startswith(('+', '-')): |
2519 # highlight tabs and trailing whitespace, but only in | |
2520 # changed lines | |
2521 stripline = line.rstrip() | |
2522 diffline = True | 2544 diffline = True |
2523 | 2545 |
2524 prefixes = textprefixes | 2546 prefixes = textprefixes |
2525 if head: | 2547 if head: |
2526 prefixes = headprefixes | 2548 prefixes = headprefixes |
2527 for prefix, label in prefixes: | 2549 if diffline: |
2528 if stripline.startswith(prefix): | 2550 # buffered |
2529 if diffline: | 2551 bufferedline = line |
2530 if i in matches: | 2552 if i + 1 < linecount: |
2531 for t, l in _inlinediff(lines[i].rstrip(), | 2553 bufferedline += "\n" |
2532 lines[matches[i]].rstrip(), | 2554 hunkbuffer.append(bufferedline) |
2533 label): | 2555 else: |
2534 yield (t, l) | 2556 # unbuffered |
2535 else: | 2557 for token in consumehunkbuffer(): |
2536 for token in tabsplitter.findall(stripline): | 2558 yield token |
2537 if token.startswith('\t'): | 2559 stripline = line.rstrip() |
2538 yield (token, 'diff.tab') | 2560 for prefix, label in prefixes: |
2539 else: | 2561 if stripline.startswith(prefix): |
2540 yield (token, label) | |
2541 else: | |
2542 yield (stripline, label) | 2562 yield (stripline, label) |
2543 break | 2563 if line != stripline: |
2544 else: | 2564 yield (line[len(stripline):], |
2545 yield (line, '') | 2565 'diff.trailingwhitespace') |
2546 if line != stripline: | |
2547 yield (line[len(stripline):], 'diff.trailingwhitespace') | |
2548 if i + 1 < linecount: | |
2549 yield ('\n', '') | |
2550 | |
2551 def _findmatches(slist): | |
2552 '''Look for insertion matches to deletion and returns a dict of | |
2553 correspondences. | |
2554 ''' | |
2555 lastmatch = 0 | |
2556 matches = {} | |
2557 for i, line in enumerate(slist): | |
2558 if line == '': | |
2559 continue | |
2560 if line.startswith('-'): | |
2561 lastmatch = max(lastmatch, i) | |
2562 newgroup = False | |
2563 for j, newline in enumerate(slist[lastmatch + 1:]): | |
2564 if newline == '': | |
2565 continue | |
2566 if newline.startswith('-') and newgroup: # too far, no match | |
2567 break | |
2568 if newline.startswith('+'): # potential match | |
2569 newgroup = True | |
2570 sim = difflib.SequenceMatcher(None, line, newline).ratio() | |
2571 if sim > 0.7: | |
2572 lastmatch = lastmatch + 1 + j | |
2573 matches[i] = lastmatch | |
2574 matches[lastmatch] = i | |
2575 break | 2566 break |
2576 return matches | 2567 else: |
2577 | 2568 yield (line, '') |
2578 def _inlinediff(s1, s2, operation): | 2569 if i + 1 < linecount: |
2579 '''Perform string diff to highlight specific changes.''' | 2570 yield ('\n', '') |
2580 operation_skip = ('+', '?') if operation == 'diff.deleted' else ('-', '?') | 2571 for token in consumehunkbuffer(): |
2581 if operation == 'diff.deleted': | 2572 yield token |
2582 s2, s1 = s1, s2 | |
2583 | |
2584 buff = [] | |
2585 # we never want to higlight the leading +- | |
2586 if operation == 'diff.deleted' and s2.startswith('-'): | |
2587 label = operation | |
2588 token = '-' | |
2589 s2 = s2[1:] | |
2590 s1 = s1[1:] | |
2591 elif operation == 'diff.inserted' and s1.startswith('+'): | |
2592 label = operation | |
2593 token = '+' | |
2594 s2 = s2[1:] | |
2595 s1 = s1[1:] | |
2596 else: | |
2597 raise error.ProgrammingError("Case not expected, operation = %s" % | |
2598 operation) | |
2599 | |
2600 s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1)) | |
2601 for part in s: | |
2602 if part.startswith(operation_skip) or len(part) == 2: | |
2603 continue | |
2604 l = operation + '.highlight' | |
2605 if part.startswith(' '): | |
2606 l = operation | |
2607 if part[2:] == '\t': | |
2608 l = 'diff.tab' | |
2609 if l == label: # contiguous token with same label | |
2610 token += part[2:] | |
2611 continue | |
2612 else: | |
2613 buff.append((token, label)) | |
2614 label = l | |
2615 token = part[2:] | |
2616 buff.append((token, label)) | |
2617 | |
2618 return buff | |
2619 | 2573 |
2620 def diffui(*args, **kw): | 2574 def diffui(*args, **kw): |
2621 '''like diff(), but yields 2-tuples of (output, label) for ui.write()''' | 2575 '''like diff(), but yields 2-tuples of (output, label) for ui.write()''' |
2622 return difflabel(diff, *args, **kw) | 2576 return difflabel(diff, *args, **kw) |
2623 | 2577 |