Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/patch.py @ 14451:c78d41db6f88
patch: refactor file creation/removal detection
The patcher has to know if a file is being created or removed to check if the
target already exists, or to actually unlink the file when a hunk emptying it
is applied. This was done by embedding the creation/removal information in the
first (and only) hunk attached to the file.
There are two problems with this approach:
- creation/removal is really a property of the file being patched and not its
hunk.
- for regular patches, file creation cannot be deduced at parsing time: there
are case where the *stripped* file paths must be compared. Modifying hunks
after their creation is clumsy and prevent further refactorings related to
copies handling.
Instead, we delegate this job to selectfile() which has all the relevant
information, and remove the hunk createfile() and rmfile() methods.
author | Patrick Mezard <pmezard@gmail.com> |
---|---|
date | Fri, 27 May 2011 21:50:09 +0200 |
parents | cbe13e6bdc34 |
children | ee574cfd0c32 |
comparison
equal
deleted
inserted
replaced
14450:d1a1578c5f78 | 14451:c78d41db6f88 |
---|---|
502 unidesc = re.compile('@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@') | 502 unidesc = re.compile('@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@') |
503 contextdesc = re.compile('(---|\*\*\*) (\d+)(,(\d+))? (---|\*\*\*)') | 503 contextdesc = re.compile('(---|\*\*\*) (\d+)(,(\d+))? (---|\*\*\*)') |
504 eolmodes = ['strict', 'crlf', 'lf', 'auto'] | 504 eolmodes = ['strict', 'crlf', 'lf', 'auto'] |
505 | 505 |
506 class patchfile(object): | 506 class patchfile(object): |
507 def __init__(self, ui, fname, backend, mode, missing=False, | 507 def __init__(self, ui, fname, backend, mode, create, remove, missing=False, |
508 eolmode='strict'): | 508 eolmode='strict'): |
509 self.fname = fname | 509 self.fname = fname |
510 self.eolmode = eolmode | 510 self.eolmode = eolmode |
511 self.eol = None | 511 self.eol = None |
512 self.backend = backend | 512 self.backend = backend |
513 self.ui = ui | 513 self.ui = ui |
514 self.lines = [] | 514 self.lines = [] |
515 self.exists = False | 515 self.exists = False |
516 self.missing = missing | 516 self.missing = missing |
517 self.mode = mode | 517 self.mode = mode |
518 self.create = create | |
519 self.remove = remove | |
518 if not missing: | 520 if not missing: |
519 try: | 521 try: |
520 data, mode = self.backend.getfile(fname) | 522 data, mode = self.backend.getfile(fname) |
521 if data: | 523 if data: |
522 self.lines = data.splitlines(True) | 524 self.lines = data.splitlines(True) |
618 | 620 |
619 if self.missing: | 621 if self.missing: |
620 self.rej.append(h) | 622 self.rej.append(h) |
621 return -1 | 623 return -1 |
622 | 624 |
623 if self.exists and h.createfile(): | 625 if self.exists and self.create: |
624 self.ui.warn(_("file %s already exists\n") % self.fname) | 626 self.ui.warn(_("file %s already exists\n") % self.fname) |
625 self.rej.append(h) | 627 self.rej.append(h) |
626 return -1 | 628 return -1 |
627 | 629 |
628 if isinstance(h, binhunk): | 630 if isinstance(h, binhunk): |
629 if h.rmfile(): | 631 if self.remove: |
630 self.backend.unlink(self.fname) | 632 self.backend.unlink(self.fname) |
631 else: | 633 else: |
632 self.lines[:] = h.new() | 634 self.lines[:] = h.new() |
633 self.offset += len(h.new()) | 635 self.offset += len(h.new()) |
634 self.dirty = True | 636 self.dirty = True |
652 orig_start = start | 654 orig_start = start |
653 # if there's skew we want to emit the "(offset %d lines)" even | 655 # if there's skew we want to emit the "(offset %d lines)" even |
654 # when the hunk cleanly applies at start + skew, so skip the | 656 # when the hunk cleanly applies at start + skew, so skip the |
655 # fast case code | 657 # fast case code |
656 if self.skew == 0 and diffhelpers.testhunk(old, self.lines, start) == 0: | 658 if self.skew == 0 and diffhelpers.testhunk(old, self.lines, start) == 0: |
657 if h.rmfile(): | 659 if self.remove: |
658 self.backend.unlink(self.fname) | 660 self.backend.unlink(self.fname) |
659 else: | 661 else: |
660 self.lines[start : start + h.lena] = h.new() | 662 self.lines[start : start + h.lena] = h.new() |
661 self.offset += h.lenb - h.lena | 663 self.offset += h.lenb - h.lena |
662 self.dirty = True | 664 self.dirty = True |
708 self.writelines(self.fname, self.lines, self.mode) | 710 self.writelines(self.fname, self.lines, self.mode) |
709 self.write_rej() | 711 self.write_rej() |
710 return len(self.rej) | 712 return len(self.rej) |
711 | 713 |
712 class hunk(object): | 714 class hunk(object): |
713 def __init__(self, desc, num, lr, context, create=False, remove=False): | 715 def __init__(self, desc, num, lr, context): |
714 self.number = num | 716 self.number = num |
715 self.desc = desc | 717 self.desc = desc |
716 self.hunk = [desc] | 718 self.hunk = [desc] |
717 self.a = [] | 719 self.a = [] |
718 self.b = [] | 720 self.b = [] |
721 if lr is not None: | 723 if lr is not None: |
722 if context: | 724 if context: |
723 self.read_context_hunk(lr) | 725 self.read_context_hunk(lr) |
724 else: | 726 else: |
725 self.read_unified_hunk(lr) | 727 self.read_unified_hunk(lr) |
726 self.create = create | |
727 self.remove = remove and not create | |
728 | 728 |
729 def getnormalized(self): | 729 def getnormalized(self): |
730 """Return a copy with line endings normalized to LF.""" | 730 """Return a copy with line endings normalized to LF.""" |
731 | 731 |
732 def normalize(lines): | 732 def normalize(lines): |
736 line = line[:-2] + '\n' | 736 line = line[:-2] + '\n' |
737 nlines.append(line) | 737 nlines.append(line) |
738 return nlines | 738 return nlines |
739 | 739 |
740 # Dummy object, it is rebuilt manually | 740 # Dummy object, it is rebuilt manually |
741 nh = hunk(self.desc, self.number, None, None, False, False) | 741 nh = hunk(self.desc, self.number, None, None) |
742 nh.number = self.number | 742 nh.number = self.number |
743 nh.desc = self.desc | 743 nh.desc = self.desc |
744 nh.hunk = self.hunk | 744 nh.hunk = self.hunk |
745 nh.a = normalize(self.a) | 745 nh.a = normalize(self.a) |
746 nh.b = normalize(self.b) | 746 nh.b = normalize(self.b) |
747 nh.starta = self.starta | 747 nh.starta = self.starta |
748 nh.startb = self.startb | 748 nh.startb = self.startb |
749 nh.lena = self.lena | 749 nh.lena = self.lena |
750 nh.lenb = self.lenb | 750 nh.lenb = self.lenb |
751 nh.create = self.create | |
752 nh.remove = self.remove | |
753 return nh | 751 return nh |
754 | 752 |
755 def read_unified_hunk(self, lr): | 753 def read_unified_hunk(self, lr): |
756 m = unidesc.match(self.desc) | 754 m = unidesc.match(self.desc) |
757 if not m: | 755 if not m: |
889 lr.push(l) | 887 lr.push(l) |
890 | 888 |
891 def complete(self): | 889 def complete(self): |
892 return len(self.a) == self.lena and len(self.b) == self.lenb | 890 return len(self.a) == self.lena and len(self.b) == self.lenb |
893 | 891 |
894 def createfile(self): | |
895 return self.starta == 0 and self.lena == 0 and self.create | |
896 | |
897 def rmfile(self): | |
898 return self.startb == 0 and self.lenb == 0 and self.remove | |
899 | |
900 def fuzzit(self, l, fuzz, toponly): | 892 def fuzzit(self, l, fuzz, toponly): |
901 # this removes context lines from the top and bottom of list 'l'. It | 893 # this removes context lines from the top and bottom of list 'l'. It |
902 # checks the hunk to make sure only context lines are removed, and then | 894 # checks the hunk to make sure only context lines are removed, and then |
903 # returns a new shortened list of lines. | 895 # returns a new shortened list of lines. |
904 fuzz = min(fuzz, len(l)-1) | 896 fuzz = min(fuzz, len(l)-1) |
940 def new(self, fuzz=0, toponly=False): | 932 def new(self, fuzz=0, toponly=False): |
941 return self.fuzzit(self.b, fuzz, toponly) | 933 return self.fuzzit(self.b, fuzz, toponly) |
942 | 934 |
943 class binhunk: | 935 class binhunk: |
944 'A binary patch file. Only understands literals so far.' | 936 'A binary patch file. Only understands literals so far.' |
945 def __init__(self, gitpatch, lr): | 937 def __init__(self, lr): |
946 self.gitpatch = gitpatch | |
947 self.text = None | 938 self.text = None |
948 self.hunk = ['GIT binary patch\n'] | 939 self.hunk = ['GIT binary patch\n'] |
949 self._read(lr) | 940 self._read(lr) |
950 | |
951 def createfile(self): | |
952 return self.gitpatch.op == 'ADD' | |
953 | |
954 def rmfile(self): | |
955 return self.gitpatch.op == 'DELETE' | |
956 | 941 |
957 def complete(self): | 942 def complete(self): |
958 return self.text is not None | 943 return self.text is not None |
959 | 944 |
960 def new(self): | 945 def new(self): |
1018 def selectfile(backend, afile_orig, bfile_orig, hunk, strip, gp): | 1003 def selectfile(backend, afile_orig, bfile_orig, hunk, strip, gp): |
1019 if gp: | 1004 if gp: |
1020 # Git patches do not play games. Excluding copies from the | 1005 # Git patches do not play games. Excluding copies from the |
1021 # following heuristic avoids a lot of confusion | 1006 # following heuristic avoids a lot of confusion |
1022 fname = pathstrip(gp.path, strip - 1)[1] | 1007 fname = pathstrip(gp.path, strip - 1)[1] |
1023 missing = not hunk.createfile() and not backend.exists(fname) | 1008 create = gp.op == 'ADD' |
1024 return fname, missing | 1009 remove = gp.op == 'DELETE' |
1010 missing = not create and not backend.exists(fname) | |
1011 return fname, missing, create, remove | |
1025 nulla = afile_orig == "/dev/null" | 1012 nulla = afile_orig == "/dev/null" |
1026 nullb = bfile_orig == "/dev/null" | 1013 nullb = bfile_orig == "/dev/null" |
1014 create = nulla and hunk.starta == 0 and hunk.lena == 0 | |
1015 remove = nullb and hunk.startb == 0 and hunk.lenb == 0 | |
1027 abase, afile = pathstrip(afile_orig, strip) | 1016 abase, afile = pathstrip(afile_orig, strip) |
1028 gooda = not nulla and backend.exists(afile) | 1017 gooda = not nulla and backend.exists(afile) |
1029 bbase, bfile = pathstrip(bfile_orig, strip) | 1018 bbase, bfile = pathstrip(bfile_orig, strip) |
1030 if afile == bfile: | 1019 if afile == bfile: |
1031 goodb = gooda | 1020 goodb = gooda |
1032 else: | 1021 else: |
1033 goodb = not nullb and backend.exists(bfile) | 1022 goodb = not nullb and backend.exists(bfile) |
1034 createfunc = hunk.createfile | 1023 missing = not goodb and not gooda and not create |
1035 missing = not goodb and not gooda and not createfunc() | |
1036 | 1024 |
1037 # some diff programs apparently produce patches where the afile is | 1025 # some diff programs apparently produce patches where the afile is |
1038 # not /dev/null, but afile starts with bfile | 1026 # not /dev/null, but afile starts with bfile |
1039 abasedir = afile[:afile.rfind('/') + 1] | 1027 abasedir = afile[:afile.rfind('/') + 1] |
1040 bbasedir = bfile[:bfile.rfind('/') + 1] | 1028 bbasedir = bfile[:bfile.rfind('/') + 1] |
1041 if missing and abasedir == bbasedir and afile.startswith(bfile): | 1029 if (missing and abasedir == bbasedir and afile.startswith(bfile) |
1042 # this isn't very pretty | 1030 and hunk.starta == 0 and hunk.lena == 0): |
1043 hunk.create = True | 1031 create = True |
1044 if createfunc(): | 1032 missing = False |
1045 missing = False | |
1046 else: | |
1047 hunk.create = False | |
1048 | 1033 |
1049 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the | 1034 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the |
1050 # diff is between a file and its backup. In this case, the original | 1035 # diff is between a file and its backup. In this case, the original |
1051 # file should be patched (see original mpatch code). | 1036 # file should be patched (see original mpatch code). |
1052 isbackup = (abase == bbase and bfile.startswith(afile)) | 1037 isbackup = (abase == bbase and bfile.startswith(afile)) |
1063 elif not nulla: | 1048 elif not nulla: |
1064 fname = afile | 1049 fname = afile |
1065 else: | 1050 else: |
1066 raise PatchError(_("undefined source and destination files")) | 1051 raise PatchError(_("undefined source and destination files")) |
1067 | 1052 |
1068 return fname, missing | 1053 return fname, missing, create, remove |
1069 | 1054 |
1070 def scangitpatch(lr, firstline): | 1055 def scangitpatch(lr, firstline): |
1071 """ | 1056 """ |
1072 Git patches can emit: | 1057 Git patches can emit: |
1073 - rename a to b | 1058 - rename a to b |
1123 or x.startswith('GIT binary patch')): | 1108 or x.startswith('GIT binary patch')): |
1124 gp = None | 1109 gp = None |
1125 if gitpatches and gitpatches[-1][0] == bfile: | 1110 if gitpatches and gitpatches[-1][0] == bfile: |
1126 gp = gitpatches.pop()[1] | 1111 gp = gitpatches.pop()[1] |
1127 if x.startswith('GIT binary patch'): | 1112 if x.startswith('GIT binary patch'): |
1128 h = binhunk(gp, lr) | 1113 h = binhunk(lr) |
1129 else: | 1114 else: |
1130 if context is None and x.startswith('***************'): | 1115 if context is None and x.startswith('***************'): |
1131 context = True | 1116 context = True |
1132 create = afile == '/dev/null' or gp and gp.op == 'ADD' | 1117 h = hunk(x, hunknum + 1, lr, context) |
1133 remove = bfile == '/dev/null' or gp and gp.op == 'DELETE' | |
1134 h = hunk(x, hunknum + 1, lr, context, create, remove) | |
1135 hunknum += 1 | 1118 hunknum += 1 |
1136 if emitfile: | 1119 if emitfile: |
1137 emitfile = False | 1120 emitfile = False |
1138 yield 'file', (afile, bfile, h, gp) | 1121 yield 'file', (afile, bfile, h, gp) |
1139 yield 'hunk', h | 1122 yield 'hunk', h |
1248 backend.setfile(path, data, gp.mode) | 1231 backend.setfile(path, data, gp.mode) |
1249 if not first_hunk: | 1232 if not first_hunk: |
1250 continue | 1233 continue |
1251 try: | 1234 try: |
1252 mode = gp and gp.mode or None | 1235 mode = gp and gp.mode or None |
1253 current_file, missing = selectfile(backend, afile, bfile, | 1236 current_file, missing, create, remove = selectfile( |
1254 first_hunk, strip, gp) | 1237 backend, afile, bfile, first_hunk, strip, gp) |
1255 current_file = patcher(ui, current_file, backend, mode, | 1238 current_file = patcher(ui, current_file, backend, mode, |
1256 missing=missing, eolmode=eolmode) | 1239 create, remove, missing=missing, |
1240 eolmode=eolmode) | |
1257 except PatchError, inst: | 1241 except PatchError, inst: |
1258 ui.warn(str(inst) + '\n') | 1242 ui.warn(str(inst) + '\n') |
1259 current_file = None | 1243 current_file = None |
1260 rejects += 1 | 1244 rejects += 1 |
1261 continue | 1245 continue |
1384 changed.add(pathstrip(gp.path, strip - 1)[1]) | 1368 changed.add(pathstrip(gp.path, strip - 1)[1]) |
1385 if gp.op == 'RENAME': | 1369 if gp.op == 'RENAME': |
1386 changed.add(pathstrip(gp.oldpath, strip - 1)[1]) | 1370 changed.add(pathstrip(gp.oldpath, strip - 1)[1]) |
1387 if not first_hunk: | 1371 if not first_hunk: |
1388 continue | 1372 continue |
1389 current_file, missing = selectfile(backend, afile, bfile, | 1373 current_file, missing, create, remove = selectfile( |
1390 first_hunk, strip, gp) | 1374 backend, afile, bfile, first_hunk, strip, gp) |
1391 changed.add(current_file) | 1375 changed.add(current_file) |
1392 elif state not in ('hunk', 'git'): | 1376 elif state not in ('hunk', 'git'): |
1393 raise util.Abort(_('unsupported parser state: %s') % state) | 1377 raise util.Abort(_('unsupported parser state: %s') % state) |
1394 return changed | 1378 return changed |
1395 finally: | 1379 finally: |