Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/utils/stringutil.py @ 43076:2372284d9457
formatting: blacken the codebase
This is using my patch to black
(https://github.com/psf/black/pull/826) so we don't un-wrap collection
literals.
Done with:
hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S
# skip-blame mass-reformatting only
# no-check-commit reformats foo_bar functions
Differential Revision: https://phab.mercurial-scm.org/D6971
author | Augie Fackler <augie@google.com> |
---|---|
date | Sun, 06 Oct 2019 09:45:02 -0400 |
parents | e6c9ef5e11a0 |
children | 687b865b95ad |
comparison
equal
deleted
inserted
replaced
43075:57875cf423c9 | 43076:2372284d9457 |
---|---|
27 # regex special chars pulled from https://bugs.python.org/issue29995 | 27 # regex special chars pulled from https://bugs.python.org/issue29995 |
28 # which was part of Python 3.7. | 28 # which was part of Python 3.7. |
29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f') | 29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f') |
30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial} | 30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial} |
31 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial} | 31 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial} |
32 | |
32 | 33 |
33 def reescape(pat): | 34 def reescape(pat): |
34 """Drop-in replacement for re.escape.""" | 35 """Drop-in replacement for re.escape.""" |
35 # NOTE: it is intentional that this works on unicodes and not | 36 # NOTE: it is intentional that this works on unicodes and not |
36 # bytes, as it's only possible to do the escaping with | 37 # bytes, as it's only possible to do the escaping with |
42 pat = pat.translate(_regexescapemap) | 43 pat = pat.translate(_regexescapemap) |
43 if wantuni: | 44 if wantuni: |
44 return pat | 45 return pat |
45 return pat.encode('latin1') | 46 return pat.encode('latin1') |
46 | 47 |
48 | |
47 def pprint(o, bprefix=False, indent=0, level=0): | 49 def pprint(o, bprefix=False, indent=0, level=0): |
48 """Pretty print an object.""" | 50 """Pretty print an object.""" |
49 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level)) | 51 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level)) |
52 | |
50 | 53 |
51 def pprintgen(o, bprefix=False, indent=0, level=0): | 54 def pprintgen(o, bprefix=False, indent=0, level=0): |
52 """Pretty print an object to a generator of atoms. | 55 """Pretty print an object to a generator of atoms. |
53 | 56 |
54 ``bprefix`` is a flag influencing whether bytestrings are preferred with | 57 ``bprefix`` is a flag influencing whether bytestrings are preferred with |
81 level += 1 | 84 level += 1 |
82 yield '\n' | 85 yield '\n' |
83 yield ' ' * (level * indent) | 86 yield ' ' * (level * indent) |
84 | 87 |
85 for i, a in enumerate(o): | 88 for i, a in enumerate(o): |
86 for chunk in pprintgen(a, bprefix=bprefix, indent=indent, | 89 for chunk in pprintgen( |
87 level=level): | 90 a, bprefix=bprefix, indent=indent, level=level |
91 ): | |
88 yield chunk | 92 yield chunk |
89 | 93 |
90 if i + 1 < len(o): | 94 if i + 1 < len(o): |
91 if indent: | 95 if indent: |
92 yield ',\n' | 96 yield ',\n' |
111 level += 1 | 115 level += 1 |
112 yield '\n' | 116 yield '\n' |
113 yield ' ' * (level * indent) | 117 yield ' ' * (level * indent) |
114 | 118 |
115 for i, (k, v) in enumerate(sorted(o.items())): | 119 for i, (k, v) in enumerate(sorted(o.items())): |
116 for chunk in pprintgen(k, bprefix=bprefix, indent=indent, | 120 for chunk in pprintgen( |
117 level=level): | 121 k, bprefix=bprefix, indent=indent, level=level |
122 ): | |
118 yield chunk | 123 yield chunk |
119 | 124 |
120 yield ': ' | 125 yield ': ' |
121 | 126 |
122 for chunk in pprintgen(v, bprefix=bprefix, indent=indent, | 127 for chunk in pprintgen( |
123 level=level): | 128 v, bprefix=bprefix, indent=indent, level=level |
129 ): | |
124 yield chunk | 130 yield chunk |
125 | 131 |
126 if i + 1 < len(o): | 132 if i + 1 < len(o): |
127 if indent: | 133 if indent: |
128 yield ',\n' | 134 yield ',\n' |
147 level += 1 | 153 level += 1 |
148 yield '\n' | 154 yield '\n' |
149 yield ' ' * (level * indent) | 155 yield ' ' * (level * indent) |
150 | 156 |
151 for i, k in enumerate(sorted(o)): | 157 for i, k in enumerate(sorted(o)): |
152 for chunk in pprintgen(k, bprefix=bprefix, indent=indent, | 158 for chunk in pprintgen( |
153 level=level): | 159 k, bprefix=bprefix, indent=indent, level=level |
160 ): | |
154 yield chunk | 161 yield chunk |
155 | 162 |
156 if i + 1 < len(o): | 163 if i + 1 < len(o): |
157 if indent: | 164 if indent: |
158 yield ',\n' | 165 yield ',\n' |
177 level += 1 | 184 level += 1 |
178 yield '\n' | 185 yield '\n' |
179 yield ' ' * (level * indent) | 186 yield ' ' * (level * indent) |
180 | 187 |
181 for i, a in enumerate(o): | 188 for i, a in enumerate(o): |
182 for chunk in pprintgen(a, bprefix=bprefix, indent=indent, | 189 for chunk in pprintgen( |
183 level=level): | 190 a, bprefix=bprefix, indent=indent, level=level |
191 ): | |
184 yield chunk | 192 yield chunk |
185 | 193 |
186 if i + 1 < len(o): | 194 if i + 1 < len(o): |
187 if indent: | 195 if indent: |
188 yield ',\n' | 196 yield ',\n' |
219 try: | 227 try: |
220 nextitem = next(o) | 228 nextitem = next(o) |
221 except StopIteration: | 229 except StopIteration: |
222 last = True | 230 last = True |
223 | 231 |
224 for chunk in pprintgen(current, bprefix=bprefix, indent=indent, | 232 for chunk in pprintgen( |
225 level=level): | 233 current, bprefix=bprefix, indent=indent, level=level |
234 ): | |
226 yield chunk | 235 yield chunk |
227 | 236 |
228 if not last: | 237 if not last: |
229 if indent: | 238 if indent: |
230 yield ',\n' | 239 yield ',\n' |
238 yield ' ' * (level * indent) | 247 yield ' ' * (level * indent) |
239 | 248 |
240 yield ']' | 249 yield ']' |
241 else: | 250 else: |
242 yield pycompat.byterepr(o) | 251 yield pycompat.byterepr(o) |
252 | |
243 | 253 |
244 def prettyrepr(o): | 254 def prettyrepr(o): |
245 """Pretty print a representation of a possibly-nested object""" | 255 """Pretty print a representation of a possibly-nested object""" |
246 lines = [] | 256 lines = [] |
247 rs = pycompat.byterepr(o) | 257 rs = pycompat.byterepr(o) |
265 assert l >= 0 | 275 assert l >= 0 |
266 lines.append((l, rs[p0:q0].rstrip())) | 276 lines.append((l, rs[p0:q0].rstrip())) |
267 p0, p1 = q0, q1 | 277 p0, p1 = q0, q1 |
268 return '\n'.join(' ' * l + s for l, s in lines) | 278 return '\n'.join(' ' * l + s for l, s in lines) |
269 | 279 |
280 | |
270 def buildrepr(r): | 281 def buildrepr(r): |
271 """Format an optional printable representation from unexpanded bits | 282 """Format an optional printable representation from unexpanded bits |
272 | 283 |
273 ======== ================================= | 284 ======== ================================= |
274 type(r) example | 285 type(r) example |
288 elif callable(r): | 299 elif callable(r): |
289 return r() | 300 return r() |
290 else: | 301 else: |
291 return pprint(r) | 302 return pprint(r) |
292 | 303 |
304 | |
293 def binary(s): | 305 def binary(s): |
294 """return true if a string is binary data""" | 306 """return true if a string is binary data""" |
295 return bool(s and '\0' in s) | 307 return bool(s and '\0' in s) |
308 | |
296 | 309 |
297 def stringmatcher(pattern, casesensitive=True): | 310 def stringmatcher(pattern, casesensitive=True): |
298 """ | 311 """ |
299 accepts a string, possibly starting with 're:' or 'literal:' prefix. | 312 accepts a string, possibly starting with 're:' or 'literal:' prefix. |
300 returns the matcher name, pattern, and matcher function. | 313 returns the matcher name, pattern, and matcher function. |
338 flags = 0 | 351 flags = 0 |
339 if not casesensitive: | 352 if not casesensitive: |
340 flags = remod.I | 353 flags = remod.I |
341 regex = remod.compile(pattern, flags) | 354 regex = remod.compile(pattern, flags) |
342 except remod.error as e: | 355 except remod.error as e: |
343 raise error.ParseError(_('invalid regular expression: %s') | 356 raise error.ParseError(_('invalid regular expression: %s') % e) |
344 % e) | |
345 return 're', pattern, regex.search | 357 return 're', pattern, regex.search |
346 elif pattern.startswith('literal:'): | 358 elif pattern.startswith('literal:'): |
347 pattern = pattern[8:] | 359 pattern = pattern[8:] |
348 | 360 |
349 match = pattern.__eq__ | 361 match = pattern.__eq__ |
350 | 362 |
351 if not casesensitive: | 363 if not casesensitive: |
352 ipat = encoding.lower(pattern) | 364 ipat = encoding.lower(pattern) |
353 match = lambda s: ipat == encoding.lower(s) | 365 match = lambda s: ipat == encoding.lower(s) |
354 return 'literal', pattern, match | 366 return 'literal', pattern, match |
367 | |
355 | 368 |
356 def shortuser(user): | 369 def shortuser(user): |
357 """Return a short representation of a user name or email address.""" | 370 """Return a short representation of a user name or email address.""" |
358 f = user.find('@') | 371 f = user.find('@') |
359 if f >= 0: | 372 if f >= 0: |
360 user = user[:f] | 373 user = user[:f] |
361 f = user.find('<') | 374 f = user.find('<') |
362 if f >= 0: | 375 if f >= 0: |
363 user = user[f + 1:] | 376 user = user[f + 1 :] |
364 f = user.find(' ') | 377 f = user.find(' ') |
365 if f >= 0: | 378 if f >= 0: |
366 user = user[:f] | 379 user = user[:f] |
367 f = user.find('.') | 380 f = user.find('.') |
368 if f >= 0: | 381 if f >= 0: |
369 user = user[:f] | 382 user = user[:f] |
370 return user | 383 return user |
371 | 384 |
385 | |
372 def emailuser(user): | 386 def emailuser(user): |
373 """Return the user portion of an email address.""" | 387 """Return the user portion of an email address.""" |
374 f = user.find('@') | 388 f = user.find('@') |
375 if f >= 0: | 389 if f >= 0: |
376 user = user[:f] | 390 user = user[:f] |
377 f = user.find('<') | 391 f = user.find('<') |
378 if f >= 0: | 392 if f >= 0: |
379 user = user[f + 1:] | 393 user = user[f + 1 :] |
380 return user | 394 return user |
395 | |
381 | 396 |
382 def email(author): | 397 def email(author): |
383 '''get email of author.''' | 398 '''get email of author.''' |
384 r = author.find('>') | 399 r = author.find('>') |
385 if r == -1: | 400 if r == -1: |
386 r = None | 401 r = None |
387 return author[author.find('<') + 1:r] | 402 return author[author.find('<') + 1 : r] |
403 | |
388 | 404 |
389 def person(author): | 405 def person(author): |
390 """Returns the name before an email address, | 406 """Returns the name before an email address, |
391 interpreting it as per RFC 5322 | 407 interpreting it as per RFC 5322 |
392 | 408 |
411 if f != -1: | 427 if f != -1: |
412 return author[:f].strip(' "').replace('\\"', '"') | 428 return author[:f].strip(' "').replace('\\"', '"') |
413 f = author.find('@') | 429 f = author.find('@') |
414 return author[:f].replace('.', ' ') | 430 return author[:f].replace('.', ' ') |
415 | 431 |
432 | |
416 @attr.s(hash=True) | 433 @attr.s(hash=True) |
417 class mailmapping(object): | 434 class mailmapping(object): |
418 '''Represents a username/email key or value in | 435 '''Represents a username/email key or value in |
419 a mailmap file''' | 436 a mailmap file''' |
437 | |
420 email = attr.ib() | 438 email = attr.ib() |
421 name = attr.ib(default=None) | 439 name = attr.ib(default=None) |
440 | |
422 | 441 |
423 def _ismailmaplineinvalid(names, emails): | 442 def _ismailmaplineinvalid(names, emails): |
424 '''Returns True if the parsed names and emails | 443 '''Returns True if the parsed names and emails |
425 in a mailmap entry are invalid. | 444 in a mailmap entry are invalid. |
426 | 445 |
441 >>> emails = [b'proper@email.com', b'commit@email.com'] | 460 >>> emails = [b'proper@email.com', b'commit@email.com'] |
442 >>> _ismailmaplineinvalid(names, emails) | 461 >>> _ismailmaplineinvalid(names, emails) |
443 False | 462 False |
444 ''' | 463 ''' |
445 return not emails or not names and len(emails) < 2 | 464 return not emails or not names and len(emails) < 2 |
465 | |
446 | 466 |
447 def parsemailmap(mailmapcontent): | 467 def parsemailmap(mailmapcontent): |
448 """Parses data in the .mailmap format | 468 """Parses data in the .mailmap format |
449 | 469 |
450 >>> mmdata = b"\\n".join([ | 470 >>> mmdata = b"\\n".join([ |
513 # name or a second email | 533 # name or a second email |
514 if _ismailmaplineinvalid(names, emails): | 534 if _ismailmaplineinvalid(names, emails): |
515 continue | 535 continue |
516 | 536 |
517 mailmapkey = mailmapping( | 537 mailmapkey = mailmapping( |
518 email=emails[-1], | 538 email=emails[-1], name=names[-1] if len(names) == 2 else None, |
519 name=names[-1] if len(names) == 2 else None, | |
520 ) | 539 ) |
521 | 540 |
522 mailmap[mailmapkey] = mailmapping( | 541 mailmap[mailmapkey] = mailmapping( |
523 email=emails[0], | 542 email=emails[0], name=names[0] if names else None, |
524 name=names[0] if names else None, | |
525 ) | 543 ) |
526 | 544 |
527 return mailmap | 545 return mailmap |
546 | |
528 | 547 |
529 def mapname(mailmap, author): | 548 def mapname(mailmap, author): |
530 """Returns the author field according to the mailmap cache, or | 549 """Returns the author field according to the mailmap cache, or |
531 the original author field. | 550 the original author field. |
532 | 551 |
571 return '%s <%s>' % ( | 590 return '%s <%s>' % ( |
572 proper.name if proper.name else commit.name, | 591 proper.name if proper.name else commit.name, |
573 proper.email if proper.email else commit.email, | 592 proper.email if proper.email else commit.email, |
574 ) | 593 ) |
575 | 594 |
595 | |
576 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$') | 596 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$') |
597 | |
577 | 598 |
578 def isauthorwellformed(author): | 599 def isauthorwellformed(author): |
579 '''Return True if the author field is well formed | 600 '''Return True if the author field is well formed |
580 (ie "Contributor Name <contrib@email.dom>") | 601 (ie "Contributor Name <contrib@email.dom>") |
581 | 602 |
594 >>> isauthorwellformed(b'Bad Author <author>') | 615 >>> isauthorwellformed(b'Bad Author <author>') |
595 False | 616 False |
596 ''' | 617 ''' |
597 return _correctauthorformat.match(author) is not None | 618 return _correctauthorformat.match(author) is not None |
598 | 619 |
620 | |
599 def ellipsis(text, maxlength=400): | 621 def ellipsis(text, maxlength=400): |
600 """Trim string to at most maxlength (default: 400) columns in display.""" | 622 """Trim string to at most maxlength (default: 400) columns in display.""" |
601 return encoding.trim(text, maxlength, ellipsis='...') | 623 return encoding.trim(text, maxlength, ellipsis='...') |
624 | |
602 | 625 |
603 def escapestr(s): | 626 def escapestr(s): |
604 if isinstance(s, memoryview): | 627 if isinstance(s, memoryview): |
605 s = bytes(s) | 628 s = bytes(s) |
606 # call underlying function of s.encode('string_escape') directly for | 629 # call underlying function of s.encode('string_escape') directly for |
607 # Python 3 compatibility | 630 # Python 3 compatibility |
608 return codecs.escape_encode(s)[0] | 631 return codecs.escape_encode(s)[0] |
609 | 632 |
633 | |
610 def unescapestr(s): | 634 def unescapestr(s): |
611 return codecs.escape_decode(s)[0] | 635 return codecs.escape_decode(s)[0] |
636 | |
612 | 637 |
613 def forcebytestr(obj): | 638 def forcebytestr(obj): |
614 """Portably format an arbitrary object (e.g. exception) into a byte | 639 """Portably format an arbitrary object (e.g. exception) into a byte |
615 string.""" | 640 string.""" |
616 try: | 641 try: |
617 return pycompat.bytestr(obj) | 642 return pycompat.bytestr(obj) |
618 except UnicodeEncodeError: | 643 except UnicodeEncodeError: |
619 # non-ascii string, may be lossy | 644 # non-ascii string, may be lossy |
620 return pycompat.bytestr(encoding.strtolocal(str(obj))) | 645 return pycompat.bytestr(encoding.strtolocal(str(obj))) |
621 | 646 |
647 | |
622 def uirepr(s): | 648 def uirepr(s): |
623 # Avoid double backslash in Windows path repr() | 649 # Avoid double backslash in Windows path repr() |
624 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') | 650 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') |
651 | |
625 | 652 |
626 # delay import of textwrap | 653 # delay import of textwrap |
627 def _MBTextWrapper(**kwargs): | 654 def _MBTextWrapper(**kwargs): |
628 class tw(textwrap.TextWrapper): | 655 class tw(textwrap.TextWrapper): |
629 """ | 656 """ |
638 In addition, characters classified into 'ambiguous' width are | 665 In addition, characters classified into 'ambiguous' width are |
639 treated as wide in East Asian area, but as narrow in other. | 666 treated as wide in East Asian area, but as narrow in other. |
640 | 667 |
641 This requires use decision to determine width of such characters. | 668 This requires use decision to determine width of such characters. |
642 """ | 669 """ |
670 | |
643 def _cutdown(self, ucstr, space_left): | 671 def _cutdown(self, ucstr, space_left): |
644 l = 0 | 672 l = 0 |
645 colwidth = encoding.ucolwidth | 673 colwidth = encoding.ucolwidth |
646 for i in pycompat.xrange(len(ucstr)): | 674 for i in pycompat.xrange(len(ucstr)): |
647 l += colwidth(ucstr[i]) | 675 l += colwidth(ucstr[i]) |
710 # fit on *any* line (not just this one). | 738 # fit on *any* line (not just this one). |
711 if chunks and colwidth(chunks[-1]) > width: | 739 if chunks and colwidth(chunks[-1]) > width: |
712 self._handle_long_word(chunks, cur_line, cur_len, width) | 740 self._handle_long_word(chunks, cur_line, cur_len, width) |
713 | 741 |
714 # If the last chunk on this line is all whitespace, drop it. | 742 # If the last chunk on this line is all whitespace, drop it. |
715 if (self.drop_whitespace and | 743 if ( |
716 cur_line and cur_line[-1].strip() == r''): | 744 self.drop_whitespace |
745 and cur_line | |
746 and cur_line[-1].strip() == r'' | |
747 ): | |
717 del cur_line[-1] | 748 del cur_line[-1] |
718 | 749 |
719 # Convert current line back to a string and store it in list | 750 # Convert current line back to a string and store it in list |
720 # of all lines (return value). | 751 # of all lines (return value). |
721 if cur_line: | 752 if cur_line: |
724 return lines | 755 return lines |
725 | 756 |
726 global _MBTextWrapper | 757 global _MBTextWrapper |
727 _MBTextWrapper = tw | 758 _MBTextWrapper = tw |
728 return tw(**kwargs) | 759 return tw(**kwargs) |
760 | |
729 | 761 |
730 def wrap(line, width, initindent='', hangindent=''): | 762 def wrap(line, width, initindent='', hangindent=''): |
731 maxindent = max(len(hangindent), len(initindent)) | 763 maxindent = max(len(hangindent), len(initindent)) |
732 if width <= maxindent: | 764 if width <= maxindent: |
733 # adjust for weird terminal size | 765 # adjust for weird terminal size |
734 width = max(78, maxindent + 1) | 766 width = max(78, maxindent + 1) |
735 line = line.decode(pycompat.sysstr(encoding.encoding), | 767 line = line.decode( |
736 pycompat.sysstr(encoding.encodingmode)) | 768 pycompat.sysstr(encoding.encoding), |
737 initindent = initindent.decode(pycompat.sysstr(encoding.encoding), | 769 pycompat.sysstr(encoding.encodingmode), |
738 pycompat.sysstr(encoding.encodingmode)) | 770 ) |
739 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding), | 771 initindent = initindent.decode( |
740 pycompat.sysstr(encoding.encodingmode)) | 772 pycompat.sysstr(encoding.encoding), |
741 wrapper = _MBTextWrapper(width=width, | 773 pycompat.sysstr(encoding.encodingmode), |
742 initial_indent=initindent, | 774 ) |
743 subsequent_indent=hangindent) | 775 hangindent = hangindent.decode( |
776 pycompat.sysstr(encoding.encoding), | |
777 pycompat.sysstr(encoding.encodingmode), | |
778 ) | |
779 wrapper = _MBTextWrapper( | |
780 width=width, initial_indent=initindent, subsequent_indent=hangindent | |
781 ) | |
744 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding)) | 782 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding)) |
745 | 783 |
746 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True, | 784 |
747 '0': False, 'no': False, 'false': False, 'off': False, | 785 _booleans = { |
748 'never': False} | 786 '1': True, |
787 'yes': True, | |
788 'true': True, | |
789 'on': True, | |
790 'always': True, | |
791 '0': False, | |
792 'no': False, | |
793 'false': False, | |
794 'off': False, | |
795 'never': False, | |
796 } | |
797 | |
749 | 798 |
750 def parsebool(s): | 799 def parsebool(s): |
751 """Parse s into a boolean. | 800 """Parse s into a boolean. |
752 | 801 |
753 If s is not a valid boolean, returns None. | 802 If s is not a valid boolean, returns None. |
754 """ | 803 """ |
755 return _booleans.get(s.lower(), None) | 804 return _booleans.get(s.lower(), None) |
805 | |
756 | 806 |
757 def evalpythonliteral(s): | 807 def evalpythonliteral(s): |
758 """Evaluate a string containing a Python literal expression""" | 808 """Evaluate a string containing a Python literal expression""" |
759 # We could backport our tokenizer hack to rewrite '' to u'' if we want | 809 # We could backport our tokenizer hack to rewrite '' to u'' if we want |
760 if pycompat.ispy3: | 810 if pycompat.ispy3: |