comparison mercurial/utils/stringutil.py @ 43076:2372284d9457

formatting: blacken the codebase This is using my patch to black (https://github.com/psf/black/pull/826) so we don't un-wrap collection literals. Done with: hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S # skip-blame mass-reformatting only # no-check-commit reformats foo_bar functions Differential Revision: https://phab.mercurial-scm.org/D6971
author Augie Fackler <augie@google.com>
date Sun, 06 Oct 2019 09:45:02 -0400
parents e6c9ef5e11a0
children 687b865b95ad
comparison
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
27 # regex special chars pulled from https://bugs.python.org/issue29995 27 # regex special chars pulled from https://bugs.python.org/issue29995
28 # which was part of Python 3.7. 28 # which was part of Python 3.7.
29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f') 29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial} 30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
31 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial} 31 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
32
32 33
33 def reescape(pat): 34 def reescape(pat):
34 """Drop-in replacement for re.escape.""" 35 """Drop-in replacement for re.escape."""
35 # NOTE: it is intentional that this works on unicodes and not 36 # NOTE: it is intentional that this works on unicodes and not
36 # bytes, as it's only possible to do the escaping with 37 # bytes, as it's only possible to do the escaping with
42 pat = pat.translate(_regexescapemap) 43 pat = pat.translate(_regexescapemap)
43 if wantuni: 44 if wantuni:
44 return pat 45 return pat
45 return pat.encode('latin1') 46 return pat.encode('latin1')
46 47
48
47 def pprint(o, bprefix=False, indent=0, level=0): 49 def pprint(o, bprefix=False, indent=0, level=0):
48 """Pretty print an object.""" 50 """Pretty print an object."""
49 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level)) 51 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
52
50 53
51 def pprintgen(o, bprefix=False, indent=0, level=0): 54 def pprintgen(o, bprefix=False, indent=0, level=0):
52 """Pretty print an object to a generator of atoms. 55 """Pretty print an object to a generator of atoms.
53 56
54 ``bprefix`` is a flag influencing whether bytestrings are preferred with 57 ``bprefix`` is a flag influencing whether bytestrings are preferred with
81 level += 1 84 level += 1
82 yield '\n' 85 yield '\n'
83 yield ' ' * (level * indent) 86 yield ' ' * (level * indent)
84 87
85 for i, a in enumerate(o): 88 for i, a in enumerate(o):
86 for chunk in pprintgen(a, bprefix=bprefix, indent=indent, 89 for chunk in pprintgen(
87 level=level): 90 a, bprefix=bprefix, indent=indent, level=level
91 ):
88 yield chunk 92 yield chunk
89 93
90 if i + 1 < len(o): 94 if i + 1 < len(o):
91 if indent: 95 if indent:
92 yield ',\n' 96 yield ',\n'
111 level += 1 115 level += 1
112 yield '\n' 116 yield '\n'
113 yield ' ' * (level * indent) 117 yield ' ' * (level * indent)
114 118
115 for i, (k, v) in enumerate(sorted(o.items())): 119 for i, (k, v) in enumerate(sorted(o.items())):
116 for chunk in pprintgen(k, bprefix=bprefix, indent=indent, 120 for chunk in pprintgen(
117 level=level): 121 k, bprefix=bprefix, indent=indent, level=level
122 ):
118 yield chunk 123 yield chunk
119 124
120 yield ': ' 125 yield ': '
121 126
122 for chunk in pprintgen(v, bprefix=bprefix, indent=indent, 127 for chunk in pprintgen(
123 level=level): 128 v, bprefix=bprefix, indent=indent, level=level
129 ):
124 yield chunk 130 yield chunk
125 131
126 if i + 1 < len(o): 132 if i + 1 < len(o):
127 if indent: 133 if indent:
128 yield ',\n' 134 yield ',\n'
147 level += 1 153 level += 1
148 yield '\n' 154 yield '\n'
149 yield ' ' * (level * indent) 155 yield ' ' * (level * indent)
150 156
151 for i, k in enumerate(sorted(o)): 157 for i, k in enumerate(sorted(o)):
152 for chunk in pprintgen(k, bprefix=bprefix, indent=indent, 158 for chunk in pprintgen(
153 level=level): 159 k, bprefix=bprefix, indent=indent, level=level
160 ):
154 yield chunk 161 yield chunk
155 162
156 if i + 1 < len(o): 163 if i + 1 < len(o):
157 if indent: 164 if indent:
158 yield ',\n' 165 yield ',\n'
177 level += 1 184 level += 1
178 yield '\n' 185 yield '\n'
179 yield ' ' * (level * indent) 186 yield ' ' * (level * indent)
180 187
181 for i, a in enumerate(o): 188 for i, a in enumerate(o):
182 for chunk in pprintgen(a, bprefix=bprefix, indent=indent, 189 for chunk in pprintgen(
183 level=level): 190 a, bprefix=bprefix, indent=indent, level=level
191 ):
184 yield chunk 192 yield chunk
185 193
186 if i + 1 < len(o): 194 if i + 1 < len(o):
187 if indent: 195 if indent:
188 yield ',\n' 196 yield ',\n'
219 try: 227 try:
220 nextitem = next(o) 228 nextitem = next(o)
221 except StopIteration: 229 except StopIteration:
222 last = True 230 last = True
223 231
224 for chunk in pprintgen(current, bprefix=bprefix, indent=indent, 232 for chunk in pprintgen(
225 level=level): 233 current, bprefix=bprefix, indent=indent, level=level
234 ):
226 yield chunk 235 yield chunk
227 236
228 if not last: 237 if not last:
229 if indent: 238 if indent:
230 yield ',\n' 239 yield ',\n'
238 yield ' ' * (level * indent) 247 yield ' ' * (level * indent)
239 248
240 yield ']' 249 yield ']'
241 else: 250 else:
242 yield pycompat.byterepr(o) 251 yield pycompat.byterepr(o)
252
243 253
244 def prettyrepr(o): 254 def prettyrepr(o):
245 """Pretty print a representation of a possibly-nested object""" 255 """Pretty print a representation of a possibly-nested object"""
246 lines = [] 256 lines = []
247 rs = pycompat.byterepr(o) 257 rs = pycompat.byterepr(o)
265 assert l >= 0 275 assert l >= 0
266 lines.append((l, rs[p0:q0].rstrip())) 276 lines.append((l, rs[p0:q0].rstrip()))
267 p0, p1 = q0, q1 277 p0, p1 = q0, q1
268 return '\n'.join(' ' * l + s for l, s in lines) 278 return '\n'.join(' ' * l + s for l, s in lines)
269 279
280
270 def buildrepr(r): 281 def buildrepr(r):
271 """Format an optional printable representation from unexpanded bits 282 """Format an optional printable representation from unexpanded bits
272 283
273 ======== ================================= 284 ======== =================================
274 type(r) example 285 type(r) example
288 elif callable(r): 299 elif callable(r):
289 return r() 300 return r()
290 else: 301 else:
291 return pprint(r) 302 return pprint(r)
292 303
304
293 def binary(s): 305 def binary(s):
294 """return true if a string is binary data""" 306 """return true if a string is binary data"""
295 return bool(s and '\0' in s) 307 return bool(s and '\0' in s)
308
296 309
297 def stringmatcher(pattern, casesensitive=True): 310 def stringmatcher(pattern, casesensitive=True):
298 """ 311 """
299 accepts a string, possibly starting with 're:' or 'literal:' prefix. 312 accepts a string, possibly starting with 're:' or 'literal:' prefix.
300 returns the matcher name, pattern, and matcher function. 313 returns the matcher name, pattern, and matcher function.
338 flags = 0 351 flags = 0
339 if not casesensitive: 352 if not casesensitive:
340 flags = remod.I 353 flags = remod.I
341 regex = remod.compile(pattern, flags) 354 regex = remod.compile(pattern, flags)
342 except remod.error as e: 355 except remod.error as e:
343 raise error.ParseError(_('invalid regular expression: %s') 356 raise error.ParseError(_('invalid regular expression: %s') % e)
344 % e)
345 return 're', pattern, regex.search 357 return 're', pattern, regex.search
346 elif pattern.startswith('literal:'): 358 elif pattern.startswith('literal:'):
347 pattern = pattern[8:] 359 pattern = pattern[8:]
348 360
349 match = pattern.__eq__ 361 match = pattern.__eq__
350 362
351 if not casesensitive: 363 if not casesensitive:
352 ipat = encoding.lower(pattern) 364 ipat = encoding.lower(pattern)
353 match = lambda s: ipat == encoding.lower(s) 365 match = lambda s: ipat == encoding.lower(s)
354 return 'literal', pattern, match 366 return 'literal', pattern, match
367
355 368
356 def shortuser(user): 369 def shortuser(user):
357 """Return a short representation of a user name or email address.""" 370 """Return a short representation of a user name or email address."""
358 f = user.find('@') 371 f = user.find('@')
359 if f >= 0: 372 if f >= 0:
360 user = user[:f] 373 user = user[:f]
361 f = user.find('<') 374 f = user.find('<')
362 if f >= 0: 375 if f >= 0:
363 user = user[f + 1:] 376 user = user[f + 1 :]
364 f = user.find(' ') 377 f = user.find(' ')
365 if f >= 0: 378 if f >= 0:
366 user = user[:f] 379 user = user[:f]
367 f = user.find('.') 380 f = user.find('.')
368 if f >= 0: 381 if f >= 0:
369 user = user[:f] 382 user = user[:f]
370 return user 383 return user
371 384
385
372 def emailuser(user): 386 def emailuser(user):
373 """Return the user portion of an email address.""" 387 """Return the user portion of an email address."""
374 f = user.find('@') 388 f = user.find('@')
375 if f >= 0: 389 if f >= 0:
376 user = user[:f] 390 user = user[:f]
377 f = user.find('<') 391 f = user.find('<')
378 if f >= 0: 392 if f >= 0:
379 user = user[f + 1:] 393 user = user[f + 1 :]
380 return user 394 return user
395
381 396
382 def email(author): 397 def email(author):
383 '''get email of author.''' 398 '''get email of author.'''
384 r = author.find('>') 399 r = author.find('>')
385 if r == -1: 400 if r == -1:
386 r = None 401 r = None
387 return author[author.find('<') + 1:r] 402 return author[author.find('<') + 1 : r]
403
388 404
389 def person(author): 405 def person(author):
390 """Returns the name before an email address, 406 """Returns the name before an email address,
391 interpreting it as per RFC 5322 407 interpreting it as per RFC 5322
392 408
411 if f != -1: 427 if f != -1:
412 return author[:f].strip(' "').replace('\\"', '"') 428 return author[:f].strip(' "').replace('\\"', '"')
413 f = author.find('@') 429 f = author.find('@')
414 return author[:f].replace('.', ' ') 430 return author[:f].replace('.', ' ')
415 431
432
416 @attr.s(hash=True) 433 @attr.s(hash=True)
417 class mailmapping(object): 434 class mailmapping(object):
418 '''Represents a username/email key or value in 435 '''Represents a username/email key or value in
419 a mailmap file''' 436 a mailmap file'''
437
420 email = attr.ib() 438 email = attr.ib()
421 name = attr.ib(default=None) 439 name = attr.ib(default=None)
440
422 441
423 def _ismailmaplineinvalid(names, emails): 442 def _ismailmaplineinvalid(names, emails):
424 '''Returns True if the parsed names and emails 443 '''Returns True if the parsed names and emails
425 in a mailmap entry are invalid. 444 in a mailmap entry are invalid.
426 445
441 >>> emails = [b'proper@email.com', b'commit@email.com'] 460 >>> emails = [b'proper@email.com', b'commit@email.com']
442 >>> _ismailmaplineinvalid(names, emails) 461 >>> _ismailmaplineinvalid(names, emails)
443 False 462 False
444 ''' 463 '''
445 return not emails or not names and len(emails) < 2 464 return not emails or not names and len(emails) < 2
465
446 466
447 def parsemailmap(mailmapcontent): 467 def parsemailmap(mailmapcontent):
448 """Parses data in the .mailmap format 468 """Parses data in the .mailmap format
449 469
450 >>> mmdata = b"\\n".join([ 470 >>> mmdata = b"\\n".join([
513 # name or a second email 533 # name or a second email
514 if _ismailmaplineinvalid(names, emails): 534 if _ismailmaplineinvalid(names, emails):
515 continue 535 continue
516 536
517 mailmapkey = mailmapping( 537 mailmapkey = mailmapping(
518 email=emails[-1], 538 email=emails[-1], name=names[-1] if len(names) == 2 else None,
519 name=names[-1] if len(names) == 2 else None,
520 ) 539 )
521 540
522 mailmap[mailmapkey] = mailmapping( 541 mailmap[mailmapkey] = mailmapping(
523 email=emails[0], 542 email=emails[0], name=names[0] if names else None,
524 name=names[0] if names else None,
525 ) 543 )
526 544
527 return mailmap 545 return mailmap
546
528 547
529 def mapname(mailmap, author): 548 def mapname(mailmap, author):
530 """Returns the author field according to the mailmap cache, or 549 """Returns the author field according to the mailmap cache, or
531 the original author field. 550 the original author field.
532 551
571 return '%s <%s>' % ( 590 return '%s <%s>' % (
572 proper.name if proper.name else commit.name, 591 proper.name if proper.name else commit.name,
573 proper.email if proper.email else commit.email, 592 proper.email if proper.email else commit.email,
574 ) 593 )
575 594
595
576 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$') 596 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
597
577 598
578 def isauthorwellformed(author): 599 def isauthorwellformed(author):
579 '''Return True if the author field is well formed 600 '''Return True if the author field is well formed
580 (ie "Contributor Name <contrib@email.dom>") 601 (ie "Contributor Name <contrib@email.dom>")
581 602
594 >>> isauthorwellformed(b'Bad Author <author>') 615 >>> isauthorwellformed(b'Bad Author <author>')
595 False 616 False
596 ''' 617 '''
597 return _correctauthorformat.match(author) is not None 618 return _correctauthorformat.match(author) is not None
598 619
620
599 def ellipsis(text, maxlength=400): 621 def ellipsis(text, maxlength=400):
600 """Trim string to at most maxlength (default: 400) columns in display.""" 622 """Trim string to at most maxlength (default: 400) columns in display."""
601 return encoding.trim(text, maxlength, ellipsis='...') 623 return encoding.trim(text, maxlength, ellipsis='...')
624
602 625
603 def escapestr(s): 626 def escapestr(s):
604 if isinstance(s, memoryview): 627 if isinstance(s, memoryview):
605 s = bytes(s) 628 s = bytes(s)
606 # call underlying function of s.encode('string_escape') directly for 629 # call underlying function of s.encode('string_escape') directly for
607 # Python 3 compatibility 630 # Python 3 compatibility
608 return codecs.escape_encode(s)[0] 631 return codecs.escape_encode(s)[0]
609 632
633
610 def unescapestr(s): 634 def unescapestr(s):
611 return codecs.escape_decode(s)[0] 635 return codecs.escape_decode(s)[0]
636
612 637
613 def forcebytestr(obj): 638 def forcebytestr(obj):
614 """Portably format an arbitrary object (e.g. exception) into a byte 639 """Portably format an arbitrary object (e.g. exception) into a byte
615 string.""" 640 string."""
616 try: 641 try:
617 return pycompat.bytestr(obj) 642 return pycompat.bytestr(obj)
618 except UnicodeEncodeError: 643 except UnicodeEncodeError:
619 # non-ascii string, may be lossy 644 # non-ascii string, may be lossy
620 return pycompat.bytestr(encoding.strtolocal(str(obj))) 645 return pycompat.bytestr(encoding.strtolocal(str(obj)))
621 646
647
622 def uirepr(s): 648 def uirepr(s):
623 # Avoid double backslash in Windows path repr() 649 # Avoid double backslash in Windows path repr()
624 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') 650 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
651
625 652
626 # delay import of textwrap 653 # delay import of textwrap
627 def _MBTextWrapper(**kwargs): 654 def _MBTextWrapper(**kwargs):
628 class tw(textwrap.TextWrapper): 655 class tw(textwrap.TextWrapper):
629 """ 656 """
638 In addition, characters classified into 'ambiguous' width are 665 In addition, characters classified into 'ambiguous' width are
639 treated as wide in East Asian area, but as narrow in other. 666 treated as wide in East Asian area, but as narrow in other.
640 667
641 This requires use decision to determine width of such characters. 668 This requires use decision to determine width of such characters.
642 """ 669 """
670
643 def _cutdown(self, ucstr, space_left): 671 def _cutdown(self, ucstr, space_left):
644 l = 0 672 l = 0
645 colwidth = encoding.ucolwidth 673 colwidth = encoding.ucolwidth
646 for i in pycompat.xrange(len(ucstr)): 674 for i in pycompat.xrange(len(ucstr)):
647 l += colwidth(ucstr[i]) 675 l += colwidth(ucstr[i])
710 # fit on *any* line (not just this one). 738 # fit on *any* line (not just this one).
711 if chunks and colwidth(chunks[-1]) > width: 739 if chunks and colwidth(chunks[-1]) > width:
712 self._handle_long_word(chunks, cur_line, cur_len, width) 740 self._handle_long_word(chunks, cur_line, cur_len, width)
713 741
714 # If the last chunk on this line is all whitespace, drop it. 742 # If the last chunk on this line is all whitespace, drop it.
715 if (self.drop_whitespace and 743 if (
716 cur_line and cur_line[-1].strip() == r''): 744 self.drop_whitespace
745 and cur_line
746 and cur_line[-1].strip() == r''
747 ):
717 del cur_line[-1] 748 del cur_line[-1]
718 749
719 # Convert current line back to a string and store it in list 750 # Convert current line back to a string and store it in list
720 # of all lines (return value). 751 # of all lines (return value).
721 if cur_line: 752 if cur_line:
724 return lines 755 return lines
725 756
726 global _MBTextWrapper 757 global _MBTextWrapper
727 _MBTextWrapper = tw 758 _MBTextWrapper = tw
728 return tw(**kwargs) 759 return tw(**kwargs)
760
729 761
730 def wrap(line, width, initindent='', hangindent=''): 762 def wrap(line, width, initindent='', hangindent=''):
731 maxindent = max(len(hangindent), len(initindent)) 763 maxindent = max(len(hangindent), len(initindent))
732 if width <= maxindent: 764 if width <= maxindent:
733 # adjust for weird terminal size 765 # adjust for weird terminal size
734 width = max(78, maxindent + 1) 766 width = max(78, maxindent + 1)
735 line = line.decode(pycompat.sysstr(encoding.encoding), 767 line = line.decode(
736 pycompat.sysstr(encoding.encodingmode)) 768 pycompat.sysstr(encoding.encoding),
737 initindent = initindent.decode(pycompat.sysstr(encoding.encoding), 769 pycompat.sysstr(encoding.encodingmode),
738 pycompat.sysstr(encoding.encodingmode)) 770 )
739 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding), 771 initindent = initindent.decode(
740 pycompat.sysstr(encoding.encodingmode)) 772 pycompat.sysstr(encoding.encoding),
741 wrapper = _MBTextWrapper(width=width, 773 pycompat.sysstr(encoding.encodingmode),
742 initial_indent=initindent, 774 )
743 subsequent_indent=hangindent) 775 hangindent = hangindent.decode(
776 pycompat.sysstr(encoding.encoding),
777 pycompat.sysstr(encoding.encodingmode),
778 )
779 wrapper = _MBTextWrapper(
780 width=width, initial_indent=initindent, subsequent_indent=hangindent
781 )
744 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding)) 782 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
745 783
746 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True, 784
747 '0': False, 'no': False, 'false': False, 'off': False, 785 _booleans = {
748 'never': False} 786 '1': True,
787 'yes': True,
788 'true': True,
789 'on': True,
790 'always': True,
791 '0': False,
792 'no': False,
793 'false': False,
794 'off': False,
795 'never': False,
796 }
797
749 798
750 def parsebool(s): 799 def parsebool(s):
751 """Parse s into a boolean. 800 """Parse s into a boolean.
752 801
753 If s is not a valid boolean, returns None. 802 If s is not a valid boolean, returns None.
754 """ 803 """
755 return _booleans.get(s.lower(), None) 804 return _booleans.get(s.lower(), None)
805
756 806
757 def evalpythonliteral(s): 807 def evalpythonliteral(s):
758 """Evaluate a string containing a Python literal expression""" 808 """Evaluate a string containing a Python literal expression"""
759 # We could backport our tokenizer hack to rewrite '' to u'' if we want 809 # We could backport our tokenizer hack to rewrite '' to u'' if we want
760 if pycompat.ispy3: 810 if pycompat.ispy3: