comparison mercurial/utils/stringutil.py @ 49724:bbbb5213d043

typing: add basic type hints to stringutil.py
author Matt Harbison <matt_harbison@yahoo.com>
date Fri, 04 Nov 2022 22:59:16 -0400
parents d44e3c45f0e4
children 9be765b82a90
comparison
equal deleted inserted replaced
49723:2506c3ac73f4 49724:bbbb5213d043
12 import codecs 12 import codecs
13 import re as remod 13 import re as remod
14 import textwrap 14 import textwrap
15 import types 15 import types
16 16
17 from typing import (
18 Optional,
19 overload,
20 )
21
17 from ..i18n import _ 22 from ..i18n import _
18 from ..thirdparty import attr 23 from ..thirdparty import attr
19 24
20 from .. import ( 25 from .. import (
21 encoding, 26 encoding,
26 # regex special chars pulled from https://bugs.python.org/issue29995 31 # regex special chars pulled from https://bugs.python.org/issue29995
27 # which was part of Python 3.7. 32 # which was part of Python 3.7.
28 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f') 33 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
29 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial} 34 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
30 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial} 35 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
36
37
38 @overload
39 def reescape(pat: bytes) -> bytes:
40 ...
41
42
43 @overload
44 def reescape(pat: str) -> str:
45 ...
31 46
32 47
33 def reescape(pat): 48 def reescape(pat):
34 """Drop-in replacement for re.escape.""" 49 """Drop-in replacement for re.escape."""
35 # NOTE: it is intentional that this works on unicodes and not 50 # NOTE: it is intentional that this works on unicodes and not
43 if wantuni: 58 if wantuni:
44 return pat 59 return pat
45 return pat.encode('latin1') 60 return pat.encode('latin1')
46 61
47 62
48 def pprint(o, bprefix=False, indent=0, level=0): 63 def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
49 """Pretty print an object.""" 64 """Pretty print an object."""
50 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level)) 65 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
51 66
52 67
53 def pprintgen(o, bprefix=False, indent=0, level=0): 68 def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
54 """Pretty print an object to a generator of atoms. 69 """Pretty print an object to a generator of atoms.
55 70
56 ``bprefix`` is a flag influencing whether bytestrings are preferred with 71 ``bprefix`` is a flag influencing whether bytestrings are preferred with
57 a ``b''`` prefix. 72 a ``b''`` prefix.
58 73
248 yield b']' 263 yield b']'
249 else: 264 else:
250 yield pycompat.byterepr(o) 265 yield pycompat.byterepr(o)
251 266
252 267
253 def prettyrepr(o): 268 def prettyrepr(o) -> bytes:
254 """Pretty print a representation of a possibly-nested object""" 269 """Pretty print a representation of a possibly-nested object"""
255 lines = [] 270 lines = []
256 rs = pycompat.byterepr(o) 271 rs = pycompat.byterepr(o)
257 p0 = p1 = 0 272 p0 = p1 = 0
258 while p0 < len(rs): 273 while p0 < len(rs):
279 lines.append((l, rs[p0:q0].rstrip())) 294 lines.append((l, rs[p0:q0].rstrip()))
280 p0, p1 = q0, q1 295 p0, p1 = q0, q1
281 return b'\n'.join(b' ' * l + s for l, s in lines) 296 return b'\n'.join(b' ' * l + s for l, s in lines)
282 297
283 298
284 def buildrepr(r): 299 def buildrepr(r) -> bytes:
285 """Format an optional printable representation from unexpanded bits 300 """Format an optional printable representation from unexpanded bits
286 301
287 ======== ================================= 302 ======== =================================
288 type(r) example 303 type(r) example
289 ======== ================================= 304 ======== =================================
303 return r() 318 return r()
304 else: 319 else:
305 return pprint(r) 320 return pprint(r)
306 321
307 322
308 def binary(s): 323 def binary(s: bytes) -> bool:
309 """return true if a string is binary data""" 324 """return true if a string is binary data"""
310 return bool(s and b'\0' in s) 325 return bool(s and b'\0' in s)
311 326
312 327
313 def _splitpattern(pattern): 328 def _splitpattern(pattern: bytes):
314 if pattern.startswith(b're:'): 329 if pattern.startswith(b're:'):
315 return b're', pattern[3:] 330 return b're', pattern[3:]
316 elif pattern.startswith(b'literal:'): 331 elif pattern.startswith(b'literal:'):
317 return b'literal', pattern[8:] 332 return b'literal', pattern[8:]
318 return b'literal', pattern 333 return b'literal', pattern
319 334
320 335
321 def stringmatcher(pattern, casesensitive=True): 336 def stringmatcher(pattern: bytes, casesensitive: bool = True):
322 """ 337 """
323 accepts a string, possibly starting with 're:' or 'literal:' prefix. 338 accepts a string, possibly starting with 're:' or 'literal:' prefix.
324 returns the matcher name, pattern, and matcher function. 339 returns the matcher name, pattern, and matcher function.
325 missing or unknown prefixes are treated as literal matches. 340 missing or unknown prefixes are treated as literal matches.
326 341
377 return kind, pattern, match 392 return kind, pattern, match
378 393
379 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) 394 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
380 395
381 396
382 def substringregexp(pattern, flags=0): 397 def substringregexp(pattern: bytes, flags: int = 0):
383 """Build a regexp object from a string pattern possibly starting with 398 """Build a regexp object from a string pattern possibly starting with
384 're:' or 'literal:' prefix. 399 're:' or 'literal:' prefix.
385 400
386 helper for tests: 401 helper for tests:
387 >>> def test(pattern, *tests): 402 >>> def test(pattern, *tests):
429 return remod.compile(remod.escape(pattern), flags) 444 return remod.compile(remod.escape(pattern), flags)
430 445
431 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) 446 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
432 447
433 448
434 def shortuser(user): 449 def shortuser(user: bytes) -> bytes:
435 """Return a short representation of a user name or email address.""" 450 """Return a short representation of a user name or email address."""
436 f = user.find(b'@') 451 f = user.find(b'@')
437 if f >= 0: 452 if f >= 0:
438 user = user[:f] 453 user = user[:f]
439 f = user.find(b'<') 454 f = user.find(b'<')
446 if f >= 0: 461 if f >= 0:
447 user = user[:f] 462 user = user[:f]
448 return user 463 return user
449 464
450 465
451 def emailuser(user): 466 def emailuser(user: bytes) -> bytes:
452 """Return the user portion of an email address.""" 467 """Return the user portion of an email address."""
453 f = user.find(b'@') 468 f = user.find(b'@')
454 if f >= 0: 469 if f >= 0:
455 user = user[:f] 470 user = user[:f]
456 f = user.find(b'<') 471 f = user.find(b'<')
457 if f >= 0: 472 if f >= 0:
458 user = user[f + 1 :] 473 user = user[f + 1 :]
459 return user 474 return user
460 475
461 476
462 def email(author): 477 def email(author: bytes) -> bytes:
463 '''get email of author.''' 478 '''get email of author.'''
464 r = author.find(b'>') 479 r = author.find(b'>')
465 if r == -1: 480 if r == -1:
466 r = None 481 r = None
467 return author[author.find(b'<') + 1 : r] 482 return author[author.find(b'<') + 1 : r]
468 483
469 484
470 def person(author): 485 def person(author: bytes) -> bytes:
471 """Returns the name before an email address, 486 """Returns the name before an email address,
472 interpreting it as per RFC 5322 487 interpreting it as per RFC 5322
473 488
474 >>> person(b'foo@bar') 489 >>> person(b'foo@bar')
475 'foo' 490 'foo'
610 ) 625 )
611 626
612 return mailmap 627 return mailmap
613 628
614 629
615 def mapname(mailmap, author): 630 def mapname(mailmap, author: bytes) -> bytes:
616 """Returns the author field according to the mailmap cache, or 631 """Returns the author field according to the mailmap cache, or
617 the original author field. 632 the original author field.
618 633
619 >>> mmdata = b"\\n".join([ 634 >>> mmdata = b"\\n".join([
620 ... b'# Comment', 635 ... b'# Comment',
661 676
662 677
663 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$') 678 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
664 679
665 680
666 def isauthorwellformed(author): 681 def isauthorwellformed(author: bytes) -> bool:
667 """Return True if the author field is well formed 682 """Return True if the author field is well formed
668 (ie "Contributor Name <contrib@email.dom>") 683 (ie "Contributor Name <contrib@email.dom>")
669 684
670 >>> isauthorwellformed(b'Good Author <good@author.com>') 685 >>> isauthorwellformed(b'Good Author <good@author.com>')
671 True 686 True
683 False 698 False
684 """ 699 """
685 return _correctauthorformat.match(author) is not None 700 return _correctauthorformat.match(author) is not None
686 701
687 702
688 def firstline(text): 703 def firstline(text: bytes) -> bytes:
689 """Return the first line of the input""" 704 """Return the first line of the input"""
690 # Try to avoid running splitlines() on the whole string 705 # Try to avoid running splitlines() on the whole string
691 i = text.find(b'\n') 706 i = text.find(b'\n')
692 if i != -1: 707 if i != -1:
693 text = text[:i] 708 text = text[:i]
695 return text.splitlines()[0] 710 return text.splitlines()[0]
696 except IndexError: 711 except IndexError:
697 return b'' 712 return b''
698 713
699 714
700 def ellipsis(text, maxlength=400): 715 def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
701 """Trim string to at most maxlength (default: 400) columns in display.""" 716 """Trim string to at most maxlength (default: 400) columns in display."""
702 return encoding.trim(text, maxlength, ellipsis=b'...') 717 return encoding.trim(text, maxlength, ellipsis=b'...')
703 718
704 719
705 def escapestr(s): 720 def escapestr(s: bytes) -> bytes:
721 # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
706 if isinstance(s, memoryview): 722 if isinstance(s, memoryview):
707 s = bytes(s) 723 s = bytes(s)
708 # call underlying function of s.encode('string_escape') directly for 724 # call underlying function of s.encode('string_escape') directly for
709 # Python 3 compatibility 725 # Python 3 compatibility
710 return codecs.escape_encode(s)[0] # pytype: disable=module-attr 726 return codecs.escape_encode(s)[0] # pytype: disable=module-attr
711 727
712 728
713 def unescapestr(s): 729 def unescapestr(s: bytes) -> bytes:
714 return codecs.escape_decode(s)[0] # pytype: disable=module-attr 730 return codecs.escape_decode(s)[0] # pytype: disable=module-attr
715 731
716 732
717 def forcebytestr(obj): 733 def forcebytestr(obj):
718 """Portably format an arbitrary object (e.g. exception) into a byte 734 """Portably format an arbitrary object (e.g. exception) into a byte
722 except UnicodeEncodeError: 738 except UnicodeEncodeError:
723 # non-ascii string, may be lossy 739 # non-ascii string, may be lossy
724 return pycompat.bytestr(encoding.strtolocal(str(obj))) 740 return pycompat.bytestr(encoding.strtolocal(str(obj)))
725 741
726 742
727 def uirepr(s): 743 def uirepr(s: bytes) -> bytes:
728 # Avoid double backslash in Windows path repr() 744 # Avoid double backslash in Windows path repr()
729 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') 745 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
730 746
731 747
732 # delay import of textwrap 748 # delay import of textwrap
836 global _MBTextWrapper 852 global _MBTextWrapper
837 _MBTextWrapper = tw 853 _MBTextWrapper = tw
838 return tw(**kwargs) 854 return tw(**kwargs)
839 855
840 856
841 def wrap(line, width, initindent=b'', hangindent=b''): 857 def wrap(
858 line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
859 ) -> bytes:
842 maxindent = max(len(hangindent), len(initindent)) 860 maxindent = max(len(hangindent), len(initindent))
843 if width <= maxindent: 861 if width <= maxindent:
844 # adjust for weird terminal size 862 # adjust for weird terminal size
845 width = max(78, maxindent + 1) 863 width = max(78, maxindent + 1)
846 line = line.decode( 864 line = line.decode(
873 b'off': False, 891 b'off': False,
874 b'never': False, 892 b'never': False,
875 } 893 }
876 894
877 895
878 def parsebool(s): 896 def parsebool(s: bytes) -> Optional[bool]:
879 """Parse s into a boolean. 897 """Parse s into a boolean.
880 898
881 If s is not a valid boolean, returns None. 899 If s is not a valid boolean, returns None.
882 """ 900 """
883 return _booleans.get(s.lower(), None) 901 return _booleans.get(s.lower(), None)
884 902
885 903
886 def parselist(value): 904 # TODO: make arg mandatory (and fix code below?)
905 def parselist(value: Optional[bytes]):
887 """parse a configuration value as a list of comma/space separated strings 906 """parse a configuration value as a list of comma/space separated strings
888 907
889 >>> parselist(b'this,is "a small" ,test') 908 >>> parselist(b'this,is "a small" ,test')
890 ['this', 'is', 'a small', 'test'] 909 ['this', 'is', 'a small', 'test']
891 """ 910 """
971 else: 990 else:
972 result = value 991 result = value
973 return result or [] 992 return result or []
974 993
975 994
976 def evalpythonliteral(s): 995 def evalpythonliteral(s: bytes):
977 """Evaluate a string containing a Python literal expression""" 996 """Evaluate a string containing a Python literal expression"""
978 # We could backport our tokenizer hack to rewrite '' to u'' if we want 997 # We could backport our tokenizer hack to rewrite '' to u'' if we want
979 return ast.literal_eval(s.decode('latin1')) 998 return ast.literal_eval(s.decode('latin1'))