mercurial-scm/hg: comparison mercurial/utils/stringutil.py

equal deleted inserted replaced

-:57875cf423c9
+:2372284d9457
 # regex special chars pulled from https://bugs.python.org/issue29995
 # which was part of Python 3.7.
 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
 def reescape(pat):
 """Drop-in replacement for re.escape."""
 # NOTE: it is intentional that this works on unicodes and not
 # bytes, as it's only possible to do the escaping with
 pat = pat.translate(_regexescapemap)
 if wantuni:
 return pat
 return pat.encode('latin1')
 def pprint(o, bprefix=False, indent=0, level=0):
 """Pretty print an object."""
 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
 def pprintgen(o, bprefix=False, indent=0, level=0):
 """Pretty print an object to a generator of atoms.
 ``bprefix`` is a flag influencing whether bytestrings are preferred with
 level += 1
 yield '\n'
 yield ' ' * (level * indent)
 for i, a in enumerate(o):
-for chunk in pprintgen(a, bprefix=bprefix, indent=indent,
+for chunk in pprintgen(
-level=level):
+a, bprefix=bprefix, indent=indent, level=level
+):
 yield chunk
 if i + 1 < len(o):
 if indent:
 yield ',\n'
 level += 1
 yield '\n'
 yield ' ' * (level * indent)
 for i, (k, v) in enumerate(sorted(o.items())):
-for chunk in pprintgen(k, bprefix=bprefix, indent=indent,
+for chunk in pprintgen(
-level=level):
+k, bprefix=bprefix, indent=indent, level=level
+):
 yield chunk
 yield ': '
-for chunk in pprintgen(v, bprefix=bprefix, indent=indent,
+for chunk in pprintgen(
-level=level):
+v, bprefix=bprefix, indent=indent, level=level
+):
 yield chunk
 if i + 1 < len(o):
 if indent:
 yield ',\n'
 level += 1
 yield '\n'
 yield ' ' * (level * indent)
 for i, k in enumerate(sorted(o)):
-for chunk in pprintgen(k, bprefix=bprefix, indent=indent,
+for chunk in pprintgen(
-level=level):
+k, bprefix=bprefix, indent=indent, level=level
+):
 yield chunk
 if i + 1 < len(o):
 if indent:
 yield ',\n'
 level += 1
 yield '\n'
 yield ' ' * (level * indent)
 for i, a in enumerate(o):
-for chunk in pprintgen(a, bprefix=bprefix, indent=indent,
+for chunk in pprintgen(
-level=level):
+a, bprefix=bprefix, indent=indent, level=level
+):
 yield chunk
 if i + 1 < len(o):
 if indent:
 yield ',\n'
 try:
 nextitem = next(o)
 except StopIteration:
 last = True
-for chunk in pprintgen(current, bprefix=bprefix, indent=indent,
+for chunk in pprintgen(
-level=level):
+current, bprefix=bprefix, indent=indent, level=level
+):
 yield chunk
 if not last:
 if indent:
 yield ',\n'
 yield ' ' * (level * indent)
 yield ']'
 else:
 yield pycompat.byterepr(o)
 def prettyrepr(o):
 """Pretty print a representation of a possibly-nested object"""
 lines = []
 rs = pycompat.byterepr(o)
 assert l >= 0
 lines.append((l, rs[p0:q0].rstrip()))
 p0, p1 = q0, q1
 return '\n'.join('  ' * l + s for l, s in lines)
 def buildrepr(r):
 """Format an optional printable representation from unexpanded bits
 ========  =================================
 type(r)   example
 elif callable(r):
 return r()
 else:
 return pprint(r)
 def binary(s):
 """return true if a string is binary data"""
 return bool(s and '\0' in s)
 def stringmatcher(pattern, casesensitive=True):
 """
 accepts a string, possibly starting with 're:' or 'literal:' prefix.
 returns the matcher name, pattern, and matcher function.
 flags = 0
 if not casesensitive:
 flags = remod.I
 regex = remod.compile(pattern, flags)
 except remod.error as e:
-raise error.ParseError(_('invalid regular expression: %s')
+raise error.ParseError(_('invalid regular expression: %s') % e)
-% e)
 return 're', pattern, regex.search
 elif pattern.startswith('literal:'):
 pattern = pattern[8:]
 match = pattern.__eq__
 if not casesensitive:
 ipat = encoding.lower(pattern)
 match = lambda s: ipat == encoding.lower(s)
 return 'literal', pattern, match
 def shortuser(user):
 """Return a short representation of a user name or email address."""
 f = user.find('@')
 if f >= 0:
 user = user[:f]
 f = user.find('<')
 if f >= 0:
-user = user[f + 1:]
+user = user[f + 1 :]
 f = user.find(' ')
 if f >= 0:
 user = user[:f]
 f = user.find('.')
 if f >= 0:
 user = user[:f]
 return user
 def emailuser(user):
 """Return the user portion of an email address."""
 f = user.find('@')
 if f >= 0:
 user = user[:f]
 f = user.find('<')
 if f >= 0:
-user = user[f + 1:]
+user = user[f + 1 :]
 return user
 def email(author):
 '''get email of author.'''
 r = author.find('>')
 if r == -1:
 r = None
-return author[author.find('<') + 1:r]
+return author[author.find('<') + 1 : r]
 def person(author):
 """Returns the name before an email address,
 interpreting it as per RFC 5322
 if f != -1:
 return author[:f].strip(' "').replace('\\"', '"')
 f = author.find('@')
 return author[:f].replace('.', ' ')
 @attr.s(hash=True)
 class mailmapping(object):
 '''Represents a username/email key or value in
 a mailmap file'''
 email = attr.ib()
 name = attr.ib(default=None)
 def _ismailmaplineinvalid(names, emails):
 '''Returns True if the parsed names and emails
 in a mailmap entry are invalid.
 >>> emails = [b'proper@email.com', b'commit@email.com']
 >>> _ismailmaplineinvalid(names, emails)
 False
 '''
 return not emails or not names and len(emails) < 2
 def parsemailmap(mailmapcontent):
 """Parses data in the .mailmap format
 >>> mmdata = b"\\n".join([
 # name or a second email
 if _ismailmaplineinvalid(names, emails):
 continue
 mailmapkey = mailmapping(
-email=emails[-1],
+email=emails[-1], name=names[-1] if len(names) == 2 else None,
-name=names[-1] if len(names) == 2 else None,
 )
 mailmap[mailmapkey] = mailmapping(
-email=emails[0],
+email=emails[0], name=names[0] if names else None,
-name=names[0] if names else None,
 )
 return mailmap
 def mapname(mailmap, author):
 """Returns the author field according to the mailmap cache, or
 the original author field.
 return '%s <%s>' % (
 proper.name if proper.name else commit.name,
 proper.email if proper.email else commit.email,
 )
 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
 def isauthorwellformed(author):
 '''Return True if the author field is well formed
 (ie "Contributor Name <contrib@email.dom>")
 >>> isauthorwellformed(b'Bad Author <author>')
 False
 '''
 return _correctauthorformat.match(author) is not None
 def ellipsis(text, maxlength=400):
 """Trim string to at most maxlength (default: 400) columns in display."""
 return encoding.trim(text, maxlength, ellipsis='...')
 def escapestr(s):
 if isinstance(s, memoryview):
 s = bytes(s)
 # call underlying function of s.encode('string_escape') directly for
 # Python 3 compatibility
 return codecs.escape_encode(s)[0]
 def unescapestr(s):
 return codecs.escape_decode(s)[0]
 def forcebytestr(obj):
 """Portably format an arbitrary object (e.g. exception) into a byte
 string."""
 try:
 return pycompat.bytestr(obj)
 except UnicodeEncodeError:
 # non-ascii string, may be lossy
 return pycompat.bytestr(encoding.strtolocal(str(obj)))
 def uirepr(s):
 # Avoid double backslash in Windows path repr()
 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
 # delay import of textwrap
 def _MBTextWrapper(**kwargs):
 class tw(textwrap.TextWrapper):
 """
 In addition, characters classified into 'ambiguous' width are
 treated as wide in East Asian area, but as narrow in other.
 This requires use decision to determine width of such characters.
 """
 def _cutdown(self, ucstr, space_left):
 l = 0
 colwidth = encoding.ucolwidth
 for i in pycompat.xrange(len(ucstr)):
 l += colwidth(ucstr[i])
 # fit on *any* line (not just this one).
 if chunks and colwidth(chunks[-1]) > width:
 self._handle_long_word(chunks, cur_line, cur_len, width)
 # If the last chunk on this line is all whitespace, drop it.
-if (self.drop_whitespace and
+if (
-cur_line and cur_line[-1].strip() == r''):
+self.drop_whitespace
+and cur_line
+and cur_line[-1].strip() == r''
+):
 del cur_line[-1]
 # Convert current line back to a string and store it in list
 # of all lines (return value).
 if cur_line:
 return lines
 global _MBTextWrapper
 _MBTextWrapper = tw
 return tw(**kwargs)
 def wrap(line, width, initindent='', hangindent=''):
 maxindent = max(len(hangindent), len(initindent))
 if width <= maxindent:
 # adjust for weird terminal size
 width = max(78, maxindent + 1)
-line = line.decode(pycompat.sysstr(encoding.encoding),
+line = line.decode(
-pycompat.sysstr(encoding.encodingmode))
+pycompat.sysstr(encoding.encoding),
-initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
+pycompat.sysstr(encoding.encodingmode),
-pycompat.sysstr(encoding.encodingmode))
+)
-hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
+initindent = initindent.decode(
-pycompat.sysstr(encoding.encodingmode))
+pycompat.sysstr(encoding.encoding),
-wrapper = _MBTextWrapper(width=width,
+pycompat.sysstr(encoding.encodingmode),
-initial_indent=initindent,
+)
-subsequent_indent=hangindent)
+hangindent = hangindent.decode(
+pycompat.sysstr(encoding.encoding),
+pycompat.sysstr(encoding.encodingmode),
+)
+wrapper = _MBTextWrapper(
+width=width, initial_indent=initindent, subsequent_indent=hangindent
+)
 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
-_booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
-'0': False, 'no': False, 'false': False, 'off': False,
+_booleans = {
-'never': False}
+'1': True,
+'yes': True,
+'true': True,
+'on': True,
+'always': True,
+'0': False,
+'no': False,
+'false': False,
+'off': False,
+'never': False,
+}
 def parsebool(s):
 """Parse s into a boolean.
 If s is not a valid boolean, returns None.
 """
 return _booleans.get(s.lower(), None)
 def evalpythonliteral(s):
 """Evaluate a string containing a Python literal expression"""
 # We could backport our tokenizer hack to rewrite '' to u'' if we want
 if pycompat.ispy3:

changeset 43076	2372284d9457
parent 40684	e6c9ef5e11a0
child 43077	687b865b95ad