mercurial/utils/stringutil.py
changeset 43076 2372284d9457
parent 40684 e6c9ef5e11a0
child 43077 687b865b95ad
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
    27 # regex special chars pulled from https://bugs.python.org/issue29995
    27 # regex special chars pulled from https://bugs.python.org/issue29995
    28 # which was part of Python 3.7.
    28 # which was part of Python 3.7.
    29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
    29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
    30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
    30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
    31 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
    31 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
       
    32 
    32 
    33 
    33 def reescape(pat):
    34 def reescape(pat):
    34     """Drop-in replacement for re.escape."""
    35     """Drop-in replacement for re.escape."""
    35     # NOTE: it is intentional that this works on unicodes and not
    36     # NOTE: it is intentional that this works on unicodes and not
    36     # bytes, as it's only possible to do the escaping with
    37     # bytes, as it's only possible to do the escaping with
    42     pat = pat.translate(_regexescapemap)
    43     pat = pat.translate(_regexescapemap)
    43     if wantuni:
    44     if wantuni:
    44         return pat
    45         return pat
    45     return pat.encode('latin1')
    46     return pat.encode('latin1')
    46 
    47 
       
    48 
    47 def pprint(o, bprefix=False, indent=0, level=0):
    49 def pprint(o, bprefix=False, indent=0, level=0):
    48     """Pretty print an object."""
    50     """Pretty print an object."""
    49     return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
    51     return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
       
    52 
    50 
    53 
    51 def pprintgen(o, bprefix=False, indent=0, level=0):
    54 def pprintgen(o, bprefix=False, indent=0, level=0):
    52     """Pretty print an object to a generator of atoms.
    55     """Pretty print an object to a generator of atoms.
    53 
    56 
    54     ``bprefix`` is a flag influencing whether bytestrings are preferred with
    57     ``bprefix`` is a flag influencing whether bytestrings are preferred with
    81             level += 1
    84             level += 1
    82             yield '\n'
    85             yield '\n'
    83             yield ' ' * (level * indent)
    86             yield ' ' * (level * indent)
    84 
    87 
    85         for i, a in enumerate(o):
    88         for i, a in enumerate(o):
    86             for chunk in pprintgen(a, bprefix=bprefix, indent=indent,
    89             for chunk in pprintgen(
    87                                    level=level):
    90                 a, bprefix=bprefix, indent=indent, level=level
       
    91             ):
    88                 yield chunk
    92                 yield chunk
    89 
    93 
    90             if i + 1 < len(o):
    94             if i + 1 < len(o):
    91                 if indent:
    95                 if indent:
    92                     yield ',\n'
    96                     yield ',\n'
   111             level += 1
   115             level += 1
   112             yield '\n'
   116             yield '\n'
   113             yield ' ' * (level * indent)
   117             yield ' ' * (level * indent)
   114 
   118 
   115         for i, (k, v) in enumerate(sorted(o.items())):
   119         for i, (k, v) in enumerate(sorted(o.items())):
   116             for chunk in pprintgen(k, bprefix=bprefix, indent=indent,
   120             for chunk in pprintgen(
   117                                    level=level):
   121                 k, bprefix=bprefix, indent=indent, level=level
       
   122             ):
   118                 yield chunk
   123                 yield chunk
   119 
   124 
   120             yield ': '
   125             yield ': '
   121 
   126 
   122             for chunk in pprintgen(v, bprefix=bprefix, indent=indent,
   127             for chunk in pprintgen(
   123                                    level=level):
   128                 v, bprefix=bprefix, indent=indent, level=level
       
   129             ):
   124                 yield chunk
   130                 yield chunk
   125 
   131 
   126             if i + 1 < len(o):
   132             if i + 1 < len(o):
   127                 if indent:
   133                 if indent:
   128                     yield ',\n'
   134                     yield ',\n'
   147             level += 1
   153             level += 1
   148             yield '\n'
   154             yield '\n'
   149             yield ' ' * (level * indent)
   155             yield ' ' * (level * indent)
   150 
   156 
   151         for i, k in enumerate(sorted(o)):
   157         for i, k in enumerate(sorted(o)):
   152             for chunk in pprintgen(k, bprefix=bprefix, indent=indent,
   158             for chunk in pprintgen(
   153                                    level=level):
   159                 k, bprefix=bprefix, indent=indent, level=level
       
   160             ):
   154                 yield chunk
   161                 yield chunk
   155 
   162 
   156             if i + 1 < len(o):
   163             if i + 1 < len(o):
   157                 if indent:
   164                 if indent:
   158                     yield ',\n'
   165                     yield ',\n'
   177             level += 1
   184             level += 1
   178             yield '\n'
   185             yield '\n'
   179             yield ' ' * (level * indent)
   186             yield ' ' * (level * indent)
   180 
   187 
   181         for i, a in enumerate(o):
   188         for i, a in enumerate(o):
   182             for chunk in pprintgen(a, bprefix=bprefix, indent=indent,
   189             for chunk in pprintgen(
   183                                    level=level):
   190                 a, bprefix=bprefix, indent=indent, level=level
       
   191             ):
   184                 yield chunk
   192                 yield chunk
   185 
   193 
   186             if i + 1 < len(o):
   194             if i + 1 < len(o):
   187                 if indent:
   195                 if indent:
   188                     yield ',\n'
   196                     yield ',\n'
   219             try:
   227             try:
   220                 nextitem = next(o)
   228                 nextitem = next(o)
   221             except StopIteration:
   229             except StopIteration:
   222                 last = True
   230                 last = True
   223 
   231 
   224             for chunk in pprintgen(current, bprefix=bprefix, indent=indent,
   232             for chunk in pprintgen(
   225                                    level=level):
   233                 current, bprefix=bprefix, indent=indent, level=level
       
   234             ):
   226                 yield chunk
   235                 yield chunk
   227 
   236 
   228             if not last:
   237             if not last:
   229                 if indent:
   238                 if indent:
   230                     yield ',\n'
   239                     yield ',\n'
   238             yield ' ' * (level * indent)
   247             yield ' ' * (level * indent)
   239 
   248 
   240         yield ']'
   249         yield ']'
   241     else:
   250     else:
   242         yield pycompat.byterepr(o)
   251         yield pycompat.byterepr(o)
       
   252 
   243 
   253 
   244 def prettyrepr(o):
   254 def prettyrepr(o):
   245     """Pretty print a representation of a possibly-nested object"""
   255     """Pretty print a representation of a possibly-nested object"""
   246     lines = []
   256     lines = []
   247     rs = pycompat.byterepr(o)
   257     rs = pycompat.byterepr(o)
   265         assert l >= 0
   275         assert l >= 0
   266         lines.append((l, rs[p0:q0].rstrip()))
   276         lines.append((l, rs[p0:q0].rstrip()))
   267         p0, p1 = q0, q1
   277         p0, p1 = q0, q1
   268     return '\n'.join('  ' * l + s for l, s in lines)
   278     return '\n'.join('  ' * l + s for l, s in lines)
   269 
   279 
       
   280 
   270 def buildrepr(r):
   281 def buildrepr(r):
   271     """Format an optional printable representation from unexpanded bits
   282     """Format an optional printable representation from unexpanded bits
   272 
   283 
   273     ========  =================================
   284     ========  =================================
   274     type(r)   example
   285     type(r)   example
   288     elif callable(r):
   299     elif callable(r):
   289         return r()
   300         return r()
   290     else:
   301     else:
   291         return pprint(r)
   302         return pprint(r)
   292 
   303 
       
   304 
   293 def binary(s):
   305 def binary(s):
   294     """return true if a string is binary data"""
   306     """return true if a string is binary data"""
   295     return bool(s and '\0' in s)
   307     return bool(s and '\0' in s)
       
   308 
   296 
   309 
   297 def stringmatcher(pattern, casesensitive=True):
   310 def stringmatcher(pattern, casesensitive=True):
   298     """
   311     """
   299     accepts a string, possibly starting with 're:' or 'literal:' prefix.
   312     accepts a string, possibly starting with 're:' or 'literal:' prefix.
   300     returns the matcher name, pattern, and matcher function.
   313     returns the matcher name, pattern, and matcher function.
   338             flags = 0
   351             flags = 0
   339             if not casesensitive:
   352             if not casesensitive:
   340                 flags = remod.I
   353                 flags = remod.I
   341             regex = remod.compile(pattern, flags)
   354             regex = remod.compile(pattern, flags)
   342         except remod.error as e:
   355         except remod.error as e:
   343             raise error.ParseError(_('invalid regular expression: %s')
   356             raise error.ParseError(_('invalid regular expression: %s') % e)
   344                                    % e)
       
   345         return 're', pattern, regex.search
   357         return 're', pattern, regex.search
   346     elif pattern.startswith('literal:'):
   358     elif pattern.startswith('literal:'):
   347         pattern = pattern[8:]
   359         pattern = pattern[8:]
   348 
   360 
   349     match = pattern.__eq__
   361     match = pattern.__eq__
   350 
   362 
   351     if not casesensitive:
   363     if not casesensitive:
   352         ipat = encoding.lower(pattern)
   364         ipat = encoding.lower(pattern)
   353         match = lambda s: ipat == encoding.lower(s)
   365         match = lambda s: ipat == encoding.lower(s)
   354     return 'literal', pattern, match
   366     return 'literal', pattern, match
       
   367 
   355 
   368 
   356 def shortuser(user):
   369 def shortuser(user):
   357     """Return a short representation of a user name or email address."""
   370     """Return a short representation of a user name or email address."""
   358     f = user.find('@')
   371     f = user.find('@')
   359     if f >= 0:
   372     if f >= 0:
   360         user = user[:f]
   373         user = user[:f]
   361     f = user.find('<')
   374     f = user.find('<')
   362     if f >= 0:
   375     if f >= 0:
   363         user = user[f + 1:]
   376         user = user[f + 1 :]
   364     f = user.find(' ')
   377     f = user.find(' ')
   365     if f >= 0:
   378     if f >= 0:
   366         user = user[:f]
   379         user = user[:f]
   367     f = user.find('.')
   380     f = user.find('.')
   368     if f >= 0:
   381     if f >= 0:
   369         user = user[:f]
   382         user = user[:f]
   370     return user
   383     return user
   371 
   384 
       
   385 
   372 def emailuser(user):
   386 def emailuser(user):
   373     """Return the user portion of an email address."""
   387     """Return the user portion of an email address."""
   374     f = user.find('@')
   388     f = user.find('@')
   375     if f >= 0:
   389     if f >= 0:
   376         user = user[:f]
   390         user = user[:f]
   377     f = user.find('<')
   391     f = user.find('<')
   378     if f >= 0:
   392     if f >= 0:
   379         user = user[f + 1:]
   393         user = user[f + 1 :]
   380     return user
   394     return user
       
   395 
   381 
   396 
   382 def email(author):
   397 def email(author):
   383     '''get email of author.'''
   398     '''get email of author.'''
   384     r = author.find('>')
   399     r = author.find('>')
   385     if r == -1:
   400     if r == -1:
   386         r = None
   401         r = None
   387     return author[author.find('<') + 1:r]
   402     return author[author.find('<') + 1 : r]
       
   403 
   388 
   404 
   389 def person(author):
   405 def person(author):
   390     """Returns the name before an email address,
   406     """Returns the name before an email address,
   391     interpreting it as per RFC 5322
   407     interpreting it as per RFC 5322
   392 
   408 
   411     if f != -1:
   427     if f != -1:
   412         return author[:f].strip(' "').replace('\\"', '"')
   428         return author[:f].strip(' "').replace('\\"', '"')
   413     f = author.find('@')
   429     f = author.find('@')
   414     return author[:f].replace('.', ' ')
   430     return author[:f].replace('.', ' ')
   415 
   431 
       
   432 
   416 @attr.s(hash=True)
   433 @attr.s(hash=True)
   417 class mailmapping(object):
   434 class mailmapping(object):
   418     '''Represents a username/email key or value in
   435     '''Represents a username/email key or value in
   419     a mailmap file'''
   436     a mailmap file'''
       
   437 
   420     email = attr.ib()
   438     email = attr.ib()
   421     name = attr.ib(default=None)
   439     name = attr.ib(default=None)
       
   440 
   422 
   441 
   423 def _ismailmaplineinvalid(names, emails):
   442 def _ismailmaplineinvalid(names, emails):
   424     '''Returns True if the parsed names and emails
   443     '''Returns True if the parsed names and emails
   425     in a mailmap entry are invalid.
   444     in a mailmap entry are invalid.
   426 
   445 
   441     >>> emails = [b'proper@email.com', b'commit@email.com']
   460     >>> emails = [b'proper@email.com', b'commit@email.com']
   442     >>> _ismailmaplineinvalid(names, emails)
   461     >>> _ismailmaplineinvalid(names, emails)
   443     False
   462     False
   444     '''
   463     '''
   445     return not emails or not names and len(emails) < 2
   464     return not emails or not names and len(emails) < 2
       
   465 
   446 
   466 
   447 def parsemailmap(mailmapcontent):
   467 def parsemailmap(mailmapcontent):
   448     """Parses data in the .mailmap format
   468     """Parses data in the .mailmap format
   449 
   469 
   450     >>> mmdata = b"\\n".join([
   470     >>> mmdata = b"\\n".join([
   513         # name or a second email
   533         # name or a second email
   514         if _ismailmaplineinvalid(names, emails):
   534         if _ismailmaplineinvalid(names, emails):
   515             continue
   535             continue
   516 
   536 
   517         mailmapkey = mailmapping(
   537         mailmapkey = mailmapping(
   518             email=emails[-1],
   538             email=emails[-1], name=names[-1] if len(names) == 2 else None,
   519             name=names[-1] if len(names) == 2 else None,
       
   520         )
   539         )
   521 
   540 
   522         mailmap[mailmapkey] = mailmapping(
   541         mailmap[mailmapkey] = mailmapping(
   523             email=emails[0],
   542             email=emails[0], name=names[0] if names else None,
   524             name=names[0] if names else None,
       
   525         )
   543         )
   526 
   544 
   527     return mailmap
   545     return mailmap
       
   546 
   528 
   547 
   529 def mapname(mailmap, author):
   548 def mapname(mailmap, author):
   530     """Returns the author field according to the mailmap cache, or
   549     """Returns the author field according to the mailmap cache, or
   531     the original author field.
   550     the original author field.
   532 
   551 
   571     return '%s <%s>' % (
   590     return '%s <%s>' % (
   572         proper.name if proper.name else commit.name,
   591         proper.name if proper.name else commit.name,
   573         proper.email if proper.email else commit.email,
   592         proper.email if proper.email else commit.email,
   574     )
   593     )
   575 
   594 
       
   595 
   576 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
   596 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
       
   597 
   577 
   598 
   578 def isauthorwellformed(author):
   599 def isauthorwellformed(author):
   579     '''Return True if the author field is well formed
   600     '''Return True if the author field is well formed
   580     (ie "Contributor Name <contrib@email.dom>")
   601     (ie "Contributor Name <contrib@email.dom>")
   581 
   602 
   594     >>> isauthorwellformed(b'Bad Author <author>')
   615     >>> isauthorwellformed(b'Bad Author <author>')
   595     False
   616     False
   596     '''
   617     '''
   597     return _correctauthorformat.match(author) is not None
   618     return _correctauthorformat.match(author) is not None
   598 
   619 
       
   620 
   599 def ellipsis(text, maxlength=400):
   621 def ellipsis(text, maxlength=400):
   600     """Trim string to at most maxlength (default: 400) columns in display."""
   622     """Trim string to at most maxlength (default: 400) columns in display."""
   601     return encoding.trim(text, maxlength, ellipsis='...')
   623     return encoding.trim(text, maxlength, ellipsis='...')
       
   624 
   602 
   625 
   603 def escapestr(s):
   626 def escapestr(s):
   604     if isinstance(s, memoryview):
   627     if isinstance(s, memoryview):
   605         s = bytes(s)
   628         s = bytes(s)
   606     # call underlying function of s.encode('string_escape') directly for
   629     # call underlying function of s.encode('string_escape') directly for
   607     # Python 3 compatibility
   630     # Python 3 compatibility
   608     return codecs.escape_encode(s)[0]
   631     return codecs.escape_encode(s)[0]
   609 
   632 
       
   633 
   610 def unescapestr(s):
   634 def unescapestr(s):
   611     return codecs.escape_decode(s)[0]
   635     return codecs.escape_decode(s)[0]
       
   636 
   612 
   637 
   613 def forcebytestr(obj):
   638 def forcebytestr(obj):
   614     """Portably format an arbitrary object (e.g. exception) into a byte
   639     """Portably format an arbitrary object (e.g. exception) into a byte
   615     string."""
   640     string."""
   616     try:
   641     try:
   617         return pycompat.bytestr(obj)
   642         return pycompat.bytestr(obj)
   618     except UnicodeEncodeError:
   643     except UnicodeEncodeError:
   619         # non-ascii string, may be lossy
   644         # non-ascii string, may be lossy
   620         return pycompat.bytestr(encoding.strtolocal(str(obj)))
   645         return pycompat.bytestr(encoding.strtolocal(str(obj)))
   621 
   646 
       
   647 
   622 def uirepr(s):
   648 def uirepr(s):
   623     # Avoid double backslash in Windows path repr()
   649     # Avoid double backslash in Windows path repr()
   624     return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
   650     return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
       
   651 
   625 
   652 
   626 # delay import of textwrap
   653 # delay import of textwrap
   627 def _MBTextWrapper(**kwargs):
   654 def _MBTextWrapper(**kwargs):
   628     class tw(textwrap.TextWrapper):
   655     class tw(textwrap.TextWrapper):
   629         """
   656         """
   638         In addition, characters classified into 'ambiguous' width are
   665         In addition, characters classified into 'ambiguous' width are
   639         treated as wide in East Asian area, but as narrow in other.
   666         treated as wide in East Asian area, but as narrow in other.
   640 
   667 
   641         This requires use decision to determine width of such characters.
   668         This requires use decision to determine width of such characters.
   642         """
   669         """
       
   670 
   643         def _cutdown(self, ucstr, space_left):
   671         def _cutdown(self, ucstr, space_left):
   644             l = 0
   672             l = 0
   645             colwidth = encoding.ucolwidth
   673             colwidth = encoding.ucolwidth
   646             for i in pycompat.xrange(len(ucstr)):
   674             for i in pycompat.xrange(len(ucstr)):
   647                 l += colwidth(ucstr[i])
   675                 l += colwidth(ucstr[i])
   710                 # fit on *any* line (not just this one).
   738                 # fit on *any* line (not just this one).
   711                 if chunks and colwidth(chunks[-1]) > width:
   739                 if chunks and colwidth(chunks[-1]) > width:
   712                     self._handle_long_word(chunks, cur_line, cur_len, width)
   740                     self._handle_long_word(chunks, cur_line, cur_len, width)
   713 
   741 
   714                 # If the last chunk on this line is all whitespace, drop it.
   742                 # If the last chunk on this line is all whitespace, drop it.
   715                 if (self.drop_whitespace and
   743                 if (
   716                     cur_line and cur_line[-1].strip() == r''):
   744                     self.drop_whitespace
       
   745                     and cur_line
       
   746                     and cur_line[-1].strip() == r''
       
   747                 ):
   717                     del cur_line[-1]
   748                     del cur_line[-1]
   718 
   749 
   719                 # Convert current line back to a string and store it in list
   750                 # Convert current line back to a string and store it in list
   720                 # of all lines (return value).
   751                 # of all lines (return value).
   721                 if cur_line:
   752                 if cur_line:
   724             return lines
   755             return lines
   725 
   756 
   726     global _MBTextWrapper
   757     global _MBTextWrapper
   727     _MBTextWrapper = tw
   758     _MBTextWrapper = tw
   728     return tw(**kwargs)
   759     return tw(**kwargs)
       
   760 
   729 
   761 
   730 def wrap(line, width, initindent='', hangindent=''):
   762 def wrap(line, width, initindent='', hangindent=''):
   731     maxindent = max(len(hangindent), len(initindent))
   763     maxindent = max(len(hangindent), len(initindent))
   732     if width <= maxindent:
   764     if width <= maxindent:
   733         # adjust for weird terminal size
   765         # adjust for weird terminal size
   734         width = max(78, maxindent + 1)
   766         width = max(78, maxindent + 1)
   735     line = line.decode(pycompat.sysstr(encoding.encoding),
   767     line = line.decode(
   736                        pycompat.sysstr(encoding.encodingmode))
   768         pycompat.sysstr(encoding.encoding),
   737     initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
   769         pycompat.sysstr(encoding.encodingmode),
   738                                    pycompat.sysstr(encoding.encodingmode))
   770     )
   739     hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
   771     initindent = initindent.decode(
   740                                    pycompat.sysstr(encoding.encodingmode))
   772         pycompat.sysstr(encoding.encoding),
   741     wrapper = _MBTextWrapper(width=width,
   773         pycompat.sysstr(encoding.encodingmode),
   742                              initial_indent=initindent,
   774     )
   743                              subsequent_indent=hangindent)
   775     hangindent = hangindent.decode(
       
   776         pycompat.sysstr(encoding.encoding),
       
   777         pycompat.sysstr(encoding.encodingmode),
       
   778     )
       
   779     wrapper = _MBTextWrapper(
       
   780         width=width, initial_indent=initindent, subsequent_indent=hangindent
       
   781     )
   744     return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
   782     return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
   745 
   783 
   746 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
   784 
   747              '0': False, 'no': False, 'false': False, 'off': False,
   785 _booleans = {
   748              'never': False}
   786     '1': True,
       
   787     'yes': True,
       
   788     'true': True,
       
   789     'on': True,
       
   790     'always': True,
       
   791     '0': False,
       
   792     'no': False,
       
   793     'false': False,
       
   794     'off': False,
       
   795     'never': False,
       
   796 }
       
   797 
   749 
   798 
   750 def parsebool(s):
   799 def parsebool(s):
   751     """Parse s into a boolean.
   800     """Parse s into a boolean.
   752 
   801 
   753     If s is not a valid boolean, returns None.
   802     If s is not a valid boolean, returns None.
   754     """
   803     """
   755     return _booleans.get(s.lower(), None)
   804     return _booleans.get(s.lower(), None)
       
   805 
   756 
   806 
   757 def evalpythonliteral(s):
   807 def evalpythonliteral(s):
   758     """Evaluate a string containing a Python literal expression"""
   808     """Evaluate a string containing a Python literal expression"""
   759     # We could backport our tokenizer hack to rewrite '' to u'' if we want
   809     # We could backport our tokenizer hack to rewrite '' to u'' if we want
   760     if pycompat.ispy3:
   810     if pycompat.ispy3: