mercurial/utils/stringutil.py
author Matt Harbison <matt_harbison@yahoo.com>
Sun, 05 Jan 2025 22:26:16 -0500
changeset 52644 e627cc25b6f3
parent 51859 f4733654f144
child 52691 279e217d6041
permissions -rw-r--r--
pyupgrade: rewrite `yield` statements in a loop to `yield from` This is the `legacy` fixer in `pyupgrade`, with the `yield` statement yielding loop commented back in. This seems to help pytype in some cases, and hurt it in others. But that can be manually fixed later. Note that it's possibly buggy in that it aggressively changed `import-checker.py` to `yield from 'fcntl', 'grp', 'pwd', 'select', 'termios': # Unix only`, which is invalid syntax. Possibly it needed help from the token fixer that I've disabled locally (because that wants to make a bunch of unrelated changes). Just change those few places to yield from a list, to avoid having to constantly revert that.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
37083
f99d64e8a4e4 stringutil: move generic string helpers to new module
Yuya Nishihara <yuya@tcha.org>
parents: 37082
diff changeset
     1
# stringutil.py - utility for generic string formatting, parsing, etc.
8226
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     2
#
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     3
#  Copyright 2005 K. Thananchayan <thananck@yahoo.com>
46819
d4ba4d51f85f contributor: change mentions of mpm to olivia
Rapha?l Gom?s <rgomes@octobus.net>
parents: 45942
diff changeset
     4
#  Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
8226
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     5
#  Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     6
#
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     7
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 9996
diff changeset
     8
# GNU General Public License version 2 or any later version.
1082
ce96e316278a Update util.py docstrings, fix walk test
mpm@selenic.com
parents: 1081
diff changeset
     9
51859
f4733654f144 typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents: 51828
diff changeset
    10
from __future__ import annotations
1082
ce96e316278a Update util.py docstrings, fix walk test
mpm@selenic.com
parents: 1081
diff changeset
    11
37476
e9dea82ea1f3 wireproto: convert python literal to object without using unsafe eval()
Yuya Nishihara <yuya@tcha.org>
parents: 37322
diff changeset
    12
import ast
31453
3b7a6941a6ef py3: call codecs.escape_encode() directly
Yuya Nishihara <yuya@tcha.org>
parents: 31451
diff changeset
    13
import codecs
21907
7e5dfa00e3c2 util: rename 're' to 'remod'
Siddharth Agarwal <sid0@fb.com>
parents: 21857
diff changeset
    14
import re as remod
27358
ac839ee45b6a util: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 27357
diff changeset
    15
import textwrap
39296
ce145f8eface stringutil: teach pprint() to recognize generators
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39063
diff changeset
    16
import types
51725
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51699
diff changeset
    17
import typing
3769
96095d9ff1f8 Add encoding detection
Matt Mackall <mpm@selenic.com>
parents: 3767
diff changeset
    18
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    19
from typing import (
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    20
    Optional,
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    21
    overload,
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    22
)
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    23
37083
f99d64e8a4e4 stringutil: move generic string helpers to new module
Yuya Nishihara <yuya@tcha.org>
parents: 37082
diff changeset
    24
from ..i18n import _
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
    25
from ..thirdparty import attr
37083
f99d64e8a4e4 stringutil: move generic string helpers to new module
Yuya Nishihara <yuya@tcha.org>
parents: 37082
diff changeset
    26
51725
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51699
diff changeset
    27
# Force pytype to use the non-vendored package
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51699
diff changeset
    28
if typing.TYPE_CHECKING:
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51699
diff changeset
    29
    # noinspection PyPackageRequirements
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51699
diff changeset
    30
    import attr
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51699
diff changeset
    31
37083
f99d64e8a4e4 stringutil: move generic string helpers to new module
Yuya Nishihara <yuya@tcha.org>
parents: 37082
diff changeset
    32
from .. import (
27358
ac839ee45b6a util: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 27357
diff changeset
    33
    encoding,
ac839ee45b6a util: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 27357
diff changeset
    34
    error,
28818
6041fb8f2da8 pycompat: add empty and queue to handle py3 divergence
timeless <timeless@mozdev.org>
parents: 28497
diff changeset
    35
    pycompat,
27358
ac839ee45b6a util: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 27357
diff changeset
    36
)
37010
8453699a1f21 util: observable proxy objects for sockets
Gregory Szorc <gregory.szorc@gmail.com>
parents: 36991
diff changeset
    37
38474
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    38
# regex special chars pulled from https://bugs.python.org/issue29995
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    39
# which was part of Python 3.7.
38477
de275ab362cb stringutil: update list of re-special characters to include &~
Augie Fackler <augie@google.com>
parents: 38474
diff changeset
    40
_respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
38474
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    41
_regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
40684
e6c9ef5e11a0 match: provide and use a quick way to escape a single byte
Boris Feld <boris.feld@octobus.net>
parents: 40276
diff changeset
    42
regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
38474
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    43
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
    44
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    45
@overload
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    46
def reescape(pat: bytes) -> bytes:
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    47
    ...
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    48
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    49
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    50
@overload
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    51
def reescape(pat: str) -> str:
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    52
    ...
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    53
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    54
38474
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    55
def reescape(pat):
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    56
    """Drop-in replacement for re.escape."""
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    57
    # NOTE: it is intentional that this works on unicodes and not
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    58
    # bytes, as it's only possible to do the escaping with
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    59
    # unicode.translate, not bytes.translate. Sigh.
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    60
    wantuni = True
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    61
    if isinstance(pat, bytes):
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    62
        wantuni = False
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    63
        pat = pat.decode('latin1')
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    64
    pat = pat.translate(_regexescapemap)
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    65
    if wantuni:
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    66
        return pat
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    67
    return pat.encode('latin1')
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    68
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
    69
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    70
def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
    71
    """Pretty print an object."""
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
    72
    return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    73
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
    74
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    75
def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    76
    """Pretty print an object to a generator of atoms.
39353
0d21b1f1722c stringutil: refactor core of pprint so it emits chunks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39296
diff changeset
    77
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    78
    ``bprefix`` is a flag influencing whether bytestrings are preferred with
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    79
    a ``b''`` prefix.
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    80
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    81
    ``indent`` controls whether collections and nested data structures
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    82
    span multiple lines via the indentation amount in spaces. By default,
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    83
    no newlines are emitted.
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
    84
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
    85
    ``level`` specifies the initial indent level. Used if ``indent > 0``.
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    86
    """
39353
0d21b1f1722c stringutil: refactor core of pprint so it emits chunks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39296
diff changeset
    87
37619
68132a95df31 stringutil: support more types with pprint()
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37476
diff changeset
    88
    if isinstance(o, bytes):
37750
f7194c925003 stringutil: make b prefixes on string output optional
Augie Fackler <augie@google.com>
parents: 37749
diff changeset
    89
        if bprefix:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    90
            yield b"b'%s'" % escapestr(o)
39353
0d21b1f1722c stringutil: refactor core of pprint so it emits chunks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39296
diff changeset
    91
        else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    92
            yield b"'%s'" % escapestr(o)
37619
68132a95df31 stringutil: support more types with pprint()
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37476
diff changeset
    93
    elif isinstance(o, bytearray):
68132a95df31 stringutil: support more types with pprint()
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37476
diff changeset
    94
        # codecs.escape_encode() can't handle bytearray, so escapestr fails
68132a95df31 stringutil: support more types with pprint()
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37476
diff changeset
    95
        # without coercion.
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    96
        yield b"bytearray['%s']" % escapestr(bytes(o))
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
    97
    elif isinstance(o, list):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
    98
        if not o:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    99
            yield b'[]'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   100
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   101
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   102
        yield b'['
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   103
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   104
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   105
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   106
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   107
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   108
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   109
        for i, a in enumerate(o):
52644
e627cc25b6f3 pyupgrade: rewrite `yield` statements in a loop to `yield from`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   110
            yield from pprintgen(a, bprefix=bprefix, indent=indent, level=level)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   111
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   112
            if i + 1 < len(o):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   113
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   114
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   115
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   116
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   117
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   118
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   119
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   120
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   121
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   122
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   123
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   124
        yield b']'
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
   125
    elif isinstance(o, dict):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   126
        if not o:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   127
            yield b'{}'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   128
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   129
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   130
        yield b'{'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   131
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   132
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   133
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   134
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   135
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   136
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   137
        for i, (k, v) in enumerate(sorted(o.items())):
52644
e627cc25b6f3 pyupgrade: rewrite `yield` statements in a loop to `yield from`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   138
            yield from pprintgen(k, bprefix=bprefix, indent=indent, level=level)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   139
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   140
            yield b': '
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   141
52644
e627cc25b6f3 pyupgrade: rewrite `yield` statements in a loop to `yield from`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   142
            yield from pprintgen(v, bprefix=bprefix, indent=indent, level=level)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   143
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   144
            if i + 1 < len(o):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   145
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   146
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   147
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   148
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   149
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   150
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   151
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   152
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   153
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   154
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   155
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   156
        yield b'}'
39051
2aebe138ef6e stringutil: teach pprint about sets
Augie Fackler <augie@google.com>
parents: 38783
diff changeset
   157
    elif isinstance(o, set):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   158
        if not o:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   159
            yield b'set([])'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   160
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   161
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   162
        yield b'set(['
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   163
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   164
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   165
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   166
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   167
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   168
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   169
        for i, k in enumerate(sorted(o)):
52644
e627cc25b6f3 pyupgrade: rewrite `yield` statements in a loop to `yield from`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   170
            yield from pprintgen(k, bprefix=bprefix, indent=indent, level=level)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   171
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   172
            if i + 1 < len(o):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   173
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   174
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   175
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   176
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   177
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   178
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   179
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   180
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   181
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   182
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   183
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   184
        yield b'])'
37932
bf6bb710b40f stringutil: teach pprint about tuples
Augie Fackler <augie@google.com>
parents: 37750
diff changeset
   185
    elif isinstance(o, tuple):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   186
        if not o:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   187
            yield b'()'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   188
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   189
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   190
        yield b'('
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   191
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   192
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   193
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   194
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   195
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   196
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   197
        for i, a in enumerate(o):
52644
e627cc25b6f3 pyupgrade: rewrite `yield` statements in a loop to `yield from`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   198
            yield from pprintgen(a, bprefix=bprefix, indent=indent, level=level)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   199
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   200
            if i + 1 < len(o):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   201
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   202
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   203
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   204
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   205
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   206
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   207
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   208
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   209
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   210
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   211
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   212
        yield b')'
39296
ce145f8eface stringutil: teach pprint() to recognize generators
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39063
diff changeset
   213
    elif isinstance(o, types.GeneratorType):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   214
        # Special case of empty generator.
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   215
        try:
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   216
            nextitem = next(o)
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   217
        except StopIteration:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   218
            yield b'gen[]'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   219
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   220
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   221
        yield b'gen['
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   222
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   223
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   224
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   225
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   226
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   227
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   228
        last = False
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   229
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   230
        while not last:
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   231
            current = nextitem
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   232
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   233
            try:
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   234
                nextitem = next(o)
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   235
            except StopIteration:
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   236
                last = True
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   237
52644
e627cc25b6f3 pyupgrade: rewrite `yield` statements in a loop to `yield from`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   238
            yield from pprintgen(
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   239
                current, bprefix=bprefix, indent=indent, level=level
52644
e627cc25b6f3 pyupgrade: rewrite `yield` statements in a loop to `yield from`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   240
            )
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   241
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   242
            if not last:
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   243
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   244
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   245
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   246
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   247
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   248
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   249
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   250
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   251
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   252
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   253
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   254
        yield b']'
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
   255
    else:
39353
0d21b1f1722c stringutil: refactor core of pprint so it emits chunks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39296
diff changeset
   256
        yield pycompat.byterepr(o)
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
   257
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   258
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   259
def prettyrepr(o) -> bytes:
38261
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   260
    """Pretty print a representation of a possibly-nested object"""
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   261
    lines = []
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   262
    rs = pycompat.byterepr(o)
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   263
    p0 = p1 = 0
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   264
    while p0 < len(rs):
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   265
        # '... field=<type ... field=<type ...'
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   266
        #      ~~~~~~~~~~~~~~~~
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   267
        #      p0    p1        q0    q1
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   268
        q0 = -1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   269
        q1 = rs.find(b'<', p1 + 1)
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   270
        if q1 < 0:
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   271
            q1 = len(rs)
48479
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   272
            # pytype: disable=wrong-arg-count
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   273
            # TODO: figure out why pytype doesn't recognize the optional start
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   274
            #  arg
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   275
        elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
48479
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   276
            # pytype: enable=wrong-arg-count
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   277
            # backtrack for ' field=<'
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   278
            q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   279
        if q0 < 0:
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   280
            q0 = q1
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   281
        else:
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   282
            q0 += 1  # skip ' '
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   283
        l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
38261
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   284
        assert l >= 0
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   285
        lines.append((l, rs[p0:q0].rstrip()))
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   286
        p0, p1 = q0, q1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   287
    return b'\n'.join(b'  ' * l + s for l, s in lines)
38261
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   288
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   289
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   290
def buildrepr(r) -> bytes:
38576
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   291
    """Format an optional printable representation from unexpanded bits
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   292
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   293
    ========  =================================
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   294
    type(r)   example
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   295
    ========  =================================
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   296
    tuple     ('<not %r>', other)
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   297
    bytes     '<branch closed>'
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   298
    callable  lambda: '<branch %r>' % sorted(b)
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   299
    object    other
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   300
    ========  =================================
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   301
    """
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   302
    if r is None:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   303
        return b''
38576
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   304
    elif isinstance(r, tuple):
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   305
        return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   306
    elif isinstance(r, bytes):
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   307
        return r
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   308
    elif callable(r):
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   309
        return r()
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   310
    else:
39052
38409be2f521 stringutil: have buildrepr delegate to pprint for unknown types
Augie Fackler <augie@google.com>
parents: 39051
diff changeset
   311
        return pprint(r)
38576
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   312
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   313
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   314
def binary(s: bytes) -> bool:
6507
9699864de219 Let util.binary check entire data for \0 (issue1066, issue1079)
Christian Ebert <blacktrash@gmx.net>
parents: 6501
diff changeset
   315
    """return true if a string is binary data"""
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   316
    return bool(s and b'\0' in s)
6762
f67d1468ac50 util: add sort helper
Matt Mackall <mpm@selenic.com>
parents: 6746
diff changeset
   317
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   318
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   319
def _splitpattern(pattern: bytes):
45722
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   320
    if pattern.startswith(b're:'):
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   321
        return b're', pattern[3:]
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   322
    elif pattern.startswith(b'literal:'):
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   323
        return b'literal', pattern[8:]
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   324
    return b'literal', pattern
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   325
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   326
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   327
def stringmatcher(pattern: bytes, casesensitive: bool = True):
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   328
    """
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   329
    accepts a string, possibly starting with 're:' or 'literal:' prefix.
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   330
    returns the matcher name, pattern, and matcher function.
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   331
    missing or unknown prefixes are treated as literal matches.
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   332
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   333
    helper for tests:
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   334
    >>> def test(pattern, *tests):
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   335
    ...     kind, pattern, matcher = stringmatcher(pattern)
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   336
    ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   337
    >>> def itest(pattern, *tests):
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   338
    ...     kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   339
    ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   340
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   341
    exact matching (no prefix):
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   342
    >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   343
    ('literal', 'abcdefg', [False, False, True])
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   344
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   345
    regex matching ('re:' prefix)
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   346
    >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   347
    ('re', 'a.+b', [False, False, True])
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   348
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   349
    force exact matches ('literal:' prefix)
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   350
    >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   351
    ('literal', 're:foobar', [False, True])
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   352
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   353
    unknown prefixes are ignored and treated as literals
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   354
    >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   355
    ('literal', 'foo:bar', [False, False, True])
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   356
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   357
    case insensitive regex matches
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   358
    >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   359
    ('re', 'A.+b', [False, False, True])
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   360
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   361
    case insensitive literal matches
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   362
    >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   363
    ('literal', 'ABCDEFG', [False, False, True])
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   364
    """
45722
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   365
    kind, pattern = _splitpattern(pattern)
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   366
    if kind == b're':
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   367
        try:
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   368
            flags = 0
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   369
            if not casesensitive:
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   370
                flags = remod.I
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   371
            regex = remod.compile(pattern, flags)
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   372
        except remod.error as e:
45723
edfc5820aae7 py3: fix stringmatcher() to byte-stringify exception message
Yuya Nishihara <yuya@tcha.org>
parents: 45722
diff changeset
   373
            raise error.ParseError(
edfc5820aae7 py3: fix stringmatcher() to byte-stringify exception message
Yuya Nishihara <yuya@tcha.org>
parents: 45722
diff changeset
   374
                _(b'invalid regular expression: %s') % forcebytestr(e)
edfc5820aae7 py3: fix stringmatcher() to byte-stringify exception message
Yuya Nishihara <yuya@tcha.org>
parents: 45722
diff changeset
   375
            )
45722
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   376
        return kind, pattern, regex.search
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   377
    elif kind == b'literal':
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   378
        if casesensitive:
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   379
            match = pattern.__eq__
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   380
        else:
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   381
            ipat = encoding.lower(pattern)
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   382
            match = lambda s: ipat == encoding.lower(s)
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   383
        return kind, pattern, match
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   384
45722
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   385
    raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   386
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   387
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   388
def substringregexp(pattern: bytes, flags: int = 0):
45724
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   389
    """Build a regexp object from a string pattern possibly starting with
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   390
    're:' or 'literal:' prefix.
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   391
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   392
    helper for tests:
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   393
    >>> def test(pattern, *tests):
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   394
    ...     regexp = substringregexp(pattern)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   395
    ...     return [bool(regexp.search(t)) for t in tests]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   396
    >>> def itest(pattern, *tests):
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   397
    ...     regexp = substringregexp(pattern, remod.I)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   398
    ...     return [bool(regexp.search(t)) for t in tests]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   399
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   400
    substring matching (no prefix):
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   401
    >>> test(b'bcde', b'abc', b'def', b'abcdefg')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   402
    [False, False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   403
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   404
    substring pattern should be escaped:
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   405
    >>> substringregexp(b'.bc').pattern
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   406
    '\\\\.bc'
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   407
    >>> test(b'.bc', b'abc', b'def', b'abcdefg')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   408
    [False, False, False]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   409
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   410
    regex matching ('re:' prefix)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   411
    >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   412
    [False, False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   413
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   414
    force substring matches ('literal:' prefix)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   415
    >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   416
    [False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   417
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   418
    case insensitive literal matches
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   419
    >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   420
    [False, False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   421
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   422
    case insensitive regex matches
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   423
    >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   424
    [False, False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   425
    """
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   426
    kind, pattern = _splitpattern(pattern)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   427
    if kind == b're':
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   428
        try:
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   429
            return remod.compile(pattern, flags)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   430
        except remod.error as e:
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   431
            raise error.ParseError(
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   432
                _(b'invalid regular expression: %s') % forcebytestr(e)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   433
            )
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   434
    elif kind == b'literal':
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   435
        return remod.compile(remod.escape(pattern), flags)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   436
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   437
    raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   438
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   439
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   440
def shortuser(user: bytes) -> bytes:
1903
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   441
    """Return a short representation of a user name or email address."""
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   442
    f = user.find(b'@')
1903
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   443
    if f >= 0:
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   444
        user = user[:f]
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   445
    f = user.find(b'<')
1903
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   446
    if f >= 0:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   447
        user = user[f + 1 :]
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   448
    f = user.find(b' ')
3176
7492b33bdd9f shortuser should stop before the first space character.
Thomas Arendsen Hein <thomas@intevation.de>
parents: 3147
diff changeset
   449
    if f >= 0:
7492b33bdd9f shortuser should stop before the first space character.
Thomas Arendsen Hein <thomas@intevation.de>
parents: 3147
diff changeset
   450
        user = user[:f]
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   451
    f = user.find(b'.')
3533
bb44489b901f shortname: truncate at '.' too
Matt Mackall <mpm@selenic.com>
parents: 3466
diff changeset
   452
    if f >= 0:
bb44489b901f shortname: truncate at '.' too
Matt Mackall <mpm@selenic.com>
parents: 3466
diff changeset
   453
        user = user[:f]
1903
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   454
    return user
1920
b7cc0f323a4c merge with crew.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1903 1882
diff changeset
   455
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   456
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   457
def emailuser(user: bytes) -> bytes:
16360
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   458
    """Return the user portion of an email address."""
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   459
    f = user.find(b'@')
16360
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   460
    if f >= 0:
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   461
        user = user[:f]
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   462
    f = user.find(b'<')
16360
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   463
    if f >= 0:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   464
        user = user[f + 1 :]
16360
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   465
    return user
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   466
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   467
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   468
def email(author: bytes) -> bytes:
5975
75d9fe70c654 templater: move email function to util
Matt Mackall <mpm@selenic.com>
parents: 5949
diff changeset
   469
    '''get email of author.'''
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   470
    r = author.find(b'>')
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10264
diff changeset
   471
    if r == -1:
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10264
diff changeset
   472
        r = None
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   473
    return author[author.find(b'<') + 1 : r]
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   474
5975
75d9fe70c654 templater: move email function to util
Matt Mackall <mpm@selenic.com>
parents: 5949
diff changeset
   475
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   476
def person(author: bytes) -> bytes:
37155
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   477
    """Returns the name before an email address,
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   478
    interpreting it as per RFC 5322
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   479
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   480
    >>> person(b'foo@bar')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   481
    'foo'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   482
    >>> person(b'Foo Bar <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   483
    'Foo Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   484
    >>> person(b'"Foo Bar" <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   485
    'Foo Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   486
    >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   487
    'Foo "buz" Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   488
    >>> # The following are invalid, but do exist in real-life
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   489
    ...
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   490
    >>> person(b'Foo "buz" Bar <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   491
    'Foo "buz" Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   492
    >>> person(b'"Foo Bar <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   493
    'Foo Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   494
    """
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   495
    if b'@' not in author:
37155
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   496
        return author
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   497
    f = author.find(b'<')
37155
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   498
    if f != -1:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   499
        return author[:f].strip(b' "').replace(b'\\"', b'"')
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   500
    f = author.find(b'@')
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   501
    return author[:f].replace(b'.', b' ')
37155
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   502
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   503
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   504
@attr.s(hash=True)
48946
642e31cb55f0 py3: use class X: instead of class X(object):
Gregory Szorc <gregory.szorc@gmail.com>
parents: 48911
diff changeset
   505
class mailmapping:
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   506
    """Represents a username/email key or value in
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   507
    a mailmap file"""
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   508
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   509
    email = attr.ib()
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   510
    name = attr.ib(default=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   511
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   512
37246
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   513
def _ismailmaplineinvalid(names, emails):
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   514
    """Returns True if the parsed names and emails
37246
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   515
    in a mailmap entry are invalid.
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   516
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   517
    >>> # No names or emails fails
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   518
    >>> names, emails = [], []
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   519
    >>> _ismailmaplineinvalid(names, emails)
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   520
    True
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   521
    >>> # Only one email fails
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   522
    >>> emails = [b'email@email.com']
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   523
    >>> _ismailmaplineinvalid(names, emails)
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   524
    True
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   525
    >>> # One email and one name passes
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   526
    >>> names = [b'Test Name']
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   527
    >>> _ismailmaplineinvalid(names, emails)
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   528
    False
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   529
    >>> # No names but two emails passes
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   530
    >>> names = []
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   531
    >>> emails = [b'proper@email.com', b'commit@email.com']
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   532
    >>> _ismailmaplineinvalid(names, emails)
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   533
    False
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   534
    """
37246
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   535
    return not emails or not names and len(emails) < 2
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   536
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   537
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   538
def parsemailmap(mailmapcontent):
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   539
    """Parses data in the .mailmap format
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   540
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   541
    >>> mmdata = b"\\n".join([
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   542
    ... b'# Comment',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   543
    ... b'Name <commit1@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   544
    ... b'<name@email.xx> <commit2@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   545
    ... b'Name <proper@email.xx> <commit3@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   546
    ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   547
    ... ])
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   548
    >>> mm = parsemailmap(mmdata)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   549
    >>> for key in sorted(mm.keys()):
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   550
    ...     print(key)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   551
    mailmapping(email='commit1@email.xx', name=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   552
    mailmapping(email='commit2@email.xx', name=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   553
    mailmapping(email='commit3@email.xx', name=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   554
    mailmapping(email='commit4@email.xx', name='Commit')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   555
    >>> for val in sorted(mm.values()):
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   556
    ...     print(val)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   557
    mailmapping(email='commit1@email.xx', name='Name')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   558
    mailmapping(email='name@email.xx', name=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   559
    mailmapping(email='proper@email.xx', name='Name')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   560
    mailmapping(email='proper@email.xx', name='Name')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   561
    """
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   562
    mailmap = {}
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   563
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   564
    if mailmapcontent is None:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   565
        return mailmap
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   566
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   567
    for line in mailmapcontent.splitlines():
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   568
        # Don't bother checking the line if it is a comment or
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   569
        # is an improperly formed author field
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   570
        if line.lstrip().startswith(b'#'):
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   571
            continue
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   572
37245
54b896f195d1 stringutil: rename local email/names variables to their plural forms
Connor Sheehan <sheehan@mozilla.com>
parents: 37210
diff changeset
   573
        # names, emails hold the parsed emails and names for each line
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   574
        # name_builder holds the words in a persons name
37245
54b896f195d1 stringutil: rename local email/names variables to their plural forms
Connor Sheehan <sheehan@mozilla.com>
parents: 37210
diff changeset
   575
        names, emails = [], []
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   576
        namebuilder = []
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   577
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   578
        for element in line.split():
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   579
            if element.startswith(b'#'):
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   580
                # If we reach a comment in the mailmap file, move on
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   581
                break
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   582
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   583
            elif element.startswith(b'<') and element.endswith(b'>'):
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   584
                # We have found an email.
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   585
                # Parse it, and finalize any names from earlier
37245
54b896f195d1 stringutil: rename local email/names variables to their plural forms
Connor Sheehan <sheehan@mozilla.com>
parents: 37210
diff changeset
   586
                emails.append(element[1:-1])  # Slice off the "<>"
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   587
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   588
                if namebuilder:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   589
                    names.append(b' '.join(namebuilder))
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   590
                    namebuilder = []
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   591
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   592
                # Break if we have found a second email, any other
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   593
                # data does not fit the spec for .mailmap
37245
54b896f195d1 stringutil: rename local email/names variables to their plural forms
Connor Sheehan <sheehan@mozilla.com>
parents: 37210
diff changeset
   594
                if len(emails) > 1:
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   595
                    break
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   596
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   597
            else:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   598
                # We have found another word in the committers name
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   599
                namebuilder.append(element)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   600
37246
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   601
        # Check to see if we have parsed the line into a valid form
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   602
        # We require at least one email, and either at least one
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   603
        # name or a second email
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   604
        if _ismailmaplineinvalid(names, emails):
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   605
            continue
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   606
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   607
        mailmapkey = mailmapping(
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   608
            email=emails[-1],
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   609
            name=names[-1] if len(names) == 2 else None,
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   610
        )
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   611
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   612
        mailmap[mailmapkey] = mailmapping(
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   613
            email=emails[0],
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   614
            name=names[0] if names else None,
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   615
        )
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   616
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   617
    return mailmap
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   618
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   619
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   620
def mapname(mailmap, author: bytes) -> bytes:
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   621
    """Returns the author field according to the mailmap cache, or
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   622
    the original author field.
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   623
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   624
    >>> mmdata = b"\\n".join([
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   625
    ...     b'# Comment',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   626
    ...     b'Name <commit1@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   627
    ...     b'<name@email.xx> <commit2@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   628
    ...     b'Name <proper@email.xx> <commit3@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   629
    ...     b'Name <proper@email.xx> Commit <commit4@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   630
    ... ])
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   631
    >>> m = parsemailmap(mmdata)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   632
    >>> mapname(m, b'Commit <commit1@email.xx>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   633
    'Name <commit1@email.xx>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   634
    >>> mapname(m, b'Name <commit2@email.xx>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   635
    'Name <name@email.xx>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   636
    >>> mapname(m, b'Commit <commit3@email.xx>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   637
    'Name <proper@email.xx>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   638
    >>> mapname(m, b'Commit <commit4@email.xx>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   639
    'Name <proper@email.xx>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   640
    >>> mapname(m, b'Unknown Name <unknown@email.com>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   641
    'Unknown Name <unknown@email.com>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   642
    """
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   643
    # If the author field coming in isn't in the correct format,
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   644
    # or the mailmap is empty just return the original author field
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   645
    if not isauthorwellformed(author) or not mailmap:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   646
        return author
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   647
37247
2ed180117f76 stringutil: edit comment to reflect actual data type name
Connor Sheehan <sheehan@mozilla.com>
parents: 37246
diff changeset
   648
    # Turn the user name into a mailmapping
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   649
    commit = mailmapping(name=person(author), email=email(author))
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   650
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   651
    try:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   652
        # Try and use both the commit email and name as the key
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   653
        proper = mailmap[commit]
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   654
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   655
    except KeyError:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   656
        # If the lookup fails, use just the email as the key instead
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   657
        # We call this commit2 as not to erase original commit fields
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   658
        commit2 = mailmapping(email=commit.email)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   659
        proper = mailmap.get(commit2, mailmapping(None, None))
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   660
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   661
    # Return the author field with proper values filled in
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   662
    return b'%s <%s>' % (
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   663
        proper.name if proper.name else commit.name,
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   664
        proper.email if proper.email else commit.email,
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   665
    )
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   666
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   667
44022
c1ccefb513e4 cleanup: drop redundant character escapes outside of `[]`
Matt Harbison <matt_harbison@yahoo.com>
parents: 43506
diff changeset
   668
_correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
37154
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   669
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   670
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   671
def isauthorwellformed(author: bytes) -> bool:
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   672
    """Return True if the author field is well formed
37154
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   673
    (ie "Contributor Name <contrib@email.dom>")
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   674
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   675
    >>> isauthorwellformed(b'Good Author <good@author.com>')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   676
    True
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   677
    >>> isauthorwellformed(b'Author <good@author.com>')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   678
    True
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   679
    >>> isauthorwellformed(b'Bad Author')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   680
    False
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   681
    >>> isauthorwellformed(b'Bad Author <author@author.com')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   682
    False
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   683
    >>> isauthorwellformed(b'Bad Author author@author.com')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   684
    False
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   685
    >>> isauthorwellformed(b'<author@author.com>')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   686
    False
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   687
    >>> isauthorwellformed(b'Bad Author <author>')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   688
    False
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   689
    """
37154
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   690
    return _correctauthorformat.match(author) is not None
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   691
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   692
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   693
def firstline(text: bytes) -> bytes:
49021
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   694
    """Return the first line of the input"""
49030
75794847ef62 stringutil: try to avoid running `splitlines()` only to get first line
Martin von Zweigbergk <martinvonz@google.com>
parents: 49021
diff changeset
   695
    # Try to avoid running splitlines() on the whole string
75794847ef62 stringutil: try to avoid running `splitlines()` only to get first line
Martin von Zweigbergk <martinvonz@google.com>
parents: 49021
diff changeset
   696
    i = text.find(b'\n')
75794847ef62 stringutil: try to avoid running `splitlines()` only to get first line
Martin von Zweigbergk <martinvonz@google.com>
parents: 49021
diff changeset
   697
    if i != -1:
75794847ef62 stringutil: try to avoid running `splitlines()` only to get first line
Martin von Zweigbergk <martinvonz@google.com>
parents: 49021
diff changeset
   698
        text = text[:i]
49021
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   699
    try:
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   700
        return text.splitlines()[0]
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   701
    except IndexError:
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   702
        return b''
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   703
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   704
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   705
def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
21857
86c2d792a4b7 util: replace 'ellipsis' implementation by 'encoding.trim'
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 21813
diff changeset
   706
    """Trim string to at most maxlength (default: 400) columns in display."""
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   707
    return encoding.trim(text, maxlength, ellipsis=b'...')
3767
1861fa38a6a7 Move ellipsis code to util.ellipsis() and improve maxlength handling.
Thomas Arendsen Hein <thomas@intevation.de>
parents: 3721
diff changeset
   708
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   709
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   710
def escapestr(s: bytes) -> bytes:
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   711
    # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
51828
4eccb65e444f utils: accept bytearray arguments for escapestr
Joerg Sonnenberger <joerg@bec.de>
parents: 51725
diff changeset
   712
    if isinstance(s, (memoryview, bytearray)):
39063
1419ba5e3b56 stringutil: if we get a memoryview in escapestr, coerce it to bytes
Augie Fackler <augie@google.com>
parents: 39052
diff changeset
   713
        s = bytes(s)
31453
3b7a6941a6ef py3: call codecs.escape_encode() directly
Yuya Nishihara <yuya@tcha.org>
parents: 31451
diff changeset
   714
    # call underlying function of s.encode('string_escape') directly for
3b7a6941a6ef py3: call codecs.escape_encode() directly
Yuya Nishihara <yuya@tcha.org>
parents: 31451
diff changeset
   715
    # Python 3 compatibility
49648
9be765b82a90 typing: minor tweaks to allow updating to pytype 2022.11.18
Matt Harbison <matt_harbison@yahoo.com>
parents: 49575
diff changeset
   716
    # pytype: disable=bad-return-type
48479
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   717
    return codecs.escape_encode(s)[0]  # pytype: disable=module-attr
49648
9be765b82a90 typing: minor tweaks to allow updating to pytype 2022.11.18
Matt Harbison <matt_harbison@yahoo.com>
parents: 49575
diff changeset
   718
    # pytype: enable=bad-return-type
31451
53865692a354 util: wrap s.encode('string_escape') call for future py3 compatibility
Yuya Nishihara <yuya@tcha.org>
parents: 31449
diff changeset
   719
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   720
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   721
def unescapestr(s: bytes) -> bytes:
49648
9be765b82a90 typing: minor tweaks to allow updating to pytype 2022.11.18
Matt Harbison <matt_harbison@yahoo.com>
parents: 49575
diff changeset
   722
    # pytype: disable=bad-return-type
48479
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   723
    return codecs.escape_decode(s)[0]  # pytype: disable=module-attr
49648
9be765b82a90 typing: minor tweaks to allow updating to pytype 2022.11.18
Matt Harbison <matt_harbison@yahoo.com>
parents: 49575
diff changeset
   724
    # pytype: enable=bad-return-type
31484
afb335353d28 util: wrap s.decode('string_escape') calls for future py3 compatibility
Yuya Nishihara <yuya@tcha.org>
parents: 31465
diff changeset
   725
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   726
33682
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   727
def forcebytestr(obj):
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   728
    """Portably format an arbitrary object (e.g. exception) into a byte
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   729
    string."""
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   730
    try:
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   731
        return pycompat.bytestr(obj)
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   732
    except UnicodeEncodeError:
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   733
        # non-ascii string, may be lossy
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   734
        return pycompat.bytestr(encoding.strtolocal(str(obj)))
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   735
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   736
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   737
def uirepr(s: bytes) -> bytes:
5291
23651848d638 extdiff: avoid repr() doubling paths backslashes under Windows
Patrick Mezard <pmezard@gmail.com>
parents: 5213
diff changeset
   738
    # Avoid double backslash in Windows path repr()
36266
1fa33bd848ee py3: fix bytes-unicode dance while building docstring of extdiff
Yuya Nishihara <yuya@tcha.org>
parents: 36235
diff changeset
   739
    return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
7547
4949729ee9ee python implementation of diffstat
Alexander Solovyov <piranha@piranha.org.ua>
parents: 7537
diff changeset
   740
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   741
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   742
# delay import of textwrap
37079
736024df4498 util: mark MBTextWrapper as private
Yuya Nishihara <yuya@tcha.org>
parents: 37078
diff changeset
   743
def _MBTextWrapper(**kwargs):
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   744
    class tw(textwrap.TextWrapper):
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   745
        """
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   746
        Extend TextWrapper for width-awareness.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   747
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   748
        Neither number of 'bytes' in any encoding nor 'characters' is
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   749
        appropriate to calculate terminal columns for specified string.
12957
9f2ac318b92e util: clarify purpose of MBTextWrapper class
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 12938
diff changeset
   750
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   751
        Original TextWrapper implementation uses built-in 'len()' directly,
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   752
        so overriding is needed to use width information of each characters.
12957
9f2ac318b92e util: clarify purpose of MBTextWrapper class
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 12938
diff changeset
   753
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   754
        In addition, characters classified into 'ambiguous' width are
17424
e7cfe3587ea4 fix trivial spelling errors
Mads Kiilerich <mads@kiilerich.com>
parents: 17391
diff changeset
   755
        treated as wide in East Asian area, but as narrow in other.
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   756
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   757
        This requires use decision to determine width of such characters.
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   758
        """
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   759
15065
24a6c3f903bb util: wrap lines with multi-byte characters correctly (issue2943)
Mads Kiilerich <mads@kiilerich.com>
parents: 15024
diff changeset
   760
        def _cutdown(self, ucstr, space_left):
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   761
            l = 0
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   762
            colwidth = encoding.ucolwidth
49284
d44e3c45f0e4 py3: replace `pycompat.xrange` by `range`
Manuel Jacob <me@manueljacob.de>
parents: 49030
diff changeset
   763
            for i in range(len(ucstr)):
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   764
                l += colwidth(ucstr[i])
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   765
                if space_left < l:
15065
24a6c3f903bb util: wrap lines with multi-byte characters correctly (issue2943)
Mads Kiilerich <mads@kiilerich.com>
parents: 15024
diff changeset
   766
                    return (ucstr[:i], ucstr[i:])
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   767
            return ucstr, b''
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   768
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   769
        # overriding of base class
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   770
        def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   771
            space_left = max(width - cur_len, 1)
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   772
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   773
            if self.break_long_words:
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   774
                cut, res = self._cutdown(reversed_chunks[-1], space_left)
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   775
                cur_line.append(cut)
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   776
                reversed_chunks[-1] = res
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   777
            elif not cur_line:
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   778
                cur_line.append(reversed_chunks.pop())
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   779
26201
c5b2074ae8c0 util: capitalize Python in MBTextWrapper._wrap_chunks comment
timeless@mozdev.org
parents: 26126
diff changeset
   780
        # this overriding code is imported from TextWrapper of Python 2.6
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   781
        # to calculate columns of string by 'encoding.ucolwidth()'
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   782
        def _wrap_chunks(self, chunks):
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   783
            colwidth = encoding.ucolwidth
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   784
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   785
            lines = []
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   786
            if self.width <= 0:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   787
                raise ValueError(b"invalid width %r (must be > 0)" % self.width)
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   788
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   789
            # Arrange in reverse order so items can be efficiently popped
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   790
            # from a stack of chucks.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   791
            chunks.reverse()
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   792
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   793
            while chunks:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   794
                # Start the list of chunks that will make up the current line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   795
                # cur_len is just the length of all the chunks in cur_line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   796
                cur_line = []
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   797
                cur_len = 0
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   798
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   799
                # Figure out which static string will prefix this line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   800
                if lines:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   801
                    indent = self.subsequent_indent
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   802
                else:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   803
                    indent = self.initial_indent
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   804
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   805
                # Maximum width for this line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   806
                width = self.width - len(indent)
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   807
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   808
                # First chunk on line is whitespace -- drop it, unless this
17424
e7cfe3587ea4 fix trivial spelling errors
Mads Kiilerich <mads@kiilerich.com>
parents: 17391
diff changeset
   809
                # is the very beginning of the text (i.e. no lines started yet).
43506
9f70512ae2cf cleanup: remove pointless r-prefixes on single-quoted strings
Augie Fackler <augie@google.com>
parents: 43077
diff changeset
   810
                if self.drop_whitespace and chunks[-1].strip() == '' and lines:
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   811
                    del chunks[-1]
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   812
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   813
                while chunks:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   814
                    l = colwidth(chunks[-1])
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   815
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   816
                    # Can at least squeeze this chunk onto the current line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   817
                    if cur_len + l <= width:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   818
                        cur_line.append(chunks.pop())
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   819
                        cur_len += l
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   820
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   821
                    # Nope, this line is full.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   822
                    else:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   823
                        break
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   824
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   825
                # The current line is full, and the next chunk is too big to
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   826
                # fit on *any* line (not just this one).
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   827
                if chunks and colwidth(chunks[-1]) > width:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   828
                    self._handle_long_word(chunks, cur_line, cur_len, width)
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   829
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   830
                # If the last chunk on this line is all whitespace, drop it.
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   831
                if (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   832
                    self.drop_whitespace
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   833
                    and cur_line
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   834
                    and cur_line[-1].strip() == r''
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   835
                ):
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   836
                    del cur_line[-1]
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   837
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   838
                # Convert current line back to a string and store it in list
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   839
                # of all lines (return value).
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   840
                if cur_line:
43506
9f70512ae2cf cleanup: remove pointless r-prefixes on single-quoted strings
Augie Fackler <augie@google.com>
parents: 43077
diff changeset
   841
                    lines.append(indent + ''.join(cur_line))
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   842
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   843
            return lines
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   844
37079
736024df4498 util: mark MBTextWrapper as private
Yuya Nishihara <yuya@tcha.org>
parents: 37078
diff changeset
   845
    global _MBTextWrapper
736024df4498 util: mark MBTextWrapper as private
Yuya Nishihara <yuya@tcha.org>
parents: 37078
diff changeset
   846
    _MBTextWrapper = tw
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   847
    return tw(**kwargs)
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   848
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   849
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   850
def wrap(
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   851
    line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   852
) -> bytes:
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   853
    maxindent = max(len(hangindent), len(initindent))
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   854
    if width <= maxindent:
9417
4c3fb45123e5 util, minirst: do not crash with COLUMNS=0
Martin Geisler <mg@lazybytes.net>
parents: 9397
diff changeset
   855
        # adjust for weird terminal size
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   856
        width = max(78, maxindent + 1)
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   857
    line = line.decode(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   858
        pycompat.sysstr(encoding.encoding),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   859
        pycompat.sysstr(encoding.encodingmode),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   860
    )
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   861
    initindent = initindent.decode(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   862
        pycompat.sysstr(encoding.encoding),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   863
        pycompat.sysstr(encoding.encodingmode),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   864
    )
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   865
    hangindent = hangindent.decode(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   866
        pycompat.sysstr(encoding.encoding),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   867
        pycompat.sysstr(encoding.encodingmode),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   868
    )
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   869
    wrapper = _MBTextWrapper(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   870
        width=width, initial_indent=initindent, subsequent_indent=hangindent
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   871
    )
31338
a9a28ca17615 util: pass encoding.[encoding|encodingmode] as unicodes
Pulkit Goyal <7895pulkit@gmail.com>
parents: 31315
diff changeset
   872
    return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
8938
9b8c9266c59d commands: wrap short descriptions in 'hg help'
Martin Geisler <mg@lazybytes.net>
parents: 8785
diff changeset
   873
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   874
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   875
_booleans = {
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   876
    b'1': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   877
    b'yes': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   878
    b'true': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   879
    b'on': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   880
    b'always': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   881
    b'0': False,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   882
    b'no': False,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   883
    b'false': False,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   884
    b'off': False,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   885
    b'never': False,
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   886
}
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   887
12087
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   888
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   889
def parsebool(s: bytes) -> Optional[bool]:
12087
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   890
    """Parse s into a boolean.
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   891
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   892
    If s is not a valid boolean, returns None.
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   893
    """
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   894
    return _booleans.get(s.lower(), None)
37290
cc5a040fe150 wireproto: syntax for encoding CBOR into frames
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37247
diff changeset
   895
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   896
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   897
# TODO: make arg mandatory (and fix code below?)
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   898
def parselist(value: Optional[bytes]):
47189
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   899
    """parse a configuration value as a list of comma/space separated strings
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   900
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   901
    >>> parselist(b'this,is "a small" ,test')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   902
    ['this', 'is', 'a small', 'test']
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   903
    """
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   904
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   905
    def _parse_plain(parts, s, offset):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   906
        whitespace = False
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   907
        while offset < len(s) and (
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   908
            s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   909
        ):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   910
            whitespace = True
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   911
            offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   912
        if offset >= len(s):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   913
            return None, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   914
        if whitespace:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   915
            parts.append(b'')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   916
        if s[offset : offset + 1] == b'"' and not parts[-1]:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   917
            return _parse_quote, parts, offset + 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   918
        elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   919
            parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   920
            return _parse_plain, parts, offset + 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   921
        parts[-1] += s[offset : offset + 1]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   922
        return _parse_plain, parts, offset + 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   923
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   924
    def _parse_quote(parts, s, offset):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   925
        if offset < len(s) and s[offset : offset + 1] == b'"':  # ""
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   926
            parts.append(b'')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   927
            offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   928
            while offset < len(s) and (
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   929
                s[offset : offset + 1].isspace()
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   930
                or s[offset : offset + 1] == b','
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   931
            ):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   932
                offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   933
            return _parse_plain, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   934
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   935
        while offset < len(s) and s[offset : offset + 1] != b'"':
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   936
            if (
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   937
                s[offset : offset + 1] == b'\\'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   938
                and offset + 1 < len(s)
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   939
                and s[offset + 1 : offset + 2] == b'"'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   940
            ):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   941
                offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   942
                parts[-1] += b'"'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   943
            else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   944
                parts[-1] += s[offset : offset + 1]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   945
            offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   946
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   947
        if offset >= len(s):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   948
            real_parts = _configlist(parts[-1])
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   949
            if not real_parts:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   950
                parts[-1] = b'"'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   951
            else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   952
                real_parts[0] = b'"' + real_parts[0]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   953
                parts = parts[:-1]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   954
                parts.extend(real_parts)
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   955
            return None, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   956
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   957
        offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   958
        while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   959
            offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   960
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   961
        if offset < len(s):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   962
            if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   963
                parts[-1] += b'"'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   964
                offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   965
            else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   966
                parts.append(b'')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   967
        else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   968
            return None, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   969
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   970
        return _parse_plain, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   971
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   972
    def _configlist(s):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   973
        s = s.rstrip(b' ,')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   974
        if not s:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   975
            return []
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   976
        parser, parts, offset = _parse_plain, [b''], 0
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   977
        while parser:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   978
            parser, parts, offset = parser(parts, s, offset)
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   979
        return parts
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   980
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   981
    if value is not None and isinstance(value, bytes):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   982
        result = _configlist(value.lstrip(b' ,\n'))
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   983
    else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   984
        result = value
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   985
    return result or []
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   986
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   987
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   988
def evalpythonliteral(s: bytes):
37476
e9dea82ea1f3 wireproto: convert python literal to object without using unsafe eval()
Yuya Nishihara <yuya@tcha.org>
parents: 37322
diff changeset
   989
    """Evaluate a string containing a Python literal expression"""
e9dea82ea1f3 wireproto: convert python literal to object without using unsafe eval()
Yuya Nishihara <yuya@tcha.org>
parents: 37322
diff changeset
   990
    # We could backport our tokenizer hack to rewrite '' to u'' if we want
48911
46b3ecfb16e2 stringutil: remove Python 2 support code
Gregory Szorc <gregory.szorc@gmail.com>
parents: 48875
diff changeset
   991
    return ast.literal_eval(s.decode('latin1'))