Mercurial > public > mercurial-scm > hg-stable
diff mercurial/utils/stringutil.py @ 47200:b0e92313107e
parselist: move the function from config to stringutil
We move the function in a lower level module to avoid cycle. It moves next to
`parsebool` who had to migrate for the same reasons.
Differential Revision: https://phab.mercurial-scm.org/D10449
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 16 Apr 2021 01:18:28 +0200 |
parents | d4ba4d51f85f |
children | dcdecec401ca |
line wrap: on
line diff
--- a/mercurial/utils/stringutil.py Fri Apr 16 02:14:21 2021 +0200 +++ b/mercurial/utils/stringutil.py Fri Apr 16 01:18:28 2021 +0200 @@ -868,6 +868,96 @@ return _booleans.get(s.lower(), None) +def parselist(value): + """parse a configuration value as a list of comma/space separated strings + + >>> parselist(b'this,is "a small" ,test') + ['this', 'is', 'a small', 'test'] + """ + + def _parse_plain(parts, s, offset): + whitespace = False + while offset < len(s) and ( + s[offset : offset + 1].isspace() or s[offset : offset + 1] == b',' + ): + whitespace = True + offset += 1 + if offset >= len(s): + return None, parts, offset + if whitespace: + parts.append(b'') + if s[offset : offset + 1] == b'"' and not parts[-1]: + return _parse_quote, parts, offset + 1 + elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\': + parts[-1] = parts[-1][:-1] + s[offset : offset + 1] + return _parse_plain, parts, offset + 1 + parts[-1] += s[offset : offset + 1] + return _parse_plain, parts, offset + 1 + + def _parse_quote(parts, s, offset): + if offset < len(s) and s[offset : offset + 1] == b'"': # "" + parts.append(b'') + offset += 1 + while offset < len(s) and ( + s[offset : offset + 1].isspace() + or s[offset : offset + 1] == b',' + ): + offset += 1 + return _parse_plain, parts, offset + + while offset < len(s) and s[offset : offset + 1] != b'"': + if ( + s[offset : offset + 1] == b'\\' + and offset + 1 < len(s) + and s[offset + 1 : offset + 2] == b'"' + ): + offset += 1 + parts[-1] += b'"' + else: + parts[-1] += s[offset : offset + 1] + offset += 1 + + if offset >= len(s): + real_parts = _configlist(parts[-1]) + if not real_parts: + parts[-1] = b'"' + else: + real_parts[0] = b'"' + real_parts[0] + parts = parts[:-1] + parts.extend(real_parts) + return None, parts, offset + + offset += 1 + while offset < len(s) and s[offset : offset + 1] in [b' ', b',']: + offset += 1 + + if offset < len(s): + if offset + 1 == len(s) and s[offset : offset + 1] == b'"': + parts[-1] += b'"' + offset += 1 + else: + parts.append(b'') + else: + return None, parts, offset + + return _parse_plain, parts, offset + + def _configlist(s): + s = s.rstrip(b' ,') + if not s: + return [] + parser, parts, offset = _parse_plain, [b''], 0 + while parser: + parser, parts, offset = parser(parts, s, offset) + return parts + + if value is not None and isinstance(value, bytes): + result = _configlist(value.lstrip(b' ,\n')) + else: + result = value + return result or [] + + def evalpythonliteral(s): """Evaluate a string containing a Python literal expression""" # We could backport our tokenizer hack to rewrite '' to u'' if we want