Mercurial > public > mercurial-scm > hg-stable
diff mercurial/util.py @ 37086:f99d64e8a4e4
stringutil: move generic string helpers to new module
Per https://phab.mercurial-scm.org/D2903#46738
URL and file paths functions are left since they are big enough to make
separate modules.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Thu, 22 Mar 2018 21:19:31 +0900 |
parents | 1a1d1c44b570 |
children | f0b6fbea00cf |
line wrap: on
line diff
--- a/mercurial/util.py Thu Mar 22 21:32:19 2018 +0900 +++ b/mercurial/util.py Thu Mar 22 21:19:31 2018 +0900 @@ -17,7 +17,6 @@ import abc import bz2 -import codecs import collections import contextlib import errno @@ -37,7 +36,6 @@ import subprocess import sys import tempfile -import textwrap import time import traceback import warnings @@ -52,7 +50,10 @@ pycompat, urllibcompat, ) -from .utils import dateutil +from .utils import ( + dateutil, + stringutil, +) base85 = policy.importmod(r'base85') osutil = policy.importmod(r'osutil') @@ -808,20 +809,6 @@ return object.__getattribute__(self, r'_observedcall')( r'setsockopt', *args, **kwargs) -_DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)} -_DATA_ESCAPE_MAP.update({ - b'\\': b'\\\\', - b'\r': br'\r', - b'\n': br'\n', -}) -_DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]') - -def escapedata(s): - if isinstance(s, bytearray): - s = bytes(s) - - return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s) - class baseproxyobserver(object): def _writedata(self, data): if not self.logdata: @@ -1567,10 +1554,6 @@ return fn(s, cmd[len(name):].lstrip()) return pipefilter(s, cmd) -def binary(s): - """return true if a string is binary data""" - return bool(s and '\0' in s) - def increasingchunks(source, min=1024, max=65536): '''return no less than min bytes per chunk while data remains, doubling min after each chunk until it reaches max''' @@ -2571,102 +2554,6 @@ b[0:len(res)] = res return len(res) -def stringmatcher(pattern, casesensitive=True): - """ - accepts a string, possibly starting with 're:' or 'literal:' prefix. - returns the matcher name, pattern, and matcher function. - missing or unknown prefixes are treated as literal matches. - - helper for tests: - >>> def test(pattern, *tests): - ... kind, pattern, matcher = stringmatcher(pattern) - ... return (kind, pattern, [bool(matcher(t)) for t in tests]) - >>> def itest(pattern, *tests): - ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False) - ... return (kind, pattern, [bool(matcher(t)) for t in tests]) - - exact matching (no prefix): - >>> test(b'abcdefg', b'abc', b'def', b'abcdefg') - ('literal', 'abcdefg', [False, False, True]) - - regex matching ('re:' prefix) - >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar') - ('re', 'a.+b', [False, False, True]) - - force exact matches ('literal:' prefix) - >>> test(b'literal:re:foobar', b'foobar', b're:foobar') - ('literal', 're:foobar', [False, True]) - - unknown prefixes are ignored and treated as literals - >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar') - ('literal', 'foo:bar', [False, False, True]) - - case insensitive regex matches - >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar') - ('re', 'A.+b', [False, False, True]) - - case insensitive literal matches - >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg') - ('literal', 'ABCDEFG', [False, False, True]) - """ - if pattern.startswith('re:'): - pattern = pattern[3:] - try: - flags = 0 - if not casesensitive: - flags = remod.I - regex = remod.compile(pattern, flags) - except remod.error as e: - raise error.ParseError(_('invalid regular expression: %s') - % e) - return 're', pattern, regex.search - elif pattern.startswith('literal:'): - pattern = pattern[8:] - - match = pattern.__eq__ - - if not casesensitive: - ipat = encoding.lower(pattern) - match = lambda s: ipat == encoding.lower(s) - return 'literal', pattern, match - -def shortuser(user): - """Return a short representation of a user name or email address.""" - f = user.find('@') - if f >= 0: - user = user[:f] - f = user.find('<') - if f >= 0: - user = user[f + 1:] - f = user.find(' ') - if f >= 0: - user = user[:f] - f = user.find('.') - if f >= 0: - user = user[:f] - return user - -def emailuser(user): - """Return the user portion of an email address.""" - f = user.find('@') - if f >= 0: - user = user[:f] - f = user.find('<') - if f >= 0: - user = user[f + 1:] - return user - -def email(author): - '''get email of author.''' - r = author.find('>') - if r == -1: - r = None - return author[author.find('<') + 1:r] - -def ellipsis(text, maxlength=400): - """Trim string to at most maxlength (default: 400) columns in display.""" - return encoding.trim(text, maxlength, ellipsis='...') - def unitcountfn(*unittable): '''return a function that renders a readable count of some quantity''' @@ -2751,147 +2638,6 @@ fromnativeeol = pycompat.identity nativeeolwriter = pycompat.identity -def escapestr(s): - # call underlying function of s.encode('string_escape') directly for - # Python 3 compatibility - return codecs.escape_encode(s)[0] - -def unescapestr(s): - return codecs.escape_decode(s)[0] - -def forcebytestr(obj): - """Portably format an arbitrary object (e.g. exception) into a byte - string.""" - try: - return pycompat.bytestr(obj) - except UnicodeEncodeError: - # non-ascii string, may be lossy - return pycompat.bytestr(encoding.strtolocal(str(obj))) - -def uirepr(s): - # Avoid double backslash in Windows path repr() - return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') - -# delay import of textwrap -def _MBTextWrapper(**kwargs): - class tw(textwrap.TextWrapper): - """ - Extend TextWrapper for width-awareness. - - Neither number of 'bytes' in any encoding nor 'characters' is - appropriate to calculate terminal columns for specified string. - - Original TextWrapper implementation uses built-in 'len()' directly, - so overriding is needed to use width information of each characters. - - In addition, characters classified into 'ambiguous' width are - treated as wide in East Asian area, but as narrow in other. - - This requires use decision to determine width of such characters. - """ - def _cutdown(self, ucstr, space_left): - l = 0 - colwidth = encoding.ucolwidth - for i in xrange(len(ucstr)): - l += colwidth(ucstr[i]) - if space_left < l: - return (ucstr[:i], ucstr[i:]) - return ucstr, '' - - # overriding of base class - def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): - space_left = max(width - cur_len, 1) - - if self.break_long_words: - cut, res = self._cutdown(reversed_chunks[-1], space_left) - cur_line.append(cut) - reversed_chunks[-1] = res - elif not cur_line: - cur_line.append(reversed_chunks.pop()) - - # this overriding code is imported from TextWrapper of Python 2.6 - # to calculate columns of string by 'encoding.ucolwidth()' - def _wrap_chunks(self, chunks): - colwidth = encoding.ucolwidth - - lines = [] - if self.width <= 0: - raise ValueError("invalid width %r (must be > 0)" % self.width) - - # Arrange in reverse order so items can be efficiently popped - # from a stack of chucks. - chunks.reverse() - - while chunks: - - # Start the list of chunks that will make up the current line. - # cur_len is just the length of all the chunks in cur_line. - cur_line = [] - cur_len = 0 - - # Figure out which static string will prefix this line. - if lines: - indent = self.subsequent_indent - else: - indent = self.initial_indent - - # Maximum width for this line. - width = self.width - len(indent) - - # First chunk on line is whitespace -- drop it, unless this - # is the very beginning of the text (i.e. no lines started yet). - if self.drop_whitespace and chunks[-1].strip() == r'' and lines: - del chunks[-1] - - while chunks: - l = colwidth(chunks[-1]) - - # Can at least squeeze this chunk onto the current line. - if cur_len + l <= width: - cur_line.append(chunks.pop()) - cur_len += l - - # Nope, this line is full. - else: - break - - # The current line is full, and the next chunk is too big to - # fit on *any* line (not just this one). - if chunks and colwidth(chunks[-1]) > width: - self._handle_long_word(chunks, cur_line, cur_len, width) - - # If the last chunk on this line is all whitespace, drop it. - if (self.drop_whitespace and - cur_line and cur_line[-1].strip() == r''): - del cur_line[-1] - - # Convert current line back to a string and store it in list - # of all lines (return value). - if cur_line: - lines.append(indent + r''.join(cur_line)) - - return lines - - global _MBTextWrapper - _MBTextWrapper = tw - return tw(**kwargs) - -def wrap(line, width, initindent='', hangindent=''): - maxindent = max(len(hangindent), len(initindent)) - if width <= maxindent: - # adjust for weird terminal size - width = max(78, maxindent + 1) - line = line.decode(pycompat.sysstr(encoding.encoding), - pycompat.sysstr(encoding.encodingmode)) - initindent = initindent.decode(pycompat.sysstr(encoding.encoding), - pycompat.sysstr(encoding.encodingmode)) - hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding), - pycompat.sysstr(encoding.encodingmode)) - wrapper = _MBTextWrapper(width=width, - initial_indent=initindent, - subsequent_indent=hangindent) - return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding)) - if (pyplatform.python_implementation() == 'CPython' and sys.version_info < (3, 0)): # There is an issue in CPython that some IO methods do not handle EINTR @@ -3064,17 +2810,6 @@ except socket.error: raise Abort(_("no port number associated with service '%s'") % port) -_booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True, - '0': False, 'no': False, 'false': False, 'off': False, - 'never': False} - -def parsebool(s): - """Parse s into a boolean. - - If s is not a valid boolean, returns None. - """ - return _booleans.get(s.lower(), None) - class url(object): r"""Reliable URL parser. @@ -4341,3 +4076,19 @@ strdate = _deprecatedfunc(dateutil.strdate, '4.6') parsedate = _deprecatedfunc(dateutil.parsedate, '4.6') matchdate = _deprecatedfunc(dateutil.matchdate, '4.6') + +def _deprecatedfunc(func, version): # TODO + return func +escapedata = _deprecatedfunc(stringutil.escapedata, '4.6') +binary = _deprecatedfunc(stringutil.binary, '4.6') +stringmatcher = _deprecatedfunc(stringutil.stringmatcher, '4.6') +shortuser = _deprecatedfunc(stringutil.shortuser, '4.6') +emailuser = _deprecatedfunc(stringutil.emailuser, '4.6') +email = _deprecatedfunc(stringutil.email, '4.6') +ellipsis = _deprecatedfunc(stringutil.ellipsis, '4.6') +escapestr = _deprecatedfunc(stringutil.escapestr, '4.6') +unescapestr = _deprecatedfunc(stringutil.unescapestr, '4.6') +forcebytestr = _deprecatedfunc(stringutil.forcebytestr, '4.6') +uirepr = _deprecatedfunc(stringutil.uirepr, '4.6') +wrap = _deprecatedfunc(stringutil.wrap, '4.6') +parsebool = _deprecatedfunc(stringutil.parsebool, '4.6')