--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/utils/stringutil.py Thu Mar 22 21:19:31 2018 +0900
@@ -0,0 +1,288 @@
+# stringutil.py - utility for generic string formatting, parsing, etc.
+#
+# Copyright 2005 K. Thananchayan <thananck@yahoo.com>
+# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
+# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import codecs
+import re as remod
+import textwrap
+
+from ..i18n import _
+
+from .. import (
+ encoding,
+ error,
+ pycompat,
+)
+
+_DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
+_DATA_ESCAPE_MAP.update({
+ b'\\': b'\\\\',
+ b'\r': br'\r',
+ b'\n': br'\n',
+})
+_DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
+
+def escapedata(s):
+ if isinstance(s, bytearray):
+ s = bytes(s)
+
+ return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)
+
+def binary(s):
+ """return true if a string is binary data"""
+ return bool(s and '\0' in s)
+
+def stringmatcher(pattern, casesensitive=True):
+ """
+ accepts a string, possibly starting with 're:' or 'literal:' prefix.
+ returns the matcher name, pattern, and matcher function.
+ missing or unknown prefixes are treated as literal matches.
+
+ helper for tests:
+ >>> def test(pattern, *tests):
+ ... kind, pattern, matcher = stringmatcher(pattern)
+ ... return (kind, pattern, [bool(matcher(t)) for t in tests])
+ >>> def itest(pattern, *tests):
+ ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
+ ... return (kind, pattern, [bool(matcher(t)) for t in tests])
+
+ exact matching (no prefix):
+ >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
+ ('literal', 'abcdefg', [False, False, True])
+
+ regex matching ('re:' prefix)
+ >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
+ ('re', 'a.+b', [False, False, True])
+
+ force exact matches ('literal:' prefix)
+ >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
+ ('literal', 're:foobar', [False, True])
+
+ unknown prefixes are ignored and treated as literals
+ >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
+ ('literal', 'foo:bar', [False, False, True])
+
+ case insensitive regex matches
+ >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
+ ('re', 'A.+b', [False, False, True])
+
+ case insensitive literal matches
+ >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
+ ('literal', 'ABCDEFG', [False, False, True])
+ """
+ if pattern.startswith('re:'):
+ pattern = pattern[3:]
+ try:
+ flags = 0
+ if not casesensitive:
+ flags = remod.I
+ regex = remod.compile(pattern, flags)
+ except remod.error as e:
+ raise error.ParseError(_('invalid regular expression: %s')
+ % e)
+ return 're', pattern, regex.search
+ elif pattern.startswith('literal:'):
+ pattern = pattern[8:]
+
+ match = pattern.__eq__
+
+ if not casesensitive:
+ ipat = encoding.lower(pattern)
+ match = lambda s: ipat == encoding.lower(s)
+ return 'literal', pattern, match
+
+def shortuser(user):
+ """Return a short representation of a user name or email address."""
+ f = user.find('@')
+ if f >= 0:
+ user = user[:f]
+ f = user.find('<')
+ if f >= 0:
+ user = user[f + 1:]
+ f = user.find(' ')
+ if f >= 0:
+ user = user[:f]
+ f = user.find('.')
+ if f >= 0:
+ user = user[:f]
+ return user
+
+def emailuser(user):
+ """Return the user portion of an email address."""
+ f = user.find('@')
+ if f >= 0:
+ user = user[:f]
+ f = user.find('<')
+ if f >= 0:
+ user = user[f + 1:]
+ return user
+
+def email(author):
+ '''get email of author.'''
+ r = author.find('>')
+ if r == -1:
+ r = None
+ return author[author.find('<') + 1:r]
+
+def ellipsis(text, maxlength=400):
+ """Trim string to at most maxlength (default: 400) columns in display."""
+ return encoding.trim(text, maxlength, ellipsis='...')
+
+def escapestr(s):
+ # call underlying function of s.encode('string_escape') directly for
+ # Python 3 compatibility
+ return codecs.escape_encode(s)[0]
+
+def unescapestr(s):
+ return codecs.escape_decode(s)[0]
+
+def forcebytestr(obj):
+ """Portably format an arbitrary object (e.g. exception) into a byte
+ string."""
+ try:
+ return pycompat.bytestr(obj)
+ except UnicodeEncodeError:
+ # non-ascii string, may be lossy
+ return pycompat.bytestr(encoding.strtolocal(str(obj)))
+
+def uirepr(s):
+ # Avoid double backslash in Windows path repr()
+ return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
+
+# delay import of textwrap
+def _MBTextWrapper(**kwargs):
+ class tw(textwrap.TextWrapper):
+ """
+ Extend TextWrapper for width-awareness.
+
+ Neither number of 'bytes' in any encoding nor 'characters' is
+ appropriate to calculate terminal columns for specified string.
+
+ Original TextWrapper implementation uses built-in 'len()' directly,
+ so overriding is needed to use width information of each characters.
+
+ In addition, characters classified into 'ambiguous' width are
+ treated as wide in East Asian area, but as narrow in other.
+
+ This requires use decision to determine width of such characters.
+ """
+ def _cutdown(self, ucstr, space_left):
+ l = 0
+ colwidth = encoding.ucolwidth
+ for i in xrange(len(ucstr)):
+ l += colwidth(ucstr[i])
+ if space_left < l:
+ return (ucstr[:i], ucstr[i:])
+ return ucstr, ''
+
+ # overriding of base class
+ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
+ space_left = max(width - cur_len, 1)
+
+ if self.break_long_words:
+ cut, res = self._cutdown(reversed_chunks[-1], space_left)
+ cur_line.append(cut)
+ reversed_chunks[-1] = res
+ elif not cur_line:
+ cur_line.append(reversed_chunks.pop())
+
+ # this overriding code is imported from TextWrapper of Python 2.6
+ # to calculate columns of string by 'encoding.ucolwidth()'
+ def _wrap_chunks(self, chunks):
+ colwidth = encoding.ucolwidth
+
+ lines = []
+ if self.width <= 0:
+ raise ValueError("invalid width %r (must be > 0)" % self.width)
+
+ # Arrange in reverse order so items can be efficiently popped
+ # from a stack of chucks.
+ chunks.reverse()
+
+ while chunks:
+
+ # Start the list of chunks that will make up the current line.
+ # cur_len is just the length of all the chunks in cur_line.
+ cur_line = []
+ cur_len = 0
+
+ # Figure out which static string will prefix this line.
+ if lines:
+ indent = self.subsequent_indent
+ else:
+ indent = self.initial_indent
+
+ # Maximum width for this line.
+ width = self.width - len(indent)
+
+ # First chunk on line is whitespace -- drop it, unless this
+ # is the very beginning of the text (i.e. no lines started yet).
+ if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
+ del chunks[-1]
+
+ while chunks:
+ l = colwidth(chunks[-1])
+
+ # Can at least squeeze this chunk onto the current line.
+ if cur_len + l <= width:
+ cur_line.append(chunks.pop())
+ cur_len += l
+
+ # Nope, this line is full.
+ else:
+ break
+
+ # The current line is full, and the next chunk is too big to
+ # fit on *any* line (not just this one).
+ if chunks and colwidth(chunks[-1]) > width:
+ self._handle_long_word(chunks, cur_line, cur_len, width)
+
+ # If the last chunk on this line is all whitespace, drop it.
+ if (self.drop_whitespace and
+ cur_line and cur_line[-1].strip() == r''):
+ del cur_line[-1]
+
+ # Convert current line back to a string and store it in list
+ # of all lines (return value).
+ if cur_line:
+ lines.append(indent + r''.join(cur_line))
+
+ return lines
+
+ global _MBTextWrapper
+ _MBTextWrapper = tw
+ return tw(**kwargs)
+
+def wrap(line, width, initindent='', hangindent=''):
+ maxindent = max(len(hangindent), len(initindent))
+ if width <= maxindent:
+ # adjust for weird terminal size
+ width = max(78, maxindent + 1)
+ line = line.decode(pycompat.sysstr(encoding.encoding),
+ pycompat.sysstr(encoding.encodingmode))
+ initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
+ pycompat.sysstr(encoding.encodingmode))
+ hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
+ pycompat.sysstr(encoding.encodingmode))
+ wrapper = _MBTextWrapper(width=width,
+ initial_indent=initindent,
+ subsequent_indent=hangindent)
+ return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
+
+_booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
+ '0': False, 'no': False, 'false': False, 'off': False,
+ 'never': False}
+
+def parsebool(s):
+ """Parse s into a boolean.
+
+ If s is not a valid boolean, returns None.
+ """
+ return _booleans.get(s.lower(), None)