mercurial-scm/hg: contrib/testparseutil.py comparison

comparison contrib/testparseutil.py @ 42330:5364ba1f796f

py3: make contrib/testparseutil.py to work on str(unicodes) contrib/check-code work on unicodes and call functions from testparseutil.py which before this patch used to work on bytes. This path removes that inconsistency and make testparseutil.py work on unicodes. This makes test-check-code.t and test-contrib-check-code.t work on Python 3 again. Differential Revision: https://phab.mercurial-scm.org/D6391

author	Pulkit Goyal <7895pulkit@gmail.com>
date	Fri, 17 May 2019 00:04:29 +0530
parents	99b4c6d73a72
children	37f38e1dea44

comparison

equal deleted inserted replaced

-:c7652f7440d9
+:5364ba1f796f
 if isinstance(s, builtins.str):
 return s
 return s.decode(u'latin-1')
 def opentext(f):
-return open(f, 'rb')
+return open(f, 'r')
 else:
 stdin = sys.stdin
 stdout = sys.stdout
 stderr = sys.stderr
 ...     def __init__(self, desc, matchfunc):
 ...         self.desc = desc
 ...         self.matchfunc = matchfunc
 ...     def startsat(self, line):
 ...         return self.matchfunc(line)
->>> ambig1 = ambigmatcher(b'ambiguous #1',
+>>> ambig1 = ambigmatcher('ambiguous #1',
-...                       lambda l: l.startswith(b'  $ cat '))
+...                       lambda l: l.startswith('  $ cat '))
->>> ambig2 = ambigmatcher(b'ambiguous #2',
+>>> ambig2 = ambigmatcher('ambiguous #2',
-...                       lambda l: l.endswith(b'<< EOF\\n'))
+...                       lambda l: l.endswith('<< EOF\\n'))
->>> lines = [b'  $ cat > foo.py << EOF\\n']
+>>> lines = ['  $ cat > foo.py << EOF\\n']
 >>> errors = []
 >>> matchers = [ambig1, ambig2]
->>> list(t for t in embedded(b'<dummy>', lines, errors, matchers))
+>>> list(t for t in embedded('<dummy>', lines, errors, matchers))
 []
 >>> b2s(errors)
 ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
 """
 matcher = None
 ctx = filename = code = startline = None # for pyflakes
 for lineno, line in enumerate(lines, 1):
-if not line.endswith(b'\n'):
+if not line.endswith('\n'):
-line += b'\n' # to normalize EOF line
+line += '\n' # to normalize EOF line
 if matcher: # now, inside embedded code
 if matcher.endsat(ctx, line):
 codeatend = matcher.codeatend(ctx, line)
 if codeatend is not None:
 code.append(codeatend)
 if not matcher.ignores(ctx):
-yield (filename, startline, lineno, b''.join(code))
+yield (filename, startline, lineno, ''.join(code))
 matcher = None
 # DO NOT "continue", because line might start next fragment
 elif not matcher.isinside(ctx, line):
 # this is an error of basefile
 # (if matchers are implemented correctly)
-errors.append(b'%s:%d: unexpected line for "%s"'
+errors.append('%s:%d: unexpected line for "%s"'
 % (basefile, lineno, matcher.desc))
 # stop extracting embedded code by current 'matcher',
 # because appearance of unexpected line might mean
 # that expected end-of-embedded-code line might never
 # appear
 if ctx:
 matched.append((m, ctx))
 if matched:
 if len(matched) > 1:
 # this is an error of matchers, maybe
-errors.append(b'%s:%d: ambiguous line for %s' %
+errors.append('%s:%d: ambiguous line for %s' %
 (basefile, lineno,
-b', '.join([b'"%s"' % m.desc
+', '.join(['"%s"' % m.desc
 for m, c in matched])))
 # omit extracting embedded code, because choosing
 # arbitrary matcher from matched ones might fail to
 # detect the end of embedded code as expected.
 continue
 startline = lineno + 1
 if matcher:
 # examine whether EOF ends embedded code, because embedded
 # code isn't yet ended explicitly
-if matcher.endsat(ctx, b'\n'):
+if matcher.endsat(ctx, '\n'):
-codeatend = matcher.codeatend(ctx, b'\n')
+codeatend = matcher.codeatend(ctx, '\n')
 if codeatend is not None:
 code.append(codeatend)
 if not matcher.ignores(ctx):
-yield (filename, startline, lineno + 1, b''.join(code))
+yield (filename, startline, lineno + 1, ''.join(code))
 else:
 # this is an error of basefile
 # (if matchers are implemented correctly)
-errors.append(b'%s:%d: unexpected end of file for "%s"'
+errors.append('%s:%d: unexpected end of file for "%s"'
 % (basefile, lineno, matcher.desc))
 # heredoc limit mark to ignore embedded code at check-code.py or so
-heredocignorelimit = b'NO_CHECK_EOF'
+heredocignorelimit = 'NO_CHECK_EOF'
 # the pattern to match against cases below, and to return a limit mark
 # string as 'lname' group
 #
 # - << LIMITMARK
 # - << "LIMITMARK"
 # - << 'LIMITMARK'
-heredoclimitpat = br'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
+heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
 class fileheredocmatcher(embeddedmatcher):
 """Detect "cat > FILE << LIMIT" style embedded code
 >>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\\.py')
->>> b2s(matcher.startsat(b'  $ cat > file.py << EOF\\n'))
+>>> b2s(matcher.startsat('  $ cat > file.py << EOF\\n'))
 ('file.py', '  > EOF\\n')
->>> b2s(matcher.startsat(b'  $ cat   >>file.py   <<EOF\\n'))
+>>> b2s(matcher.startsat('  $ cat   >>file.py   <<EOF\\n'))
 ('file.py', '  > EOF\\n')
->>> b2s(matcher.startsat(b'  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
+>>> b2s(matcher.startsat('  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
 ('any file.py', '  > EOF\\n')
->>> b2s(matcher.startsat(b"  $ cat > file.py << 'ANYLIMIT'\\n"))
+>>> b2s(matcher.startsat("  $ cat > file.py << 'ANYLIMIT'\\n"))
 ('file.py', '  > ANYLIMIT\\n')
->>> b2s(matcher.startsat(b'  $ cat<<ANYLIMIT>"file.py"\\n'))
+>>> b2s(matcher.startsat('  $ cat<<ANYLIMIT>"file.py"\\n'))
 ('file.py', '  > ANYLIMIT\\n')
->>> start = b'  $ cat > file.py << EOF\\n'
+>>> start = '  $ cat > file.py << EOF\\n'
 >>> ctx = matcher.startsat(start)
 >>> matcher.codeatstart(ctx, start)
 >>> b2s(matcher.filename(ctx))
 'file.py'
 >>> matcher.ignores(ctx)
 False
->>> inside = b'  > foo = 1\\n'
+>>> inside = '  > foo = 1\\n'
 >>> matcher.endsat(ctx, inside)
 False
 >>> matcher.isinside(ctx, inside)
 True
 >>> b2s(matcher.codeinside(ctx, inside))
 'foo = 1\\n'
->>> end = b'  > EOF\\n'
+>>> end = '  > EOF\\n'
 >>> matcher.endsat(ctx, end)
 True
 >>> matcher.codeatend(ctx, end)
->>> matcher.endsat(ctx, b'  > EOFEOF\\n')
+>>> matcher.endsat(ctx, '  > EOFEOF\\n')
 False
->>> ctx = matcher.startsat(b'  $ cat > file.py << NO_CHECK_EOF\\n')
+>>> ctx = matcher.startsat('  $ cat > file.py << NO_CHECK_EOF\\n')
 >>> matcher.ignores(ctx)
 True
 """
-_prefix = b'  > '
+_prefix = '  > '
 def __init__(self, desc, namepat):
 super(fileheredocmatcher, self).__init__(desc)
 # build the pattern to match against cases below (and ">>"
 # group
 #
 # - > NAMEPAT
 # - > "NAMEPAT"
 # - > 'NAMEPAT'
-namepat = (br'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'
+namepat = (r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'
 % namepat)
 self._fileres = [
 # "cat > NAME << LIMIT" case
-re.compile(br'  \$ \s*cat' + namepat + heredoclimitpat),
+re.compile(r'  \$ \s*cat' + namepat + heredoclimitpat),
 # "cat << LIMIT > NAME" case
-re.compile(br'  \$ \s*cat' + heredoclimitpat + namepat),
+re.compile(r'  \$ \s*cat' + heredoclimitpat + namepat),
 ]
 def startsat(self, line):
 # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
 for filere in self._fileres:
 matched = filere.match(line)
 if matched:
 return (matched.group('name'),
-b'  > %s\n' % matched.group('limit'))
+'  > %s\n' % matched.group('limit'))
 def endsat(self, ctx, line):
 return ctx[1] == line
 def isinside(self, ctx, line):
 return line.startswith(self._prefix)
 def ignores(self, ctx):
-return b'  > %s\n' % heredocignorelimit == ctx[1]
+return '  > %s\n' % heredocignorelimit == ctx[1]
 def filename(self, ctx):
 return ctx[0]
 def codeatstart(self, ctx, line):
 class pydoctestmatcher(embeddedmatcher):
 """Detect ">>> code" style embedded python code
 >>> matcher = pydoctestmatcher()
->>> startline = b'  >>> foo = 1\\n'
+>>> startline = '  >>> foo = 1\\n'
 >>> matcher.startsat(startline)
 True
->>> matcher.startsat(b'  ... foo = 1\\n')
+>>> matcher.startsat('  ... foo = 1\\n')
 False
 >>> ctx = matcher.startsat(startline)
 >>> matcher.filename(ctx)
 >>> matcher.ignores(ctx)
 False
 >>> b2s(matcher.codeatstart(ctx, startline))
 'foo = 1\\n'
->>> inside = b'  >>> foo = 1\\n'
+>>> inside = '  >>> foo = 1\\n'
 >>> matcher.endsat(ctx, inside)
 False
 >>> matcher.isinside(ctx, inside)
 True
 >>> b2s(matcher.codeinside(ctx, inside))
 'foo = 1\\n'
->>> inside = b'  ... foo = 1\\n'
+>>> inside = '  ... foo = 1\\n'
 >>> matcher.endsat(ctx, inside)
 False
 >>> matcher.isinside(ctx, inside)
 True
 >>> b2s(matcher.codeinside(ctx, inside))
 'foo = 1\\n'
->>> inside = b'  expected output\\n'
+>>> inside = '  expected output\\n'
 >>> matcher.endsat(ctx, inside)
 False
 >>> matcher.isinside(ctx, inside)
 True
 >>> b2s(matcher.codeinside(ctx, inside))
 '\\n'
->>> inside = b'  \\n'
+>>> inside = '  \\n'
 >>> matcher.endsat(ctx, inside)
 False
 >>> matcher.isinside(ctx, inside)
 True
 >>> b2s(matcher.codeinside(ctx, inside))
 '\\n'
->>> end = b'  $ foo bar\\n'
+>>> end = '  $ foo bar\\n'
 >>> matcher.endsat(ctx, end)
 True
 >>> matcher.codeatend(ctx, end)
->>> end = b'\\n'
+>>> end = '\\n'
 >>> matcher.endsat(ctx, end)
 True
 >>> matcher.codeatend(ctx, end)
 """
-_prefix = b'  >>> '
+_prefix = '  >>> '
-_prefixre = re.compile(br'  (>>>|\.\.\.) ')
+_prefixre = re.compile(r'  (>>>|\.\.\.) ')
 # If a line matches against not _prefixre but _outputre, that line
 # is "an expected output line" (= not a part of code fragment).
 #
 # Strictly speaking, a line matching against "(#if|#else|#endif)"
 # is also treated similarly in "inline python code" semantics by
 # run-tests.py. But "directive line inside inline python code"
 # should be rejected by Mercurial reviewers. Therefore, this
 # regexp does not matche against such directive lines.
-_outputre = re.compile(br'  $|  [^$]')
+_outputre = re.compile(r'  $|  [^$]')
 def __init__(self):
-super(pydoctestmatcher, self).__init__(b"doctest style python code")
+super(pydoctestmatcher, self).__init__("doctest style python code")
 def startsat(self, line):
 # ctx is "True"
 return line.startswith(self._prefix)
 return None # no embedded code at end line
 def codeinside(self, ctx, line):
 if self._prefixre.match(line):
 return line[len(self._prefix):] # strip prefix '  >>> '/'  ... '
-return b'\n' # an expected output line is treated as an empty line
+return '\n' # an expected output line is treated as an empty line
 class pyheredocmatcher(embeddedmatcher):
 """Detect "python << LIMIT" style embedded python code
 >>> matcher = pyheredocmatcher()
->>> b2s(matcher.startsat(b'  $ python << EOF\\n'))
+>>> b2s(matcher.startsat('  $ python << EOF\\n'))
 '  > EOF\\n'
->>> b2s(matcher.startsat(b'  $ $PYTHON   <<EOF\\n'))
+>>> b2s(matcher.startsat('  $ $PYTHON   <<EOF\\n'))
 '  > EOF\\n'
->>> b2s(matcher.startsat(b'  $ "$PYTHON"<<  "EOF"\\n'))
+>>> b2s(matcher.startsat('  $ "$PYTHON"<<  "EOF"\\n'))
 '  > EOF\\n'
->>> b2s(matcher.startsat(b"  $ $PYTHON << 'ANYLIMIT'\\n"))
+>>> b2s(matcher.startsat("  $ $PYTHON << 'ANYLIMIT'\\n"))
 '  > ANYLIMIT\\n'
->>> matcher.startsat(b'  $ "$PYTHON" < EOF\\n')
+>>> matcher.startsat('  $ "$PYTHON" < EOF\\n')
->>> start = b'  $ python << EOF\\n'
+>>> start = '  $ python << EOF\\n'
 >>> ctx = matcher.startsat(start)
 >>> matcher.codeatstart(ctx, start)
 >>> matcher.filename(ctx)
 >>> matcher.ignores(ctx)
 False
->>> inside = b'  > foo = 1\\n'
+>>> inside = '  > foo = 1\\n'
 >>> matcher.endsat(ctx, inside)
 False
 >>> matcher.isinside(ctx, inside)
 True
 >>> b2s(matcher.codeinside(ctx, inside))
 'foo = 1\\n'
->>> end = b'  > EOF\\n'
+>>> end = '  > EOF\\n'
 >>> matcher.endsat(ctx, end)
 True
 >>> matcher.codeatend(ctx, end)
->>> matcher.endsat(ctx, b'  > EOFEOF\\n')
+>>> matcher.endsat(ctx, '  > EOFEOF\\n')
 False
->>> ctx = matcher.startsat(b'  $ python << NO_CHECK_EOF\\n')
+>>> ctx = matcher.startsat('  $ python << NO_CHECK_EOF\\n')
 >>> matcher.ignores(ctx)
 True
 """
-_prefix = b'  > '
+_prefix = '  > '
-_startre = re.compile(br'  \$ (\$PYTHON|"\$PYTHON"|python).*' +
+_startre = re.compile(r'  \$ (\$PYTHON|"\$PYTHON"|python).*' +
 heredoclimitpat)
 def __init__(self):
-super(pyheredocmatcher, self).__init__(b"heredoc python invocation")
+super(pyheredocmatcher, self).__init__("heredoc python invocation")
 def startsat(self, line):
 # ctx is END-LINE-OF-EMBEDDED-CODE
 matched = self._startre.match(line)
 if matched:
-return b'  > %s\n' % matched.group('limit')
+return '  > %s\n' % matched.group('limit')
 def endsat(self, ctx, line):
 return ctx == line
 def isinside(self, ctx, line):
 return line.startswith(self._prefix)
 def ignores(self, ctx):
-return b'  > %s\n' % heredocignorelimit == ctx
+return '  > %s\n' % heredocignorelimit == ctx
 def filename(self, ctx):
 return None # no filename
 def codeatstart(self, ctx, line):
 _pymatchers = [
 pydoctestmatcher(),
 pyheredocmatcher(),
 # use '[^<]+' instead of '\S+', in order to match against
 # paths including whitespaces
-fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'),
+fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
 ]
 def pyembedded(basefile, lines, errors):
 return embedded(basefile, lines, errors, _pymatchers)
 # for embedded shell script
 _shmatchers = [
 # use '[^<]+' instead of '\S+', in order to match against
 # paths including whitespaces
-fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'),
+fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
 ]
 def shembedded(basefile, lines, errors):
 return embedded(basefile, lines, errors, _shmatchers)
 # for embedded hgrc configuration
 _hgrcmatchers = [
 # use '[^<]+' instead of '\S+', in order to match against
 # paths including whitespaces
-fileheredocmatcher(b'heredoc hgrc file',
+fileheredocmatcher('heredoc hgrc file',
-br'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'),
+r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'),
 ]
 def hgrcembedded(basefile, lines, errors):
 return embedded(basefile, lines, errors, _hgrcmatchers)
 def showembedded(basefile, lines, embeddedfunc, opts):
 errors = []
 for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
 if not name:
-name = b'<anonymous>'
+name = '<anonymous>'
-writeout(b"%s:%d: %s starts\n" % (basefile, starts, name))
+writeout("%s:%d: %s starts\n" % (basefile, starts, name))
 if opts.verbose and code:
-writeout(b"  |%s\n" %
+writeout("  |%s\n" %
-b"\n  |".join(l for l in code.splitlines()))
+"\n  |".join(l for l in code.splitlines()))
-writeout(b"%s:%d: %s ends\n" % (basefile, ends, name))
+writeout("%s:%d: %s ends\n" % (basefile, ends, name))
 for e in errors:
-writeerr(b"%s\n" % e)
+writeerr("%s\n" % e)
 return len(errors)
 def applyembedded(args, embeddedfunc, opts):
 ret = 0
 if args:
 for f in args:
 with opentext(f) as fp:
-if showembedded(bytestr(f), fp, embeddedfunc, opts):
+if showembedded(f, fp, embeddedfunc, opts):
 ret = 1
 else:
 lines = [l for l in stdin.readlines()]
-if showembedded(b'<stdin>', lines, embeddedfunc, opts):
+if showembedded('<stdin>', lines, embeddedfunc, opts):
 ret = 1
 return ret
 commands = {}
 def command(name, desc):

Mercurial > public > mercurial-scm > hg

comparison contrib/testparseutil.py @ 42330:5364ba1f796f