mercurial-scm/hg: comparison contrib/testparseutil.py

equal deleted inserted replaced

-:58786930ea27
+:726cfc47f17a
+# testparseutil.py - utilities to parse test script for check tools
+#
+#  Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+from __future__ import absolute_import, print_function
+import abc
+import re
+import sys
+####################
+# for Python3 compatibility (almost comes from mercurial/pycompat.py)
+ispy3 = (sys.version_info[0] >= 3)
+def identity(a):
+return a
+def _rapply(f, xs):
+if xs is None:
+# assume None means non-value of optional data
+return xs
+if isinstance(xs, (list, set, tuple)):
+return type(xs)(_rapply(f, x) for x in xs)
+if isinstance(xs, dict):
+return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
+return f(xs)
+def rapply(f, xs):
+if f is identity:
+# fast path mainly for py2
+return xs
+return _rapply(f, xs)
+if ispy3:
+import builtins
+# TODO: .buffer might not exist if std streams were replaced; we'll need
+# a silly wrapper to make a bytes stream backed by a unicode one.
+stdin = sys.stdin.buffer
+stdout = sys.stdout.buffer
+stderr = sys.stderr.buffer
+def bytestr(s):
+# tiny version of pycompat.bytestr
+return s.encode('latin1')
+def sysstr(s):
+if isinstance(s, builtins.str):
+return s
+return s.decode(u'latin-1')
+def opentext(f):
+return open(f, 'rb')
+else:
+stdin = sys.stdin
+stdout = sys.stdout
+stderr = sys.stderr
+bytestr = str
+sysstr = identity
+opentext = open
+def b2s(x):
+# convert BYTES elements in "x" to SYSSTR recursively
+return rapply(sysstr, x)
+def writeout(data):
+# write "data" in BYTES into stdout
+stdout.write(data)
+def writeerr(data):
+# write "data" in BYTES into stderr
+stderr.write(data)
+####################
+class embeddedmatcher(object):
+"""Base class to detect embedded code fragments in *.t test script
+"""
+__metaclass__ = abc.ABCMeta
+def __init__(self, desc):
+self.desc = desc
+@abc.abstractmethod
+def startsat(self, line):
+"""Examine whether embedded code starts at line
+This can return arbitrary object, and it is used as 'ctx' for
+subsequent method invocations.
+"""
+@abc.abstractmethod
+def endsat(self, ctx, line):
+"""Examine whether embedded code ends at line"""
+@abc.abstractmethod
+def isinside(self, ctx, line):
+"""Examine whether line is inside embedded code, if not yet endsat
+"""
+@abc.abstractmethod
+def ignores(self, ctx):
+"""Examine whether detected embedded code should be ignored"""
+@abc.abstractmethod
+def filename(self, ctx):
+"""Return filename of embedded code
+If filename isn't specified for embedded code explicitly, this
+returns None.
+"""
+@abc.abstractmethod
+def codeatstart(self, ctx, line):
+"""Return actual code at the start line of embedded code
+This might return None, if the start line doesn't contain
+actual code.
+"""
+@abc.abstractmethod
+def codeatend(self, ctx, line):
+"""Return actual code at the end line of embedded code
+This might return None, if the end line doesn't contain actual
+code.
+"""
+@abc.abstractmethod
+def codeinside(self, ctx, line):
+"""Return actual code at line inside embedded code"""
+def embedded(basefile, lines, errors, matchers):
+"""pick embedded code fragments up from given lines
+This is common parsing logic, which examines specified matchers on
+given lines.
+:basefile: a name of a file, from which lines to be parsed come.
+:lines: to be parsed (might be a value returned by "open(basefile)")
+:errors: an array, into which messages for detected error are stored
+:matchers: an array of embeddedmatcher objects
+This function yields '(filename, starts, ends, code)' tuple.
+:filename: a name of embedded code, if it is explicitly specified
+(e.g.  "foobar" of "cat >> foobar <<EOF").
+Otherwise, this is None
+:starts: line number (1-origin), at which embedded code starts (inclusive)
+:ends: line number (1-origin), at which embedded code ends (exclusive)
+:code: extracted embedded code, which is single-stringified
+>>> class ambigmatcher(object):
+...     # mock matcher class to examine implementation of
+...     # "ambiguous matching" corner case
+...     def __init__(self, desc, matchfunc):
+...         self.desc = desc
+...         self.matchfunc = matchfunc
+...     def startsat(self, line):
+...         return self.matchfunc(line)
+>>> ambig1 = ambigmatcher(b'ambiguous #1',
+...                       lambda l: l.startswith(b'  $ cat '))
+>>> ambig2 = ambigmatcher(b'ambiguous #2',
+...                       lambda l: l.endswith(b'<< EOF\\n'))
+>>> lines = [b'  $ cat > foo.py << EOF\\n']
+>>> errors = []
+>>> matchers = [ambig1, ambig2]
+>>> list(t for t in embedded(b'<dummy>', lines, errors, matchers))
+[]
+>>> b2s(errors)
+['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
+"""
+matcher = None
+ctx = filename = code = startline = None # for pyflakes
+for lineno, line in enumerate(lines, 1):
+if not line.endswith(b'\n'):
+line += b'\n' # to normalize EOF line
+if matcher: # now, inside embedded code
+if matcher.endsat(ctx, line):
+codeatend = matcher.codeatend(ctx, line)
+if codeatend is not None:
+code.append(codeatend)
+if not matcher.ignores(ctx):
+yield (filename, startline, lineno, b''.join(code))
+matcher = None
+# DO NOT "continue", because line might start next fragment
+elif not matcher.isinside(ctx, line):
+# this is an error of basefile
+# (if matchers are implemented correctly)
+errors.append(b'%s:%d: unexpected line for "%s"'
+% (basefile, lineno, matcher.desc))
+# stop extracting embedded code by current 'matcher',
+# because appearance of unexpected line might mean
+# that expected end-of-embedded-code line might never
+# appear
+matcher = None
+# DO NOT "continue", because line might start next fragment
+else:
+code.append(matcher.codeinside(ctx, line))
+continue
+# examine whether current line starts embedded code or not
+assert not matcher
+matched = []
+for m in matchers:
+ctx = m.startsat(line)
+if ctx:
+matched.append((m, ctx))
+if matched:
+if len(matched) > 1:
+# this is an error of matchers, maybe
+errors.append(b'%s:%d: ambiguous line for %s' %
+(basefile, lineno,
+b', '.join([b'"%s"' % m.desc
+for m, c in matched])))
+# omit extracting embedded code, because choosing
+# arbitrary matcher from matched ones might fail to
+# detect the end of embedded code as expected.
+continue
+matcher, ctx = matched[0]
+filename = matcher.filename(ctx)
+code = []
+codeatstart = matcher.codeatstart(ctx, line)
+if codeatstart is not None:
+code.append(codeatstart)
+startline = lineno
+else:
+startline = lineno + 1
+if matcher:
+# examine whether EOF ends embedded code, because embedded
+# code isn't yet ended explicitly
+if matcher.endsat(ctx, b'\n'):
+codeatend = matcher.codeatend(ctx, b'\n')
+if codeatend is not None:
+code.append(codeatend)
+if not matcher.ignores(ctx):
+yield (filename, startline, lineno + 1, b''.join(code))
+else:
+# this is an error of basefile
+# (if matchers are implemented correctly)
+errors.append(b'%s:%d: unexpected end of file for "%s"'
+% (basefile, lineno, matcher.desc))
+# heredoc limit mark to ignore embedded code at check-code.py or so
+heredocignorelimit = b'NO_CHECK_EOF'
+# the pattern to match against cases below, and to return a limit mark
+# string as 'lname' group
+#
+# - << LIMITMARK
+# - << "LIMITMARK"
+# - << 'LIMITMARK'
+heredoclimitpat = br'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
+class fileheredocmatcher(embeddedmatcher):
+"""Detect "cat > FILE << LIMIT" style embedded code
+>>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py')
+>>> b2s(matcher.startsat(b'  $ cat > file.py << EOF\\n'))
+('file.py', '  > EOF\\n')
+>>> b2s(matcher.startsat(b'  $ cat   >>file.py   <<EOF\\n'))
+('file.py', '  > EOF\\n')
+>>> b2s(matcher.startsat(b'  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
+('any file.py', '  > EOF\\n')
+>>> b2s(matcher.startsat(b"  $ cat > file.py << 'ANYLIMIT'\\n"))
+('file.py', '  > ANYLIMIT\\n')
+>>> b2s(matcher.startsat(b'  $ cat<<ANYLIMIT>"file.py"\\n'))
+('file.py', '  > ANYLIMIT\\n')
+>>> start = b'  $ cat > file.py << EOF\\n'
+>>> ctx = matcher.startsat(start)
+>>> matcher.codeatstart(ctx, start)
+>>> b2s(matcher.filename(ctx))
+'file.py'
+>>> matcher.ignores(ctx)
+False
+>>> inside = b'  > foo = 1\\n'
+>>> matcher.endsat(ctx, inside)
+False
+>>> matcher.isinside(ctx, inside)
+True
+>>> b2s(matcher.codeinside(ctx, inside))
+'foo = 1\\n'
+>>> end = b'  > EOF\\n'
+>>> matcher.endsat(ctx, end)
+True
+>>> matcher.codeatend(ctx, end)
+>>> matcher.endsat(ctx, b'  > EOFEOF\\n')
+False
+>>> ctx = matcher.startsat(b'  $ cat > file.py << NO_CHECK_EOF\\n')
+>>> matcher.ignores(ctx)
+True
+"""
+_prefix = b'  > '
+def __init__(self, desc, namepat):
+super(fileheredocmatcher, self).__init__(desc)
+# build the pattern to match against cases below (and ">>"
+# variants), and to return a target filename string as 'name'
+# group
+#
+# - > NAMEPAT
+# - > "NAMEPAT"
+# - > 'NAMEPAT'
+namepat = (br'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'
+% namepat)
+self._fileres = [
+# "cat > NAME << LIMIT" case
+re.compile(br'  \$ \s*cat' + namepat + heredoclimitpat),
+# "cat << LIMIT > NAME" case
+re.compile(br'  \$ \s*cat' + heredoclimitpat + namepat),
+]
+def startsat(self, line):
+# ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
+for filere in self._fileres:
+matched = filere.match(line)
+if matched:
+return (matched.group('name'),
+b'  > %s\n' % matched.group('limit'))
+def endsat(self, ctx, line):
+return ctx[1] == line
+def isinside(self, ctx, line):
+return line.startswith(self._prefix)
+def ignores(self, ctx):
+return b'  > %s\n' % heredocignorelimit == ctx[1]
+def filename(self, ctx):
+return ctx[0]
+def codeatstart(self, ctx, line):
+return None # no embedded code at start line
+def codeatend(self, ctx, line):
+return None # no embedded code at end line
+def codeinside(self, ctx, line):
+return line[len(self._prefix):] # strip prefix
+####
+# for embedded python script
+class pydoctestmatcher(embeddedmatcher):
+"""Detect ">>> code" style embedded python code
+>>> matcher = pydoctestmatcher()
+>>> startline = b'  >>> foo = 1\\n'
+>>> matcher.startsat(startline)
+True
+>>> matcher.startsat(b'  ... foo = 1\\n')
+False
+>>> ctx = matcher.startsat(startline)
+>>> matcher.filename(ctx)
+>>> matcher.ignores(ctx)
+False
+>>> b2s(matcher.codeatstart(ctx, startline))
+'foo = 1\\n'
+>>> inside = b'  >>> foo = 1\\n'
+>>> matcher.endsat(ctx, inside)
+False
+>>> matcher.isinside(ctx, inside)
+True
+>>> b2s(matcher.codeinside(ctx, inside))
+'foo = 1\\n'
+>>> inside = b'  ... foo = 1\\n'
+>>> matcher.endsat(ctx, inside)
+False
+>>> matcher.isinside(ctx, inside)
+True
+>>> b2s(matcher.codeinside(ctx, inside))
+'foo = 1\\n'
+>>> inside = b'  expected output\\n'
+>>> matcher.endsat(ctx, inside)
+False
+>>> matcher.isinside(ctx, inside)
+True
+>>> b2s(matcher.codeinside(ctx, inside))
+'\\n'
+>>> inside = b'  \\n'
+>>> matcher.endsat(ctx, inside)
+False
+>>> matcher.isinside(ctx, inside)
+True
+>>> b2s(matcher.codeinside(ctx, inside))
+'\\n'
+>>> end = b'  $ foo bar\\n'
+>>> matcher.endsat(ctx, end)
+True
+>>> matcher.codeatend(ctx, end)
+>>> end = b'\\n'
+>>> matcher.endsat(ctx, end)
+True
+>>> matcher.codeatend(ctx, end)
+"""
+_prefix = b'  >>> '
+_prefixre = re.compile(br'  (>>>|\.\.\.) ')
+# If a line matches against not _prefixre but _outputre, that line
+# is "an expected output line" (= not a part of code fragment).
+#
+# Strictly speaking, a line matching against "(#if|#else|#endif)"
+# is also treated similarly in "inline python code" semantics by
+# run-tests.py. But "directive line inside inline python code"
+# should be rejected by Mercurial reviewers. Therefore, this
+# regexp does not matche against such directive lines.
+_outputre = re.compile(br'  $|  [^$]')
+def __init__(self):
+super(pydoctestmatcher, self).__init__(b"doctest style python code")
+def startsat(self, line):
+# ctx is "True"
+return line.startswith(self._prefix)
+def endsat(self, ctx, line):
+return not (self._prefixre.match(line) or self._outputre.match(line))
+def isinside(self, ctx, line):
+return True # always true, if not yet ended
+def ignores(self, ctx):
+return False # should be checked always
+def filename(self, ctx):
+return None # no filename
+def codeatstart(self, ctx, line):
+return line[len(self._prefix):] # strip prefix '  >>> '/'  ... '
+def codeatend(self, ctx, line):
+return None # no embedded code at end line
+def codeinside(self, ctx, line):
+if self._prefixre.match(line):
+return line[len(self._prefix):] # strip prefix '  >>> '/'  ... '
+return b'\n' # an expected output line is treated as an empty line
+class pyheredocmatcher(embeddedmatcher):
+"""Detect "python << LIMIT" style embedded python code
+>>> matcher = pyheredocmatcher()
+>>> b2s(matcher.startsat(b'  $ python << EOF\\n'))
+'  > EOF\\n'
+>>> b2s(matcher.startsat(b'  $ $PYTHON   <<EOF\\n'))
+'  > EOF\\n'
+>>> b2s(matcher.startsat(b'  $ "$PYTHON"<<  "EOF"\\n'))
+'  > EOF\\n'
+>>> b2s(matcher.startsat(b"  $ $PYTHON << 'ANYLIMIT'\\n"))
+'  > ANYLIMIT\\n'
+>>> matcher.startsat(b'  $ "$PYTHON" < EOF\\n')
+>>> start = b'  $ python << EOF\\n'
+>>> ctx = matcher.startsat(start)
+>>> matcher.codeatstart(ctx, start)
+>>> matcher.filename(ctx)
+>>> matcher.ignores(ctx)
+False
+>>> inside = b'  > foo = 1\\n'
+>>> matcher.endsat(ctx, inside)
+False
+>>> matcher.isinside(ctx, inside)
+True
+>>> b2s(matcher.codeinside(ctx, inside))
+'foo = 1\\n'
+>>> end = b'  > EOF\\n'
+>>> matcher.endsat(ctx, end)
+True
+>>> matcher.codeatend(ctx, end)
+>>> matcher.endsat(ctx, b'  > EOFEOF\\n')
+False
+>>> ctx = matcher.startsat(b'  $ python << NO_CHECK_EOF\\n')
+>>> matcher.ignores(ctx)
+True
+"""
+_prefix = b'  > '
+_startre = re.compile(br'  \$ (\$PYTHON|"\$PYTHON"|python).*' +
+heredoclimitpat)
+def __init__(self):
+super(pyheredocmatcher, self).__init__(b"heredoc python invocation")
+def startsat(self, line):
+# ctx is END-LINE-OF-EMBEDDED-CODE
+matched = self._startre.match(line)
+if matched:
+return b'  > %s\n' % matched.group('limit')
+def endsat(self, ctx, line):
+return ctx == line
+def isinside(self, ctx, line):
+return line.startswith(self._prefix)
+def ignores(self, ctx):
+return b'  > %s\n' % heredocignorelimit == ctx
+def filename(self, ctx):
+return None # no filename
+def codeatstart(self, ctx, line):
+return None # no embedded code at start line
+def codeatend(self, ctx, line):
+return None # no embedded code at end line
+def codeinside(self, ctx, line):
+return line[len(self._prefix):] # strip prefix
+_pymatchers = [
+pydoctestmatcher(),
+pyheredocmatcher(),
+# use '[^<]+' instead of '\S+', in order to match against
+# paths including whitespaces
+fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'),
+]
+def pyembedded(basefile, lines, errors):
+return embedded(basefile, lines, errors, _pymatchers)
+####
+# for embedded shell script
+_shmatchers = [
+# use '[^<]+' instead of '\S+', in order to match against
+# paths including whitespaces
+fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'),
+]
+def shembedded(basefile, lines, errors):
+return embedded(basefile, lines, errors, _shmatchers)
+####
+# for embedded hgrc configuration
+_hgrcmatchers = [
+# use '[^<]+' instead of '\S+', in order to match against
+# paths including whitespaces
+fileheredocmatcher(b'heredoc hgrc file',
+br'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'),
+]
+def hgrcembedded(basefile, lines, errors):
+return embedded(basefile, lines, errors, _hgrcmatchers)
+####
+if __name__ == "__main__":
+import optparse
+import sys
+def showembedded(basefile, lines, embeddedfunc, opts):
+errors = []
+for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
+if not name:
+name = b'<anonymous>'
+writeout(b"%s:%d: %s starts\n" % (basefile, starts, name))
+if opts.verbose and code:
+writeout(b"  |%s\n" %
+b"\n  |".join(l for l in code.splitlines()))
+writeout(b"%s:%d: %s ends\n" % (basefile, ends, name))
+for e in errors:
+writeerr(b"%s\n" % e)
+return len(errors)
+def applyembedded(args, embeddedfunc, opts):
+ret = 0
+if args:
+for f in args:
+with opentext(f) as fp:
+if showembedded(bytestr(f), fp, embeddedfunc, opts):
+ret = 1
+else:
+lines = [l for l in stdin.readlines()]
+if showembedded(b'<stdin>', lines, embeddedfunc, opts):
+ret = 1
+return ret
+commands = {}
+def command(name, desc):
+def wrap(func):
+commands[name] = (desc, func)
+return wrap
+@command("pyembedded", "detect embedded python script")
+def pyembeddedcmd(args, opts):
+return applyembedded(args, pyembedded, opts)
+@command("shembedded", "detect embedded shell script")
+def shembeddedcmd(args, opts):
+return applyembedded(args, shembedded, opts)
+@command("hgrcembedded", "detect embedded hgrc configuration")
+def hgrcembeddedcmd(args, opts):
+return applyembedded(args, hgrcembedded, opts)
+availablecommands = "\n".join(["  - %s: %s" % (key, value[0])
+for key, value in commands.items()])
+parser = optparse.OptionParser("""%prog COMMAND [file ...]
+Pick up embedded code fragments from given file(s) or stdin, and list
+up start/end lines of them in standard compiler format
+("FILENAME:LINENO:").
+Available commands are:
+""" + availablecommands + """
+""")
+parser.add_option("-v", "--verbose",
+help="enable additional output (e.g. actual code)",
+action="store_true")
+(opts, args) = parser.parse_args()
+if not args or args[0] not in commands:
+parser.print_help()
+sys.exit(255)
+sys.exit(commands[args[0]][1](args[1:], opts))

changeset 40093	726cfc47f17a
child 41552	99b4c6d73a72