comparison contrib/check-code.py @ 15281:aeeb2afcdc25 stable

check-code: support multiline matches like try/except/finally - match one pattern at a time against entire file - find line containing match - sort matches by line number
author Matt Mackall <mpm@selenic.com>
date Sun, 16 Oct 2011 20:26:20 -0500
parents 5a0fdc715769
children ebeac9c41456
comparison
equal deleted inserted replaced
15280:52bc0c4daaf9 15281:aeeb2afcdc25
11 import keyword 11 import keyword
12 import optparse 12 import optparse
13 13
14 def repquote(m): 14 def repquote(m):
15 t = re.sub(r"\w", "x", m.group('text')) 15 t = re.sub(r"\w", "x", m.group('text'))
16 t = re.sub(r"[^\sx]", "o", t) 16 t = re.sub(r"[^\s\nx]", "o", t)
17 return m.group('quote') + t + m.group('quote') 17 return m.group('quote') + t + m.group('quote')
18 18
19 def reppython(m): 19 def reppython(m):
20 comment = m.group('comment') 20 comment = m.group('comment')
21 if comment: 21 if comment:
42 42
43 43
44 testpats = [ 44 testpats = [
45 [ 45 [
46 (r'(pushd|popd)', "don't use 'pushd' or 'popd', use 'cd'"), 46 (r'(pushd|popd)', "don't use 'pushd' or 'popd', use 'cd'"),
47 (r'\W\$?\(\([^\)]*\)\)', "don't use (()) or $(()), use 'expr'"), 47 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
48 (r'^function', "don't use 'function', use old style"), 48 (r'^function', "don't use 'function', use old style"),
49 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"), 49 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
50 (r'echo.*\\n', "don't use 'echo \\n', use printf"), 50 (r'echo.*\\n', "don't use 'echo \\n', use printf"),
51 (r'echo -n', "don't use 'echo -n', use printf"), 51 (r'echo -n', "don't use 'echo -n', use printf"),
52 (r'^diff.*-\w*N', "don't use 'diff -N'"), 52 (r'^diff.*-\w*N', "don't use 'diff -N'"),
53 (r'(^| )wc[^|]*$', "filter wc output"), 53 (r'(^| )wc[^|\n]*$', "filter wc output"),
54 (r'head -c', "don't use 'head -c', use 'dd'"), 54 (r'head -c', "don't use 'head -c', use 'dd'"),
55 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"), 55 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
56 (r'printf.*\\\d\d\d', "don't use 'printf \NNN', use Python"), 56 (r'printf.*\\\d\d\d', "don't use 'printf \NNN', use Python"),
57 (r'printf.*\\x', "don't use printf \\x, use Python"), 57 (r'printf.*\\x', "don't use printf \\x, use Python"),
58 (r'\$\(.*\)', "don't use $(expr), use `expr`"), 58 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
59 (r'rm -rf \*', "don't use naked rm -rf, target a directory"), 59 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
60 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w', 60 (r'(^|\|\s*)grep (-\w\s+)*[^|\n]*[(|]\w',
61 "use egrep for extended grep syntax"), 61 "use egrep for extended grep syntax"),
62 (r'/bin/', "don't use explicit paths for tools"), 62 (r'/bin/', "don't use explicit paths for tools"),
63 (r'\$PWD', "don't use $PWD, use `pwd`"), 63 (r'\$PWD', "don't use $PWD, use `pwd`"),
64 (r'[^\n]\Z', "no trailing newline"), 64 (r'[^\n]\Z', "no trailing newline"),
65 (r'export.*=', "don't export and assign at once"), 65 (r'export.*=', "don't export and assign at once"),
66 ('^([^"\']|("[^"]*")|(\'[^\']*\'))*\\^', "^ must be quoted"), 66 ('^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\\^', "^ must be quoted"),
67 (r'^source\b', "don't use 'source', use '.'"), 67 (r'^source\b', "don't use 'source', use '.'"),
68 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"), 68 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
69 (r'ls\s+[^|-]+\s+-', "options to 'ls' must come before filenames"), 69 (r'ls\s+[^|\n-]+\s+-', "options to 'ls' must come before filenames"),
70 (r'[^>]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"), 70 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
71 (r'stop\(\)', "don't use 'stop' as a shell function name"), 71 (r'stop\(\)', "don't use 'stop' as a shell function name"),
72 ], 72 ],
73 # warnings 73 # warnings
74 [] 74 []
75 ] 75 ]
81 81
82 uprefix = r"^ \$ " 82 uprefix = r"^ \$ "
83 uprefixc = r"^ > " 83 uprefixc = r"^ > "
84 utestpats = [ 84 utestpats = [
85 [ 85 [
86 (r'^(\S| $ ).*(\S\s+|^\s+)\n', "trailing whitespace on non-output"), 86 (r'^(\S| $ ).*(\S[ \t]+|^[ \t]+)\n', "trailing whitespace on non-output"),
87 (uprefix + r'.*\|\s*sed', "use regex test output patterns instead of sed"), 87 (uprefix + r'.*\|\s*sed', "use regex test output patterns instead of sed"),
88 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"), 88 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
89 (uprefix + r'.*\$\?', "explicit exit code checks unnecessary"), 89 (uprefix + r'.*\$\?', "explicit exit code checks unnecessary"),
90 (uprefix + r'.*\|\| echo.*(fail|error)', 90 (uprefix + r'.*\|\| echo.*(fail|error)',
91 "explicit exit code checks unnecessary"), 91 "explicit exit code checks unnecessary"),
119 (r'\.has_key\b', "dict.has_key is not available in Python 3+"), 119 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
120 (r'^\s*\t', "don't use tabs"), 120 (r'^\s*\t', "don't use tabs"),
121 (r'\S;\s*\n', "semicolon"), 121 (r'\S;\s*\n', "semicolon"),
122 (r'\w,\w', "missing whitespace after ,"), 122 (r'\w,\w', "missing whitespace after ,"),
123 (r'\w[+/*\-<>]\w', "missing whitespace in expression"), 123 (r'\w[+/*\-<>]\w', "missing whitespace in expression"),
124 (r'^\s+\w+=\w+[^,)]$', "missing whitespace in assignment"), 124 (r'^\s+\w+=\w+[^,)\n]$', "missing whitespace in assignment"),
125 (r'(?m)(\s+)try:\n((?:\n|\1\s.*\n)+?)\1except.*?:\n'
126 r'((?:\n|\1\s.*\n)+?)\1finally:', 'no try/except/finally in Py2.4'),
125 (r'.{85}', "line too long"), 127 (r'.{85}', "line too long"),
126 (r'[^\n]\Z', "no trailing newline"), 128 (r'[^\n]\Z', "no trailing newline"),
127 (r'(\S\s+|^\s+)\n', "trailing whitespace"), 129 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
128 # (r'^\s+[^_ ][^_. ]+_[^_]+\s*=', "don't use underbars in identifiers"), 130 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=', "don't use underbars in identifiers"),
129 # (r'\w*[a-z][A-Z]\w*\s*=', "don't use camelcase in identifiers"), 131 # (r'\w*[a-z][A-Z]\w*\s*=', "don't use camelcase in identifiers"),
130 (r'^\s*(if|while|def|class|except|try)\s[^[]*:\s*[^\]#\s]+', 132 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
131 "linebreak after :"), 133 "linebreak after :"),
132 (r'class\s[^( ]+:', "old-style class, use class foo(object)"), 134 (r'class\s[^( \n]+:', "old-style class, use class foo(object)"),
133 (r'class\s[^( ]+\(\):', 135 (r'class\s[^( \n]+\(\):',
134 "class foo() not available in Python 2.4, use class foo(object)"), 136 "class foo() not available in Python 2.4, use class foo(object)"),
135 (r'\b(%s)\(' % '|'.join(keyword.kwlist), 137 (r'\b(%s)\(' % '|'.join(keyword.kwlist),
136 "Python keyword is not a function"), 138 "Python keyword is not a function"),
137 (r',]', "unneeded trailing ',' in list"), 139 (r',]', "unneeded trailing ',' in list"),
138 # (r'class\s[A-Z][^\(]*\((?!Exception)', 140 # (r'class\s[A-Z][^\(]*\((?!Exception)',
150 (r'(?<!def)\s+(callable)\(', 152 (r'(?<!def)\s+(callable)\(',
151 "callable not available in Python 3, use getattr(f, '__call__', None)"), 153 "callable not available in Python 3, use getattr(f, '__call__', None)"),
152 (r'if\s.*\selse', "if ... else form not available in Python 2.4"), 154 (r'if\s.*\selse', "if ... else form not available in Python 2.4"),
153 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist), 155 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
154 "gratuitous whitespace after Python keyword"), 156 "gratuitous whitespace after Python keyword"),
155 (r'([\(\[]\s\S)|(\S\s[\)\]])', "gratuitous whitespace in () or []"), 157 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
156 # (r'\s\s=', "gratuitous whitespace before ="), 158 # (r'\s\s=', "gratuitous whitespace before ="),
157 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=)\S', 159 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=)\S',
158 "missing whitespace around operator"), 160 "missing whitespace around operator"),
159 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=)\s', 161 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=)\s',
160 "missing whitespace around operator"), 162 "missing whitespace around operator"),
204 cpats = [ 206 cpats = [
205 [ 207 [
206 (r'//', "don't use //-style comments"), 208 (r'//', "don't use //-style comments"),
207 (r'^ ', "don't use spaces to indent"), 209 (r'^ ', "don't use spaces to indent"),
208 (r'\S\t', "don't use tabs except for indent"), 210 (r'\S\t', "don't use tabs except for indent"),
209 (r'(\S\s+|^\s+)\n', "trailing whitespace"), 211 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
210 (r'.{85}', "line too long"), 212 (r'.{85}', "line too long"),
211 (r'(while|if|do|for)\(', "use space after while/if/do/for"), 213 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
212 (r'return\(', "return is not a function"), 214 (r'return\(', "return is not a function"),
213 (r' ;', "no space before ;"), 215 (r' ;', "no space before ;"),
214 (r'\w+\* \w+', "use int *foo, not int* foo"), 216 (r'\w+\* \w+', "use int *foo, not int* foo"),
329 if warnings: 331 if warnings:
330 pats = pats[0] + pats[1] 332 pats = pats[0] + pats[1]
331 else: 333 else:
332 pats = pats[0] 334 pats = pats[0]
333 # print post # uncomment to show filtered version 335 # print post # uncomment to show filtered version
334 z = enumerate(zip(pre.splitlines(), post.splitlines(True))) 336
335 if debug: 337 if debug:
336 print "Checking %s for %s" % (name, f) 338 print "Checking %s for %s" % (name, f)
337 for n, l in z: 339
338 if "check-code" + "-ignore" in l[0]: 340 prelines = None
339 if debug: 341 errors = []
340 print "Skipping %s for %s:%s (check-code -ignore)" % ( 342 for p, msg in pats:
341 name, f, n) 343 pos = 0
342 continue 344 n = 0
343 for p, msg in pats: 345 for m in re.finditer(p, post):
344 if re.search(p, l[1]): 346 if prelines is None:
345 bd = "" 347 prelines = pre.splitlines()
346 if blame: 348 postlines = post.splitlines(True)
347 bd = 'working directory' 349
348 if not blamecache: 350 start = m.start()
349 blamecache = getblame(f) 351 while n < len(postlines):
350 if n < len(blamecache): 352 step = len(postlines[n])
351 bl, bu, br = blamecache[n] 353 if pos + step > start:
352 if bl == l[0]: 354 break
353 bd = '%s@%s' % (bu, br) 355 pos += step
354 logfunc(f, n + 1, l[0], msg, bd) 356 n += 1
355 fc += 1 357 l = prelines[n]
356 result = False 358
359 if "check-code" + "-ignore" in l:
360 if debug:
361 print "Skipping %s for %s:%s (check-code -ignore)" % (
362 name, f, n)
363 continue
364 bd = ""
365 if blame:
366 bd = 'working directory'
367 if not blamecache:
368 blamecache = getblame(f)
369 if n < len(blamecache):
370 bl, bu, br = blamecache[n]
371 if bl == l:
372 bd = '%s@%s' % (bu, br)
373 errors.append((f, n + 1, l, msg, bd))
374 result = False
375
376 errors.sort()
377 for e in errors:
378 logfunc(*e)
379 fc += 1
357 if maxerr is not None and fc >= maxerr: 380 if maxerr is not None and fc >= maxerr:
358 print " (too many errors, giving up)" 381 print " (too many errors, giving up)"
359 break 382 break
383
360 return result 384 return result
361 385
362 if __name__ == "__main__": 386 if __name__ == "__main__":
363 parser = optparse.OptionParser("%prog [options] [files]") 387 parser = optparse.OptionParser("%prog [options] [files]")
364 parser.add_option("-w", "--warnings", action="store_true", 388 parser.add_option("-w", "--warnings", action="store_true",