Mercurial > public > mercurial-scm > hg
annotate contrib/import-checker.py @ 20197:761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
This patch refactors the logic in contrib/import-checker.py responsible for
checking the beginnings of the paths in sys.path. In particular, it adds a
variable that defines the set of allowed prefixes.
The primary purpose of this change is to make it easier to add more allowed
prefixes. This will be useful in resolving issue4129, which involves making
the function list_stdlib_modules() work when run from a virtualenv.
author | Chris Jerdonek <chris.jerdonek@gmail.com> |
---|---|
date | Sun, 22 Dec 2013 14:10:26 -0800 |
parents | c65a6937b828 |
children | f5393a9dc4e5 |
rev | line source |
---|---|
20036 | 1 import ast |
2 import os | |
3 import sys | |
4 | |
5 def dotted_name_of_path(path): | |
6 """Given a relative path to a source file, return its dotted module name. | |
7 | |
8 | |
9 >>> dotted_name_of_path('mercurial/error.py') | |
10 'mercurial.error' | |
11 """ | |
12 parts = path.split('/') | |
13 parts[-1] = parts[-1][:-3] # remove .py | |
14 return '.'.join(parts) | |
15 | |
16 | |
17 def list_stdlib_modules(): | |
18 """List the modules present in the stdlib. | |
19 | |
20 >>> mods = set(list_stdlib_modules()) | |
21 >>> 'BaseHTTPServer' in mods | |
22 True | |
23 | |
24 os.path isn't really a module, so it's missing: | |
25 | |
26 >>> 'os.path' in mods | |
27 False | |
28 | |
29 sys requires special treatment, because it's baked into the | |
30 interpreter, but it should still appear: | |
31 | |
32 >>> 'sys' in mods | |
33 True | |
34 | |
35 >>> 'collections' in mods | |
36 True | |
37 | |
38 >>> 'cStringIO' in mods | |
39 True | |
40 """ | |
41 for m in sys.builtin_module_names: | |
42 yield m | |
43 # These modules only exist on windows, but we should always | |
44 # consider them stdlib. | |
45 for m in ['msvcrt', '_winreg']: | |
46 yield m | |
47 # These get missed too | |
48 for m in 'ctypes', 'email': | |
49 yield m | |
50 yield 'builtins' # python3 only | |
20197
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
51 stdlib_prefixes = set([sys.prefix, sys.exec_prefix]) |
20036 | 52 for libpath in sys.path: |
20197
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
53 # We want to walk everything in sys.path that starts with something |
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
54 # in stdlib_prefixes. |
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
55 for prefix in stdlib_prefixes: |
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
56 if libpath.startswith(prefix): |
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
57 break |
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
58 else: |
20036 | 59 continue |
60 if 'site-packages' in libpath: | |
61 continue | |
62 for top, dirs, files in os.walk(libpath): | |
63 for name in files: | |
64 if name == '__init__.py': | |
65 continue | |
66 if not (name.endswith('.py') or name.endswith('.so')): | |
67 continue | |
68 full_path = os.path.join(top, name) | |
69 if 'site-packages' in full_path: | |
70 continue | |
71 rel_path = full_path[len(libpath) + 1:] | |
72 mod = dotted_name_of_path(rel_path) | |
73 yield mod | |
74 | |
75 stdlib_modules = set(list_stdlib_modules()) | |
76 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
77 def imported_modules(source, ignore_nested=False): |
20036 | 78 """Given the source of a file as a string, yield the names |
79 imported by that file. | |
80 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
81 Args: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
82 source: The python source to examine as a string. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
83 ignore_nested: If true, import statements that do not start in |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
84 column zero will be ignored. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
85 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
86 Returns: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
87 A list of module names imported by the given source. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
88 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
89 >>> sorted(imported_modules( |
20036 | 90 ... 'import foo ; from baz import bar; import foo.qux')) |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
91 ['baz.bar', 'foo', 'foo.qux'] |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
92 >>> sorted(imported_modules( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
93 ... '''import foo |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
94 ... def wat(): |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
95 ... import bar |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
96 ... ''', ignore_nested=True)) |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
97 ['foo'] |
20036 | 98 """ |
99 for node in ast.walk(ast.parse(source)): | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
100 if ignore_nested and getattr(node, 'col_offset', 0) > 0: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
101 continue |
20036 | 102 if isinstance(node, ast.Import): |
103 for n in node.names: | |
104 yield n.name | |
105 elif isinstance(node, ast.ImportFrom): | |
106 prefix = node.module + '.' | |
107 for n in node.names: | |
108 yield prefix + n.name | |
109 | |
110 def verify_stdlib_on_own_line(source): | |
111 """Given some python source, verify that stdlib imports are done | |
112 in separate statements from relative local module imports. | |
113 | |
114 Observing this limitation is important as it works around an | |
115 annoying lib2to3 bug in relative import rewrites: | |
116 http://bugs.python.org/issue19510. | |
117 | |
118 >>> list(verify_stdlib_on_own_line('import sys, foo')) | |
119 ['mixed stdlib and relative imports:\\n foo, sys'] | |
120 >>> list(verify_stdlib_on_own_line('import sys, os')) | |
121 [] | |
122 >>> list(verify_stdlib_on_own_line('import foo, bar')) | |
123 [] | |
124 """ | |
125 for node in ast.walk(ast.parse(source)): | |
126 if isinstance(node, ast.Import): | |
127 from_stdlib = {} | |
128 for n in node.names: | |
129 from_stdlib[n.name] = n.name in stdlib_modules | |
130 num_std = len([x for x in from_stdlib.values() if x]) | |
131 if num_std not in (len(from_stdlib.values()), 0): | |
132 yield ('mixed stdlib and relative imports:\n %s' % | |
133 ', '.join(sorted(from_stdlib.iterkeys()))) | |
134 | |
135 class CircularImport(Exception): | |
136 pass | |
137 | |
138 | |
139 def cyclekey(names): | |
140 return tuple(sorted(set(names))) | |
141 | |
142 def check_one_mod(mod, imports, path=None, ignore=None): | |
143 if path is None: | |
144 path = [] | |
145 if ignore is None: | |
146 ignore = [] | |
147 path = path + [mod] | |
148 for i in sorted(imports.get(mod, [])): | |
149 if i not in stdlib_modules: | |
150 i = mod.rsplit('.', 1)[0] + '.' + i | |
151 if i in path: | |
152 firstspot = path.index(i) | |
153 cycle = path[firstspot:] + [i] | |
154 if cyclekey(cycle) not in ignore: | |
155 raise CircularImport(cycle) | |
156 continue | |
157 check_one_mod(i, imports, path=path, ignore=ignore) | |
158 | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
159 def rotatecycle(cycle): |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
160 """arrange a cycle so that the lexicographically first module listed first |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
161 |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
162 >>> rotatecycle(['foo', 'bar', 'foo']) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
163 ['bar', 'foo', 'bar'] |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
164 """ |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
165 lowest = min(cycle) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
166 idx = cycle.index(lowest) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
167 return cycle[idx:] + cycle[1:idx] + [lowest] |
20036 | 168 |
169 def find_cycles(imports): | |
170 """Find cycles in an already-loaded import graph. | |
171 | |
172 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'], | |
173 ... 'top.bar': ['baz', 'sys'], | |
174 ... 'top.baz': ['foo'], | |
175 ... 'top.qux': ['foo']} | |
176 >>> print '\\n'.join(sorted(find_cycles(imports))) | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
177 top.bar -> top.baz -> top.foo -> top.bar -> top.bar |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
178 top.foo -> top.qux -> top.foo -> top.foo |
20036 | 179 """ |
180 cycles = {} | |
181 for mod in sorted(imports.iterkeys()): | |
182 try: | |
183 check_one_mod(mod, imports, ignore=cycles) | |
184 except CircularImport, e: | |
185 cycle = e.args[0] | |
186 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle)) | |
187 return cycles.values() | |
188 | |
189 def _cycle_sortkey(c): | |
190 return len(c), c | |
191 | |
192 def main(argv): | |
193 if len(argv) < 2: | |
194 print 'Usage: %s file [file] [file] ...' | |
195 return 1 | |
196 used_imports = {} | |
197 any_errors = False | |
198 for source_path in argv[1:]: | |
199 f = open(source_path) | |
200 modname = dotted_name_of_path(source_path) | |
201 src = f.read() | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
202 used_imports[modname] = sorted( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
203 imported_modules(src, ignore_nested=True)) |
20036 | 204 for error in verify_stdlib_on_own_line(src): |
205 any_errors = True | |
206 print source_path, error | |
207 f.close() | |
208 cycles = find_cycles(used_imports) | |
209 if cycles: | |
210 firstmods = set() | |
211 for c in sorted(cycles, key=_cycle_sortkey): | |
212 first = c.split()[0] | |
213 # As a rough cut, ignore any cycle that starts with the | |
214 # same module as some other cycle. Otherwise we see lots | |
215 # of cycles that are effectively duplicates. | |
216 if first in firstmods: | |
217 continue | |
218 print 'Import cycle:', c | |
219 firstmods.add(first) | |
220 any_errors = True | |
221 return not any_errors | |
222 | |
223 if __name__ == '__main__': | |
224 sys.exit(int(main(sys.argv))) |