Mercurial > public > mercurial-scm > hg-stable
annotate contrib/import-checker.py @ 20037:957b43371928
import-checker: ignore nested imports
author | Augie Fackler <raf@durin42.com> |
---|---|
date | Sun, 17 Nov 2013 16:58:18 -0500 |
parents | e5d51fa51aba |
children | c65a6937b828 |
rev | line source |
---|---|
20036 | 1 import ast |
2 import os | |
3 import sys | |
4 | |
5 def dotted_name_of_path(path): | |
6 """Given a relative path to a source file, return its dotted module name. | |
7 | |
8 | |
9 >>> dotted_name_of_path('mercurial/error.py') | |
10 'mercurial.error' | |
11 """ | |
12 parts = path.split('/') | |
13 parts[-1] = parts[-1][:-3] # remove .py | |
14 return '.'.join(parts) | |
15 | |
16 | |
17 def list_stdlib_modules(): | |
18 """List the modules present in the stdlib. | |
19 | |
20 >>> mods = set(list_stdlib_modules()) | |
21 >>> 'BaseHTTPServer' in mods | |
22 True | |
23 | |
24 os.path isn't really a module, so it's missing: | |
25 | |
26 >>> 'os.path' in mods | |
27 False | |
28 | |
29 sys requires special treatment, because it's baked into the | |
30 interpreter, but it should still appear: | |
31 | |
32 >>> 'sys' in mods | |
33 True | |
34 | |
35 >>> 'collections' in mods | |
36 True | |
37 | |
38 >>> 'cStringIO' in mods | |
39 True | |
40 """ | |
41 for m in sys.builtin_module_names: | |
42 yield m | |
43 # These modules only exist on windows, but we should always | |
44 # consider them stdlib. | |
45 for m in ['msvcrt', '_winreg']: | |
46 yield m | |
47 # These get missed too | |
48 for m in 'ctypes', 'email': | |
49 yield m | |
50 yield 'builtins' # python3 only | |
51 for libpath in sys.path: | |
52 # We want to walk everything in sys.path that starts with | |
53 # either sys.prefix or sys.exec_prefix. | |
54 if not (libpath.startswith(sys.prefix) | |
55 or libpath.startswith(sys.exec_prefix)): | |
56 continue | |
57 if 'site-packages' in libpath: | |
58 continue | |
59 for top, dirs, files in os.walk(libpath): | |
60 for name in files: | |
61 if name == '__init__.py': | |
62 continue | |
63 if not (name.endswith('.py') or name.endswith('.so')): | |
64 continue | |
65 full_path = os.path.join(top, name) | |
66 if 'site-packages' in full_path: | |
67 continue | |
68 rel_path = full_path[len(libpath) + 1:] | |
69 mod = dotted_name_of_path(rel_path) | |
70 yield mod | |
71 | |
72 stdlib_modules = set(list_stdlib_modules()) | |
73 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
74 def imported_modules(source, ignore_nested=False): |
20036 | 75 """Given the source of a file as a string, yield the names |
76 imported by that file. | |
77 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
78 Args: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
79 source: The python source to examine as a string. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
80 ignore_nested: If true, import statements that do not start in |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
81 column zero will be ignored. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
82 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
83 Returns: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
84 A list of module names imported by the given source. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
85 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
86 >>> sorted(imported_modules( |
20036 | 87 ... 'import foo ; from baz import bar; import foo.qux')) |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
88 ['baz.bar', 'foo', 'foo.qux'] |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
89 >>> sorted(imported_modules( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
90 ... '''import foo |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
91 ... def wat(): |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
92 ... import bar |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
93 ... ''', ignore_nested=True)) |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
94 ['foo'] |
20036 | 95 """ |
96 for node in ast.walk(ast.parse(source)): | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
97 if ignore_nested and getattr(node, 'col_offset', 0) > 0: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
98 continue |
20036 | 99 if isinstance(node, ast.Import): |
100 for n in node.names: | |
101 yield n.name | |
102 elif isinstance(node, ast.ImportFrom): | |
103 prefix = node.module + '.' | |
104 for n in node.names: | |
105 yield prefix + n.name | |
106 | |
107 def verify_stdlib_on_own_line(source): | |
108 """Given some python source, verify that stdlib imports are done | |
109 in separate statements from relative local module imports. | |
110 | |
111 Observing this limitation is important as it works around an | |
112 annoying lib2to3 bug in relative import rewrites: | |
113 http://bugs.python.org/issue19510. | |
114 | |
115 >>> list(verify_stdlib_on_own_line('import sys, foo')) | |
116 ['mixed stdlib and relative imports:\\n foo, sys'] | |
117 >>> list(verify_stdlib_on_own_line('import sys, os')) | |
118 [] | |
119 >>> list(verify_stdlib_on_own_line('import foo, bar')) | |
120 [] | |
121 """ | |
122 for node in ast.walk(ast.parse(source)): | |
123 if isinstance(node, ast.Import): | |
124 from_stdlib = {} | |
125 for n in node.names: | |
126 from_stdlib[n.name] = n.name in stdlib_modules | |
127 num_std = len([x for x in from_stdlib.values() if x]) | |
128 if num_std not in (len(from_stdlib.values()), 0): | |
129 yield ('mixed stdlib and relative imports:\n %s' % | |
130 ', '.join(sorted(from_stdlib.iterkeys()))) | |
131 | |
132 class CircularImport(Exception): | |
133 pass | |
134 | |
135 | |
136 def cyclekey(names): | |
137 return tuple(sorted(set(names))) | |
138 | |
139 def check_one_mod(mod, imports, path=None, ignore=None): | |
140 if path is None: | |
141 path = [] | |
142 if ignore is None: | |
143 ignore = [] | |
144 path = path + [mod] | |
145 for i in sorted(imports.get(mod, [])): | |
146 if i not in stdlib_modules: | |
147 i = mod.rsplit('.', 1)[0] + '.' + i | |
148 if i in path: | |
149 firstspot = path.index(i) | |
150 cycle = path[firstspot:] + [i] | |
151 if cyclekey(cycle) not in ignore: | |
152 raise CircularImport(cycle) | |
153 continue | |
154 check_one_mod(i, imports, path=path, ignore=ignore) | |
155 | |
156 | |
157 def find_cycles(imports): | |
158 """Find cycles in an already-loaded import graph. | |
159 | |
160 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'], | |
161 ... 'top.bar': ['baz', 'sys'], | |
162 ... 'top.baz': ['foo'], | |
163 ... 'top.qux': ['foo']} | |
164 >>> print '\\n'.join(sorted(find_cycles(imports))) | |
165 top.bar -> top.baz -> top.foo -> top.bar | |
166 top.foo -> top.qux -> top.foo | |
167 """ | |
168 cycles = {} | |
169 for mod in sorted(imports.iterkeys()): | |
170 try: | |
171 check_one_mod(mod, imports, ignore=cycles) | |
172 except CircularImport, e: | |
173 cycle = e.args[0] | |
174 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle)) | |
175 return cycles.values() | |
176 | |
177 def _cycle_sortkey(c): | |
178 return len(c), c | |
179 | |
180 def main(argv): | |
181 if len(argv) < 2: | |
182 print 'Usage: %s file [file] [file] ...' | |
183 return 1 | |
184 used_imports = {} | |
185 any_errors = False | |
186 for source_path in argv[1:]: | |
187 f = open(source_path) | |
188 modname = dotted_name_of_path(source_path) | |
189 src = f.read() | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
190 used_imports[modname] = sorted( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
191 imported_modules(src, ignore_nested=True)) |
20036 | 192 for error in verify_stdlib_on_own_line(src): |
193 any_errors = True | |
194 print source_path, error | |
195 f.close() | |
196 cycles = find_cycles(used_imports) | |
197 if cycles: | |
198 firstmods = set() | |
199 for c in sorted(cycles, key=_cycle_sortkey): | |
200 first = c.split()[0] | |
201 # As a rough cut, ignore any cycle that starts with the | |
202 # same module as some other cycle. Otherwise we see lots | |
203 # of cycles that are effectively duplicates. | |
204 if first in firstmods: | |
205 continue | |
206 print 'Import cycle:', c | |
207 firstmods.add(first) | |
208 any_errors = True | |
209 return not any_errors | |
210 | |
211 if __name__ == '__main__': | |
212 sys.exit(int(main(sys.argv))) |