Mercurial > public > mercurial-scm > hg
annotate contrib/import-checker.py @ 22975:461342e1c8aa
import-checker: check modules for pure Python build correctly
Before this patch, "import-checker.py" just replaces "/" in specified
filenames by ".". This makes modules for pure Python build belong to
"mercurial.pure" package, and prevents "import-checker.py" from
correctly checking about cyclic dependency in them.
This patch discards "pure" component from fully qualified name of such
modules.
To avoid discarding "pure" from the module name of standard libraries
unexpectedly, this patch allows "dotted_name_of_path" to discard
"pure" only from Mercurial specific modules, which are specified via
command line arguments.
author | FUJIWARA Katsunori <foozy@lares.dti.ne.jp> |
---|---|
date | Fri, 17 Oct 2014 02:07:05 +0900 |
parents | 6bd43614d387 |
children | 642d245ff537 |
rev | line source |
---|---|
20036 | 1 import ast |
2 import os | |
3 import sys | |
4 | |
20198
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules() |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
6 # to work when run from a virtualenv. The modules were chosen empirically |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
7 # so that the return value matches the return value without virtualenv. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
8 import BaseHTTPServer |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
9 import zlib |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
10 |
22975
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
11 def dotted_name_of_path(path, trimpure=False): |
20036 | 12 """Given a relative path to a source file, return its dotted module name. |
13 | |
14 >>> dotted_name_of_path('mercurial/error.py') | |
15 'mercurial.error' | |
22975
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True) |
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
17 'mercurial.parsers' |
20383
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
18 >>> dotted_name_of_path('zlibmodule.so') |
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
19 'zlib' |
20036 | 20 """ |
21 parts = path.split('/') | |
20391
466e4c574db0
import-checker: handle standard modules with arch in the filename
Mads Kiilerich <madski@unity3d.com>
parents:
20386
diff
changeset
|
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so |
20383
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
23 if parts[-1].endswith('module'): |
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
24 parts[-1] = parts[-1][:-6] |
22975
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
25 if trimpure: |
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
26 return '.'.join(p for p in parts if p != 'pure') |
20036 | 27 return '.'.join(parts) |
28 | |
29 | |
30 def list_stdlib_modules(): | |
31 """List the modules present in the stdlib. | |
32 | |
33 >>> mods = set(list_stdlib_modules()) | |
34 >>> 'BaseHTTPServer' in mods | |
35 True | |
36 | |
37 os.path isn't really a module, so it's missing: | |
38 | |
39 >>> 'os.path' in mods | |
40 False | |
41 | |
42 sys requires special treatment, because it's baked into the | |
43 interpreter, but it should still appear: | |
44 | |
45 >>> 'sys' in mods | |
46 True | |
47 | |
48 >>> 'collections' in mods | |
49 True | |
50 | |
51 >>> 'cStringIO' in mods | |
52 True | |
53 """ | |
54 for m in sys.builtin_module_names: | |
55 yield m | |
56 # These modules only exist on windows, but we should always | |
57 # consider them stdlib. | |
58 for m in ['msvcrt', '_winreg']: | |
59 yield m | |
60 # These get missed too | |
61 for m in 'ctypes', 'email': | |
62 yield m | |
63 yield 'builtins' # python3 only | |
20197
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix]) |
20198
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
65 # We need to supplement the list of prefixes for the search to work |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
66 # when run from within a virtualenv. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
67 for mod in (BaseHTTPServer, zlib): |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
68 try: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
69 # Not all module objects have a __file__ attribute. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
70 filename = mod.__file__ |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
71 except AttributeError: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
72 continue |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
73 dirname = os.path.dirname(filename) |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
74 for prefix in stdlib_prefixes: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
75 if dirname.startswith(prefix): |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
76 # Then this directory is redundant. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
77 break |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
78 else: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
79 stdlib_prefixes.add(dirname) |
20036 | 80 for libpath in sys.path: |
20201
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
81 # We want to walk everything in sys.path that starts with |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
82 # something in stdlib_prefixes. check-code suppressed because |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
83 # the ast module used by this script implies the availability |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
84 # of any(). |
20238
81e905790b30
check-code: do not skip entire file, skip only one match instead
Simon Heimberg <simohe@besonet.ch>
parents:
20201
diff
changeset
|
85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24 |
20036 | 86 continue |
87 if 'site-packages' in libpath: | |
88 continue | |
89 for top, dirs, files in os.walk(libpath): | |
90 for name in files: | |
91 if name == '__init__.py': | |
92 continue | |
93 if not (name.endswith('.py') or name.endswith('.so')): | |
94 continue | |
95 full_path = os.path.join(top, name) | |
96 if 'site-packages' in full_path: | |
97 continue | |
98 rel_path = full_path[len(libpath) + 1:] | |
99 mod = dotted_name_of_path(rel_path) | |
100 yield mod | |
101 | |
102 stdlib_modules = set(list_stdlib_modules()) | |
103 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
104 def imported_modules(source, ignore_nested=False): |
20036 | 105 """Given the source of a file as a string, yield the names |
106 imported by that file. | |
107 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
108 Args: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
109 source: The python source to examine as a string. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
110 ignore_nested: If true, import statements that do not start in |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
111 column zero will be ignored. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
112 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
113 Returns: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
114 A list of module names imported by the given source. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
115 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
116 >>> sorted(imported_modules( |
20036 | 117 ... 'import foo ; from baz import bar; import foo.qux')) |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
118 ['baz.bar', 'foo', 'foo.qux'] |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
119 >>> sorted(imported_modules( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
120 ... '''import foo |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
121 ... def wat(): |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
122 ... import bar |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
123 ... ''', ignore_nested=True)) |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
124 ['foo'] |
20036 | 125 """ |
126 for node in ast.walk(ast.parse(source)): | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
127 if ignore_nested and getattr(node, 'col_offset', 0) > 0: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
128 continue |
20036 | 129 if isinstance(node, ast.Import): |
130 for n in node.names: | |
131 yield n.name | |
132 elif isinstance(node, ast.ImportFrom): | |
133 prefix = node.module + '.' | |
134 for n in node.names: | |
135 yield prefix + n.name | |
136 | |
137 def verify_stdlib_on_own_line(source): | |
138 """Given some python source, verify that stdlib imports are done | |
139 in separate statements from relative local module imports. | |
140 | |
141 Observing this limitation is important as it works around an | |
142 annoying lib2to3 bug in relative import rewrites: | |
143 http://bugs.python.org/issue19510. | |
144 | |
145 >>> list(verify_stdlib_on_own_line('import sys, foo')) | |
20386
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
146 ['mixed imports\\n stdlib: sys\\n relative: foo'] |
20036 | 147 >>> list(verify_stdlib_on_own_line('import sys, os')) |
148 [] | |
149 >>> list(verify_stdlib_on_own_line('import foo, bar')) | |
150 [] | |
151 """ | |
152 for node in ast.walk(ast.parse(source)): | |
153 if isinstance(node, ast.Import): | |
20386
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
154 from_stdlib = {False: [], True: []} |
20036 | 155 for n in node.names: |
20386
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
156 from_stdlib[n.name in stdlib_modules].append(n.name) |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
157 if from_stdlib[True] and from_stdlib[False]: |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
158 yield ('mixed imports\n stdlib: %s\n relative: %s' % |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
159 (', '.join(sorted(from_stdlib[True])), |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
160 ', '.join(sorted(from_stdlib[False])))) |
20036 | 161 |
162 class CircularImport(Exception): | |
163 pass | |
164 | |
165 | |
166 def cyclekey(names): | |
167 return tuple(sorted(set(names))) | |
168 | |
169 def check_one_mod(mod, imports, path=None, ignore=None): | |
170 if path is None: | |
171 path = [] | |
172 if ignore is None: | |
173 ignore = [] | |
174 path = path + [mod] | |
175 for i in sorted(imports.get(mod, [])): | |
22974
6bd43614d387
import-checker: treat "from mercurial import XXXX" style correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
20391
diff
changeset
|
176 if i not in stdlib_modules and not i.startswith('mercurial.'): |
20036 | 177 i = mod.rsplit('.', 1)[0] + '.' + i |
178 if i in path: | |
179 firstspot = path.index(i) | |
180 cycle = path[firstspot:] + [i] | |
181 if cyclekey(cycle) not in ignore: | |
182 raise CircularImport(cycle) | |
183 continue | |
184 check_one_mod(i, imports, path=path, ignore=ignore) | |
185 | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
186 def rotatecycle(cycle): |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
187 """arrange a cycle so that the lexicographically first module listed first |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
188 |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
189 >>> rotatecycle(['foo', 'bar', 'foo']) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
190 ['bar', 'foo', 'bar'] |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
191 """ |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
192 lowest = min(cycle) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
193 idx = cycle.index(lowest) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
194 return cycle[idx:] + cycle[1:idx] + [lowest] |
20036 | 195 |
196 def find_cycles(imports): | |
197 """Find cycles in an already-loaded import graph. | |
198 | |
199 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'], | |
200 ... 'top.bar': ['baz', 'sys'], | |
201 ... 'top.baz': ['foo'], | |
202 ... 'top.qux': ['foo']} | |
203 >>> print '\\n'.join(sorted(find_cycles(imports))) | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
204 top.bar -> top.baz -> top.foo -> top.bar -> top.bar |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
205 top.foo -> top.qux -> top.foo -> top.foo |
20036 | 206 """ |
207 cycles = {} | |
208 for mod in sorted(imports.iterkeys()): | |
209 try: | |
210 check_one_mod(mod, imports, ignore=cycles) | |
211 except CircularImport, e: | |
212 cycle = e.args[0] | |
213 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle)) | |
214 return cycles.values() | |
215 | |
216 def _cycle_sortkey(c): | |
217 return len(c), c | |
218 | |
219 def main(argv): | |
220 if len(argv) < 2: | |
221 print 'Usage: %s file [file] [file] ...' | |
222 return 1 | |
223 used_imports = {} | |
224 any_errors = False | |
225 for source_path in argv[1:]: | |
226 f = open(source_path) | |
22975
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
227 modname = dotted_name_of_path(source_path, trimpure=True) |
20036 | 228 src = f.read() |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
229 used_imports[modname] = sorted( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
230 imported_modules(src, ignore_nested=True)) |
20036 | 231 for error in verify_stdlib_on_own_line(src): |
232 any_errors = True | |
233 print source_path, error | |
234 f.close() | |
235 cycles = find_cycles(used_imports) | |
236 if cycles: | |
237 firstmods = set() | |
238 for c in sorted(cycles, key=_cycle_sortkey): | |
239 first = c.split()[0] | |
240 # As a rough cut, ignore any cycle that starts with the | |
241 # same module as some other cycle. Otherwise we see lots | |
242 # of cycles that are effectively duplicates. | |
243 if first in firstmods: | |
244 continue | |
245 print 'Import cycle:', c | |
246 firstmods.add(first) | |
247 any_errors = True | |
248 return not any_errors | |
249 | |
250 if __name__ == '__main__': | |
251 sys.exit(int(main(sys.argv))) |