modulefinder.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. """Find modules used by a script, using introspection."""
  2. import dis
  3. import importlib._bootstrap_external
  4. import importlib.machinery
  5. import marshal
  6. import os
  7. import sys
  8. import types
  9. import struct
  10. import warnings
  11. with warnings.catch_warnings():
  12. warnings.simplefilter('ignore', PendingDeprecationWarning)
  13. import imp
  14. LOAD_CONST = dis.opmap['LOAD_CONST']
  15. IMPORT_NAME = dis.opmap['IMPORT_NAME']
  16. STORE_NAME = dis.opmap['STORE_NAME']
  17. STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
  18. STORE_OPS = STORE_NAME, STORE_GLOBAL
  19. EXTENDED_ARG = dis.EXTENDED_ARG
  20. # Modulefinder does a good job at simulating Python's, but it can not
  21. # handle __path__ modifications packages make at runtime. Therefore there
  22. # is a mechanism whereby you can register extra paths in this map for a
  23. # package, and it will be honored.
  24. # Note this is a mapping is lists of paths.
  25. packagePathMap = {}
  26. # A Public interface
  27. def AddPackagePath(packagename, path):
  28. packagePathMap.setdefault(packagename, []).append(path)
  29. replacePackageMap = {}
  30. # This ReplacePackage mechanism allows modulefinder to work around
  31. # situations in which a package injects itself under the name
  32. # of another package into sys.modules at runtime by calling
  33. # ReplacePackage("real_package_name", "faked_package_name")
  34. # before running ModuleFinder.
  35. def ReplacePackage(oldname, newname):
  36. replacePackageMap[oldname] = newname
  37. class Module:
  38. def __init__(self, name, file=None, path=None):
  39. self.__name__ = name
  40. self.__file__ = file
  41. self.__path__ = path
  42. self.__code__ = None
  43. # The set of global names that are assigned to in the module.
  44. # This includes those names imported through starimports of
  45. # Python modules.
  46. self.globalnames = {}
  47. # The set of starimports this module did that could not be
  48. # resolved, ie. a starimport from a non-Python module.
  49. self.starimports = {}
  50. def __repr__(self):
  51. s = "Module(%r" % (self.__name__,)
  52. if self.__file__ is not None:
  53. s = s + ", %r" % (self.__file__,)
  54. if self.__path__ is not None:
  55. s = s + ", %r" % (self.__path__,)
  56. s = s + ")"
  57. return s
  58. class ModuleFinder:
  59. def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
  60. if path is None:
  61. path = sys.path
  62. self.path = path
  63. self.modules = {}
  64. self.badmodules = {}
  65. self.debug = debug
  66. self.indent = 0
  67. self.excludes = excludes
  68. self.replace_paths = replace_paths
  69. self.processed_paths = [] # Used in debugging only
  70. def msg(self, level, str, *args):
  71. if level <= self.debug:
  72. for i in range(self.indent):
  73. print(" ", end=' ')
  74. print(str, end=' ')
  75. for arg in args:
  76. print(repr(arg), end=' ')
  77. print()
  78. def msgin(self, *args):
  79. level = args[0]
  80. if level <= self.debug:
  81. self.indent = self.indent + 1
  82. self.msg(*args)
  83. def msgout(self, *args):
  84. level = args[0]
  85. if level <= self.debug:
  86. self.indent = self.indent - 1
  87. self.msg(*args)
  88. def run_script(self, pathname):
  89. self.msg(2, "run_script", pathname)
  90. with open(pathname) as fp:
  91. stuff = ("", "r", imp.PY_SOURCE)
  92. self.load_module('__main__', fp, pathname, stuff)
  93. def load_file(self, pathname):
  94. dir, name = os.path.split(pathname)
  95. name, ext = os.path.splitext(name)
  96. with open(pathname) as fp:
  97. stuff = (ext, "r", imp.PY_SOURCE)
  98. self.load_module(name, fp, pathname, stuff)
  99. def import_hook(self, name, caller=None, fromlist=None, level=-1):
  100. self.msg(3, "import_hook", name, caller, fromlist, level)
  101. parent = self.determine_parent(caller, level=level)
  102. q, tail = self.find_head_package(parent, name)
  103. m = self.load_tail(q, tail)
  104. if not fromlist:
  105. return q
  106. if m.__path__:
  107. self.ensure_fromlist(m, fromlist)
  108. return None
  109. def determine_parent(self, caller, level=-1):
  110. self.msgin(4, "determine_parent", caller, level)
  111. if not caller or level == 0:
  112. self.msgout(4, "determine_parent -> None")
  113. return None
  114. pname = caller.__name__
  115. if level >= 1: # relative import
  116. if caller.__path__:
  117. level -= 1
  118. if level == 0:
  119. parent = self.modules[pname]
  120. assert parent is caller
  121. self.msgout(4, "determine_parent ->", parent)
  122. return parent
  123. if pname.count(".") < level:
  124. raise ImportError("relative importpath too deep")
  125. pname = ".".join(pname.split(".")[:-level])
  126. parent = self.modules[pname]
  127. self.msgout(4, "determine_parent ->", parent)
  128. return parent
  129. if caller.__path__:
  130. parent = self.modules[pname]
  131. assert caller is parent
  132. self.msgout(4, "determine_parent ->", parent)
  133. return parent
  134. if '.' in pname:
  135. i = pname.rfind('.')
  136. pname = pname[:i]
  137. parent = self.modules[pname]
  138. assert parent.__name__ == pname
  139. self.msgout(4, "determine_parent ->", parent)
  140. return parent
  141. self.msgout(4, "determine_parent -> None")
  142. return None
  143. def find_head_package(self, parent, name):
  144. self.msgin(4, "find_head_package", parent, name)
  145. if '.' in name:
  146. i = name.find('.')
  147. head = name[:i]
  148. tail = name[i+1:]
  149. else:
  150. head = name
  151. tail = ""
  152. if parent:
  153. qname = "%s.%s" % (parent.__name__, head)
  154. else:
  155. qname = head
  156. q = self.import_module(head, qname, parent)
  157. if q:
  158. self.msgout(4, "find_head_package ->", (q, tail))
  159. return q, tail
  160. if parent:
  161. qname = head
  162. parent = None
  163. q = self.import_module(head, qname, parent)
  164. if q:
  165. self.msgout(4, "find_head_package ->", (q, tail))
  166. return q, tail
  167. self.msgout(4, "raise ImportError: No module named", qname)
  168. raise ImportError("No module named " + qname)
  169. def load_tail(self, q, tail):
  170. self.msgin(4, "load_tail", q, tail)
  171. m = q
  172. while tail:
  173. i = tail.find('.')
  174. if i < 0: i = len(tail)
  175. head, tail = tail[:i], tail[i+1:]
  176. mname = "%s.%s" % (m.__name__, head)
  177. m = self.import_module(head, mname, m)
  178. if not m:
  179. self.msgout(4, "raise ImportError: No module named", mname)
  180. raise ImportError("No module named " + mname)
  181. self.msgout(4, "load_tail ->", m)
  182. return m
  183. def ensure_fromlist(self, m, fromlist, recursive=0):
  184. self.msg(4, "ensure_fromlist", m, fromlist, recursive)
  185. for sub in fromlist:
  186. if sub == "*":
  187. if not recursive:
  188. all = self.find_all_submodules(m)
  189. if all:
  190. self.ensure_fromlist(m, all, 1)
  191. elif not hasattr(m, sub):
  192. subname = "%s.%s" % (m.__name__, sub)
  193. submod = self.import_module(sub, subname, m)
  194. if not submod:
  195. raise ImportError("No module named " + subname)
  196. def find_all_submodules(self, m):
  197. if not m.__path__:
  198. return
  199. modules = {}
  200. # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
  201. # But we must also collect Python extension modules - although
  202. # we cannot separate normal dlls from Python extensions.
  203. suffixes = []
  204. suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
  205. suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
  206. suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
  207. for dir in m.__path__:
  208. try:
  209. names = os.listdir(dir)
  210. except OSError:
  211. self.msg(2, "can't list directory", dir)
  212. continue
  213. for name in names:
  214. mod = None
  215. for suff in suffixes:
  216. n = len(suff)
  217. if name[-n:] == suff:
  218. mod = name[:-n]
  219. break
  220. if mod and mod != "__init__":
  221. modules[mod] = mod
  222. return modules.keys()
  223. def import_module(self, partname, fqname, parent):
  224. self.msgin(3, "import_module", partname, fqname, parent)
  225. try:
  226. m = self.modules[fqname]
  227. except KeyError:
  228. pass
  229. else:
  230. self.msgout(3, "import_module ->", m)
  231. return m
  232. if fqname in self.badmodules:
  233. self.msgout(3, "import_module -> None")
  234. return None
  235. if parent and parent.__path__ is None:
  236. self.msgout(3, "import_module -> None")
  237. return None
  238. try:
  239. fp, pathname, stuff = self.find_module(partname,
  240. parent and parent.__path__, parent)
  241. except ImportError:
  242. self.msgout(3, "import_module ->", None)
  243. return None
  244. try:
  245. m = self.load_module(fqname, fp, pathname, stuff)
  246. finally:
  247. if fp:
  248. fp.close()
  249. if parent:
  250. setattr(parent, partname, m)
  251. self.msgout(3, "import_module ->", m)
  252. return m
  253. def load_module(self, fqname, fp, pathname, file_info):
  254. suffix, mode, type = file_info
  255. self.msgin(2, "load_module", fqname, fp and "fp", pathname)
  256. if type == imp.PKG_DIRECTORY:
  257. m = self.load_package(fqname, pathname)
  258. self.msgout(2, "load_module ->", m)
  259. return m
  260. if type == imp.PY_SOURCE:
  261. co = compile(fp.read()+'\n', pathname, 'exec')
  262. elif type == imp.PY_COMPILED:
  263. try:
  264. marshal_data = importlib._bootstrap_external._validate_bytecode_header(fp.read())
  265. except ImportError as exc:
  266. self.msgout(2, "raise ImportError: " + str(exc), pathname)
  267. raise
  268. co = marshal.loads(marshal_data)
  269. else:
  270. co = None
  271. m = self.add_module(fqname)
  272. m.__file__ = pathname
  273. if co:
  274. if self.replace_paths:
  275. co = self.replace_paths_in_code(co)
  276. m.__code__ = co
  277. self.scan_code(co, m)
  278. self.msgout(2, "load_module ->", m)
  279. return m
  280. def _add_badmodule(self, name, caller):
  281. if name not in self.badmodules:
  282. self.badmodules[name] = {}
  283. if caller:
  284. self.badmodules[name][caller.__name__] = 1
  285. else:
  286. self.badmodules[name]["-"] = 1
  287. def _safe_import_hook(self, name, caller, fromlist, level=-1):
  288. # wrapper for self.import_hook() that won't raise ImportError
  289. if name in self.badmodules:
  290. self._add_badmodule(name, caller)
  291. return
  292. try:
  293. self.import_hook(name, caller, level=level)
  294. except ImportError as msg:
  295. self.msg(2, "ImportError:", str(msg))
  296. self._add_badmodule(name, caller)
  297. else:
  298. if fromlist:
  299. for sub in fromlist:
  300. if sub in self.badmodules:
  301. self._add_badmodule(sub, caller)
  302. continue
  303. try:
  304. self.import_hook(name, caller, [sub], level=level)
  305. except ImportError as msg:
  306. self.msg(2, "ImportError:", str(msg))
  307. fullname = name + "." + sub
  308. self._add_badmodule(fullname, caller)
  309. def scan_opcodes_25(self, co,
  310. unpack = struct.unpack):
  311. # Scan the code, and yield 'interesting' opcode combinations
  312. code = co.co_code
  313. names = co.co_names
  314. consts = co.co_consts
  315. opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
  316. if op != EXTENDED_ARG]
  317. for i, (op, oparg) in enumerate(opargs):
  318. if op in STORE_OPS:
  319. yield "store", (names[oparg],)
  320. continue
  321. if (op == IMPORT_NAME and i >= 2
  322. and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
  323. level = consts[opargs[i-2][1]]
  324. fromlist = consts[opargs[i-1][1]]
  325. if level == 0: # absolute import
  326. yield "absolute_import", (fromlist, names[oparg])
  327. else: # relative import
  328. yield "relative_import", (level, fromlist, names[oparg])
  329. continue
  330. def scan_code(self, co, m):
  331. code = co.co_code
  332. scanner = self.scan_opcodes_25
  333. for what, args in scanner(co):
  334. if what == "store":
  335. name, = args
  336. m.globalnames[name] = 1
  337. elif what == "absolute_import":
  338. fromlist, name = args
  339. have_star = 0
  340. if fromlist is not None:
  341. if "*" in fromlist:
  342. have_star = 1
  343. fromlist = [f for f in fromlist if f != "*"]
  344. self._safe_import_hook(name, m, fromlist, level=0)
  345. if have_star:
  346. # We've encountered an "import *". If it is a Python module,
  347. # the code has already been parsed and we can suck out the
  348. # global names.
  349. mm = None
  350. if m.__path__:
  351. # At this point we don't know whether 'name' is a
  352. # submodule of 'm' or a global module. Let's just try
  353. # the full name first.
  354. mm = self.modules.get(m.__name__ + "." + name)
  355. if mm is None:
  356. mm = self.modules.get(name)
  357. if mm is not None:
  358. m.globalnames.update(mm.globalnames)
  359. m.starimports.update(mm.starimports)
  360. if mm.__code__ is None:
  361. m.starimports[name] = 1
  362. else:
  363. m.starimports[name] = 1
  364. elif what == "relative_import":
  365. level, fromlist, name = args
  366. if name:
  367. self._safe_import_hook(name, m, fromlist, level=level)
  368. else:
  369. parent = self.determine_parent(m, level=level)
  370. self._safe_import_hook(parent.__name__, None, fromlist, level=0)
  371. else:
  372. # We don't expect anything else from the generator.
  373. raise RuntimeError(what)
  374. for c in co.co_consts:
  375. if isinstance(c, type(co)):
  376. self.scan_code(c, m)
  377. def load_package(self, fqname, pathname):
  378. self.msgin(2, "load_package", fqname, pathname)
  379. newname = replacePackageMap.get(fqname)
  380. if newname:
  381. fqname = newname
  382. m = self.add_module(fqname)
  383. m.__file__ = pathname
  384. m.__path__ = [pathname]
  385. # As per comment at top of file, simulate runtime __path__ additions.
  386. m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
  387. fp, buf, stuff = self.find_module("__init__", m.__path__)
  388. try:
  389. self.load_module(fqname, fp, buf, stuff)
  390. self.msgout(2, "load_package ->", m)
  391. return m
  392. finally:
  393. if fp:
  394. fp.close()
  395. def add_module(self, fqname):
  396. if fqname in self.modules:
  397. return self.modules[fqname]
  398. self.modules[fqname] = m = Module(fqname)
  399. return m
  400. def find_module(self, name, path, parent=None):
  401. if parent is not None:
  402. # assert path is not None
  403. fullname = parent.__name__+'.'+name
  404. else:
  405. fullname = name
  406. if fullname in self.excludes:
  407. self.msgout(3, "find_module -> Excluded", fullname)
  408. raise ImportError(name)
  409. if path is None:
  410. if name in sys.builtin_module_names:
  411. return (None, None, ("", "", imp.C_BUILTIN))
  412. path = self.path
  413. return imp.find_module(name, path)
  414. def report(self):
  415. """Print a report to stdout, listing the found modules with their
  416. paths, as well as modules that are missing, or seem to be missing.
  417. """
  418. print()
  419. print(" %-25s %s" % ("Name", "File"))
  420. print(" %-25s %s" % ("----", "----"))
  421. # Print modules found
  422. keys = sorted(self.modules.keys())
  423. for key in keys:
  424. m = self.modules[key]
  425. if m.__path__:
  426. print("P", end=' ')
  427. else:
  428. print("m", end=' ')
  429. print("%-25s" % key, m.__file__ or "")
  430. # Print missing modules
  431. missing, maybe = self.any_missing_maybe()
  432. if missing:
  433. print()
  434. print("Missing modules:")
  435. for name in missing:
  436. mods = sorted(self.badmodules[name].keys())
  437. print("?", name, "imported from", ', '.join(mods))
  438. # Print modules that may be missing, but then again, maybe not...
  439. if maybe:
  440. print()
  441. print("Submodules that appear to be missing, but could also be", end=' ')
  442. print("global names in the parent package:")
  443. for name in maybe:
  444. mods = sorted(self.badmodules[name].keys())
  445. print("?", name, "imported from", ', '.join(mods))
  446. def any_missing(self):
  447. """Return a list of modules that appear to be missing. Use
  448. any_missing_maybe() if you want to know which modules are
  449. certain to be missing, and which *may* be missing.
  450. """
  451. missing, maybe = self.any_missing_maybe()
  452. return missing + maybe
  453. def any_missing_maybe(self):
  454. """Return two lists, one with modules that are certainly missing
  455. and one with modules that *may* be missing. The latter names could
  456. either be submodules *or* just global names in the package.
  457. The reason it can't always be determined is that it's impossible to
  458. tell which names are imported when "from module import *" is done
  459. with an extension module, short of actually importing it.
  460. """
  461. missing = []
  462. maybe = []
  463. for name in self.badmodules:
  464. if name in self.excludes:
  465. continue
  466. i = name.rfind(".")
  467. if i < 0:
  468. missing.append(name)
  469. continue
  470. subname = name[i+1:]
  471. pkgname = name[:i]
  472. pkg = self.modules.get(pkgname)
  473. if pkg is not None:
  474. if pkgname in self.badmodules[name]:
  475. # The package tried to import this module itself and
  476. # failed. It's definitely missing.
  477. missing.append(name)
  478. elif subname in pkg.globalnames:
  479. # It's a global in the package: definitely not missing.
  480. pass
  481. elif pkg.starimports:
  482. # It could be missing, but the package did an "import *"
  483. # from a non-Python module, so we simply can't be sure.
  484. maybe.append(name)
  485. else:
  486. # It's not a global in the package, the package didn't
  487. # do funny star imports, it's very likely to be missing.
  488. # The symbol could be inserted into the package from the
  489. # outside, but since that's not good style we simply list
  490. # it missing.
  491. missing.append(name)
  492. else:
  493. missing.append(name)
  494. missing.sort()
  495. maybe.sort()
  496. return missing, maybe
  497. def replace_paths_in_code(self, co):
  498. new_filename = original_filename = os.path.normpath(co.co_filename)
  499. for f, r in self.replace_paths:
  500. if original_filename.startswith(f):
  501. new_filename = r + original_filename[len(f):]
  502. break
  503. if self.debug and original_filename not in self.processed_paths:
  504. if new_filename != original_filename:
  505. self.msgout(2, "co_filename %r changed to %r" \
  506. % (original_filename,new_filename,))
  507. else:
  508. self.msgout(2, "co_filename %r remains unchanged" \
  509. % (original_filename,))
  510. self.processed_paths.append(original_filename)
  511. consts = list(co.co_consts)
  512. for i in range(len(consts)):
  513. if isinstance(consts[i], type(co)):
  514. consts[i] = self.replace_paths_in_code(consts[i])
  515. return types.CodeType(co.co_argcount, co.co_kwonlyargcount,
  516. co.co_nlocals, co.co_stacksize, co.co_flags,
  517. co.co_code, tuple(consts), co.co_names,
  518. co.co_varnames, new_filename, co.co_name,
  519. co.co_firstlineno, co.co_lnotab, co.co_freevars,
  520. co.co_cellvars)
  521. def test():
  522. # Parse command line
  523. import getopt
  524. try:
  525. opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
  526. except getopt.error as msg:
  527. print(msg)
  528. return
  529. # Process options
  530. debug = 1
  531. domods = 0
  532. addpath = []
  533. exclude = []
  534. for o, a in opts:
  535. if o == '-d':
  536. debug = debug + 1
  537. if o == '-m':
  538. domods = 1
  539. if o == '-p':
  540. addpath = addpath + a.split(os.pathsep)
  541. if o == '-q':
  542. debug = 0
  543. if o == '-x':
  544. exclude.append(a)
  545. # Provide default arguments
  546. if not args:
  547. script = "hello.py"
  548. else:
  549. script = args[0]
  550. # Set the path based on sys.path and the script directory
  551. path = sys.path[:]
  552. path[0] = os.path.dirname(script)
  553. path = addpath + path
  554. if debug > 1:
  555. print("path:")
  556. for item in path:
  557. print(" ", repr(item))
  558. # Create the module finder and turn its crank
  559. mf = ModuleFinder(path, debug, exclude)
  560. for arg in args[1:]:
  561. if arg == '-m':
  562. domods = 1
  563. continue
  564. if domods:
  565. if arg[-2:] == '.*':
  566. mf.import_hook(arg[:-2], None, ["*"])
  567. else:
  568. mf.import_hook(arg)
  569. else:
  570. mf.load_file(arg)
  571. mf.run_script(script)
  572. mf.report()
  573. return mf # for -i debugging
  574. if __name__ == '__main__':
  575. try:
  576. mf = test()
  577. except KeyboardInterrupt:
  578. print("\n[interrupted]")