modulefinder.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. """Find modules used by a script, using introspection."""
  2. from __future__ import generators
  3. import dis
  4. import imp
  5. import marshal
  6. import os
  7. import sys
  8. import types
  9. import struct
  10. if hasattr(sys.__stdout__, "newlines"):
  11. READ_MODE = "U" # universal line endings
  12. else:
  13. # Python < 2.3 compatibility, no longer strictly required
  14. READ_MODE = "r"
  15. LOAD_CONST = dis.opmap['LOAD_CONST']
  16. IMPORT_NAME = dis.opmap['IMPORT_NAME']
  17. STORE_NAME = dis.opmap['STORE_NAME']
  18. STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
  19. STORE_OPS = STORE_NAME, STORE_GLOBAL
  20. HAVE_ARGUMENT = dis.HAVE_ARGUMENT
  21. EXTENDED_ARG = dis.EXTENDED_ARG
  22. def _unpack_opargs(code):
  23. # enumerate() is not an option, since we sometimes process
  24. # multiple elements on a single pass through the loop
  25. extended_arg = 0
  26. n = len(code)
  27. i = 0
  28. while i < n:
  29. op = ord(code[i])
  30. offset = i
  31. i = i+1
  32. arg = None
  33. if op >= HAVE_ARGUMENT:
  34. arg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
  35. extended_arg = 0
  36. i = i+2
  37. if op == EXTENDED_ARG:
  38. extended_arg = arg*65536
  39. yield (offset, op, arg)
  40. # Modulefinder does a good job at simulating Python's, but it can not
  41. # handle __path__ modifications packages make at runtime. Therefore there
  42. # is a mechanism whereby you can register extra paths in this map for a
  43. # package, and it will be honored.
  44. # Note this is a mapping is lists of paths.
  45. packagePathMap = {}
  46. # A Public interface
  47. def AddPackagePath(packagename, path):
  48. paths = packagePathMap.get(packagename, [])
  49. paths.append(path)
  50. packagePathMap[packagename] = paths
  51. replacePackageMap = {}
  52. # This ReplacePackage mechanism allows modulefinder to work around the
  53. # way the _xmlplus package injects itself under the name "xml" into
  54. # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
  55. # before running ModuleFinder.
  56. def ReplacePackage(oldname, newname):
  57. replacePackageMap[oldname] = newname
  58. class Module:
  59. def __init__(self, name, file=None, path=None):
  60. self.__name__ = name
  61. self.__file__ = file
  62. self.__path__ = path
  63. self.__code__ = None
  64. # The set of global names that are assigned to in the module.
  65. # This includes those names imported through starimports of
  66. # Python modules.
  67. self.globalnames = {}
  68. # The set of starimports this module did that could not be
  69. # resolved, ie. a starimport from a non-Python module.
  70. self.starimports = {}
  71. def __repr__(self):
  72. s = "Module(%r" % (self.__name__,)
  73. if self.__file__ is not None:
  74. s = s + ", %r" % (self.__file__,)
  75. if self.__path__ is not None:
  76. s = s + ", %r" % (self.__path__,)
  77. s = s + ")"
  78. return s
  79. class ModuleFinder:
  80. def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
  81. if path is None:
  82. path = sys.path
  83. self.path = path
  84. self.modules = {}
  85. self.badmodules = {}
  86. self.debug = debug
  87. self.indent = 0
  88. self.excludes = excludes
  89. self.replace_paths = replace_paths
  90. self.processed_paths = [] # Used in debugging only
  91. def msg(self, level, str, *args):
  92. if level <= self.debug:
  93. for i in range(self.indent):
  94. print " ",
  95. print str,
  96. for arg in args:
  97. print repr(arg),
  98. print
  99. def msgin(self, *args):
  100. level = args[0]
  101. if level <= self.debug:
  102. self.indent = self.indent + 1
  103. self.msg(*args)
  104. def msgout(self, *args):
  105. level = args[0]
  106. if level <= self.debug:
  107. self.indent = self.indent - 1
  108. self.msg(*args)
  109. def run_script(self, pathname):
  110. self.msg(2, "run_script", pathname)
  111. with open(pathname, READ_MODE) as fp:
  112. stuff = ("", "r", imp.PY_SOURCE)
  113. self.load_module('__main__', fp, pathname, stuff)
  114. def load_file(self, pathname):
  115. dir, name = os.path.split(pathname)
  116. name, ext = os.path.splitext(name)
  117. with open(pathname, READ_MODE) as fp:
  118. stuff = (ext, "r", imp.PY_SOURCE)
  119. self.load_module(name, fp, pathname, stuff)
  120. def import_hook(self, name, caller=None, fromlist=None, level=-1):
  121. self.msg(3, "import_hook", name, caller, fromlist, level)
  122. parent = self.determine_parent(caller, level=level)
  123. q, tail = self.find_head_package(parent, name)
  124. m = self.load_tail(q, tail)
  125. if not fromlist:
  126. return q
  127. if m.__path__:
  128. self.ensure_fromlist(m, fromlist)
  129. return None
  130. def determine_parent(self, caller, level=-1):
  131. self.msgin(4, "determine_parent", caller, level)
  132. if not caller or level == 0:
  133. self.msgout(4, "determine_parent -> None")
  134. return None
  135. pname = caller.__name__
  136. if level >= 1: # relative import
  137. if caller.__path__:
  138. level -= 1
  139. if level == 0:
  140. parent = self.modules[pname]
  141. assert parent is caller
  142. self.msgout(4, "determine_parent ->", parent)
  143. return parent
  144. if pname.count(".") < level:
  145. raise ImportError, "relative importpath too deep"
  146. pname = ".".join(pname.split(".")[:-level])
  147. parent = self.modules[pname]
  148. self.msgout(4, "determine_parent ->", parent)
  149. return parent
  150. if caller.__path__:
  151. parent = self.modules[pname]
  152. assert caller is parent
  153. self.msgout(4, "determine_parent ->", parent)
  154. return parent
  155. if '.' in pname:
  156. i = pname.rfind('.')
  157. pname = pname[:i]
  158. parent = self.modules[pname]
  159. assert parent.__name__ == pname
  160. self.msgout(4, "determine_parent ->", parent)
  161. return parent
  162. self.msgout(4, "determine_parent -> None")
  163. return None
  164. def find_head_package(self, parent, name):
  165. self.msgin(4, "find_head_package", parent, name)
  166. if '.' in name:
  167. i = name.find('.')
  168. head = name[:i]
  169. tail = name[i+1:]
  170. else:
  171. head = name
  172. tail = ""
  173. if parent:
  174. qname = "%s.%s" % (parent.__name__, head)
  175. else:
  176. qname = head
  177. q = self.import_module(head, qname, parent)
  178. if q:
  179. self.msgout(4, "find_head_package ->", (q, tail))
  180. return q, tail
  181. if parent:
  182. qname = head
  183. parent = None
  184. q = self.import_module(head, qname, parent)
  185. if q:
  186. self.msgout(4, "find_head_package ->", (q, tail))
  187. return q, tail
  188. self.msgout(4, "raise ImportError: No module named", qname)
  189. raise ImportError, "No module named " + qname
  190. def load_tail(self, q, tail):
  191. self.msgin(4, "load_tail", q, tail)
  192. m = q
  193. while tail:
  194. i = tail.find('.')
  195. if i < 0: i = len(tail)
  196. head, tail = tail[:i], tail[i+1:]
  197. mname = "%s.%s" % (m.__name__, head)
  198. m = self.import_module(head, mname, m)
  199. if not m:
  200. self.msgout(4, "raise ImportError: No module named", mname)
  201. raise ImportError, "No module named " + mname
  202. self.msgout(4, "load_tail ->", m)
  203. return m
  204. def ensure_fromlist(self, m, fromlist, recursive=0):
  205. self.msg(4, "ensure_fromlist", m, fromlist, recursive)
  206. for sub in fromlist:
  207. if sub == "*":
  208. if not recursive:
  209. all = self.find_all_submodules(m)
  210. if all:
  211. self.ensure_fromlist(m, all, 1)
  212. elif not hasattr(m, sub):
  213. subname = "%s.%s" % (m.__name__, sub)
  214. submod = self.import_module(sub, subname, m)
  215. if not submod:
  216. raise ImportError, "No module named " + subname
  217. def find_all_submodules(self, m):
  218. if not m.__path__:
  219. return
  220. modules = {}
  221. # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
  222. # But we must also collect Python extension modules - although
  223. # we cannot separate normal dlls from Python extensions.
  224. suffixes = []
  225. for triple in imp.get_suffixes():
  226. suffixes.append(triple[0])
  227. for dir in m.__path__:
  228. try:
  229. names = os.listdir(dir)
  230. except os.error:
  231. self.msg(2, "can't list directory", dir)
  232. continue
  233. for name in names:
  234. mod = None
  235. for suff in suffixes:
  236. n = len(suff)
  237. if name[-n:] == suff:
  238. mod = name[:-n]
  239. break
  240. if mod and mod != "__init__":
  241. modules[mod] = mod
  242. return modules.keys()
  243. def import_module(self, partname, fqname, parent):
  244. self.msgin(3, "import_module", partname, fqname, parent)
  245. try:
  246. m = self.modules[fqname]
  247. except KeyError:
  248. pass
  249. else:
  250. self.msgout(3, "import_module ->", m)
  251. return m
  252. if fqname in self.badmodules:
  253. self.msgout(3, "import_module -> None")
  254. return None
  255. if parent and parent.__path__ is None:
  256. self.msgout(3, "import_module -> None")
  257. return None
  258. try:
  259. fp, pathname, stuff = self.find_module(partname,
  260. parent and parent.__path__, parent)
  261. except ImportError:
  262. self.msgout(3, "import_module ->", None)
  263. return None
  264. try:
  265. m = self.load_module(fqname, fp, pathname, stuff)
  266. finally:
  267. if fp: fp.close()
  268. if parent:
  269. setattr(parent, partname, m)
  270. self.msgout(3, "import_module ->", m)
  271. return m
  272. def load_module(self, fqname, fp, pathname, file_info):
  273. suffix, mode, type = file_info
  274. self.msgin(2, "load_module", fqname, fp and "fp", pathname)
  275. if type == imp.PKG_DIRECTORY:
  276. m = self.load_package(fqname, pathname)
  277. self.msgout(2, "load_module ->", m)
  278. return m
  279. if type == imp.PY_SOURCE:
  280. co = compile(fp.read()+'\n', pathname, 'exec')
  281. elif type == imp.PY_COMPILED:
  282. if fp.read(4) != imp.get_magic():
  283. self.msgout(2, "raise ImportError: Bad magic number", pathname)
  284. raise ImportError, "Bad magic number in %s" % pathname
  285. fp.read(4)
  286. co = marshal.load(fp)
  287. else:
  288. co = None
  289. m = self.add_module(fqname)
  290. m.__file__ = pathname
  291. if co:
  292. if self.replace_paths:
  293. co = self.replace_paths_in_code(co)
  294. m.__code__ = co
  295. self.scan_code(co, m)
  296. self.msgout(2, "load_module ->", m)
  297. return m
  298. def _add_badmodule(self, name, caller):
  299. if name not in self.badmodules:
  300. self.badmodules[name] = {}
  301. if caller:
  302. self.badmodules[name][caller.__name__] = 1
  303. else:
  304. self.badmodules[name]["-"] = 1
  305. def _safe_import_hook(self, name, caller, fromlist, level=-1):
  306. # wrapper for self.import_hook() that won't raise ImportError
  307. if name in self.badmodules:
  308. self._add_badmodule(name, caller)
  309. return
  310. try:
  311. self.import_hook(name, caller, level=level)
  312. except ImportError, msg:
  313. self.msg(2, "ImportError:", str(msg))
  314. self._add_badmodule(name, caller)
  315. else:
  316. if fromlist:
  317. for sub in fromlist:
  318. if sub in self.badmodules:
  319. self._add_badmodule(sub, caller)
  320. continue
  321. try:
  322. self.import_hook(name, caller, [sub], level=level)
  323. except ImportError, msg:
  324. self.msg(2, "ImportError:", str(msg))
  325. fullname = name + "." + sub
  326. self._add_badmodule(fullname, caller)
  327. def scan_opcodes(self, co,
  328. unpack = struct.unpack):
  329. # Scan the code, and yield 'interesting' opcode combinations
  330. # Version for Python 2.4 and older
  331. code = co.co_code
  332. names = co.co_names
  333. consts = co.co_consts
  334. opargs = [(op, arg) for _, op, arg in _unpack_opargs(code)
  335. if op != EXTENDED_ARG]
  336. for i, (op, oparg) in enumerate(opargs):
  337. if c in STORE_OPS:
  338. yield "store", (names[oparg],)
  339. continue
  340. if (op == IMPORT_NAME and i >= 1
  341. and opargs[i-1][0] == LOAD_CONST):
  342. fromlist = consts[opargs[i-1][1]]
  343. yield "import", (fromlist, names[oparg])
  344. continue
  345. def scan_opcodes_25(self, co):
  346. # Scan the code, and yield 'interesting' opcode combinations
  347. code = co.co_code
  348. names = co.co_names
  349. consts = co.co_consts
  350. opargs = [(op, arg) for _, op, arg in _unpack_opargs(code)
  351. if op != EXTENDED_ARG]
  352. for i, (op, oparg) in enumerate(opargs):
  353. if op in STORE_OPS:
  354. yield "store", (names[oparg],)
  355. continue
  356. if (op == IMPORT_NAME and i >= 2
  357. and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
  358. level = consts[opargs[i-2][1]]
  359. fromlist = consts[opargs[i-1][1]]
  360. if level == -1: # normal import
  361. yield "import", (fromlist, names[oparg])
  362. elif level == 0: # absolute import
  363. yield "absolute_import", (fromlist, names[oparg])
  364. else: # relative import
  365. yield "relative_import", (level, fromlist, names[oparg])
  366. continue
  367. def scan_code(self, co, m):
  368. code = co.co_code
  369. if sys.version_info >= (2, 5):
  370. scanner = self.scan_opcodes_25
  371. else:
  372. scanner = self.scan_opcodes
  373. for what, args in scanner(co):
  374. if what == "store":
  375. name, = args
  376. m.globalnames[name] = 1
  377. elif what in ("import", "absolute_import"):
  378. fromlist, name = args
  379. have_star = 0
  380. if fromlist is not None:
  381. if "*" in fromlist:
  382. have_star = 1
  383. fromlist = [f for f in fromlist if f != "*"]
  384. if what == "absolute_import": level = 0
  385. else: level = -1
  386. self._safe_import_hook(name, m, fromlist, level=level)
  387. if have_star:
  388. # We've encountered an "import *". If it is a Python module,
  389. # the code has already been parsed and we can suck out the
  390. # global names.
  391. mm = None
  392. if m.__path__:
  393. # At this point we don't know whether 'name' is a
  394. # submodule of 'm' or a global module. Let's just try
  395. # the full name first.
  396. mm = self.modules.get(m.__name__ + "." + name)
  397. if mm is None:
  398. mm = self.modules.get(name)
  399. if mm is not None:
  400. m.globalnames.update(mm.globalnames)
  401. m.starimports.update(mm.starimports)
  402. if mm.__code__ is None:
  403. m.starimports[name] = 1
  404. else:
  405. m.starimports[name] = 1
  406. elif what == "relative_import":
  407. level, fromlist, name = args
  408. if name:
  409. self._safe_import_hook(name, m, fromlist, level=level)
  410. else:
  411. parent = self.determine_parent(m, level=level)
  412. self._safe_import_hook(parent.__name__, None, fromlist, level=0)
  413. else:
  414. # We don't expect anything else from the generator.
  415. raise RuntimeError(what)
  416. for c in co.co_consts:
  417. if isinstance(c, type(co)):
  418. self.scan_code(c, m)
  419. def load_package(self, fqname, pathname):
  420. self.msgin(2, "load_package", fqname, pathname)
  421. newname = replacePackageMap.get(fqname)
  422. if newname:
  423. fqname = newname
  424. m = self.add_module(fqname)
  425. m.__file__ = pathname
  426. m.__path__ = [pathname]
  427. # As per comment at top of file, simulate runtime __path__ additions.
  428. m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
  429. fp, buf, stuff = self.find_module("__init__", m.__path__)
  430. self.load_module(fqname, fp, buf, stuff)
  431. self.msgout(2, "load_package ->", m)
  432. if fp:
  433. fp.close()
  434. return m
  435. def add_module(self, fqname):
  436. if fqname in self.modules:
  437. return self.modules[fqname]
  438. self.modules[fqname] = m = Module(fqname)
  439. return m
  440. def find_module(self, name, path, parent=None):
  441. if parent is not None:
  442. # assert path is not None
  443. fullname = parent.__name__+'.'+name
  444. else:
  445. fullname = name
  446. if fullname in self.excludes:
  447. self.msgout(3, "find_module -> Excluded", fullname)
  448. raise ImportError, name
  449. if path is None:
  450. if name in sys.builtin_module_names:
  451. return (None, None, ("", "", imp.C_BUILTIN))
  452. path = self.path
  453. return imp.find_module(name, path)
  454. def report(self):
  455. """Print a report to stdout, listing the found modules with their
  456. paths, as well as modules that are missing, or seem to be missing.
  457. """
  458. print
  459. print " %-25s %s" % ("Name", "File")
  460. print " %-25s %s" % ("----", "----")
  461. # Print modules found
  462. keys = self.modules.keys()
  463. keys.sort()
  464. for key in keys:
  465. m = self.modules[key]
  466. if m.__path__:
  467. print "P",
  468. else:
  469. print "m",
  470. print "%-25s" % key, m.__file__ or ""
  471. # Print missing modules
  472. missing, maybe = self.any_missing_maybe()
  473. if missing:
  474. print
  475. print "Missing modules:"
  476. for name in missing:
  477. mods = self.badmodules[name].keys()
  478. mods.sort()
  479. print "?", name, "imported from", ', '.join(mods)
  480. # Print modules that may be missing, but then again, maybe not...
  481. if maybe:
  482. print
  483. print "Submodules that appear to be missing, but could also be",
  484. print "global names in the parent package:"
  485. for name in maybe:
  486. mods = self.badmodules[name].keys()
  487. mods.sort()
  488. print "?", name, "imported from", ', '.join(mods)
  489. def any_missing(self):
  490. """Return a list of modules that appear to be missing. Use
  491. any_missing_maybe() if you want to know which modules are
  492. certain to be missing, and which *may* be missing.
  493. """
  494. missing, maybe = self.any_missing_maybe()
  495. return missing + maybe
  496. def any_missing_maybe(self):
  497. """Return two lists, one with modules that are certainly missing
  498. and one with modules that *may* be missing. The latter names could
  499. either be submodules *or* just global names in the package.
  500. The reason it can't always be determined is that it's impossible to
  501. tell which names are imported when "from module import *" is done
  502. with an extension module, short of actually importing it.
  503. """
  504. missing = []
  505. maybe = []
  506. for name in self.badmodules:
  507. if name in self.excludes:
  508. continue
  509. i = name.rfind(".")
  510. if i < 0:
  511. missing.append(name)
  512. continue
  513. subname = name[i+1:]
  514. pkgname = name[:i]
  515. pkg = self.modules.get(pkgname)
  516. if pkg is not None:
  517. if pkgname in self.badmodules[name]:
  518. # The package tried to import this module itself and
  519. # failed. It's definitely missing.
  520. missing.append(name)
  521. elif subname in pkg.globalnames:
  522. # It's a global in the package: definitely not missing.
  523. pass
  524. elif pkg.starimports:
  525. # It could be missing, but the package did an "import *"
  526. # from a non-Python module, so we simply can't be sure.
  527. maybe.append(name)
  528. else:
  529. # It's not a global in the package, the package didn't
  530. # do funny star imports, it's very likely to be missing.
  531. # The symbol could be inserted into the package from the
  532. # outside, but since that's not good style we simply list
  533. # it missing.
  534. missing.append(name)
  535. else:
  536. missing.append(name)
  537. missing.sort()
  538. maybe.sort()
  539. return missing, maybe
  540. def replace_paths_in_code(self, co):
  541. new_filename = original_filename = os.path.normpath(co.co_filename)
  542. for f, r in self.replace_paths:
  543. if original_filename.startswith(f):
  544. new_filename = r + original_filename[len(f):]
  545. break
  546. if self.debug and original_filename not in self.processed_paths:
  547. if new_filename != original_filename:
  548. self.msgout(2, "co_filename %r changed to %r" \
  549. % (original_filename,new_filename,))
  550. else:
  551. self.msgout(2, "co_filename %r remains unchanged" \
  552. % (original_filename,))
  553. self.processed_paths.append(original_filename)
  554. consts = list(co.co_consts)
  555. for i in range(len(consts)):
  556. if isinstance(consts[i], type(co)):
  557. consts[i] = self.replace_paths_in_code(consts[i])
  558. return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
  559. co.co_flags, co.co_code, tuple(consts), co.co_names,
  560. co.co_varnames, new_filename, co.co_name,
  561. co.co_firstlineno, co.co_lnotab,
  562. co.co_freevars, co.co_cellvars)
  563. def test():
  564. # Parse command line
  565. import getopt
  566. try:
  567. opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
  568. except getopt.error, msg:
  569. print msg
  570. return
  571. # Process options
  572. debug = 1
  573. domods = 0
  574. addpath = []
  575. exclude = []
  576. for o, a in opts:
  577. if o == '-d':
  578. debug = debug + 1
  579. if o == '-m':
  580. domods = 1
  581. if o == '-p':
  582. addpath = addpath + a.split(os.pathsep)
  583. if o == '-q':
  584. debug = 0
  585. if o == '-x':
  586. exclude.append(a)
  587. # Provide default arguments
  588. if not args:
  589. script = "hello.py"
  590. else:
  591. script = args[0]
  592. # Set the path based on sys.path and the script directory
  593. path = sys.path[:]
  594. path[0] = os.path.dirname(script)
  595. path = addpath + path
  596. if debug > 1:
  597. print "path:"
  598. for item in path:
  599. print " ", repr(item)
  600. # Create the module finder and turn its crank
  601. mf = ModuleFinder(path, debug, exclude)
  602. for arg in args[1:]:
  603. if arg == '-m':
  604. domods = 1
  605. continue
  606. if domods:
  607. if arg[-2:] == '.*':
  608. mf.import_hook(arg[:-2], None, ["*"])
  609. else:
  610. mf.import_hook(arg)
  611. else:
  612. mf.load_file(arg)
  613. mf.run_script(script)
  614. mf.report()
  615. return mf # for -i debugging
  616. if __name__ == '__main__':
  617. try:
  618. mf = test()
  619. except KeyboardInterrupt:
  620. print "\n[interrupt]"