pyclbr.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. """Parse a Python module and describe its classes and methods.
  2. Parse enough of a Python file to recognize imports and class and
  3. method definitions, and to find out the superclasses of a class.
  4. The interface consists of a single function:
  5. readmodule_ex(module [, path])
  6. where module is the name of a Python module, and path is an optional
  7. list of directories where the module is to be searched. If present,
  8. path is prepended to the system search path sys.path. The return
  9. value is a dictionary. The keys of the dictionary are the names of
  10. the classes defined in the module (including classes that are defined
  11. via the from XXX import YYY construct). The values are class
  12. instances of the class Class defined here. One special key/value pair
  13. is present for packages: the key '__path__' has a list as its value
  14. which contains the package search path.
  15. A class is described by the class Class in this module. Instances
  16. of this class have the following instance variables:
  17. module -- the module name
  18. name -- the name of the class
  19. super -- a list of super classes (Class instances)
  20. methods -- a dictionary of methods
  21. file -- the file in which the class was defined
  22. lineno -- the line in the file on which the class statement occurred
  23. The dictionary of methods uses the method names as keys and the line
  24. numbers on which the method was defined as values.
  25. If the name of a super class is not recognized, the corresponding
  26. entry in the list of super classes is not a class instance but a
  27. string giving the name of the super class. Since import statements
  28. are recognized and imported modules are scanned as well, this
  29. shouldn't happen often.
  30. A function is described by the class Function in this module.
  31. Instances of this class have the following instance variables:
  32. module -- the module name
  33. name -- the name of the class
  34. file -- the file in which the class was defined
  35. lineno -- the line in the file on which the class statement occurred
  36. """
  37. import sys
  38. import imp
  39. import tokenize
  40. from token import NAME, DEDENT, OP
  41. from operator import itemgetter
  42. __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
  43. _modules = {} # cache of modules we've seen
  44. # each Python class is represented by an instance of this class
  45. class Class:
  46. '''Class to represent a Python class.'''
  47. def __init__(self, module, name, super, file, lineno):
  48. self.module = module
  49. self.name = name
  50. if super is None:
  51. super = []
  52. self.super = super
  53. self.methods = {}
  54. self.file = file
  55. self.lineno = lineno
  56. def _addmethod(self, name, lineno):
  57. self.methods[name] = lineno
  58. class Function:
  59. '''Class to represent a top-level Python function'''
  60. def __init__(self, module, name, file, lineno):
  61. self.module = module
  62. self.name = name
  63. self.file = file
  64. self.lineno = lineno
  65. def readmodule(module, path=None):
  66. '''Backwards compatible interface.
  67. Call readmodule_ex() and then only keep Class objects from the
  68. resulting dictionary.'''
  69. res = {}
  70. for key, value in _readmodule(module, path or []).items():
  71. if isinstance(value, Class):
  72. res[key] = value
  73. return res
  74. def readmodule_ex(module, path=None):
  75. '''Read a module file and return a dictionary of classes.
  76. Search for MODULE in PATH and sys.path, read and parse the
  77. module and return a dictionary with one entry for each class
  78. found in the module.
  79. '''
  80. return _readmodule(module, path or [])
  81. def _readmodule(module, path, inpackage=None):
  82. '''Do the hard work for readmodule[_ex].
  83. If INPACKAGE is given, it must be the dotted name of the package in
  84. which we are searching for a submodule, and then PATH must be the
  85. package search path; otherwise, we are searching for a top-level
  86. module, and PATH is combined with sys.path.
  87. '''
  88. # Compute the full module name (prepending inpackage if set)
  89. if inpackage is not None:
  90. fullmodule = "%s.%s" % (inpackage, module)
  91. else:
  92. fullmodule = module
  93. # Check in the cache
  94. if fullmodule in _modules:
  95. return _modules[fullmodule]
  96. # Initialize the dict for this module's contents
  97. dict = {}
  98. # Check if it is a built-in module; we don't do much for these
  99. if module in sys.builtin_module_names and inpackage is None:
  100. _modules[module] = dict
  101. return dict
  102. # Check for a dotted module name
  103. i = module.rfind('.')
  104. if i >= 0:
  105. package = module[:i]
  106. submodule = module[i+1:]
  107. parent = _readmodule(package, path, inpackage)
  108. if inpackage is not None:
  109. package = "%s.%s" % (inpackage, package)
  110. if not '__path__' in parent:
  111. raise ImportError('No package named {}'.format(package))
  112. return _readmodule(submodule, parent['__path__'], package)
  113. # Search the path for the module
  114. f = None
  115. if inpackage is not None:
  116. f, fname, (_s, _m, ty) = imp.find_module(module, path)
  117. else:
  118. f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
  119. if ty == imp.PKG_DIRECTORY:
  120. dict['__path__'] = [fname]
  121. path = [fname] + path
  122. f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
  123. _modules[fullmodule] = dict
  124. if ty != imp.PY_SOURCE:
  125. # not Python source, can't do anything with this module
  126. f.close()
  127. return dict
  128. stack = [] # stack of (class, indent) pairs
  129. g = tokenize.generate_tokens(f.readline)
  130. try:
  131. for tokentype, token, start, _end, _line in g:
  132. if tokentype == DEDENT:
  133. lineno, thisindent = start
  134. # close nested classes and defs
  135. while stack and stack[-1][1] >= thisindent:
  136. del stack[-1]
  137. elif token == 'def':
  138. lineno, thisindent = start
  139. # close previous nested classes and defs
  140. while stack and stack[-1][1] >= thisindent:
  141. del stack[-1]
  142. tokentype, meth_name, start = g.next()[0:3]
  143. if tokentype != NAME:
  144. continue # Syntax error
  145. if stack:
  146. cur_class = stack[-1][0]
  147. if isinstance(cur_class, Class):
  148. # it's a method
  149. cur_class._addmethod(meth_name, lineno)
  150. # else it's a nested def
  151. else:
  152. # it's a function
  153. dict[meth_name] = Function(fullmodule, meth_name,
  154. fname, lineno)
  155. stack.append((None, thisindent)) # Marker for nested fns
  156. elif token == 'class':
  157. lineno, thisindent = start
  158. # close previous nested classes and defs
  159. while stack and stack[-1][1] >= thisindent:
  160. del stack[-1]
  161. tokentype, class_name, start = g.next()[0:3]
  162. if tokentype != NAME:
  163. continue # Syntax error
  164. # parse what follows the class name
  165. tokentype, token, start = g.next()[0:3]
  166. inherit = None
  167. if token == '(':
  168. names = [] # List of superclasses
  169. # there's a list of superclasses
  170. level = 1
  171. super = [] # Tokens making up current superclass
  172. while True:
  173. tokentype, token, start = g.next()[0:3]
  174. if token in (')', ',') and level == 1:
  175. n = "".join(super)
  176. if n in dict:
  177. # we know this super class
  178. n = dict[n]
  179. else:
  180. c = n.split('.')
  181. if len(c) > 1:
  182. # super class is of the form
  183. # module.class: look in module for
  184. # class
  185. m = c[-2]
  186. c = c[-1]
  187. if m in _modules:
  188. d = _modules[m]
  189. if c in d:
  190. n = d[c]
  191. names.append(n)
  192. super = []
  193. if token == '(':
  194. level += 1
  195. elif token == ')':
  196. level -= 1
  197. if level == 0:
  198. break
  199. elif token == ',' and level == 1:
  200. pass
  201. # only use NAME and OP (== dot) tokens for type name
  202. elif tokentype in (NAME, OP) and level == 1:
  203. super.append(token)
  204. # expressions in the base list are not supported
  205. inherit = names
  206. cur_class = Class(fullmodule, class_name, inherit,
  207. fname, lineno)
  208. if not stack:
  209. dict[class_name] = cur_class
  210. stack.append((cur_class, thisindent))
  211. elif token == 'import' and start[1] == 0:
  212. modules = _getnamelist(g)
  213. for mod, _mod2 in modules:
  214. try:
  215. # Recursively read the imported module
  216. if inpackage is None:
  217. _readmodule(mod, path)
  218. else:
  219. try:
  220. _readmodule(mod, path, inpackage)
  221. except ImportError:
  222. _readmodule(mod, [])
  223. except:
  224. # If we can't find or parse the imported module,
  225. # too bad -- don't die here.
  226. pass
  227. elif token == 'from' and start[1] == 0:
  228. mod, token = _getname(g)
  229. if not mod or token != "import":
  230. continue
  231. names = _getnamelist(g)
  232. try:
  233. # Recursively read the imported module
  234. d = _readmodule(mod, path, inpackage)
  235. except:
  236. # If we can't find or parse the imported module,
  237. # too bad -- don't die here.
  238. continue
  239. # add any classes that were defined in the imported module
  240. # to our name space if they were mentioned in the list
  241. for n, n2 in names:
  242. if n in d:
  243. dict[n2 or n] = d[n]
  244. elif n == '*':
  245. # don't add names that start with _
  246. for n in d:
  247. if n[0] != '_':
  248. dict[n] = d[n]
  249. except StopIteration:
  250. pass
  251. f.close()
  252. return dict
  253. def _getnamelist(g):
  254. # Helper to get a comma-separated list of dotted names plus 'as'
  255. # clauses. Return a list of pairs (name, name2) where name2 is
  256. # the 'as' name, or None if there is no 'as' clause.
  257. names = []
  258. while True:
  259. name, token = _getname(g)
  260. if not name:
  261. break
  262. if token == 'as':
  263. name2, token = _getname(g)
  264. else:
  265. name2 = None
  266. names.append((name, name2))
  267. while token != "," and "\n" not in token:
  268. token = g.next()[1]
  269. if token != ",":
  270. break
  271. return names
  272. def _getname(g):
  273. # Helper to get a dotted name, return a pair (name, token) where
  274. # name is the dotted name, or None if there was no dotted name,
  275. # and token is the next input token.
  276. parts = []
  277. tokentype, token = g.next()[0:2]
  278. if tokentype != NAME and token != '*':
  279. return (None, token)
  280. parts.append(token)
  281. while True:
  282. tokentype, token = g.next()[0:2]
  283. if token != '.':
  284. break
  285. tokentype, token = g.next()[0:2]
  286. if tokentype != NAME:
  287. break
  288. parts.append(token)
  289. return (".".join(parts), token)
  290. def _main():
  291. # Main program for testing.
  292. import os
  293. mod = sys.argv[1]
  294. if os.path.exists(mod):
  295. path = [os.path.dirname(mod)]
  296. mod = os.path.basename(mod)
  297. if mod.lower().endswith(".py"):
  298. mod = mod[:-3]
  299. else:
  300. path = []
  301. dict = readmodule_ex(mod, path)
  302. objs = dict.values()
  303. objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
  304. getattr(b, 'lineno', 0)))
  305. for obj in objs:
  306. if isinstance(obj, Class):
  307. print "class", obj.name, obj.super, obj.lineno
  308. methods = sorted(obj.methods.iteritems(), key=itemgetter(1))
  309. for name, lineno in methods:
  310. if name != "__path__":
  311. print " def", name, lineno
  312. elif isinstance(obj, Function):
  313. print "def", obj.name, obj.lineno
  314. if __name__ == "__main__":
  315. _main()