dis.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. """Disassembler of Python byte code into mnemonics."""
  2. import sys
  3. import types
  4. import collections
  5. import io
  6. from opcode import *
  7. from opcode import __all__ as _opcodes_all
  8. __all__ = ["code_info", "dis", "disassemble", "distb", "disco",
  9. "findlinestarts", "findlabels", "show_code",
  10. "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
  11. del _opcodes_all
  12. _have_code = (types.MethodType, types.FunctionType, types.CodeType,
  13. classmethod, staticmethod, type)
  14. def _try_compile(source, name):
  15. """Attempts to compile the given source, first as an expression and
  16. then as a statement if the first approach fails.
  17. Utility function to accept strings in functions that otherwise
  18. expect code objects
  19. """
  20. try:
  21. c = compile(source, name, 'eval')
  22. except SyntaxError:
  23. c = compile(source, name, 'exec')
  24. return c
  25. def dis(x=None, *, file=None):
  26. """Disassemble classes, methods, functions, generators, or code.
  27. With no argument, disassemble the last traceback.
  28. """
  29. if x is None:
  30. distb(file=file)
  31. return
  32. if hasattr(x, '__func__'): # Method
  33. x = x.__func__
  34. if hasattr(x, '__code__'): # Function
  35. x = x.__code__
  36. if hasattr(x, 'gi_code'): # Generator
  37. x = x.gi_code
  38. if hasattr(x, '__dict__'): # Class or module
  39. items = sorted(x.__dict__.items())
  40. for name, x1 in items:
  41. if isinstance(x1, _have_code):
  42. print("Disassembly of %s:" % name, file=file)
  43. try:
  44. dis(x1, file=file)
  45. except TypeError as msg:
  46. print("Sorry:", msg, file=file)
  47. print(file=file)
  48. elif hasattr(x, 'co_code'): # Code object
  49. disassemble(x, file=file)
  50. elif isinstance(x, (bytes, bytearray)): # Raw bytecode
  51. _disassemble_bytes(x, file=file)
  52. elif isinstance(x, str): # Source code
  53. _disassemble_str(x, file=file)
  54. else:
  55. raise TypeError("don't know how to disassemble %s objects" %
  56. type(x).__name__)
  57. def distb(tb=None, *, file=None):
  58. """Disassemble a traceback (default: last traceback)."""
  59. if tb is None:
  60. try:
  61. tb = sys.last_traceback
  62. except AttributeError:
  63. raise RuntimeError("no last traceback to disassemble")
  64. while tb.tb_next: tb = tb.tb_next
  65. disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
  66. # The inspect module interrogates this dictionary to build its
  67. # list of CO_* constants. It is also used by pretty_flags to
  68. # turn the co_flags field into a human readable list.
  69. COMPILER_FLAG_NAMES = {
  70. 1: "OPTIMIZED",
  71. 2: "NEWLOCALS",
  72. 4: "VARARGS",
  73. 8: "VARKEYWORDS",
  74. 16: "NESTED",
  75. 32: "GENERATOR",
  76. 64: "NOFREE",
  77. 128: "COROUTINE",
  78. 256: "ITERABLE_COROUTINE",
  79. }
  80. def pretty_flags(flags):
  81. """Return pretty representation of code flags."""
  82. names = []
  83. for i in range(32):
  84. flag = 1<<i
  85. if flags & flag:
  86. names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
  87. flags ^= flag
  88. if not flags:
  89. break
  90. else:
  91. names.append(hex(flags))
  92. return ", ".join(names)
  93. def _get_code_object(x):
  94. """Helper to handle methods, functions, generators, strings and raw code objects"""
  95. if hasattr(x, '__func__'): # Method
  96. x = x.__func__
  97. if hasattr(x, '__code__'): # Function
  98. x = x.__code__
  99. if hasattr(x, 'gi_code'): # Generator
  100. x = x.gi_code
  101. if isinstance(x, str): # Source code
  102. x = _try_compile(x, "<disassembly>")
  103. if hasattr(x, 'co_code'): # Code object
  104. return x
  105. raise TypeError("don't know how to disassemble %s objects" %
  106. type(x).__name__)
  107. def code_info(x):
  108. """Formatted details of methods, functions, or code."""
  109. return _format_code_info(_get_code_object(x))
  110. def _format_code_info(co):
  111. lines = []
  112. lines.append("Name: %s" % co.co_name)
  113. lines.append("Filename: %s" % co.co_filename)
  114. lines.append("Argument count: %s" % co.co_argcount)
  115. lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
  116. lines.append("Number of locals: %s" % co.co_nlocals)
  117. lines.append("Stack size: %s" % co.co_stacksize)
  118. lines.append("Flags: %s" % pretty_flags(co.co_flags))
  119. if co.co_consts:
  120. lines.append("Constants:")
  121. for i_c in enumerate(co.co_consts):
  122. lines.append("%4d: %r" % i_c)
  123. if co.co_names:
  124. lines.append("Names:")
  125. for i_n in enumerate(co.co_names):
  126. lines.append("%4d: %s" % i_n)
  127. if co.co_varnames:
  128. lines.append("Variable names:")
  129. for i_n in enumerate(co.co_varnames):
  130. lines.append("%4d: %s" % i_n)
  131. if co.co_freevars:
  132. lines.append("Free variables:")
  133. for i_n in enumerate(co.co_freevars):
  134. lines.append("%4d: %s" % i_n)
  135. if co.co_cellvars:
  136. lines.append("Cell variables:")
  137. for i_n in enumerate(co.co_cellvars):
  138. lines.append("%4d: %s" % i_n)
  139. return "\n".join(lines)
  140. def show_code(co, *, file=None):
  141. """Print details of methods, functions, or code to *file*.
  142. If *file* is not provided, the output is printed on stdout.
  143. """
  144. print(code_info(co), file=file)
  145. _Instruction = collections.namedtuple("_Instruction",
  146. "opname opcode arg argval argrepr offset starts_line is_jump_target")
  147. class Instruction(_Instruction):
  148. """Details for a bytecode operation
  149. Defined fields:
  150. opname - human readable name for operation
  151. opcode - numeric code for operation
  152. arg - numeric argument to operation (if any), otherwise None
  153. argval - resolved arg value (if known), otherwise same as arg
  154. argrepr - human readable description of operation argument
  155. offset - start index of operation within bytecode sequence
  156. starts_line - line started by this opcode (if any), otherwise None
  157. is_jump_target - True if other code jumps to here, otherwise False
  158. """
  159. def _disassemble(self, lineno_width=3, mark_as_current=False):
  160. """Format instruction details for inclusion in disassembly output
  161. *lineno_width* sets the width of the line number field (0 omits it)
  162. *mark_as_current* inserts a '-->' marker arrow as part of the line
  163. """
  164. fields = []
  165. # Column: Source code line number
  166. if lineno_width:
  167. if self.starts_line is not None:
  168. lineno_fmt = "%%%dd" % lineno_width
  169. fields.append(lineno_fmt % self.starts_line)
  170. else:
  171. fields.append(' ' * lineno_width)
  172. # Column: Current instruction indicator
  173. if mark_as_current:
  174. fields.append('-->')
  175. else:
  176. fields.append(' ')
  177. # Column: Jump target marker
  178. if self.is_jump_target:
  179. fields.append('>>')
  180. else:
  181. fields.append(' ')
  182. # Column: Instruction offset from start of code sequence
  183. fields.append(repr(self.offset).rjust(4))
  184. # Column: Opcode name
  185. fields.append(self.opname.ljust(20))
  186. # Column: Opcode argument
  187. if self.arg is not None:
  188. fields.append(repr(self.arg).rjust(5))
  189. # Column: Opcode argument details
  190. if self.argrepr:
  191. fields.append('(' + self.argrepr + ')')
  192. return ' '.join(fields).rstrip()
  193. def get_instructions(x, *, first_line=None):
  194. """Iterator for the opcodes in methods, functions or code
  195. Generates a series of Instruction named tuples giving the details of
  196. each operations in the supplied code.
  197. If *first_line* is not None, it indicates the line number that should
  198. be reported for the first source line in the disassembled code.
  199. Otherwise, the source line information (if any) is taken directly from
  200. the disassembled code object.
  201. """
  202. co = _get_code_object(x)
  203. cell_names = co.co_cellvars + co.co_freevars
  204. linestarts = dict(findlinestarts(co))
  205. if first_line is not None:
  206. line_offset = first_line - co.co_firstlineno
  207. else:
  208. line_offset = 0
  209. return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
  210. co.co_consts, cell_names, linestarts,
  211. line_offset)
  212. def _get_const_info(const_index, const_list):
  213. """Helper to get optional details about const references
  214. Returns the dereferenced constant and its repr if the constant
  215. list is defined.
  216. Otherwise returns the constant index and its repr().
  217. """
  218. argval = const_index
  219. if const_list is not None:
  220. argval = const_list[const_index]
  221. return argval, repr(argval)
  222. def _get_name_info(name_index, name_list):
  223. """Helper to get optional details about named references
  224. Returns the dereferenced name as both value and repr if the name
  225. list is defined.
  226. Otherwise returns the name index and its repr().
  227. """
  228. argval = name_index
  229. if name_list is not None:
  230. argval = name_list[name_index]
  231. argrepr = argval
  232. else:
  233. argrepr = repr(argval)
  234. return argval, argrepr
  235. def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
  236. cells=None, linestarts=None, line_offset=0):
  237. """Iterate over the instructions in a bytecode string.
  238. Generates a sequence of Instruction namedtuples giving the details of each
  239. opcode. Additional information about the code's runtime environment
  240. (e.g. variable names, constants) can be specified using optional
  241. arguments.
  242. """
  243. labels = findlabels(code)
  244. starts_line = None
  245. free = None
  246. for offset, op, arg in _unpack_opargs(code):
  247. if linestarts is not None:
  248. starts_line = linestarts.get(offset, None)
  249. if starts_line is not None:
  250. starts_line += line_offset
  251. is_jump_target = offset in labels
  252. argval = None
  253. argrepr = ''
  254. if arg is not None:
  255. # Set argval to the dereferenced value of the argument when
  256. # available, and argrepr to the string representation of argval.
  257. # _disassemble_bytes needs the string repr of the
  258. # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
  259. argval = arg
  260. if op in hasconst:
  261. argval, argrepr = _get_const_info(arg, constants)
  262. elif op in hasname:
  263. argval, argrepr = _get_name_info(arg, names)
  264. elif op in hasjrel:
  265. argval = offset + 3 + arg
  266. argrepr = "to " + repr(argval)
  267. elif op in haslocal:
  268. argval, argrepr = _get_name_info(arg, varnames)
  269. elif op in hascompare:
  270. argval = cmp_op[arg]
  271. argrepr = argval
  272. elif op in hasfree:
  273. argval, argrepr = _get_name_info(arg, cells)
  274. elif op in hasnargs:
  275. argrepr = "%d positional, %d keyword pair" % (arg%256, arg//256)
  276. yield Instruction(opname[op], op,
  277. arg, argval, argrepr,
  278. offset, starts_line, is_jump_target)
  279. def disassemble(co, lasti=-1, *, file=None):
  280. """Disassemble a code object."""
  281. cell_names = co.co_cellvars + co.co_freevars
  282. linestarts = dict(findlinestarts(co))
  283. _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
  284. co.co_consts, cell_names, linestarts, file=file)
  285. def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
  286. constants=None, cells=None, linestarts=None,
  287. *, file=None, line_offset=0):
  288. # Omit the line number column entirely if we have no line number info
  289. show_lineno = linestarts is not None
  290. # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
  291. lineno_width = 3 if show_lineno else 0
  292. for instr in _get_instructions_bytes(code, varnames, names,
  293. constants, cells, linestarts,
  294. line_offset=line_offset):
  295. new_source_line = (show_lineno and
  296. instr.starts_line is not None and
  297. instr.offset > 0)
  298. if new_source_line:
  299. print(file=file)
  300. is_current_instr = instr.offset == lasti
  301. print(instr._disassemble(lineno_width, is_current_instr), file=file)
  302. def _disassemble_str(source, *, file=None):
  303. """Compile the source string, then disassemble the code object."""
  304. disassemble(_try_compile(source, '<dis>'), file=file)
  305. disco = disassemble # XXX For backwards compatibility
  306. def _unpack_opargs(code):
  307. # enumerate() is not an option, since we sometimes process
  308. # multiple elements on a single pass through the loop
  309. extended_arg = 0
  310. n = len(code)
  311. i = 0
  312. while i < n:
  313. op = code[i]
  314. offset = i
  315. i = i+1
  316. arg = None
  317. if op >= HAVE_ARGUMENT:
  318. arg = code[i] + code[i+1]*256 + extended_arg
  319. extended_arg = 0
  320. i = i+2
  321. if op == EXTENDED_ARG:
  322. extended_arg = arg*65536
  323. yield (offset, op, arg)
  324. def findlabels(code):
  325. """Detect all offsets in a byte code which are jump targets.
  326. Return the list of offsets.
  327. """
  328. labels = []
  329. for offset, op, arg in _unpack_opargs(code):
  330. if arg is not None:
  331. label = -1
  332. if op in hasjrel:
  333. label = offset + 3 + arg
  334. elif op in hasjabs:
  335. label = arg
  336. if label >= 0:
  337. if label not in labels:
  338. labels.append(label)
  339. return labels
  340. def findlinestarts(code):
  341. """Find the offsets in a byte code which are start of lines in the source.
  342. Generate pairs (offset, lineno) as described in Python/compile.c.
  343. """
  344. byte_increments = list(code.co_lnotab[0::2])
  345. line_increments = list(code.co_lnotab[1::2])
  346. lastlineno = None
  347. lineno = code.co_firstlineno
  348. addr = 0
  349. for byte_incr, line_incr in zip(byte_increments, line_increments):
  350. if byte_incr:
  351. if lineno != lastlineno:
  352. yield (addr, lineno)
  353. lastlineno = lineno
  354. addr += byte_incr
  355. lineno += line_incr
  356. if lineno != lastlineno:
  357. yield (addr, lineno)
  358. class Bytecode:
  359. """The bytecode operations of a piece of code
  360. Instantiate this with a function, method, string of code, or a code object
  361. (as returned by compile()).
  362. Iterating over this yields the bytecode operations as Instruction instances.
  363. """
  364. def __init__(self, x, *, first_line=None, current_offset=None):
  365. self.codeobj = co = _get_code_object(x)
  366. if first_line is None:
  367. self.first_line = co.co_firstlineno
  368. self._line_offset = 0
  369. else:
  370. self.first_line = first_line
  371. self._line_offset = first_line - co.co_firstlineno
  372. self._cell_names = co.co_cellvars + co.co_freevars
  373. self._linestarts = dict(findlinestarts(co))
  374. self._original_object = x
  375. self.current_offset = current_offset
  376. def __iter__(self):
  377. co = self.codeobj
  378. return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
  379. co.co_consts, self._cell_names,
  380. self._linestarts,
  381. line_offset=self._line_offset)
  382. def __repr__(self):
  383. return "{}({!r})".format(self.__class__.__name__,
  384. self._original_object)
  385. @classmethod
  386. def from_traceback(cls, tb):
  387. """ Construct a Bytecode from the given traceback """
  388. while tb.tb_next:
  389. tb = tb.tb_next
  390. return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
  391. def info(self):
  392. """Return formatted information about the code object."""
  393. return _format_code_info(self.codeobj)
  394. def dis(self):
  395. """Return a formatted view of the bytecode operations."""
  396. co = self.codeobj
  397. if self.current_offset is not None:
  398. offset = self.current_offset
  399. else:
  400. offset = -1
  401. with io.StringIO() as output:
  402. _disassemble_bytes(co.co_code, varnames=co.co_varnames,
  403. names=co.co_names, constants=co.co_consts,
  404. cells=self._cell_names,
  405. linestarts=self._linestarts,
  406. line_offset=self._line_offset,
  407. file=output,
  408. lasti=offset)
  409. return output.getvalue()
  410. def _test():
  411. """Simple test program to disassemble a file."""
  412. import argparse
  413. parser = argparse.ArgumentParser()
  414. parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
  415. args = parser.parse_args()
  416. with args.infile as infile:
  417. source = infile.read()
  418. code = compile(source, args.infile.name, "exec")
  419. dis(code)
  420. if __name__ == "__main__":
  421. _test()