test_tokenize.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. from test import test_support
  2. from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
  3. STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
  4. from StringIO import StringIO
  5. import os
  6. from unittest import TestCase
  7. class TokenizeTest(TestCase):
  8. # Tests for the tokenize module.
  9. # The tests can be really simple. Given a small fragment of source
  10. # code, print out a table with tokens. The ENDMARKER is omitted for
  11. # brevity.
  12. def check_tokenize(self, s, expected):
  13. # Format the tokens in s in a table format.
  14. # The ENDMARKER is omitted.
  15. result = []
  16. f = StringIO(s)
  17. for type, token, start, end, line in generate_tokens(f.readline):
  18. if type == ENDMARKER:
  19. break
  20. type = tok_name[type]
  21. result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
  22. locals())
  23. self.assertEqual(result,
  24. expected.rstrip().splitlines())
  25. def test_basic(self):
  26. self.check_tokenize("1 + 1", """\
  27. NUMBER '1' (1, 0) (1, 1)
  28. OP '+' (1, 2) (1, 3)
  29. NUMBER '1' (1, 4) (1, 5)
  30. """)
  31. self.check_tokenize("if False:\n"
  32. " # NL\n"
  33. " True = False # NEWLINE\n", """\
  34. NAME 'if' (1, 0) (1, 2)
  35. NAME 'False' (1, 3) (1, 8)
  36. OP ':' (1, 8) (1, 9)
  37. NEWLINE '\\n' (1, 9) (1, 10)
  38. COMMENT '# NL' (2, 4) (2, 8)
  39. NL '\\n' (2, 8) (2, 9)
  40. INDENT ' ' (3, 0) (3, 4)
  41. NAME 'True' (3, 4) (3, 8)
  42. OP '=' (3, 9) (3, 10)
  43. NAME 'False' (3, 11) (3, 16)
  44. COMMENT '# NEWLINE' (3, 17) (3, 26)
  45. NEWLINE '\\n' (3, 26) (3, 27)
  46. DEDENT '' (4, 0) (4, 0)
  47. """)
  48. indent_error_file = """\
  49. def k(x):
  50. x += 2
  51. x += 5
  52. """
  53. with self.assertRaisesRegexp(IndentationError,
  54. "unindent does not match any "
  55. "outer indentation level"):
  56. for tok in generate_tokens(StringIO(indent_error_file).readline):
  57. pass
  58. def test_int(self):
  59. # Ordinary integers and binary operators
  60. self.check_tokenize("0xff <= 255", """\
  61. NUMBER '0xff' (1, 0) (1, 4)
  62. OP '<=' (1, 5) (1, 7)
  63. NUMBER '255' (1, 8) (1, 11)
  64. """)
  65. self.check_tokenize("0b10 <= 255", """\
  66. NUMBER '0b10' (1, 0) (1, 4)
  67. OP '<=' (1, 5) (1, 7)
  68. NUMBER '255' (1, 8) (1, 11)
  69. """)
  70. self.check_tokenize("0o123 <= 0123", """\
  71. NUMBER '0o123' (1, 0) (1, 5)
  72. OP '<=' (1, 6) (1, 8)
  73. NUMBER '0123' (1, 9) (1, 13)
  74. """)
  75. self.check_tokenize("01234567 > ~0x15", """\
  76. NUMBER '01234567' (1, 0) (1, 8)
  77. OP '>' (1, 9) (1, 10)
  78. OP '~' (1, 11) (1, 12)
  79. NUMBER '0x15' (1, 12) (1, 16)
  80. """)
  81. self.check_tokenize("2134568 != 01231515", """\
  82. NUMBER '2134568' (1, 0) (1, 7)
  83. OP '!=' (1, 8) (1, 10)
  84. NUMBER '01231515' (1, 11) (1, 19)
  85. """)
  86. self.check_tokenize("(-124561-1) & 0200000000", """\
  87. OP '(' (1, 0) (1, 1)
  88. OP '-' (1, 1) (1, 2)
  89. NUMBER '124561' (1, 2) (1, 8)
  90. OP '-' (1, 8) (1, 9)
  91. NUMBER '1' (1, 9) (1, 10)
  92. OP ')' (1, 10) (1, 11)
  93. OP '&' (1, 12) (1, 13)
  94. NUMBER '0200000000' (1, 14) (1, 24)
  95. """)
  96. self.check_tokenize("0xdeadbeef != -1", """\
  97. NUMBER '0xdeadbeef' (1, 0) (1, 10)
  98. OP '!=' (1, 11) (1, 13)
  99. OP '-' (1, 14) (1, 15)
  100. NUMBER '1' (1, 15) (1, 16)
  101. """)
  102. self.check_tokenize("0xdeadc0de & 012345", """\
  103. NUMBER '0xdeadc0de' (1, 0) (1, 10)
  104. OP '&' (1, 11) (1, 12)
  105. NUMBER '012345' (1, 13) (1, 19)
  106. """)
  107. self.check_tokenize("0xFF & 0x15 | 1234", """\
  108. NUMBER '0xFF' (1, 0) (1, 4)
  109. OP '&' (1, 5) (1, 6)
  110. NUMBER '0x15' (1, 7) (1, 11)
  111. OP '|' (1, 12) (1, 13)
  112. NUMBER '1234' (1, 14) (1, 18)
  113. """)
  114. def test_long(self):
  115. # Long integers
  116. self.check_tokenize("x = 0L", """\
  117. NAME 'x' (1, 0) (1, 1)
  118. OP '=' (1, 2) (1, 3)
  119. NUMBER '0L' (1, 4) (1, 6)
  120. """)
  121. self.check_tokenize("x = 0xfffffffffff", """\
  122. NAME 'x' (1, 0) (1, 1)
  123. OP '=' (1, 2) (1, 3)
  124. NUMBER '0xffffffffff (1, 4) (1, 17)
  125. """)
  126. self.check_tokenize("x = 123141242151251616110l", """\
  127. NAME 'x' (1, 0) (1, 1)
  128. OP '=' (1, 2) (1, 3)
  129. NUMBER '123141242151 (1, 4) (1, 26)
  130. """)
  131. self.check_tokenize("x = -15921590215012591L", """\
  132. NAME 'x' (1, 0) (1, 1)
  133. OP '=' (1, 2) (1, 3)
  134. OP '-' (1, 4) (1, 5)
  135. NUMBER '159215902150 (1, 5) (1, 23)
  136. """)
  137. def test_float(self):
  138. # Floating point numbers
  139. self.check_tokenize("x = 3.14159", """\
  140. NAME 'x' (1, 0) (1, 1)
  141. OP '=' (1, 2) (1, 3)
  142. NUMBER '3.14159' (1, 4) (1, 11)
  143. """)
  144. self.check_tokenize("x = 314159.", """\
  145. NAME 'x' (1, 0) (1, 1)
  146. OP '=' (1, 2) (1, 3)
  147. NUMBER '314159.' (1, 4) (1, 11)
  148. """)
  149. self.check_tokenize("x = .314159", """\
  150. NAME 'x' (1, 0) (1, 1)
  151. OP '=' (1, 2) (1, 3)
  152. NUMBER '.314159' (1, 4) (1, 11)
  153. """)
  154. self.check_tokenize("x = 3e14159", """\
  155. NAME 'x' (1, 0) (1, 1)
  156. OP '=' (1, 2) (1, 3)
  157. NUMBER '3e14159' (1, 4) (1, 11)
  158. """)
  159. self.check_tokenize("x = 3E123", """\
  160. NAME 'x' (1, 0) (1, 1)
  161. OP '=' (1, 2) (1, 3)
  162. NUMBER '3E123' (1, 4) (1, 9)
  163. """)
  164. self.check_tokenize("x+y = 3e-1230", """\
  165. NAME 'x' (1, 0) (1, 1)
  166. OP '+' (1, 1) (1, 2)
  167. NAME 'y' (1, 2) (1, 3)
  168. OP '=' (1, 4) (1, 5)
  169. NUMBER '3e-1230' (1, 6) (1, 13)
  170. """)
  171. self.check_tokenize("x = 3.14e159", """\
  172. NAME 'x' (1, 0) (1, 1)
  173. OP '=' (1, 2) (1, 3)
  174. NUMBER '3.14e159' (1, 4) (1, 12)
  175. """)
  176. def test_string(self):
  177. # String literals
  178. self.check_tokenize("x = ''; y = \"\"", """\
  179. NAME 'x' (1, 0) (1, 1)
  180. OP '=' (1, 2) (1, 3)
  181. STRING "''" (1, 4) (1, 6)
  182. OP ';' (1, 6) (1, 7)
  183. NAME 'y' (1, 8) (1, 9)
  184. OP '=' (1, 10) (1, 11)
  185. STRING '""' (1, 12) (1, 14)
  186. """)
  187. self.check_tokenize("x = '\"'; y = \"'\"", """\
  188. NAME 'x' (1, 0) (1, 1)
  189. OP '=' (1, 2) (1, 3)
  190. STRING '\\'"\\'' (1, 4) (1, 7)
  191. OP ';' (1, 7) (1, 8)
  192. NAME 'y' (1, 9) (1, 10)
  193. OP '=' (1, 11) (1, 12)
  194. STRING '"\\'"' (1, 13) (1, 16)
  195. """)
  196. self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
  197. NAME 'x' (1, 0) (1, 1)
  198. OP '=' (1, 2) (1, 3)
  199. STRING '"doesn\\'t "' (1, 4) (1, 14)
  200. NAME 'shrink' (1, 14) (1, 20)
  201. STRING '", does it"' (1, 20) (1, 31)
  202. """)
  203. self.check_tokenize("x = u'abc' + U'ABC'", """\
  204. NAME 'x' (1, 0) (1, 1)
  205. OP '=' (1, 2) (1, 3)
  206. STRING "u'abc'" (1, 4) (1, 10)
  207. OP '+' (1, 11) (1, 12)
  208. STRING "U'ABC'" (1, 13) (1, 19)
  209. """)
  210. self.check_tokenize('y = u"ABC" + U"ABC"', """\
  211. NAME 'y' (1, 0) (1, 1)
  212. OP '=' (1, 2) (1, 3)
  213. STRING 'u"ABC"' (1, 4) (1, 10)
  214. OP '+' (1, 11) (1, 12)
  215. STRING 'U"ABC"' (1, 13) (1, 19)
  216. """)
  217. self.check_tokenize("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'", """\
  218. NAME 'x' (1, 0) (1, 1)
  219. OP '=' (1, 2) (1, 3)
  220. STRING "ur'abc'" (1, 4) (1, 11)
  221. OP '+' (1, 12) (1, 13)
  222. STRING "Ur'ABC'" (1, 14) (1, 21)
  223. OP '+' (1, 22) (1, 23)
  224. STRING "uR'ABC'" (1, 24) (1, 31)
  225. OP '+' (1, 32) (1, 33)
  226. STRING "UR'ABC'" (1, 34) (1, 41)
  227. """)
  228. self.check_tokenize('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"', """\
  229. NAME 'y' (1, 0) (1, 1)
  230. OP '=' (1, 2) (1, 3)
  231. STRING 'ur"abc"' (1, 4) (1, 11)
  232. OP '+' (1, 12) (1, 13)
  233. STRING 'Ur"ABC"' (1, 14) (1, 21)
  234. OP '+' (1, 22) (1, 23)
  235. STRING 'uR"ABC"' (1, 24) (1, 31)
  236. OP '+' (1, 32) (1, 33)
  237. STRING 'UR"ABC"' (1, 34) (1, 41)
  238. """)
  239. self.check_tokenize("b'abc' + B'abc'", """\
  240. STRING "b'abc'" (1, 0) (1, 6)
  241. OP '+' (1, 7) (1, 8)
  242. STRING "B'abc'" (1, 9) (1, 15)
  243. """)
  244. self.check_tokenize('b"abc" + B"abc"', """\
  245. STRING 'b"abc"' (1, 0) (1, 6)
  246. OP '+' (1, 7) (1, 8)
  247. STRING 'B"abc"' (1, 9) (1, 15)
  248. """)
  249. self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
  250. STRING "br'abc'" (1, 0) (1, 7)
  251. OP '+' (1, 8) (1, 9)
  252. STRING "bR'abc'" (1, 10) (1, 17)
  253. OP '+' (1, 18) (1, 19)
  254. STRING "Br'abc'" (1, 20) (1, 27)
  255. OP '+' (1, 28) (1, 29)
  256. STRING "BR'abc'" (1, 30) (1, 37)
  257. """)
  258. self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
  259. STRING 'br"abc"' (1, 0) (1, 7)
  260. OP '+' (1, 8) (1, 9)
  261. STRING 'bR"abc"' (1, 10) (1, 17)
  262. OP '+' (1, 18) (1, 19)
  263. STRING 'Br"abc"' (1, 20) (1, 27)
  264. OP '+' (1, 28) (1, 29)
  265. STRING 'BR"abc"' (1, 30) (1, 37)
  266. """)
  267. def test_function(self):
  268. self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
  269. NAME 'def' (1, 0) (1, 3)
  270. NAME 'd22' (1, 4) (1, 7)
  271. OP '(' (1, 7) (1, 8)
  272. NAME 'a' (1, 8) (1, 9)
  273. OP ',' (1, 9) (1, 10)
  274. NAME 'b' (1, 11) (1, 12)
  275. OP ',' (1, 12) (1, 13)
  276. NAME 'c' (1, 14) (1, 15)
  277. OP '=' (1, 15) (1, 16)
  278. NUMBER '2' (1, 16) (1, 17)
  279. OP ',' (1, 17) (1, 18)
  280. NAME 'd' (1, 19) (1, 20)
  281. OP '=' (1, 20) (1, 21)
  282. NUMBER '2' (1, 21) (1, 22)
  283. OP ',' (1, 22) (1, 23)
  284. OP '*' (1, 24) (1, 25)
  285. NAME 'k' (1, 25) (1, 26)
  286. OP ')' (1, 26) (1, 27)
  287. OP ':' (1, 27) (1, 28)
  288. NAME 'pass' (1, 29) (1, 33)
  289. """)
  290. self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
  291. NAME 'def' (1, 0) (1, 3)
  292. NAME 'd01v_' (1, 4) (1, 9)
  293. OP '(' (1, 9) (1, 10)
  294. NAME 'a' (1, 10) (1, 11)
  295. OP '=' (1, 11) (1, 12)
  296. NUMBER '1' (1, 12) (1, 13)
  297. OP ',' (1, 13) (1, 14)
  298. OP '*' (1, 15) (1, 16)
  299. NAME 'k' (1, 16) (1, 17)
  300. OP ',' (1, 17) (1, 18)
  301. OP '**' (1, 19) (1, 21)
  302. NAME 'w' (1, 21) (1, 22)
  303. OP ')' (1, 22) (1, 23)
  304. OP ':' (1, 23) (1, 24)
  305. NAME 'pass' (1, 25) (1, 29)
  306. """)
  307. def test_comparison(self):
  308. # Comparison
  309. self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
  310. "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
  311. NAME 'if' (1, 0) (1, 2)
  312. NUMBER '1' (1, 3) (1, 4)
  313. OP '<' (1, 5) (1, 6)
  314. NUMBER '1' (1, 7) (1, 8)
  315. OP '>' (1, 9) (1, 10)
  316. NUMBER '1' (1, 11) (1, 12)
  317. OP '==' (1, 13) (1, 15)
  318. NUMBER '1' (1, 16) (1, 17)
  319. OP '>=' (1, 18) (1, 20)
  320. NUMBER '5' (1, 21) (1, 22)
  321. OP '<=' (1, 23) (1, 25)
  322. NUMBER '0x15' (1, 26) (1, 30)
  323. OP '<=' (1, 31) (1, 33)
  324. NUMBER '0x12' (1, 34) (1, 38)
  325. OP '!=' (1, 39) (1, 41)
  326. NUMBER '1' (1, 42) (1, 43)
  327. NAME 'and' (1, 44) (1, 47)
  328. NUMBER '5' (1, 48) (1, 49)
  329. NAME 'in' (1, 50) (1, 52)
  330. NUMBER '1' (1, 53) (1, 54)
  331. NAME 'not' (1, 55) (1, 58)
  332. NAME 'in' (1, 59) (1, 61)
  333. NUMBER '1' (1, 62) (1, 63)
  334. NAME 'is' (1, 64) (1, 66)
  335. NUMBER '1' (1, 67) (1, 68)
  336. NAME 'or' (1, 69) (1, 71)
  337. NUMBER '5' (1, 72) (1, 73)
  338. NAME 'is' (1, 74) (1, 76)
  339. NAME 'not' (1, 77) (1, 80)
  340. NUMBER '1' (1, 81) (1, 82)
  341. OP ':' (1, 82) (1, 83)
  342. NAME 'pass' (1, 84) (1, 88)
  343. """)
  344. def test_shift(self):
  345. # Shift
  346. self.check_tokenize("x = 1 << 1 >> 5", """\
  347. NAME 'x' (1, 0) (1, 1)
  348. OP '=' (1, 2) (1, 3)
  349. NUMBER '1' (1, 4) (1, 5)
  350. OP '<<' (1, 6) (1, 8)
  351. NUMBER '1' (1, 9) (1, 10)
  352. OP '>>' (1, 11) (1, 13)
  353. NUMBER '5' (1, 14) (1, 15)
  354. """)
  355. def test_additive(self):
  356. # Additive
  357. self.check_tokenize("x = 1 - y + 15 - 01 + 0x124 + z + a[5]", """\
  358. NAME 'x' (1, 0) (1, 1)
  359. OP '=' (1, 2) (1, 3)
  360. NUMBER '1' (1, 4) (1, 5)
  361. OP '-' (1, 6) (1, 7)
  362. NAME 'y' (1, 8) (1, 9)
  363. OP '+' (1, 10) (1, 11)
  364. NUMBER '15' (1, 12) (1, 14)
  365. OP '-' (1, 15) (1, 16)
  366. NUMBER '01' (1, 17) (1, 19)
  367. OP '+' (1, 20) (1, 21)
  368. NUMBER '0x124' (1, 22) (1, 27)
  369. OP '+' (1, 28) (1, 29)
  370. NAME 'z' (1, 30) (1, 31)
  371. OP '+' (1, 32) (1, 33)
  372. NAME 'a' (1, 34) (1, 35)
  373. OP '[' (1, 35) (1, 36)
  374. NUMBER '5' (1, 36) (1, 37)
  375. OP ']' (1, 37) (1, 38)
  376. """)
  377. def test_multiplicative(self):
  378. # Multiplicative
  379. self.check_tokenize("x = 1//1*1/5*12%0x12", """\
  380. NAME 'x' (1, 0) (1, 1)
  381. OP '=' (1, 2) (1, 3)
  382. NUMBER '1' (1, 4) (1, 5)
  383. OP '//' (1, 5) (1, 7)
  384. NUMBER '1' (1, 7) (1, 8)
  385. OP '*' (1, 8) (1, 9)
  386. NUMBER '1' (1, 9) (1, 10)
  387. OP '/' (1, 10) (1, 11)
  388. NUMBER '5' (1, 11) (1, 12)
  389. OP '*' (1, 12) (1, 13)
  390. NUMBER '12' (1, 13) (1, 15)
  391. OP '%' (1, 15) (1, 16)
  392. NUMBER '0x12' (1, 16) (1, 20)
  393. """)
  394. def test_unary(self):
  395. # Unary
  396. self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
  397. OP '~' (1, 0) (1, 1)
  398. NUMBER '1' (1, 1) (1, 2)
  399. OP '^' (1, 3) (1, 4)
  400. NUMBER '1' (1, 5) (1, 6)
  401. OP '&' (1, 7) (1, 8)
  402. NUMBER '1' (1, 9) (1, 10)
  403. OP '|' (1, 11) (1, 12)
  404. NUMBER '1' (1, 12) (1, 13)
  405. OP '^' (1, 14) (1, 15)
  406. OP '-' (1, 16) (1, 17)
  407. NUMBER '1' (1, 17) (1, 18)
  408. """)
  409. self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
  410. OP '-' (1, 0) (1, 1)
  411. NUMBER '1' (1, 1) (1, 2)
  412. OP '*' (1, 2) (1, 3)
  413. NUMBER '1' (1, 3) (1, 4)
  414. OP '/' (1, 4) (1, 5)
  415. NUMBER '1' (1, 5) (1, 6)
  416. OP '+' (1, 6) (1, 7)
  417. NUMBER '1' (1, 7) (1, 8)
  418. OP '*' (1, 8) (1, 9)
  419. NUMBER '1' (1, 9) (1, 10)
  420. OP '//' (1, 10) (1, 12)
  421. NUMBER '1' (1, 12) (1, 13)
  422. OP '-' (1, 14) (1, 15)
  423. OP '-' (1, 16) (1, 17)
  424. OP '-' (1, 17) (1, 18)
  425. OP '-' (1, 18) (1, 19)
  426. NUMBER '1' (1, 19) (1, 20)
  427. OP '**' (1, 20) (1, 22)
  428. NUMBER '1' (1, 22) (1, 23)
  429. """)
  430. def test_selector(self):
  431. # Selector
  432. self.check_tokenize("import sys, time\n"
  433. "x = sys.modules['time'].time()", """\
  434. NAME 'import' (1, 0) (1, 6)
  435. NAME 'sys' (1, 7) (1, 10)
  436. OP ',' (1, 10) (1, 11)
  437. NAME 'time' (1, 12) (1, 16)
  438. NEWLINE '\\n' (1, 16) (1, 17)
  439. NAME 'x' (2, 0) (2, 1)
  440. OP '=' (2, 2) (2, 3)
  441. NAME 'sys' (2, 4) (2, 7)
  442. OP '.' (2, 7) (2, 8)
  443. NAME 'modules' (2, 8) (2, 15)
  444. OP '[' (2, 15) (2, 16)
  445. STRING "'time'" (2, 16) (2, 22)
  446. OP ']' (2, 22) (2, 23)
  447. OP '.' (2, 23) (2, 24)
  448. NAME 'time' (2, 24) (2, 28)
  449. OP '(' (2, 28) (2, 29)
  450. OP ')' (2, 29) (2, 30)
  451. """)
  452. def test_method(self):
  453. # Methods
  454. self.check_tokenize("@staticmethod\n"
  455. "def foo(x,y): pass", """\
  456. OP '@' (1, 0) (1, 1)
  457. NAME 'staticmethod (1, 1) (1, 13)
  458. NEWLINE '\\n' (1, 13) (1, 14)
  459. NAME 'def' (2, 0) (2, 3)
  460. NAME 'foo' (2, 4) (2, 7)
  461. OP '(' (2, 7) (2, 8)
  462. NAME 'x' (2, 8) (2, 9)
  463. OP ',' (2, 9) (2, 10)
  464. NAME 'y' (2, 10) (2, 11)
  465. OP ')' (2, 11) (2, 12)
  466. OP ':' (2, 12) (2, 13)
  467. NAME 'pass' (2, 14) (2, 18)
  468. """)
  469. def test_tabs(self):
  470. # Evil tabs
  471. self.check_tokenize("def f():\n"
  472. "\tif x\n"
  473. " \tpass", """\
  474. NAME 'def' (1, 0) (1, 3)
  475. NAME 'f' (1, 4) (1, 5)
  476. OP '(' (1, 5) (1, 6)
  477. OP ')' (1, 6) (1, 7)
  478. OP ':' (1, 7) (1, 8)
  479. NEWLINE '\\n' (1, 8) (1, 9)
  480. INDENT '\\t' (2, 0) (2, 1)
  481. NAME 'if' (2, 1) (2, 3)
  482. NAME 'x' (2, 4) (2, 5)
  483. NEWLINE '\\n' (2, 5) (2, 6)
  484. INDENT ' \\t' (3, 0) (3, 9)
  485. NAME 'pass' (3, 9) (3, 13)
  486. DEDENT '' (4, 0) (4, 0)
  487. DEDENT '' (4, 0) (4, 0)
  488. """)
  489. def test_pathological_trailing_whitespace(self):
  490. # Pathological whitespace (http://bugs.python.org/issue16152)
  491. self.check_tokenize("@ ", """\
  492. OP '@' (1, 0) (1, 1)
  493. """)
  494. def decistmt(s):
  495. result = []
  496. g = generate_tokens(StringIO(s).readline) # tokenize the string
  497. for toknum, tokval, _, _, _ in g:
  498. if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
  499. result.extend([
  500. (NAME, 'Decimal'),
  501. (OP, '('),
  502. (STRING, repr(tokval)),
  503. (OP, ')')
  504. ])
  505. else:
  506. result.append((toknum, tokval))
  507. return untokenize(result)
  508. class TestMisc(TestCase):
  509. def test_decistmt(self):
  510. # Substitute Decimals for floats in a string of statements.
  511. # This is an example from the docs.
  512. from decimal import Decimal
  513. s = '+21.3e-5*-.1234/81.7'
  514. self.assertEqual(decistmt(s),
  515. "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
  516. # The format of the exponent is inherited from the platform C library.
  517. # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
  518. # we're only showing 12 digits, and the 13th isn't close to 5, the
  519. # rest of the output should be platform-independent.
  520. self.assertRegexpMatches(str(eval(s)), '-3.21716034272e-0+7')
  521. # Output from calculations with Decimal should be identical across all
  522. # platforms.
  523. self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7'))
  524. class UntokenizeTest(TestCase):
  525. def test_bad_input_order(self):
  526. # raise if previous row
  527. u = Untokenizer()
  528. u.prev_row = 2
  529. u.prev_col = 2
  530. with self.assertRaises(ValueError) as cm:
  531. u.add_whitespace((1,3))
  532. self.assertEqual(cm.exception.args[0],
  533. 'start (1,3) precedes previous end (2,2)')
  534. # raise if previous column in row
  535. self.assertRaises(ValueError, u.add_whitespace, (2,1))
  536. def test_backslash_continuation(self):
  537. # The problem is that <whitespace>\<newline> leaves no token
  538. u = Untokenizer()
  539. u.prev_row = 1
  540. u.prev_col = 1
  541. u.tokens = []
  542. u.add_whitespace((2, 0))
  543. self.assertEqual(u.tokens, ['\\\n'])
  544. u.prev_row = 2
  545. u.add_whitespace((4, 4))
  546. self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
  547. def test_iter_compat(self):
  548. u = Untokenizer()
  549. token = (NAME, 'Hello')
  550. u.compat(token, iter([]))
  551. self.assertEqual(u.tokens, ["Hello "])
  552. u = Untokenizer()
  553. self.assertEqual(u.untokenize(iter([token])), 'Hello ')
  554. class TestRoundtrip(TestCase):
  555. def check_roundtrip(self, f):
  556. """
  557. Test roundtrip for `untokenize`. `f` is an open file or a string.
  558. The source code in f is tokenized, converted back to source code
  559. via tokenize.untokenize(), and tokenized again from the latter.
  560. The test fails if the second tokenization doesn't match the first.
  561. """
  562. if isinstance(f, str): f = StringIO(f)
  563. token_list = list(generate_tokens(f.readline))
  564. f.close()
  565. tokens1 = [tok[:2] for tok in token_list]
  566. new_text = untokenize(tokens1)
  567. readline = iter(new_text.splitlines(1)).next
  568. tokens2 = [tok[:2] for tok in generate_tokens(readline)]
  569. self.assertEqual(tokens2, tokens1)
  570. def test_roundtrip(self):
  571. # There are some standard formatting practices that are easy to get right.
  572. self.check_roundtrip("if x == 1:\n"
  573. " print(x)\n")
  574. # There are some standard formatting practices that are easy to get right.
  575. self.check_roundtrip("if x == 1:\n"
  576. " print x\n")
  577. self.check_roundtrip("# This is a comment\n"
  578. "# This also")
  579. # Some people use different formatting conventions, which makes
  580. # untokenize a little trickier. Note that this test involves trailing
  581. # whitespace after the colon. Note that we use hex escapes to make the
  582. # two trailing blanks apperant in the expected output.
  583. self.check_roundtrip("if x == 1 : \n"
  584. " print x\n")
  585. fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
  586. with open(fn) as f:
  587. self.check_roundtrip(f)
  588. self.check_roundtrip("if x == 1:\n"
  589. " # A comment by itself.\n"
  590. " print x # Comment here, too.\n"
  591. " # Another comment.\n"
  592. "after_if = True\n")
  593. self.check_roundtrip("if (x # The comments need to go in the right place\n"
  594. " == 1):\n"
  595. " print 'x==1'\n")
  596. self.check_roundtrip("class Test: # A comment here\n"
  597. " # A comment with weird indent\n"
  598. " after_com = 5\n"
  599. " def x(m): return m*5 # a one liner\n"
  600. " def y(m): # A whitespace after the colon\n"
  601. " return y*4 # 3-space indent\n")
  602. # Some error-handling code
  603. self.check_roundtrip("try: import somemodule\n"
  604. "except ImportError: # comment\n"
  605. " print 'Can not import' # comment2\n"
  606. "else: print 'Loaded'\n")
  607. def test_continuation(self):
  608. # Balancing continuation
  609. self.check_roundtrip("a = (3,4, \n"
  610. "5,6)\n"
  611. "y = [3, 4,\n"
  612. "5]\n"
  613. "z = {'a': 5,\n"
  614. "'b':15, 'c':True}\n"
  615. "x = len(y) + 5 - a[\n"
  616. "3] - a[2]\n"
  617. "+ len(z) - z[\n"
  618. "'b']\n")
  619. def test_backslash_continuation(self):
  620. # Backslash means line continuation, except for comments
  621. self.check_roundtrip("x=1+\\\n"
  622. "1\n"
  623. "# This is a comment\\\n"
  624. "# This also\n")
  625. self.check_roundtrip("# Comment \\\n"
  626. "x = 0")
  627. def test_string_concatenation(self):
  628. # Two string literals on the same line
  629. self.check_roundtrip("'' ''")
  630. def test_random_files(self):
  631. # Test roundtrip on random python modules.
  632. # pass the '-ucpu' option to process the full directory.
  633. import glob, random
  634. fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
  635. tempdir = os.path.dirname(fn) or os.curdir
  636. testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
  637. if not test_support.is_resource_enabled("cpu"):
  638. testfiles = random.sample(testfiles, 10)
  639. for testfile in testfiles:
  640. try:
  641. with open(testfile, 'rb') as f:
  642. self.check_roundtrip(f)
  643. except:
  644. print "Roundtrip failed for file %s" % testfile
  645. raise
  646. def roundtrip(self, code):
  647. if isinstance(code, str):
  648. code = code.encode('utf-8')
  649. tokens = generate_tokens(StringIO(code).readline)
  650. return untokenize(tokens).decode('utf-8')
  651. def test_indentation_semantics_retained(self):
  652. """
  653. Ensure that although whitespace might be mutated in a roundtrip,
  654. the semantic meaning of the indentation remains consistent.
  655. """
  656. code = "if False:\n\tx=3\n\tx=3\n"
  657. codelines = self.roundtrip(code).split('\n')
  658. self.assertEqual(codelines[1], codelines[2])
  659. def test_main():
  660. test_support.run_unittest(TokenizeTest)
  661. test_support.run_unittest(UntokenizeTest)
  662. test_support.run_unittest(TestRoundtrip)
  663. test_support.run_unittest(TestMisc)
  664. if __name__ == "__main__":
  665. test_main()