HyperParser.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. """Provide advanced parsing abilities for ParenMatch and other extensions.
  2. HyperParser uses PyParser. PyParser mostly gives information on the
  3. proper indentation of code. HyperParser gives additional information on
  4. the structure of code.
  5. """
  6. import string
  7. import keyword
  8. from idlelib import PyParse
  9. class HyperParser:
  10. def __init__(self, editwin, index):
  11. "To initialize, analyze the surroundings of the given index."
  12. self.editwin = editwin
  13. self.text = text = editwin.text
  14. parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
  15. def index2line(index):
  16. return int(float(index))
  17. lno = index2line(text.index(index))
  18. if not editwin.context_use_ps1:
  19. for context in editwin.num_context_lines:
  20. startat = max(lno - context, 1)
  21. startatindex = repr(startat) + ".0"
  22. stopatindex = "%d.end" % lno
  23. # We add the newline because PyParse requires a newline
  24. # at end. We add a space so that index won't be at end
  25. # of line, so that its status will be the same as the
  26. # char before it, if should.
  27. parser.set_str(text.get(startatindex, stopatindex)+' \n')
  28. bod = parser.find_good_parse_start(
  29. editwin._build_char_in_string_func(startatindex))
  30. if bod is not None or startat == 1:
  31. break
  32. parser.set_lo(bod or 0)
  33. else:
  34. r = text.tag_prevrange("console", index)
  35. if r:
  36. startatindex = r[1]
  37. else:
  38. startatindex = "1.0"
  39. stopatindex = "%d.end" % lno
  40. # We add the newline because PyParse requires it. We add a
  41. # space so that index won't be at end of line, so that its
  42. # status will be the same as the char before it, if should.
  43. parser.set_str(text.get(startatindex, stopatindex)+' \n')
  44. parser.set_lo(0)
  45. # We want what the parser has, minus the last newline and space.
  46. self.rawtext = parser.str[:-2]
  47. # Parser.str apparently preserves the statement we are in, so
  48. # that stopatindex can be used to synchronize the string with
  49. # the text box indices.
  50. self.stopatindex = stopatindex
  51. self.bracketing = parser.get_last_stmt_bracketing()
  52. # find which pairs of bracketing are openers. These always
  53. # correspond to a character of rawtext.
  54. self.isopener = [i>0 and self.bracketing[i][1] >
  55. self.bracketing[i-1][1]
  56. for i in range(len(self.bracketing))]
  57. self.set_index(index)
  58. def set_index(self, index):
  59. """Set the index to which the functions relate.
  60. The index must be in the same statement.
  61. """
  62. indexinrawtext = (len(self.rawtext) -
  63. len(self.text.get(index, self.stopatindex)))
  64. if indexinrawtext < 0:
  65. raise ValueError("Index %s precedes the analyzed statement"
  66. % index)
  67. self.indexinrawtext = indexinrawtext
  68. # find the rightmost bracket to which index belongs
  69. self.indexbracket = 0
  70. while (self.indexbracket < len(self.bracketing)-1 and
  71. self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
  72. self.indexbracket += 1
  73. if (self.indexbracket < len(self.bracketing)-1 and
  74. self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
  75. not self.isopener[self.indexbracket+1]):
  76. self.indexbracket += 1
  77. def is_in_string(self):
  78. """Is the index given to the HyperParser in a string?"""
  79. # The bracket to which we belong should be an opener.
  80. # If it's an opener, it has to have a character.
  81. return (self.isopener[self.indexbracket] and
  82. self.rawtext[self.bracketing[self.indexbracket][0]]
  83. in ('"', "'"))
  84. def is_in_code(self):
  85. """Is the index given to the HyperParser in normal code?"""
  86. return (not self.isopener[self.indexbracket] or
  87. self.rawtext[self.bracketing[self.indexbracket][0]]
  88. not in ('#', '"', "'"))
  89. def get_surrounding_brackets(self, openers='([{', mustclose=False):
  90. """Return bracket indexes or None.
  91. If the index given to the HyperParser is surrounded by a
  92. bracket defined in openers (or at least has one before it),
  93. return the indices of the opening bracket and the closing
  94. bracket (or the end of line, whichever comes first).
  95. If it is not surrounded by brackets, or the end of line comes
  96. before the closing bracket and mustclose is True, returns None.
  97. """
  98. bracketinglevel = self.bracketing[self.indexbracket][1]
  99. before = self.indexbracket
  100. while (not self.isopener[before] or
  101. self.rawtext[self.bracketing[before][0]] not in openers or
  102. self.bracketing[before][1] > bracketinglevel):
  103. before -= 1
  104. if before < 0:
  105. return None
  106. bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
  107. after = self.indexbracket + 1
  108. while (after < len(self.bracketing) and
  109. self.bracketing[after][1] >= bracketinglevel):
  110. after += 1
  111. beforeindex = self.text.index("%s-%dc" %
  112. (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
  113. if (after >= len(self.bracketing) or
  114. self.bracketing[after][0] > len(self.rawtext)):
  115. if mustclose:
  116. return None
  117. afterindex = self.stopatindex
  118. else:
  119. # We are after a real char, so it is a ')' and we give the
  120. # index before it.
  121. afterindex = self.text.index(
  122. "%s-%dc" % (self.stopatindex,
  123. len(self.rawtext)-(self.bracketing[after][0]-1)))
  124. return beforeindex, afterindex
  125. # Ascii chars that may be in a white space
  126. _whitespace_chars = " \t\n\\"
  127. # Ascii chars that may be in an identifier
  128. _id_chars = string.ascii_letters + string.digits + "_"
  129. # Ascii chars that may be the first char of an identifier
  130. _id_first_chars = string.ascii_letters + "_"
  131. # Given a string and pos, return the number of chars in the
  132. # identifier which ends at pos, or 0 if there is no such one. Saved
  133. # words are not identifiers.
  134. def _eat_identifier(self, str, limit, pos):
  135. i = pos
  136. while i > limit and str[i-1] in self._id_chars:
  137. i -= 1
  138. if (i < pos and (str[i] not in self._id_first_chars or
  139. keyword.iskeyword(str[i:pos]))):
  140. i = pos
  141. return pos - i
  142. def get_expression(self):
  143. """Return a string with the Python expression which ends at the
  144. given index, which is empty if there is no real one.
  145. """
  146. if not self.is_in_code():
  147. raise ValueError("get_expression should only be called"
  148. "if index is inside a code.")
  149. rawtext = self.rawtext
  150. bracketing = self.bracketing
  151. brck_index = self.indexbracket
  152. brck_limit = bracketing[brck_index][0]
  153. pos = self.indexinrawtext
  154. last_identifier_pos = pos
  155. postdot_phase = True
  156. while 1:
  157. # Eat whitespaces, comments, and if postdot_phase is False - a dot
  158. while 1:
  159. if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
  160. # Eat a whitespace
  161. pos -= 1
  162. elif (not postdot_phase and
  163. pos > brck_limit and rawtext[pos-1] == '.'):
  164. # Eat a dot
  165. pos -= 1
  166. postdot_phase = True
  167. # The next line will fail if we are *inside* a comment,
  168. # but we shouldn't be.
  169. elif (pos == brck_limit and brck_index > 0 and
  170. rawtext[bracketing[brck_index-1][0]] == '#'):
  171. # Eat a comment
  172. brck_index -= 2
  173. brck_limit = bracketing[brck_index][0]
  174. pos = bracketing[brck_index+1][0]
  175. else:
  176. # If we didn't eat anything, quit.
  177. break
  178. if not postdot_phase:
  179. # We didn't find a dot, so the expression end at the
  180. # last identifier pos.
  181. break
  182. ret = self._eat_identifier(rawtext, brck_limit, pos)
  183. if ret:
  184. # There is an identifier to eat
  185. pos = pos - ret
  186. last_identifier_pos = pos
  187. # Now, to continue the search, we must find a dot.
  188. postdot_phase = False
  189. # (the loop continues now)
  190. elif pos == brck_limit:
  191. # We are at a bracketing limit. If it is a closing
  192. # bracket, eat the bracket, otherwise, stop the search.
  193. level = bracketing[brck_index][1]
  194. while brck_index > 0 and bracketing[brck_index-1][1] > level:
  195. brck_index -= 1
  196. if bracketing[brck_index][0] == brck_limit:
  197. # We were not at the end of a closing bracket
  198. break
  199. pos = bracketing[brck_index][0]
  200. brck_index -= 1
  201. brck_limit = bracketing[brck_index][0]
  202. last_identifier_pos = pos
  203. if rawtext[pos] in "([":
  204. # [] and () may be used after an identifier, so we
  205. # continue. postdot_phase is True, so we don't allow a dot.
  206. pass
  207. else:
  208. # We can't continue after other types of brackets
  209. if rawtext[pos] in "'\"":
  210. # Scan a string prefix
  211. while pos > 0 and rawtext[pos - 1] in "rRbBuU":
  212. pos -= 1
  213. last_identifier_pos = pos
  214. break
  215. else:
  216. # We've found an operator or something.
  217. break
  218. return rawtext[last_identifier_pos:self.indexinrawtext]
  219. if __name__ == '__main__':
  220. import unittest
  221. unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2)