formatter.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. """Generic output formatting.
  2. Formatter objects transform an abstract flow of formatting events into
  3. specific output events on writer objects. Formatters manage several stack
  4. structures to allow various properties of a writer object to be changed and
  5. restored; writers need not be able to handle relative changes nor any sort
  6. of ``change back'' operation. Specific writer properties which may be
  7. controlled via formatter objects are horizontal alignment, font, and left
  8. margin indentations. A mechanism is provided which supports providing
  9. arbitrary, non-exclusive style settings to a writer as well. Additional
  10. interfaces facilitate formatting events which are not reversible, such as
  11. paragraph separation.
  12. Writer objects encapsulate device interfaces. Abstract devices, such as
  13. file formats, are supported as well as physical devices. The provided
  14. implementations all work with abstract devices. The interface makes
  15. available mechanisms for setting the properties which formatter objects
  16. manage and inserting data into the output.
  17. """
  18. import sys
  19. AS_IS = None
  20. class NullFormatter:
  21. """A formatter which does nothing.
  22. If the writer parameter is omitted, a NullWriter instance is created.
  23. No methods of the writer are called by NullFormatter instances.
  24. Implementations should inherit from this class if implementing a writer
  25. interface but don't need to inherit any implementation.
  26. """
  27. def __init__(self, writer=None):
  28. if writer is None:
  29. writer = NullWriter()
  30. self.writer = writer
  31. def end_paragraph(self, blankline): pass
  32. def add_line_break(self): pass
  33. def add_hor_rule(self, *args, **kw): pass
  34. def add_label_data(self, format, counter, blankline=None): pass
  35. def add_flowing_data(self, data): pass
  36. def add_literal_data(self, data): pass
  37. def flush_softspace(self): pass
  38. def push_alignment(self, align): pass
  39. def pop_alignment(self): pass
  40. def push_font(self, x): pass
  41. def pop_font(self): pass
  42. def push_margin(self, margin): pass
  43. def pop_margin(self): pass
  44. def set_spacing(self, spacing): pass
  45. def push_style(self, *styles): pass
  46. def pop_style(self, n=1): pass
  47. def assert_line_data(self, flag=1): pass
  48. class AbstractFormatter:
  49. """The standard formatter.
  50. This implementation has demonstrated wide applicability to many writers,
  51. and may be used directly in most circumstances. It has been used to
  52. implement a full-featured World Wide Web browser.
  53. """
  54. # Space handling policy: blank spaces at the boundary between elements
  55. # are handled by the outermost context. "Literal" data is not checked
  56. # to determine context, so spaces in literal data are handled directly
  57. # in all circumstances.
  58. def __init__(self, writer):
  59. self.writer = writer # Output device
  60. self.align = None # Current alignment
  61. self.align_stack = [] # Alignment stack
  62. self.font_stack = [] # Font state
  63. self.margin_stack = [] # Margin state
  64. self.spacing = None # Vertical spacing state
  65. self.style_stack = [] # Other state, e.g. color
  66. self.nospace = 1 # Should leading space be suppressed
  67. self.softspace = 0 # Should a space be inserted
  68. self.para_end = 1 # Just ended a paragraph
  69. self.parskip = 0 # Skipped space between paragraphs?
  70. self.hard_break = 1 # Have a hard break
  71. self.have_label = 0
  72. def end_paragraph(self, blankline):
  73. if not self.hard_break:
  74. self.writer.send_line_break()
  75. self.have_label = 0
  76. if self.parskip < blankline and not self.have_label:
  77. self.writer.send_paragraph(blankline - self.parskip)
  78. self.parskip = blankline
  79. self.have_label = 0
  80. self.hard_break = self.nospace = self.para_end = 1
  81. self.softspace = 0
  82. def add_line_break(self):
  83. if not (self.hard_break or self.para_end):
  84. self.writer.send_line_break()
  85. self.have_label = self.parskip = 0
  86. self.hard_break = self.nospace = 1
  87. self.softspace = 0
  88. def add_hor_rule(self, *args, **kw):
  89. if not self.hard_break:
  90. self.writer.send_line_break()
  91. self.writer.send_hor_rule(*args, **kw)
  92. self.hard_break = self.nospace = 1
  93. self.have_label = self.para_end = self.softspace = self.parskip = 0
  94. def add_label_data(self, format, counter, blankline = None):
  95. if self.have_label or not self.hard_break:
  96. self.writer.send_line_break()
  97. if not self.para_end:
  98. self.writer.send_paragraph((blankline and 1) or 0)
  99. if isinstance(format, str):
  100. self.writer.send_label_data(self.format_counter(format, counter))
  101. else:
  102. self.writer.send_label_data(format)
  103. self.nospace = self.have_label = self.hard_break = self.para_end = 1
  104. self.softspace = self.parskip = 0
  105. def format_counter(self, format, counter):
  106. label = ''
  107. for c in format:
  108. if c == '1':
  109. label = label + ('%d' % counter)
  110. elif c in 'aA':
  111. if counter > 0:
  112. label = label + self.format_letter(c, counter)
  113. elif c in 'iI':
  114. if counter > 0:
  115. label = label + self.format_roman(c, counter)
  116. else:
  117. label = label + c
  118. return label
  119. def format_letter(self, case, counter):
  120. label = ''
  121. while counter > 0:
  122. counter, x = divmod(counter-1, 26)
  123. # This makes a strong assumption that lowercase letters
  124. # and uppercase letters form two contiguous blocks, with
  125. # letters in order!
  126. s = chr(ord(case) + x)
  127. label = s + label
  128. return label
  129. def format_roman(self, case, counter):
  130. ones = ['i', 'x', 'c', 'm']
  131. fives = ['v', 'l', 'd']
  132. label, index = '', 0
  133. # This will die of IndexError when counter is too big
  134. while counter > 0:
  135. counter, x = divmod(counter, 10)
  136. if x == 9:
  137. label = ones[index] + ones[index+1] + label
  138. elif x == 4:
  139. label = ones[index] + fives[index] + label
  140. else:
  141. if x >= 5:
  142. s = fives[index]
  143. x = x-5
  144. else:
  145. s = ''
  146. s = s + ones[index]*x
  147. label = s + label
  148. index = index + 1
  149. if case == 'I':
  150. return label.upper()
  151. return label
  152. def add_flowing_data(self, data):
  153. if not data: return
  154. prespace = data[:1].isspace()
  155. postspace = data[-1:].isspace()
  156. data = " ".join(data.split())
  157. if self.nospace and not data:
  158. return
  159. elif prespace or self.softspace:
  160. if not data:
  161. if not self.nospace:
  162. self.softspace = 1
  163. self.parskip = 0
  164. return
  165. if not self.nospace:
  166. data = ' ' + data
  167. self.hard_break = self.nospace = self.para_end = \
  168. self.parskip = self.have_label = 0
  169. self.softspace = postspace
  170. self.writer.send_flowing_data(data)
  171. def add_literal_data(self, data):
  172. if not data: return
  173. if self.softspace:
  174. self.writer.send_flowing_data(" ")
  175. self.hard_break = data[-1:] == '\n'
  176. self.nospace = self.para_end = self.softspace = \
  177. self.parskip = self.have_label = 0
  178. self.writer.send_literal_data(data)
  179. def flush_softspace(self):
  180. if self.softspace:
  181. self.hard_break = self.para_end = self.parskip = \
  182. self.have_label = self.softspace = 0
  183. self.nospace = 1
  184. self.writer.send_flowing_data(' ')
  185. def push_alignment(self, align):
  186. if align and align != self.align:
  187. self.writer.new_alignment(align)
  188. self.align = align
  189. self.align_stack.append(align)
  190. else:
  191. self.align_stack.append(self.align)
  192. def pop_alignment(self):
  193. if self.align_stack:
  194. del self.align_stack[-1]
  195. if self.align_stack:
  196. self.align = align = self.align_stack[-1]
  197. self.writer.new_alignment(align)
  198. else:
  199. self.align = None
  200. self.writer.new_alignment(None)
  201. def push_font(self, font):
  202. size, i, b, tt = font
  203. if self.softspace:
  204. self.hard_break = self.para_end = self.softspace = 0
  205. self.nospace = 1
  206. self.writer.send_flowing_data(' ')
  207. if self.font_stack:
  208. csize, ci, cb, ctt = self.font_stack[-1]
  209. if size is AS_IS: size = csize
  210. if i is AS_IS: i = ci
  211. if b is AS_IS: b = cb
  212. if tt is AS_IS: tt = ctt
  213. font = (size, i, b, tt)
  214. self.font_stack.append(font)
  215. self.writer.new_font(font)
  216. def pop_font(self):
  217. if self.font_stack:
  218. del self.font_stack[-1]
  219. if self.font_stack:
  220. font = self.font_stack[-1]
  221. else:
  222. font = None
  223. self.writer.new_font(font)
  224. def push_margin(self, margin):
  225. self.margin_stack.append(margin)
  226. fstack = filter(None, self.margin_stack)
  227. if not margin and fstack:
  228. margin = fstack[-1]
  229. self.writer.new_margin(margin, len(fstack))
  230. def pop_margin(self):
  231. if self.margin_stack:
  232. del self.margin_stack[-1]
  233. fstack = filter(None, self.margin_stack)
  234. if fstack:
  235. margin = fstack[-1]
  236. else:
  237. margin = None
  238. self.writer.new_margin(margin, len(fstack))
  239. def set_spacing(self, spacing):
  240. self.spacing = spacing
  241. self.writer.new_spacing(spacing)
  242. def push_style(self, *styles):
  243. if self.softspace:
  244. self.hard_break = self.para_end = self.softspace = 0
  245. self.nospace = 1
  246. self.writer.send_flowing_data(' ')
  247. for style in styles:
  248. self.style_stack.append(style)
  249. self.writer.new_styles(tuple(self.style_stack))
  250. def pop_style(self, n=1):
  251. del self.style_stack[-n:]
  252. self.writer.new_styles(tuple(self.style_stack))
  253. def assert_line_data(self, flag=1):
  254. self.nospace = self.hard_break = not flag
  255. self.para_end = self.parskip = self.have_label = 0
  256. class NullWriter:
  257. """Minimal writer interface to use in testing & inheritance.
  258. A writer which only provides the interface definition; no actions are
  259. taken on any methods. This should be the base class for all writers
  260. which do not need to inherit any implementation methods.
  261. """
  262. def __init__(self): pass
  263. def flush(self): pass
  264. def new_alignment(self, align): pass
  265. def new_font(self, font): pass
  266. def new_margin(self, margin, level): pass
  267. def new_spacing(self, spacing): pass
  268. def new_styles(self, styles): pass
  269. def send_paragraph(self, blankline): pass
  270. def send_line_break(self): pass
  271. def send_hor_rule(self, *args, **kw): pass
  272. def send_label_data(self, data): pass
  273. def send_flowing_data(self, data): pass
  274. def send_literal_data(self, data): pass
  275. class AbstractWriter(NullWriter):
  276. """A writer which can be used in debugging formatters, but not much else.
  277. Each method simply announces itself by printing its name and
  278. arguments on standard output.
  279. """
  280. def new_alignment(self, align):
  281. print "new_alignment(%r)" % (align,)
  282. def new_font(self, font):
  283. print "new_font(%r)" % (font,)
  284. def new_margin(self, margin, level):
  285. print "new_margin(%r, %d)" % (margin, level)
  286. def new_spacing(self, spacing):
  287. print "new_spacing(%r)" % (spacing,)
  288. def new_styles(self, styles):
  289. print "new_styles(%r)" % (styles,)
  290. def send_paragraph(self, blankline):
  291. print "send_paragraph(%r)" % (blankline,)
  292. def send_line_break(self):
  293. print "send_line_break()"
  294. def send_hor_rule(self, *args, **kw):
  295. print "send_hor_rule()"
  296. def send_label_data(self, data):
  297. print "send_label_data(%r)" % (data,)
  298. def send_flowing_data(self, data):
  299. print "send_flowing_data(%r)" % (data,)
  300. def send_literal_data(self, data):
  301. print "send_literal_data(%r)" % (data,)
  302. class DumbWriter(NullWriter):
  303. """Simple writer class which writes output on the file object passed in
  304. as the file parameter or, if file is omitted, on standard output. The
  305. output is simply word-wrapped to the number of columns specified by
  306. the maxcol parameter. This class is suitable for reflowing a sequence
  307. of paragraphs.
  308. """
  309. def __init__(self, file=None, maxcol=72):
  310. self.file = file or sys.stdout
  311. self.maxcol = maxcol
  312. NullWriter.__init__(self)
  313. self.reset()
  314. def reset(self):
  315. self.col = 0
  316. self.atbreak = 0
  317. def send_paragraph(self, blankline):
  318. self.file.write('\n'*blankline)
  319. self.col = 0
  320. self.atbreak = 0
  321. def send_line_break(self):
  322. self.file.write('\n')
  323. self.col = 0
  324. self.atbreak = 0
  325. def send_hor_rule(self, *args, **kw):
  326. self.file.write('\n')
  327. self.file.write('-'*self.maxcol)
  328. self.file.write('\n')
  329. self.col = 0
  330. self.atbreak = 0
  331. def send_literal_data(self, data):
  332. self.file.write(data)
  333. i = data.rfind('\n')
  334. if i >= 0:
  335. self.col = 0
  336. data = data[i+1:]
  337. data = data.expandtabs()
  338. self.col = self.col + len(data)
  339. self.atbreak = 0
  340. def send_flowing_data(self, data):
  341. if not data: return
  342. atbreak = self.atbreak or data[0].isspace()
  343. col = self.col
  344. maxcol = self.maxcol
  345. write = self.file.write
  346. for word in data.split():
  347. if atbreak:
  348. if col + len(word) >= maxcol:
  349. write('\n')
  350. col = 0
  351. else:
  352. write(' ')
  353. col = col + 1
  354. write(word)
  355. col = col + len(word)
  356. atbreak = 1
  357. self.col = col
  358. self.atbreak = data[-1].isspace()
  359. def test(file = None):
  360. w = DumbWriter()
  361. f = AbstractFormatter(w)
  362. if file is not None:
  363. fp = open(file)
  364. elif sys.argv[1:]:
  365. fp = open(sys.argv[1])
  366. else:
  367. fp = sys.stdin
  368. for line in fp:
  369. if line == '\n':
  370. f.end_paragraph(1)
  371. else:
  372. f.add_flowing_data(line)
  373. f.end_paragraph(0)
  374. if __name__ == '__main__':
  375. test()