test_htmllib.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import formatter
  2. import unittest
  3. from test import test_support
  4. htmllib = test_support.import_module('htmllib', deprecated=True)
  5. class AnchorCollector(htmllib.HTMLParser):
  6. def __init__(self, *args, **kw):
  7. self.__anchors = []
  8. htmllib.HTMLParser.__init__(self, *args, **kw)
  9. def get_anchor_info(self):
  10. return self.__anchors
  11. def anchor_bgn(self, *args):
  12. self.__anchors.append(args)
  13. class DeclCollector(htmllib.HTMLParser):
  14. def __init__(self, *args, **kw):
  15. self.__decls = []
  16. htmllib.HTMLParser.__init__(self, *args, **kw)
  17. def get_decl_info(self):
  18. return self.__decls
  19. def unknown_decl(self, data):
  20. self.__decls.append(data)
  21. class HTMLParserTestCase(unittest.TestCase):
  22. def test_anchor_collection(self):
  23. # See SF bug #467059.
  24. parser = AnchorCollector(formatter.NullFormatter(), verbose=1)
  25. parser.feed(
  26. """<a href='http://foo.org/' name='splat'> </a>
  27. <a href='http://www.python.org/'> </a>
  28. <a name='frob'> </a>
  29. """)
  30. parser.close()
  31. self.assertEqual(parser.get_anchor_info(),
  32. [('http://foo.org/', 'splat', ''),
  33. ('http://www.python.org/', '', ''),
  34. ('', 'frob', ''),
  35. ])
  36. def test_decl_collection(self):
  37. # See SF patch #545300
  38. parser = DeclCollector(formatter.NullFormatter(), verbose=1)
  39. parser.feed(
  40. """<html>
  41. <body>
  42. hallo
  43. <![if !supportEmptyParas]>&nbsp;<![endif]>
  44. </body>
  45. </html>
  46. """)
  47. parser.close()
  48. self.assertEqual(parser.get_decl_info(),
  49. ["if !supportEmptyParas",
  50. "endif"
  51. ])
  52. def test_main():
  53. test_support.run_unittest(HTMLParserTestCase)
  54. if __name__ == "__main__":
  55. test_main()