test_sax.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071
  1. # regression test for SAX 2.0 -*- coding: utf-8 -*-
  2. # $Id$
  3. from xml.sax import make_parser, ContentHandler, \
  4. SAXException, SAXReaderNotAvailable, SAXParseException
  5. try:
  6. make_parser()
  7. except SAXReaderNotAvailable:
  8. # don't try to test this module if we cannot create a parser
  9. raise ImportError("no XML parsers available")
  10. from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
  11. XMLFilterBase, prepare_input_source
  12. from xml.sax.expatreader import create_parser
  13. from xml.sax.handler import feature_namespaces
  14. from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
  15. from cStringIO import StringIO
  16. import io
  17. import gc
  18. import os.path
  19. import shutil
  20. import test.test_support as support
  21. from test.test_support import findfile, run_unittest, TESTFN
  22. import unittest
  23. TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
  24. TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
  25. supports_unicode_filenames = True
  26. if not os.path.supports_unicode_filenames:
  27. try:
  28. support.TESTFN_UNICODE.encode(support.TESTFN_ENCODING)
  29. except (AttributeError, UnicodeError, TypeError):
  30. # Either the file system encoding is None, or the file name
  31. # cannot be encoded in the file system encoding.
  32. supports_unicode_filenames = False
  33. requires_unicode_filenames = unittest.skipUnless(
  34. supports_unicode_filenames,
  35. 'Requires unicode filenames support')
  36. ns_uri = "http://www.python.org/xml-ns/saxtest/"
  37. class XmlTestBase(unittest.TestCase):
  38. def verify_empty_attrs(self, attrs):
  39. self.assertRaises(KeyError, attrs.getValue, "attr")
  40. self.assertRaises(KeyError, attrs.getValueByQName, "attr")
  41. self.assertRaises(KeyError, attrs.getNameByQName, "attr")
  42. self.assertRaises(KeyError, attrs.getQNameByName, "attr")
  43. self.assertRaises(KeyError, attrs.__getitem__, "attr")
  44. self.assertEqual(attrs.getLength(), 0)
  45. self.assertEqual(attrs.getNames(), [])
  46. self.assertEqual(attrs.getQNames(), [])
  47. self.assertEqual(len(attrs), 0)
  48. self.assertFalse(attrs.has_key("attr"))
  49. self.assertEqual(attrs.keys(), [])
  50. self.assertEqual(attrs.get("attrs"), None)
  51. self.assertEqual(attrs.get("attrs", 25), 25)
  52. self.assertEqual(attrs.items(), [])
  53. self.assertEqual(attrs.values(), [])
  54. def verify_empty_nsattrs(self, attrs):
  55. self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr"))
  56. self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr")
  57. self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr")
  58. self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr"))
  59. self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr"))
  60. self.assertEqual(attrs.getLength(), 0)
  61. self.assertEqual(attrs.getNames(), [])
  62. self.assertEqual(attrs.getQNames(), [])
  63. self.assertEqual(len(attrs), 0)
  64. self.assertFalse(attrs.has_key((ns_uri, "attr")))
  65. self.assertEqual(attrs.keys(), [])
  66. self.assertEqual(attrs.get((ns_uri, "attr")), None)
  67. self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25)
  68. self.assertEqual(attrs.items(), [])
  69. self.assertEqual(attrs.values(), [])
  70. def verify_attrs_wattr(self, attrs):
  71. self.assertEqual(attrs.getLength(), 1)
  72. self.assertEqual(attrs.getNames(), ["attr"])
  73. self.assertEqual(attrs.getQNames(), ["attr"])
  74. self.assertEqual(len(attrs), 1)
  75. self.assertTrue(attrs.has_key("attr"))
  76. self.assertEqual(attrs.keys(), ["attr"])
  77. self.assertEqual(attrs.get("attr"), "val")
  78. self.assertEqual(attrs.get("attr", 25), "val")
  79. self.assertEqual(attrs.items(), [("attr", "val")])
  80. self.assertEqual(attrs.values(), ["val"])
  81. self.assertEqual(attrs.getValue("attr"), "val")
  82. self.assertEqual(attrs.getValueByQName("attr"), "val")
  83. self.assertEqual(attrs.getNameByQName("attr"), "attr")
  84. self.assertEqual(attrs["attr"], "val")
  85. self.assertEqual(attrs.getQNameByName("attr"), "attr")
  86. def xml_unicode(doc, encoding=None):
  87. if encoding is None:
  88. return doc
  89. return u'<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
  90. def xml_bytes(doc, encoding, decl_encoding=Ellipsis):
  91. if decl_encoding is Ellipsis:
  92. decl_encoding = encoding
  93. return xml_unicode(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
  94. def make_xml_file(doc, encoding, decl_encoding=Ellipsis):
  95. if decl_encoding is Ellipsis:
  96. decl_encoding = encoding
  97. with io.open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
  98. f.write(xml_unicode(doc, decl_encoding))
  99. class ParseTest(unittest.TestCase):
  100. data = support.u(r'<money value="$\xa3\u20ac\U0001017b">'
  101. r'$\xa3\u20ac\U0001017b</money>')
  102. def tearDown(self):
  103. support.unlink(TESTFN)
  104. def check_parse(self, f):
  105. from xml.sax import parse
  106. result = StringIO()
  107. parse(f, XMLGenerator(result, 'utf-8'))
  108. self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8'))
  109. def test_parse_bytes(self):
  110. # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
  111. # UTF-16 is autodetected
  112. encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
  113. for encoding in encodings:
  114. self.check_parse(io.BytesIO(xml_bytes(self.data, encoding)))
  115. make_xml_file(self.data, encoding)
  116. self.check_parse(TESTFN)
  117. with io.open(TESTFN, 'rb') as f:
  118. self.check_parse(f)
  119. self.check_parse(io.BytesIO(xml_bytes(self.data, encoding, None)))
  120. make_xml_file(self.data, encoding, None)
  121. self.check_parse(TESTFN)
  122. with io.open(TESTFN, 'rb') as f:
  123. self.check_parse(f)
  124. # accept UTF-8 with BOM
  125. self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
  126. make_xml_file(self.data, 'utf-8-sig', 'utf-8')
  127. self.check_parse(TESTFN)
  128. with io.open(TESTFN, 'rb') as f:
  129. self.check_parse(f)
  130. self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
  131. make_xml_file(self.data, 'utf-8-sig', None)
  132. self.check_parse(TESTFN)
  133. with io.open(TESTFN, 'rb') as f:
  134. self.check_parse(f)
  135. # accept data with declared encoding
  136. self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1')))
  137. make_xml_file(self.data, 'iso-8859-1')
  138. self.check_parse(TESTFN)
  139. with io.open(TESTFN, 'rb') as f:
  140. self.check_parse(f)
  141. # fail on non-UTF-8 incompatible data without declared encoding
  142. with self.assertRaises(SAXException):
  143. self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
  144. make_xml_file(self.data, 'iso-8859-1', None)
  145. with self.assertRaises(SAXException):
  146. self.check_parse(TESTFN)
  147. with io.open(TESTFN, 'rb') as f:
  148. with self.assertRaises(SAXException):
  149. self.check_parse(f)
  150. def test_parse_InputSource(self):
  151. # accept data without declared but with explicitly specified encoding
  152. make_xml_file(self.data, 'iso-8859-1', None)
  153. with io.open(TESTFN, 'rb') as f:
  154. input = InputSource()
  155. input.setByteStream(f)
  156. input.setEncoding('iso-8859-1')
  157. self.check_parse(input)
  158. def check_parseString(self, s):
  159. from xml.sax import parseString
  160. result = StringIO()
  161. parseString(s, XMLGenerator(result, 'utf-8'))
  162. self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8'))
  163. def test_parseString_bytes(self):
  164. # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
  165. # UTF-16 is autodetected
  166. encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
  167. for encoding in encodings:
  168. self.check_parseString(xml_bytes(self.data, encoding))
  169. self.check_parseString(xml_bytes(self.data, encoding, None))
  170. # accept UTF-8 with BOM
  171. self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
  172. self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
  173. # accept data with declared encoding
  174. self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
  175. # fail on non-UTF-8 incompatible data without declared encoding
  176. with self.assertRaises(SAXException):
  177. self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
  178. class MakeParserTest(unittest.TestCase):
  179. def test_make_parser2(self):
  180. # Creating parsers several times in a row should succeed.
  181. # Testing this because there have been failures of this kind
  182. # before.
  183. from xml.sax import make_parser
  184. p = make_parser()
  185. from xml.sax import make_parser
  186. p = make_parser()
  187. from xml.sax import make_parser
  188. p = make_parser()
  189. from xml.sax import make_parser
  190. p = make_parser()
  191. from xml.sax import make_parser
  192. p = make_parser()
  193. from xml.sax import make_parser
  194. p = make_parser()
  195. # ===========================================================================
  196. #
  197. # saxutils tests
  198. #
  199. # ===========================================================================
  200. class SaxutilsTest(unittest.TestCase):
  201. # ===== escape
  202. def test_escape_basic(self):
  203. self.assertEqual(escape("Donald Duck & Co"), "Donald Duck &amp; Co")
  204. def test_escape_all(self):
  205. self.assertEqual(escape("<Donald Duck & Co>"),
  206. "&lt;Donald Duck &amp; Co&gt;")
  207. def test_escape_extra(self):
  208. self.assertEqual(escape("Hei på deg", {"å" : "&aring;"}),
  209. "Hei p&aring; deg")
  210. # ===== unescape
  211. def test_unescape_basic(self):
  212. self.assertEqual(unescape("Donald Duck &amp; Co"), "Donald Duck & Co")
  213. def test_unescape_all(self):
  214. self.assertEqual(unescape("&lt;Donald Duck &amp; Co&gt;"),
  215. "<Donald Duck & Co>")
  216. def test_unescape_extra(self):
  217. self.assertEqual(unescape("Hei på deg", {"å" : "&aring;"}),
  218. "Hei p&aring; deg")
  219. def test_unescape_amp_extra(self):
  220. self.assertEqual(unescape("&amp;foo;", {"&foo;": "splat"}), "&foo;")
  221. # ===== quoteattr
  222. def test_quoteattr_basic(self):
  223. self.assertEqual(quoteattr("Donald Duck & Co"),
  224. '"Donald Duck &amp; Co"')
  225. def test_single_quoteattr(self):
  226. self.assertEqual(quoteattr('Includes "double" quotes'),
  227. '\'Includes "double" quotes\'')
  228. def test_double_quoteattr(self):
  229. self.assertEqual(quoteattr("Includes 'single' quotes"),
  230. "\"Includes 'single' quotes\"")
  231. def test_single_double_quoteattr(self):
  232. self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"),
  233. "\"Includes 'single' and &quot;double&quot; quotes\"")
  234. # ===== make_parser
  235. def test_make_parser(self):
  236. # Creating a parser should succeed - it should fall back
  237. # to the expatreader
  238. p = make_parser(['xml.parsers.no_such_parser'])
  239. class PrepareInputSourceTest(unittest.TestCase):
  240. def setUp(self):
  241. self.file = support.TESTFN
  242. with open(self.file, "w") as tmp:
  243. tmp.write("This was read from a file.")
  244. def tearDown(self):
  245. support.unlink(self.file)
  246. def make_byte_stream(self):
  247. return io.BytesIO(b"This is a byte stream.")
  248. def checkContent(self, stream, content):
  249. self.assertIsNotNone(stream)
  250. self.assertEqual(stream.read(), content)
  251. stream.close()
  252. def test_byte_stream(self):
  253. # If the source is an InputSource that does not have a character
  254. # stream but does have a byte stream, use the byte stream.
  255. src = InputSource(self.file)
  256. src.setByteStream(self.make_byte_stream())
  257. prep = prepare_input_source(src)
  258. self.assertIsNone(prep.getCharacterStream())
  259. self.checkContent(prep.getByteStream(),
  260. b"This is a byte stream.")
  261. def test_system_id(self):
  262. # If the source is an InputSource that has neither a character
  263. # stream nor a byte stream, open the system ID.
  264. src = InputSource(self.file)
  265. prep = prepare_input_source(src)
  266. self.assertIsNone(prep.getCharacterStream())
  267. self.checkContent(prep.getByteStream(),
  268. b"This was read from a file.")
  269. def test_string(self):
  270. # If the source is a string, use it as a system ID and open it.
  271. prep = prepare_input_source(self.file)
  272. self.assertIsNone(prep.getCharacterStream())
  273. self.checkContent(prep.getByteStream(),
  274. b"This was read from a file.")
  275. def test_binary_file(self):
  276. # If the source is a binary file-like object, use it as a byte
  277. # stream.
  278. prep = prepare_input_source(self.make_byte_stream())
  279. self.assertIsNone(prep.getCharacterStream())
  280. self.checkContent(prep.getByteStream(),
  281. b"This is a byte stream.")
  282. # ===== XMLGenerator
  283. start = '<?xml version="1.0" encoding="iso-8859-1"?>\n'
  284. class XmlgenTest:
  285. def test_xmlgen_basic(self):
  286. result = self.ioclass()
  287. gen = XMLGenerator(result)
  288. gen.startDocument()
  289. gen.startElement("doc", {})
  290. gen.endElement("doc")
  291. gen.endDocument()
  292. self.assertEqual(result.getvalue(), start + "<doc></doc>")
  293. def test_xmlgen_content(self):
  294. result = self.ioclass()
  295. gen = XMLGenerator(result)
  296. gen.startDocument()
  297. gen.startElement("doc", {})
  298. gen.characters("huhei")
  299. gen.endElement("doc")
  300. gen.endDocument()
  301. self.assertEqual(result.getvalue(), start + "<doc>huhei</doc>")
  302. def test_xmlgen_pi(self):
  303. result = self.ioclass()
  304. gen = XMLGenerator(result)
  305. gen.startDocument()
  306. gen.processingInstruction("test", "data")
  307. gen.startElement("doc", {})
  308. gen.endElement("doc")
  309. gen.endDocument()
  310. self.assertEqual(result.getvalue(), start + "<?test data?><doc></doc>")
  311. def test_xmlgen_content_escape(self):
  312. result = self.ioclass()
  313. gen = XMLGenerator(result)
  314. gen.startDocument()
  315. gen.startElement("doc", {})
  316. gen.characters("<huhei&")
  317. gen.endElement("doc")
  318. gen.endDocument()
  319. self.assertEqual(result.getvalue(),
  320. start + "<doc>&lt;huhei&amp;</doc>")
  321. def test_xmlgen_attr_escape(self):
  322. result = self.ioclass()
  323. gen = XMLGenerator(result)
  324. gen.startDocument()
  325. gen.startElement("doc", {"a": '"'})
  326. gen.startElement("e", {"a": "'"})
  327. gen.endElement("e")
  328. gen.startElement("e", {"a": "'\""})
  329. gen.endElement("e")
  330. gen.startElement("e", {"a": "\n\r\t"})
  331. gen.endElement("e")
  332. gen.endElement("doc")
  333. gen.endDocument()
  334. self.assertEqual(result.getvalue(), start +
  335. ("<doc a='\"'><e a=\"'\"></e>"
  336. "<e a=\"'&quot;\"></e>"
  337. "<e a=\"&#10;&#13;&#9;\"></e></doc>"))
  338. def test_xmlgen_encoding(self):
  339. encodings = ('iso-8859-15', 'utf-8',
  340. 'utf-16be', 'utf-16le',
  341. 'utf-32be', 'utf-32le')
  342. for encoding in encodings:
  343. result = self.ioclass()
  344. gen = XMLGenerator(result, encoding=encoding)
  345. gen.startDocument()
  346. gen.startElement("doc", {"a": u'\u20ac'})
  347. gen.characters(u"\u20ac")
  348. gen.endElement("doc")
  349. gen.endDocument()
  350. self.assertEqual(result.getvalue(), (
  351. u'<?xml version="1.0" encoding="%s"?>\n'
  352. u'<doc a="\u20ac">\u20ac</doc>' % encoding
  353. ).encode(encoding, 'xmlcharrefreplace'))
  354. def test_xmlgen_unencodable(self):
  355. result = self.ioclass()
  356. gen = XMLGenerator(result, encoding='ascii')
  357. gen.startDocument()
  358. gen.startElement("doc", {"a": u'\u20ac'})
  359. gen.characters(u"\u20ac")
  360. gen.endElement("doc")
  361. gen.endDocument()
  362. self.assertEqual(result.getvalue(),
  363. '<?xml version="1.0" encoding="ascii"?>\n'
  364. '<doc a="&#8364;">&#8364;</doc>')
  365. def test_xmlgen_ignorable(self):
  366. result = self.ioclass()
  367. gen = XMLGenerator(result)
  368. gen.startDocument()
  369. gen.startElement("doc", {})
  370. gen.ignorableWhitespace(" ")
  371. gen.endElement("doc")
  372. gen.endDocument()
  373. self.assertEqual(result.getvalue(), start + "<doc> </doc>")
  374. def test_xmlgen_encoding_bytes(self):
  375. encodings = ('iso-8859-15', 'utf-8',
  376. 'utf-16be', 'utf-16le',
  377. 'utf-32be', 'utf-32le')
  378. for encoding in encodings:
  379. result = self.ioclass()
  380. gen = XMLGenerator(result, encoding=encoding)
  381. gen.startDocument()
  382. gen.startElement("doc", {"a": u'\u20ac'})
  383. gen.characters(u"\u20ac".encode(encoding))
  384. gen.ignorableWhitespace(" ".encode(encoding))
  385. gen.endElement("doc")
  386. gen.endDocument()
  387. self.assertEqual(result.getvalue(), (
  388. u'<?xml version="1.0" encoding="%s"?>\n'
  389. u'<doc a="\u20ac">\u20ac </doc>' % encoding
  390. ).encode(encoding, 'xmlcharrefreplace'))
  391. def test_xmlgen_ns(self):
  392. result = self.ioclass()
  393. gen = XMLGenerator(result)
  394. gen.startDocument()
  395. gen.startPrefixMapping("ns1", ns_uri)
  396. gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
  397. # add an unqualified name
  398. gen.startElementNS((None, "udoc"), None, {})
  399. gen.endElementNS((None, "udoc"), None)
  400. gen.endElementNS((ns_uri, "doc"), "ns1:doc")
  401. gen.endPrefixMapping("ns1")
  402. gen.endDocument()
  403. self.assertEqual(result.getvalue(), start + \
  404. ('<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' %
  405. ns_uri))
  406. def test_1463026_1(self):
  407. result = self.ioclass()
  408. gen = XMLGenerator(result)
  409. gen.startDocument()
  410. gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
  411. gen.endElementNS((None, 'a'), 'a')
  412. gen.endDocument()
  413. self.assertEqual(result.getvalue(), start+'<a b="c"></a>')
  414. def test_1463026_2(self):
  415. result = self.ioclass()
  416. gen = XMLGenerator(result)
  417. gen.startDocument()
  418. gen.startPrefixMapping(None, 'qux')
  419. gen.startElementNS(('qux', 'a'), 'a', {})
  420. gen.endElementNS(('qux', 'a'), 'a')
  421. gen.endPrefixMapping(None)
  422. gen.endDocument()
  423. self.assertEqual(result.getvalue(), start+'<a xmlns="qux"></a>')
  424. def test_1463026_3(self):
  425. result = self.ioclass()
  426. gen = XMLGenerator(result)
  427. gen.startDocument()
  428. gen.startPrefixMapping('my', 'qux')
  429. gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
  430. gen.endElementNS(('qux', 'a'), 'a')
  431. gen.endPrefixMapping('my')
  432. gen.endDocument()
  433. self.assertEqual(result.getvalue(),
  434. start+'<my:a xmlns:my="qux" b="c"></my:a>')
  435. def test_5027_1(self):
  436. # The xml prefix (as in xml:lang below) is reserved and bound by
  437. # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
  438. # a bug whereby a KeyError is raised because this namespace is missing
  439. # from a dictionary.
  440. #
  441. # This test demonstrates the bug by parsing a document.
  442. test_xml = StringIO(
  443. '<?xml version="1.0"?>'
  444. '<a:g1 xmlns:a="http://example.com/ns">'
  445. '<a:g2 xml:lang="en">Hello</a:g2>'
  446. '</a:g1>')
  447. parser = make_parser()
  448. parser.setFeature(feature_namespaces, True)
  449. result = self.ioclass()
  450. gen = XMLGenerator(result)
  451. parser.setContentHandler(gen)
  452. parser.parse(test_xml)
  453. self.assertEqual(result.getvalue(),
  454. start + (
  455. '<a:g1 xmlns:a="http://example.com/ns">'
  456. '<a:g2 xml:lang="en">Hello</a:g2>'
  457. '</a:g1>'))
  458. def test_5027_2(self):
  459. # The xml prefix (as in xml:lang below) is reserved and bound by
  460. # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
  461. # a bug whereby a KeyError is raised because this namespace is missing
  462. # from a dictionary.
  463. #
  464. # This test demonstrates the bug by direct manipulation of the
  465. # XMLGenerator.
  466. result = self.ioclass()
  467. gen = XMLGenerator(result)
  468. gen.startDocument()
  469. gen.startPrefixMapping('a', 'http://example.com/ns')
  470. gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {})
  471. lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'}
  472. gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr)
  473. gen.characters('Hello')
  474. gen.endElementNS(('http://example.com/ns', 'g2'), 'g2')
  475. gen.endElementNS(('http://example.com/ns', 'g1'), 'g1')
  476. gen.endPrefixMapping('a')
  477. gen.endDocument()
  478. self.assertEqual(result.getvalue(),
  479. start + (
  480. '<a:g1 xmlns:a="http://example.com/ns">'
  481. '<a:g2 xml:lang="en">Hello</a:g2>'
  482. '</a:g1>'))
  483. def test_no_close_file(self):
  484. result = self.ioclass()
  485. def func(out):
  486. gen = XMLGenerator(out)
  487. gen.startDocument()
  488. gen.startElement("doc", {})
  489. func(result)
  490. self.assertFalse(result.closed)
  491. def test_xmlgen_fragment(self):
  492. result = self.ioclass()
  493. gen = XMLGenerator(result)
  494. # Don't call gen.startDocument()
  495. gen.startElement("foo", {"a": "1.0"})
  496. gen.characters("Hello")
  497. gen.endElement("foo")
  498. gen.startElement("bar", {"b": "2.0"})
  499. gen.endElement("bar")
  500. # Don't call gen.endDocument()
  501. self.assertEqual(result.getvalue(),
  502. '<foo a="1.0">Hello</foo><bar b="2.0"></bar>')
  503. class StringXmlgenTest(XmlgenTest, unittest.TestCase):
  504. ioclass = StringIO
  505. class BytesIOXmlgenTest(XmlgenTest, unittest.TestCase):
  506. ioclass = io.BytesIO
  507. class WriterXmlgenTest(XmlgenTest, unittest.TestCase):
  508. class ioclass(list):
  509. write = list.append
  510. closed = False
  511. def getvalue(self):
  512. return b''.join(self)
  513. class XMLFilterBaseTest(unittest.TestCase):
  514. def test_filter_basic(self):
  515. result = StringIO()
  516. gen = XMLGenerator(result)
  517. filter = XMLFilterBase()
  518. filter.setContentHandler(gen)
  519. filter.startDocument()
  520. filter.startElement("doc", {})
  521. filter.characters("content")
  522. filter.ignorableWhitespace(" ")
  523. filter.endElement("doc")
  524. filter.endDocument()
  525. self.assertEqual(result.getvalue(), start + "<doc>content </doc>")
  526. # ===========================================================================
  527. #
  528. # expatreader tests
  529. #
  530. # ===========================================================================
  531. xml_test_out = open(TEST_XMLFILE_OUT).read()
  532. class ExpatReaderTest(XmlTestBase):
  533. # ===== XMLReader support
  534. def test_expat_binary_file(self):
  535. parser = create_parser()
  536. result = StringIO()
  537. xmlgen = XMLGenerator(result)
  538. parser.setContentHandler(xmlgen)
  539. parser.parse(open(TEST_XMLFILE))
  540. self.assertEqual(result.getvalue(), xml_test_out)
  541. @requires_unicode_filenames
  542. def test_expat_file_unicode(self):
  543. fname = support.TESTFN_UNICODE
  544. shutil.copyfile(TEST_XMLFILE, fname)
  545. self.addCleanup(support.unlink, fname)
  546. parser = create_parser()
  547. result = StringIO()
  548. xmlgen = XMLGenerator(result)
  549. parser.setContentHandler(xmlgen)
  550. parser.parse(open(fname))
  551. self.assertEqual(result.getvalue(), xml_test_out)
  552. # ===== DTDHandler support
  553. class TestDTDHandler:
  554. def __init__(self):
  555. self._notations = []
  556. self._entities = []
  557. def notationDecl(self, name, publicId, systemId):
  558. self._notations.append((name, publicId, systemId))
  559. def unparsedEntityDecl(self, name, publicId, systemId, ndata):
  560. self._entities.append((name, publicId, systemId, ndata))
  561. def test_expat_dtdhandler(self):
  562. parser = create_parser()
  563. handler = self.TestDTDHandler()
  564. parser.setDTDHandler(handler)
  565. parser.feed('<!DOCTYPE doc [\n')
  566. parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
  567. parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
  568. parser.feed(']>\n')
  569. parser.feed('<doc></doc>')
  570. parser.close()
  571. self.assertEqual(handler._notations,
  572. [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
  573. self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
  574. # ===== EntityResolver support
  575. class TestEntityResolver:
  576. def resolveEntity(self, publicId, systemId):
  577. inpsrc = InputSource()
  578. inpsrc.setByteStream(StringIO("<entity/>"))
  579. return inpsrc
  580. def test_expat_entityresolver(self):
  581. parser = create_parser()
  582. parser.setEntityResolver(self.TestEntityResolver())
  583. result = StringIO()
  584. parser.setContentHandler(XMLGenerator(result))
  585. parser.feed('<!DOCTYPE doc [\n')
  586. parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
  587. parser.feed(']>\n')
  588. parser.feed('<doc>&test;</doc>')
  589. parser.close()
  590. self.assertEqual(result.getvalue(), start +
  591. "<doc><entity></entity></doc>")
  592. # ===== Attributes support
  593. class AttrGatherer(ContentHandler):
  594. def startElement(self, name, attrs):
  595. self._attrs = attrs
  596. def startElementNS(self, name, qname, attrs):
  597. self._attrs = attrs
  598. def test_expat_attrs_empty(self):
  599. parser = create_parser()
  600. gather = self.AttrGatherer()
  601. parser.setContentHandler(gather)
  602. parser.feed("<doc/>")
  603. parser.close()
  604. self.verify_empty_attrs(gather._attrs)
  605. def test_expat_attrs_wattr(self):
  606. parser = create_parser()
  607. gather = self.AttrGatherer()
  608. parser.setContentHandler(gather)
  609. parser.feed("<doc attr='val'/>")
  610. parser.close()
  611. self.verify_attrs_wattr(gather._attrs)
  612. def test_expat_nsattrs_empty(self):
  613. parser = create_parser(1)
  614. gather = self.AttrGatherer()
  615. parser.setContentHandler(gather)
  616. parser.feed("<doc/>")
  617. parser.close()
  618. self.verify_empty_nsattrs(gather._attrs)
  619. def test_expat_nsattrs_wattr(self):
  620. parser = create_parser(1)
  621. gather = self.AttrGatherer()
  622. parser.setContentHandler(gather)
  623. parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri)
  624. parser.close()
  625. attrs = gather._attrs
  626. self.assertEqual(attrs.getLength(), 1)
  627. self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
  628. self.assertTrue((attrs.getQNames() == [] or
  629. attrs.getQNames() == ["ns:attr"]))
  630. self.assertEqual(len(attrs), 1)
  631. self.assertTrue(attrs.has_key((ns_uri, "attr")))
  632. self.assertEqual(attrs.get((ns_uri, "attr")), "val")
  633. self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
  634. self.assertEqual(attrs.items(), [((ns_uri, "attr"), "val")])
  635. self.assertEqual(attrs.values(), ["val"])
  636. self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
  637. self.assertEqual(attrs[(ns_uri, "attr")], "val")
  638. # ===== InputSource support
  639. def test_expat_inpsource_filename(self):
  640. parser = create_parser()
  641. result = StringIO()
  642. xmlgen = XMLGenerator(result)
  643. parser.setContentHandler(xmlgen)
  644. parser.parse(TEST_XMLFILE)
  645. self.assertEqual(result.getvalue(), xml_test_out)
  646. def test_expat_inpsource_sysid(self):
  647. parser = create_parser()
  648. result = StringIO()
  649. xmlgen = XMLGenerator(result)
  650. parser.setContentHandler(xmlgen)
  651. parser.parse(InputSource(TEST_XMLFILE))
  652. self.assertEqual(result.getvalue(), xml_test_out)
  653. @requires_unicode_filenames
  654. def test_expat_inpsource_sysid_unicode(self):
  655. fname = support.TESTFN_UNICODE
  656. shutil.copyfile(TEST_XMLFILE, fname)
  657. self.addCleanup(support.unlink, fname)
  658. parser = create_parser()
  659. result = StringIO()
  660. xmlgen = XMLGenerator(result)
  661. parser.setContentHandler(xmlgen)
  662. parser.parse(InputSource(fname))
  663. self.assertEqual(result.getvalue(), xml_test_out)
  664. def test_expat_inpsource_byte_stream(self):
  665. parser = create_parser()
  666. result = StringIO()
  667. xmlgen = XMLGenerator(result)
  668. parser.setContentHandler(xmlgen)
  669. inpsrc = InputSource()
  670. inpsrc.setByteStream(open(TEST_XMLFILE))
  671. parser.parse(inpsrc)
  672. self.assertEqual(result.getvalue(), xml_test_out)
  673. # ===== IncrementalParser support
  674. def test_expat_incremental(self):
  675. result = StringIO()
  676. xmlgen = XMLGenerator(result)
  677. parser = create_parser()
  678. parser.setContentHandler(xmlgen)
  679. parser.feed("<doc>")
  680. parser.feed("</doc>")
  681. parser.close()
  682. self.assertEqual(result.getvalue(), start + "<doc></doc>")
  683. def test_expat_incremental_reset(self):
  684. result = StringIO()
  685. xmlgen = XMLGenerator(result)
  686. parser = create_parser()
  687. parser.setContentHandler(xmlgen)
  688. parser.feed("<doc>")
  689. parser.feed("text")
  690. result = StringIO()
  691. xmlgen = XMLGenerator(result)
  692. parser.setContentHandler(xmlgen)
  693. parser.reset()
  694. parser.feed("<doc>")
  695. parser.feed("text")
  696. parser.feed("</doc>")
  697. parser.close()
  698. self.assertEqual(result.getvalue(), start + "<doc>text</doc>")
  699. # ===== Locator support
  700. def test_expat_locator_noinfo(self):
  701. result = StringIO()
  702. xmlgen = XMLGenerator(result)
  703. parser = create_parser()
  704. parser.setContentHandler(xmlgen)
  705. parser.feed("<doc>")
  706. parser.feed("</doc>")
  707. parser.close()
  708. self.assertEqual(parser.getSystemId(), None)
  709. self.assertEqual(parser.getPublicId(), None)
  710. self.assertEqual(parser.getLineNumber(), 1)
  711. def test_expat_locator_withinfo(self):
  712. result = StringIO()
  713. xmlgen = XMLGenerator(result)
  714. parser = create_parser()
  715. parser.setContentHandler(xmlgen)
  716. parser.parse(TEST_XMLFILE)
  717. self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
  718. self.assertEqual(parser.getPublicId(), None)
  719. @requires_unicode_filenames
  720. def test_expat_locator_withinfo_unicode(self):
  721. fname = support.TESTFN_UNICODE
  722. shutil.copyfile(TEST_XMLFILE, fname)
  723. self.addCleanup(support.unlink, fname)
  724. result = StringIO()
  725. xmlgen = XMLGenerator(result)
  726. parser = create_parser()
  727. parser.setContentHandler(xmlgen)
  728. parser.parse(fname)
  729. self.assertEqual(parser.getSystemId(), fname)
  730. self.assertEqual(parser.getPublicId(), None)
  731. # ===========================================================================
  732. #
  733. # error reporting
  734. #
  735. # ===========================================================================
  736. class ErrorReportingTest(unittest.TestCase):
  737. def test_expat_inpsource_location(self):
  738. parser = create_parser()
  739. parser.setContentHandler(ContentHandler()) # do nothing
  740. source = InputSource()
  741. source.setByteStream(StringIO("<foo bar foobar>")) #ill-formed
  742. name = "a file name"
  743. source.setSystemId(name)
  744. try:
  745. parser.parse(source)
  746. self.fail()
  747. except SAXException, e:
  748. self.assertEqual(e.getSystemId(), name)
  749. def test_expat_incomplete(self):
  750. parser = create_parser()
  751. parser.setContentHandler(ContentHandler()) # do nothing
  752. self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>"))
  753. self.assertEqual(parser.getColumnNumber(), 5)
  754. self.assertEqual(parser.getLineNumber(), 1)
  755. def test_sax_parse_exception_str(self):
  756. # pass various values from a locator to the SAXParseException to
  757. # make sure that the __str__() doesn't fall apart when None is
  758. # passed instead of an integer line and column number
  759. #
  760. # use "normal" values for the locator:
  761. str(SAXParseException("message", None,
  762. self.DummyLocator(1, 1)))
  763. # use None for the line number:
  764. str(SAXParseException("message", None,
  765. self.DummyLocator(None, 1)))
  766. # use None for the column number:
  767. str(SAXParseException("message", None,
  768. self.DummyLocator(1, None)))
  769. # use None for both:
  770. str(SAXParseException("message", None,
  771. self.DummyLocator(None, None)))
  772. class DummyLocator:
  773. def __init__(self, lineno, colno):
  774. self._lineno = lineno
  775. self._colno = colno
  776. def getPublicId(self):
  777. return "pubid"
  778. def getSystemId(self):
  779. return "sysid"
  780. def getLineNumber(self):
  781. return self._lineno
  782. def getColumnNumber(self):
  783. return self._colno
  784. # ===========================================================================
  785. #
  786. # xmlreader tests
  787. #
  788. # ===========================================================================
  789. class XmlReaderTest(XmlTestBase):
  790. # ===== AttributesImpl
  791. def test_attrs_empty(self):
  792. self.verify_empty_attrs(AttributesImpl({}))
  793. def test_attrs_wattr(self):
  794. self.verify_attrs_wattr(AttributesImpl({"attr" : "val"}))
  795. def test_nsattrs_empty(self):
  796. self.verify_empty_nsattrs(AttributesNSImpl({}, {}))
  797. def test_nsattrs_wattr(self):
  798. attrs = AttributesNSImpl({(ns_uri, "attr") : "val"},
  799. {(ns_uri, "attr") : "ns:attr"})
  800. self.assertEqual(attrs.getLength(), 1)
  801. self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
  802. self.assertEqual(attrs.getQNames(), ["ns:attr"])
  803. self.assertEqual(len(attrs), 1)
  804. self.assertTrue(attrs.has_key((ns_uri, "attr")))
  805. self.assertEqual(attrs.keys(), [(ns_uri, "attr")])
  806. self.assertEqual(attrs.get((ns_uri, "attr")), "val")
  807. self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
  808. self.assertEqual(attrs.items(), [((ns_uri, "attr"), "val")])
  809. self.assertEqual(attrs.values(), ["val"])
  810. self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
  811. self.assertEqual(attrs.getValueByQName("ns:attr"), "val")
  812. self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr"))
  813. self.assertEqual(attrs[(ns_uri, "attr")], "val")
  814. self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
  815. # During the development of Python 2.5, an attempt to move the "xml"
  816. # package implementation to a new package ("xmlcore") proved painful.
  817. # The goal of this change was to allow applications to be able to
  818. # obtain and rely on behavior in the standard library implementation
  819. # of the XML support without needing to be concerned about the
  820. # availability of the PyXML implementation.
  821. #
  822. # While the existing import hackery in Lib/xml/__init__.py can cause
  823. # PyXML's _xmlpus package to supplant the "xml" package, that only
  824. # works because either implementation uses the "xml" package name for
  825. # imports.
  826. #
  827. # The move resulted in a number of problems related to the fact that
  828. # the import machinery's "package context" is based on the name that's
  829. # being imported rather than the __name__ of the actual package
  830. # containment; it wasn't possible for the "xml" package to be replaced
  831. # by a simple module that indirected imports to the "xmlcore" package.
  832. #
  833. # The following two tests exercised bugs that were introduced in that
  834. # attempt. Keeping these tests around will help detect problems with
  835. # other attempts to provide reliable access to the standard library's
  836. # implementation of the XML support.
  837. def test_sf_1511497(self):
  838. # Bug report: http://www.python.org/sf/1511497
  839. import sys
  840. old_modules = sys.modules.copy()
  841. for modname in sys.modules.keys():
  842. if modname.startswith("xml."):
  843. del sys.modules[modname]
  844. try:
  845. import xml.sax.expatreader
  846. module = xml.sax.expatreader
  847. self.assertEqual(module.__name__, "xml.sax.expatreader")
  848. finally:
  849. sys.modules.update(old_modules)
  850. def test_sf_1513611(self):
  851. # Bug report: http://www.python.org/sf/1513611
  852. sio = StringIO("invalid")
  853. parser = make_parser()
  854. from xml.sax import SAXParseException
  855. self.assertRaises(SAXParseException, parser.parse, sio)
  856. def test_main():
  857. run_unittest(MakeParserTest,
  858. ParseTest,
  859. SaxutilsTest,
  860. PrepareInputSourceTest,
  861. StringXmlgenTest,
  862. BytesIOXmlgenTest,
  863. WriterXmlgenTest,
  864. ExpatReaderTest,
  865. ErrorReportingTest,
  866. XmlReaderTest)
  867. if __name__ == "__main__":
  868. test_main()