xmlbuilder.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. """Implementation of the DOM Level 3 'LS-Load' feature."""
  2. import copy
  3. import warnings
  4. import xml.dom
  5. from xml.dom.NodeFilter import NodeFilter
  6. __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
  7. class Options:
  8. """Features object that has variables set for each DOMBuilder feature.
  9. The DOMBuilder class uses an instance of this class to pass settings to
  10. the ExpatBuilder class.
  11. """
  12. # Note that the DOMBuilder class in LoadSave constrains which of these
  13. # values can be set using the DOM Level 3 LoadSave feature.
  14. namespaces = 1
  15. namespace_declarations = True
  16. validation = False
  17. external_parameter_entities = True
  18. external_general_entities = True
  19. external_dtd_subset = True
  20. validate_if_schema = False
  21. validate = False
  22. datatype_normalization = False
  23. create_entity_ref_nodes = True
  24. entities = True
  25. whitespace_in_element_content = True
  26. cdata_sections = True
  27. comments = True
  28. charset_overrides_xml_encoding = True
  29. infoset = False
  30. supported_mediatypes_only = False
  31. errorHandler = None
  32. filter = None
  33. class DOMBuilder:
  34. entityResolver = None
  35. errorHandler = None
  36. filter = None
  37. ACTION_REPLACE = 1
  38. ACTION_APPEND_AS_CHILDREN = 2
  39. ACTION_INSERT_AFTER = 3
  40. ACTION_INSERT_BEFORE = 4
  41. _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
  42. ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
  43. def __init__(self):
  44. self._options = Options()
  45. def _get_entityResolver(self):
  46. return self.entityResolver
  47. def _set_entityResolver(self, entityResolver):
  48. self.entityResolver = entityResolver
  49. def _get_errorHandler(self):
  50. return self.errorHandler
  51. def _set_errorHandler(self, errorHandler):
  52. self.errorHandler = errorHandler
  53. def _get_filter(self):
  54. return self.filter
  55. def _set_filter(self, filter):
  56. self.filter = filter
  57. def setFeature(self, name, state):
  58. if self.supportsFeature(name):
  59. state = state and 1 or 0
  60. try:
  61. settings = self._settings[(_name_xform(name), state)]
  62. except KeyError:
  63. raise xml.dom.NotSupportedErr(
  64. "unsupported feature: %r" % (name,))
  65. else:
  66. for name, value in settings:
  67. setattr(self._options, name, value)
  68. else:
  69. raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
  70. def supportsFeature(self, name):
  71. return hasattr(self._options, _name_xform(name))
  72. def canSetFeature(self, name, state):
  73. key = (_name_xform(name), state and 1 or 0)
  74. return key in self._settings
  75. # This dictionary maps from (feature,value) to a list of
  76. # (option,value) pairs that should be set on the Options object.
  77. # If a (feature,value) setting is not in this dictionary, it is
  78. # not supported by the DOMBuilder.
  79. #
  80. _settings = {
  81. ("namespace_declarations", 0): [
  82. ("namespace_declarations", 0)],
  83. ("namespace_declarations", 1): [
  84. ("namespace_declarations", 1)],
  85. ("validation", 0): [
  86. ("validation", 0)],
  87. ("external_general_entities", 0): [
  88. ("external_general_entities", 0)],
  89. ("external_general_entities", 1): [
  90. ("external_general_entities", 1)],
  91. ("external_parameter_entities", 0): [
  92. ("external_parameter_entities", 0)],
  93. ("external_parameter_entities", 1): [
  94. ("external_parameter_entities", 1)],
  95. ("validate_if_schema", 0): [
  96. ("validate_if_schema", 0)],
  97. ("create_entity_ref_nodes", 0): [
  98. ("create_entity_ref_nodes", 0)],
  99. ("create_entity_ref_nodes", 1): [
  100. ("create_entity_ref_nodes", 1)],
  101. ("entities", 0): [
  102. ("create_entity_ref_nodes", 0),
  103. ("entities", 0)],
  104. ("entities", 1): [
  105. ("entities", 1)],
  106. ("whitespace_in_element_content", 0): [
  107. ("whitespace_in_element_content", 0)],
  108. ("whitespace_in_element_content", 1): [
  109. ("whitespace_in_element_content", 1)],
  110. ("cdata_sections", 0): [
  111. ("cdata_sections", 0)],
  112. ("cdata_sections", 1): [
  113. ("cdata_sections", 1)],
  114. ("comments", 0): [
  115. ("comments", 0)],
  116. ("comments", 1): [
  117. ("comments", 1)],
  118. ("charset_overrides_xml_encoding", 0): [
  119. ("charset_overrides_xml_encoding", 0)],
  120. ("charset_overrides_xml_encoding", 1): [
  121. ("charset_overrides_xml_encoding", 1)],
  122. ("infoset", 0): [],
  123. ("infoset", 1): [
  124. ("namespace_declarations", 0),
  125. ("validate_if_schema", 0),
  126. ("create_entity_ref_nodes", 0),
  127. ("entities", 0),
  128. ("cdata_sections", 0),
  129. ("datatype_normalization", 1),
  130. ("whitespace_in_element_content", 1),
  131. ("comments", 1),
  132. ("charset_overrides_xml_encoding", 1)],
  133. ("supported_mediatypes_only", 0): [
  134. ("supported_mediatypes_only", 0)],
  135. ("namespaces", 0): [
  136. ("namespaces", 0)],
  137. ("namespaces", 1): [
  138. ("namespaces", 1)],
  139. }
  140. def getFeature(self, name):
  141. xname = _name_xform(name)
  142. try:
  143. return getattr(self._options, xname)
  144. except AttributeError:
  145. if name == "infoset":
  146. options = self._options
  147. return (options.datatype_normalization
  148. and options.whitespace_in_element_content
  149. and options.comments
  150. and options.charset_overrides_xml_encoding
  151. and not (options.namespace_declarations
  152. or options.validate_if_schema
  153. or options.create_entity_ref_nodes
  154. or options.entities
  155. or options.cdata_sections))
  156. raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
  157. def parseURI(self, uri):
  158. if self.entityResolver:
  159. input = self.entityResolver.resolveEntity(None, uri)
  160. else:
  161. input = DOMEntityResolver().resolveEntity(None, uri)
  162. return self.parse(input)
  163. def parse(self, input):
  164. options = copy.copy(self._options)
  165. options.filter = self.filter
  166. options.errorHandler = self.errorHandler
  167. fp = input.byteStream
  168. if fp is None and options.systemId:
  169. import urllib.request
  170. fp = urllib.request.urlopen(input.systemId)
  171. return self._parse_bytestream(fp, options)
  172. def parseWithContext(self, input, cnode, action):
  173. if action not in self._legal_actions:
  174. raise ValueError("not a legal action")
  175. raise NotImplementedError("Haven't written this yet...")
  176. def _parse_bytestream(self, stream, options):
  177. import xml.dom.expatbuilder
  178. builder = xml.dom.expatbuilder.makeBuilder(options)
  179. return builder.parseFile(stream)
  180. def _name_xform(name):
  181. return name.lower().replace('-', '_')
  182. class DOMEntityResolver(object):
  183. __slots__ = '_opener',
  184. def resolveEntity(self, publicId, systemId):
  185. assert systemId is not None
  186. source = DOMInputSource()
  187. source.publicId = publicId
  188. source.systemId = systemId
  189. source.byteStream = self._get_opener().open(systemId)
  190. # determine the encoding if the transport provided it
  191. source.encoding = self._guess_media_encoding(source)
  192. # determine the base URI is we can
  193. import posixpath, urllib.parse
  194. parts = urllib.parse.urlparse(systemId)
  195. scheme, netloc, path, params, query, fragment = parts
  196. # XXX should we check the scheme here as well?
  197. if path and not path.endswith("/"):
  198. path = posixpath.dirname(path) + "/"
  199. parts = scheme, netloc, path, params, query, fragment
  200. source.baseURI = urllib.parse.urlunparse(parts)
  201. return source
  202. def _get_opener(self):
  203. try:
  204. return self._opener
  205. except AttributeError:
  206. self._opener = self._create_opener()
  207. return self._opener
  208. def _create_opener(self):
  209. import urllib.request
  210. return urllib.request.build_opener()
  211. def _guess_media_encoding(self, source):
  212. info = source.byteStream.info()
  213. if "Content-Type" in info:
  214. for param in info.getplist():
  215. if param.startswith("charset="):
  216. return param.split("=", 1)[1].lower()
  217. class DOMInputSource(object):
  218. __slots__ = ('byteStream', 'characterStream', 'stringData',
  219. 'encoding', 'publicId', 'systemId', 'baseURI')
  220. def __init__(self):
  221. self.byteStream = None
  222. self.characterStream = None
  223. self.stringData = None
  224. self.encoding = None
  225. self.publicId = None
  226. self.systemId = None
  227. self.baseURI = None
  228. def _get_byteStream(self):
  229. return self.byteStream
  230. def _set_byteStream(self, byteStream):
  231. self.byteStream = byteStream
  232. def _get_characterStream(self):
  233. return self.characterStream
  234. def _set_characterStream(self, characterStream):
  235. self.characterStream = characterStream
  236. def _get_stringData(self):
  237. return self.stringData
  238. def _set_stringData(self, data):
  239. self.stringData = data
  240. def _get_encoding(self):
  241. return self.encoding
  242. def _set_encoding(self, encoding):
  243. self.encoding = encoding
  244. def _get_publicId(self):
  245. return self.publicId
  246. def _set_publicId(self, publicId):
  247. self.publicId = publicId
  248. def _get_systemId(self):
  249. return self.systemId
  250. def _set_systemId(self, systemId):
  251. self.systemId = systemId
  252. def _get_baseURI(self):
  253. return self.baseURI
  254. def _set_baseURI(self, uri):
  255. self.baseURI = uri
  256. class DOMBuilderFilter:
  257. """Element filter which can be used to tailor construction of
  258. a DOM instance.
  259. """
  260. # There's really no need for this class; concrete implementations
  261. # should just implement the endElement() and startElement()
  262. # methods as appropriate. Using this makes it easy to only
  263. # implement one of them.
  264. FILTER_ACCEPT = 1
  265. FILTER_REJECT = 2
  266. FILTER_SKIP = 3
  267. FILTER_INTERRUPT = 4
  268. whatToShow = NodeFilter.SHOW_ALL
  269. def _get_whatToShow(self):
  270. return self.whatToShow
  271. def acceptNode(self, element):
  272. return self.FILTER_ACCEPT
  273. def startContainer(self, element):
  274. return self.FILTER_ACCEPT
  275. del NodeFilter
  276. class _AsyncDeprecatedProperty:
  277. def warn(self, cls):
  278. clsname = cls.__name__
  279. warnings.warn(
  280. "{cls}.async is deprecated; use {cls}.async_".format(cls=clsname),
  281. DeprecationWarning)
  282. def __get__(self, instance, cls):
  283. self.warn(cls)
  284. if instance is not None:
  285. return instance.async_
  286. return False
  287. def __set__(self, instance, value):
  288. self.warn(type(instance))
  289. setattr(instance, 'async_', value)
  290. class DocumentLS:
  291. """Mixin to create documents that conform to the load/save spec."""
  292. async = _AsyncDeprecatedProperty()
  293. async_ = False
  294. def _get_async(self):
  295. return False
  296. def _set_async(self, async):
  297. if async:
  298. raise xml.dom.NotSupportedErr(
  299. "asynchronous document loading is not supported")
  300. def abort(self):
  301. # What does it mean to "clear" a document? Does the
  302. # documentElement disappear?
  303. raise NotImplementedError(
  304. "haven't figured out what this means yet")
  305. def load(self, uri):
  306. raise NotImplementedError("haven't written this yet")
  307. def loadXML(self, source):
  308. raise NotImplementedError("haven't written this yet")
  309. def saveXML(self, snode):
  310. if snode is None:
  311. snode = self
  312. elif snode.ownerDocument is not self:
  313. raise xml.dom.WrongDocumentErr()
  314. return snode.toxml()
  315. del _AsyncDeprecatedProperty
  316. class DOMImplementationLS:
  317. MODE_SYNCHRONOUS = 1
  318. MODE_ASYNCHRONOUS = 2
  319. def createDOMBuilder(self, mode, schemaType):
  320. if schemaType is not None:
  321. raise xml.dom.NotSupportedErr(
  322. "schemaType not yet supported")
  323. if mode == self.MODE_SYNCHRONOUS:
  324. return DOMBuilder()
  325. if mode == self.MODE_ASYNCHRONOUS:
  326. raise xml.dom.NotSupportedErr(
  327. "asynchronous builders are not supported")
  328. raise ValueError("unknown value for mode")
  329. def createDOMWriter(self):
  330. raise NotImplementedError(
  331. "the writer interface hasn't been written yet!")
  332. def createDOMInputSource(self):
  333. return DOMInputSource()