minidom.py 65 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941
  1. """Simple implementation of the Level 1 DOM.
  2. Namespaces and other minor Level 2 features are also supported.
  3. parse("foo.xml")
  4. parseString("<foo><bar/></foo>")
  5. Todo:
  6. =====
  7. * convenience methods for getting elements and text.
  8. * more testing
  9. * bring some of the writer and linearizer code into conformance with this
  10. interface
  11. * SAX 2 namespaces
  12. """
  13. import xml.dom
  14. from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
  15. from xml.dom.minicompat import *
  16. from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
  17. # This is used by the ID-cache invalidation checks; the list isn't
  18. # actually complete, since the nodes being checked will never be the
  19. # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is
  20. # the node being added or removed, not the node being modified.)
  21. #
  22. _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
  23. xml.dom.Node.ENTITY_REFERENCE_NODE)
  24. class Node(xml.dom.Node):
  25. namespaceURI = None # this is non-null only for elements and attributes
  26. parentNode = None
  27. ownerDocument = None
  28. nextSibling = None
  29. previousSibling = None
  30. prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
  31. def __nonzero__(self):
  32. return True
  33. def toxml(self, encoding = None):
  34. return self.toprettyxml("", "", encoding)
  35. def toprettyxml(self, indent="\t", newl="\n", encoding = None):
  36. # indent = the indentation string to prepend, per level
  37. # newl = the newline string to append
  38. writer = _get_StringIO()
  39. if encoding is not None:
  40. import codecs
  41. # Can't use codecs.getwriter to preserve 2.0 compatibility
  42. writer = codecs.lookup(encoding)[3](writer)
  43. if self.nodeType == Node.DOCUMENT_NODE:
  44. # Can pass encoding only to document, to put it into XML header
  45. self.writexml(writer, "", indent, newl, encoding)
  46. else:
  47. self.writexml(writer, "", indent, newl)
  48. return writer.getvalue()
  49. def hasChildNodes(self):
  50. if self.childNodes:
  51. return True
  52. else:
  53. return False
  54. def _get_childNodes(self):
  55. return self.childNodes
  56. def _get_firstChild(self):
  57. if self.childNodes:
  58. return self.childNodes[0]
  59. def _get_lastChild(self):
  60. if self.childNodes:
  61. return self.childNodes[-1]
  62. def insertBefore(self, newChild, refChild):
  63. if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
  64. for c in tuple(newChild.childNodes):
  65. self.insertBefore(c, refChild)
  66. ### The DOM does not clearly specify what to return in this case
  67. return newChild
  68. if newChild.nodeType not in self._child_node_types:
  69. raise xml.dom.HierarchyRequestErr(
  70. "%s cannot be child of %s" % (repr(newChild), repr(self)))
  71. if newChild.parentNode is not None:
  72. newChild.parentNode.removeChild(newChild)
  73. if refChild is None:
  74. self.appendChild(newChild)
  75. else:
  76. try:
  77. index = self.childNodes.index(refChild)
  78. except ValueError:
  79. raise xml.dom.NotFoundErr()
  80. if newChild.nodeType in _nodeTypes_with_children:
  81. _clear_id_cache(self)
  82. self.childNodes.insert(index, newChild)
  83. newChild.nextSibling = refChild
  84. refChild.previousSibling = newChild
  85. if index:
  86. node = self.childNodes[index-1]
  87. node.nextSibling = newChild
  88. newChild.previousSibling = node
  89. else:
  90. newChild.previousSibling = None
  91. newChild.parentNode = self
  92. return newChild
  93. def appendChild(self, node):
  94. if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
  95. for c in tuple(node.childNodes):
  96. self.appendChild(c)
  97. ### The DOM does not clearly specify what to return in this case
  98. return node
  99. if node.nodeType not in self._child_node_types:
  100. raise xml.dom.HierarchyRequestErr(
  101. "%s cannot be child of %s" % (repr(node), repr(self)))
  102. elif node.nodeType in _nodeTypes_with_children:
  103. _clear_id_cache(self)
  104. if node.parentNode is not None:
  105. node.parentNode.removeChild(node)
  106. _append_child(self, node)
  107. node.nextSibling = None
  108. return node
  109. def replaceChild(self, newChild, oldChild):
  110. if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
  111. refChild = oldChild.nextSibling
  112. self.removeChild(oldChild)
  113. return self.insertBefore(newChild, refChild)
  114. if newChild.nodeType not in self._child_node_types:
  115. raise xml.dom.HierarchyRequestErr(
  116. "%s cannot be child of %s" % (repr(newChild), repr(self)))
  117. if newChild is oldChild:
  118. return
  119. if newChild.parentNode is not None:
  120. newChild.parentNode.removeChild(newChild)
  121. try:
  122. index = self.childNodes.index(oldChild)
  123. except ValueError:
  124. raise xml.dom.NotFoundErr()
  125. self.childNodes[index] = newChild
  126. newChild.parentNode = self
  127. oldChild.parentNode = None
  128. if (newChild.nodeType in _nodeTypes_with_children
  129. or oldChild.nodeType in _nodeTypes_with_children):
  130. _clear_id_cache(self)
  131. newChild.nextSibling = oldChild.nextSibling
  132. newChild.previousSibling = oldChild.previousSibling
  133. oldChild.nextSibling = None
  134. oldChild.previousSibling = None
  135. if newChild.previousSibling:
  136. newChild.previousSibling.nextSibling = newChild
  137. if newChild.nextSibling:
  138. newChild.nextSibling.previousSibling = newChild
  139. return oldChild
  140. def removeChild(self, oldChild):
  141. try:
  142. self.childNodes.remove(oldChild)
  143. except ValueError:
  144. raise xml.dom.NotFoundErr()
  145. if oldChild.nextSibling is not None:
  146. oldChild.nextSibling.previousSibling = oldChild.previousSibling
  147. if oldChild.previousSibling is not None:
  148. oldChild.previousSibling.nextSibling = oldChild.nextSibling
  149. oldChild.nextSibling = oldChild.previousSibling = None
  150. if oldChild.nodeType in _nodeTypes_with_children:
  151. _clear_id_cache(self)
  152. oldChild.parentNode = None
  153. return oldChild
  154. def normalize(self):
  155. L = []
  156. for child in self.childNodes:
  157. if child.nodeType == Node.TEXT_NODE:
  158. if not child.data:
  159. # empty text node; discard
  160. if L:
  161. L[-1].nextSibling = child.nextSibling
  162. if child.nextSibling:
  163. child.nextSibling.previousSibling = child.previousSibling
  164. child.unlink()
  165. elif L and L[-1].nodeType == child.nodeType:
  166. # collapse text node
  167. node = L[-1]
  168. node.data = node.data + child.data
  169. node.nextSibling = child.nextSibling
  170. if child.nextSibling:
  171. child.nextSibling.previousSibling = node
  172. child.unlink()
  173. else:
  174. L.append(child)
  175. else:
  176. L.append(child)
  177. if child.nodeType == Node.ELEMENT_NODE:
  178. child.normalize()
  179. self.childNodes[:] = L
  180. def cloneNode(self, deep):
  181. return _clone_node(self, deep, self.ownerDocument or self)
  182. def isSupported(self, feature, version):
  183. return self.ownerDocument.implementation.hasFeature(feature, version)
  184. def _get_localName(self):
  185. # Overridden in Element and Attr where localName can be Non-Null
  186. return None
  187. # Node interfaces from Level 3 (WD 9 April 2002)
  188. def isSameNode(self, other):
  189. return self is other
  190. def getInterface(self, feature):
  191. if self.isSupported(feature, None):
  192. return self
  193. else:
  194. return None
  195. # The "user data" functions use a dictionary that is only present
  196. # if some user data has been set, so be careful not to assume it
  197. # exists.
  198. def getUserData(self, key):
  199. try:
  200. return self._user_data[key][0]
  201. except (AttributeError, KeyError):
  202. return None
  203. def setUserData(self, key, data, handler):
  204. old = None
  205. try:
  206. d = self._user_data
  207. except AttributeError:
  208. d = {}
  209. self._user_data = d
  210. if key in d:
  211. old = d[key][0]
  212. if data is None:
  213. # ignore handlers passed for None
  214. handler = None
  215. if old is not None:
  216. del d[key]
  217. else:
  218. d[key] = (data, handler)
  219. return old
  220. def _call_user_data_handler(self, operation, src, dst):
  221. if hasattr(self, "_user_data"):
  222. for key, (data, handler) in self._user_data.items():
  223. if handler is not None:
  224. handler.handle(operation, key, data, src, dst)
  225. # minidom-specific API:
  226. def unlink(self):
  227. self.parentNode = self.ownerDocument = None
  228. if self.childNodes:
  229. for child in self.childNodes:
  230. child.unlink()
  231. self.childNodes = NodeList()
  232. self.previousSibling = None
  233. self.nextSibling = None
  234. defproperty(Node, "firstChild", doc="First child node, or None.")
  235. defproperty(Node, "lastChild", doc="Last child node, or None.")
  236. defproperty(Node, "localName", doc="Namespace-local name of this node.")
  237. def _append_child(self, node):
  238. # fast path with less checks; usable by DOM builders if careful
  239. childNodes = self.childNodes
  240. if childNodes:
  241. last = childNodes[-1]
  242. node.__dict__["previousSibling"] = last
  243. last.__dict__["nextSibling"] = node
  244. childNodes.append(node)
  245. node.__dict__["parentNode"] = self
  246. def _in_document(node):
  247. # return True iff node is part of a document tree
  248. while node is not None:
  249. if node.nodeType == Node.DOCUMENT_NODE:
  250. return True
  251. node = node.parentNode
  252. return False
  253. def _write_data(writer, data):
  254. "Writes datachars to writer."
  255. if data:
  256. data = data.replace("&", "&amp;").replace("<", "&lt;"). \
  257. replace("\"", "&quot;").replace(">", "&gt;")
  258. writer.write(data)
  259. def _get_elements_by_tagName_helper(parent, name, rc):
  260. for node in parent.childNodes:
  261. if node.nodeType == Node.ELEMENT_NODE and \
  262. (name == "*" or node.tagName == name):
  263. rc.append(node)
  264. _get_elements_by_tagName_helper(node, name, rc)
  265. return rc
  266. def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
  267. for node in parent.childNodes:
  268. if node.nodeType == Node.ELEMENT_NODE:
  269. if ((localName == "*" or node.localName == localName) and
  270. (nsURI == "*" or node.namespaceURI == nsURI)):
  271. rc.append(node)
  272. _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
  273. return rc
  274. class DocumentFragment(Node):
  275. nodeType = Node.DOCUMENT_FRAGMENT_NODE
  276. nodeName = "#document-fragment"
  277. nodeValue = None
  278. attributes = None
  279. parentNode = None
  280. _child_node_types = (Node.ELEMENT_NODE,
  281. Node.TEXT_NODE,
  282. Node.CDATA_SECTION_NODE,
  283. Node.ENTITY_REFERENCE_NODE,
  284. Node.PROCESSING_INSTRUCTION_NODE,
  285. Node.COMMENT_NODE,
  286. Node.NOTATION_NODE)
  287. def __init__(self):
  288. self.childNodes = NodeList()
  289. class Attr(Node):
  290. nodeType = Node.ATTRIBUTE_NODE
  291. attributes = None
  292. ownerElement = None
  293. specified = False
  294. _is_id = False
  295. _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
  296. def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
  297. prefix=None):
  298. # skip setattr for performance
  299. d = self.__dict__
  300. d["nodeName"] = d["name"] = qName
  301. d["namespaceURI"] = namespaceURI
  302. d["prefix"] = prefix
  303. d['childNodes'] = NodeList()
  304. # Add the single child node that represents the value of the attr
  305. self.childNodes.append(Text())
  306. # nodeValue and value are set elsewhere
  307. def _get_localName(self):
  308. return self.nodeName.split(":", 1)[-1]
  309. def _get_specified(self):
  310. return self.specified
  311. def __setattr__(self, name, value):
  312. d = self.__dict__
  313. if name in ("value", "nodeValue"):
  314. d["value"] = d["nodeValue"] = value
  315. d2 = self.childNodes[0].__dict__
  316. d2["data"] = d2["nodeValue"] = value
  317. if self.ownerElement is not None:
  318. _clear_id_cache(self.ownerElement)
  319. elif name in ("name", "nodeName"):
  320. d["name"] = d["nodeName"] = value
  321. if self.ownerElement is not None:
  322. _clear_id_cache(self.ownerElement)
  323. else:
  324. d[name] = value
  325. def _set_prefix(self, prefix):
  326. nsuri = self.namespaceURI
  327. if prefix == "xmlns":
  328. if nsuri and nsuri != XMLNS_NAMESPACE:
  329. raise xml.dom.NamespaceErr(
  330. "illegal use of 'xmlns' prefix for the wrong namespace")
  331. d = self.__dict__
  332. d['prefix'] = prefix
  333. if prefix is None:
  334. newName = self.localName
  335. else:
  336. newName = "%s:%s" % (prefix, self.localName)
  337. if self.ownerElement:
  338. _clear_id_cache(self.ownerElement)
  339. d['nodeName'] = d['name'] = newName
  340. def _set_value(self, value):
  341. d = self.__dict__
  342. d['value'] = d['nodeValue'] = value
  343. if self.ownerElement:
  344. _clear_id_cache(self.ownerElement)
  345. self.childNodes[0].data = value
  346. def unlink(self):
  347. # This implementation does not call the base implementation
  348. # since most of that is not needed, and the expense of the
  349. # method call is not warranted. We duplicate the removal of
  350. # children, but that's all we needed from the base class.
  351. elem = self.ownerElement
  352. if elem is not None:
  353. del elem._attrs[self.nodeName]
  354. del elem._attrsNS[(self.namespaceURI, self.localName)]
  355. if self._is_id:
  356. self._is_id = False
  357. elem._magic_id_nodes -= 1
  358. self.ownerDocument._magic_id_count -= 1
  359. for child in self.childNodes:
  360. child.unlink()
  361. del self.childNodes[:]
  362. def _get_isId(self):
  363. if self._is_id:
  364. return True
  365. doc = self.ownerDocument
  366. elem = self.ownerElement
  367. if doc is None or elem is None:
  368. return False
  369. info = doc._get_elem_info(elem)
  370. if info is None:
  371. return False
  372. if self.namespaceURI:
  373. return info.isIdNS(self.namespaceURI, self.localName)
  374. else:
  375. return info.isId(self.nodeName)
  376. def _get_schemaType(self):
  377. doc = self.ownerDocument
  378. elem = self.ownerElement
  379. if doc is None or elem is None:
  380. return _no_type
  381. info = doc._get_elem_info(elem)
  382. if info is None:
  383. return _no_type
  384. if self.namespaceURI:
  385. return info.getAttributeTypeNS(self.namespaceURI, self.localName)
  386. else:
  387. return info.getAttributeType(self.nodeName)
  388. defproperty(Attr, "isId", doc="True if this attribute is an ID.")
  389. defproperty(Attr, "localName", doc="Namespace-local name of this attribute.")
  390. defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
  391. class NamedNodeMap(object):
  392. """The attribute list is a transient interface to the underlying
  393. dictionaries. Mutations here will change the underlying element's
  394. dictionary.
  395. Ordering is imposed artificially and does not reflect the order of
  396. attributes as found in an input document.
  397. """
  398. __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
  399. def __init__(self, attrs, attrsNS, ownerElement):
  400. self._attrs = attrs
  401. self._attrsNS = attrsNS
  402. self._ownerElement = ownerElement
  403. def _get_length(self):
  404. return len(self._attrs)
  405. def item(self, index):
  406. try:
  407. return self[self._attrs.keys()[index]]
  408. except IndexError:
  409. return None
  410. def items(self):
  411. L = []
  412. for node in self._attrs.values():
  413. L.append((node.nodeName, node.value))
  414. return L
  415. def itemsNS(self):
  416. L = []
  417. for node in self._attrs.values():
  418. L.append(((node.namespaceURI, node.localName), node.value))
  419. return L
  420. def has_key(self, key):
  421. if isinstance(key, StringTypes):
  422. return key in self._attrs
  423. else:
  424. return key in self._attrsNS
  425. def keys(self):
  426. return self._attrs.keys()
  427. def keysNS(self):
  428. return self._attrsNS.keys()
  429. def values(self):
  430. return self._attrs.values()
  431. def get(self, name, value=None):
  432. return self._attrs.get(name, value)
  433. __len__ = _get_length
  434. __hash__ = None # Mutable type can't be correctly hashed
  435. def __cmp__(self, other):
  436. if self._attrs is getattr(other, "_attrs", None):
  437. return 0
  438. else:
  439. return cmp(id(self), id(other))
  440. def __getitem__(self, attname_or_tuple):
  441. if isinstance(attname_or_tuple, tuple):
  442. return self._attrsNS[attname_or_tuple]
  443. else:
  444. return self._attrs[attname_or_tuple]
  445. # same as set
  446. def __setitem__(self, attname, value):
  447. if isinstance(value, StringTypes):
  448. try:
  449. node = self._attrs[attname]
  450. except KeyError:
  451. node = Attr(attname)
  452. node.ownerDocument = self._ownerElement.ownerDocument
  453. self.setNamedItem(node)
  454. node.value = value
  455. else:
  456. if not isinstance(value, Attr):
  457. raise TypeError, "value must be a string or Attr object"
  458. node = value
  459. self.setNamedItem(node)
  460. def getNamedItem(self, name):
  461. try:
  462. return self._attrs[name]
  463. except KeyError:
  464. return None
  465. def getNamedItemNS(self, namespaceURI, localName):
  466. try:
  467. return self._attrsNS[(namespaceURI, localName)]
  468. except KeyError:
  469. return None
  470. def removeNamedItem(self, name):
  471. n = self.getNamedItem(name)
  472. if n is not None:
  473. _clear_id_cache(self._ownerElement)
  474. del self._attrs[n.nodeName]
  475. del self._attrsNS[(n.namespaceURI, n.localName)]
  476. if 'ownerElement' in n.__dict__:
  477. n.__dict__['ownerElement'] = None
  478. return n
  479. else:
  480. raise xml.dom.NotFoundErr()
  481. def removeNamedItemNS(self, namespaceURI, localName):
  482. n = self.getNamedItemNS(namespaceURI, localName)
  483. if n is not None:
  484. _clear_id_cache(self._ownerElement)
  485. del self._attrsNS[(n.namespaceURI, n.localName)]
  486. del self._attrs[n.nodeName]
  487. if 'ownerElement' in n.__dict__:
  488. n.__dict__['ownerElement'] = None
  489. return n
  490. else:
  491. raise xml.dom.NotFoundErr()
  492. def setNamedItem(self, node):
  493. if not isinstance(node, Attr):
  494. raise xml.dom.HierarchyRequestErr(
  495. "%s cannot be child of %s" % (repr(node), repr(self)))
  496. old = self._attrs.get(node.name)
  497. if old:
  498. old.unlink()
  499. self._attrs[node.name] = node
  500. self._attrsNS[(node.namespaceURI, node.localName)] = node
  501. node.ownerElement = self._ownerElement
  502. _clear_id_cache(node.ownerElement)
  503. return old
  504. def setNamedItemNS(self, node):
  505. return self.setNamedItem(node)
  506. def __delitem__(self, attname_or_tuple):
  507. node = self[attname_or_tuple]
  508. _clear_id_cache(node.ownerElement)
  509. node.unlink()
  510. def __getstate__(self):
  511. return self._attrs, self._attrsNS, self._ownerElement
  512. def __setstate__(self, state):
  513. self._attrs, self._attrsNS, self._ownerElement = state
  514. defproperty(NamedNodeMap, "length",
  515. doc="Number of nodes in the NamedNodeMap.")
  516. AttributeList = NamedNodeMap
  517. class TypeInfo(object):
  518. __slots__ = 'namespace', 'name'
  519. def __init__(self, namespace, name):
  520. self.namespace = namespace
  521. self.name = name
  522. def __repr__(self):
  523. if self.namespace:
  524. return "<TypeInfo %r (from %r)>" % (self.name, self.namespace)
  525. else:
  526. return "<TypeInfo %r>" % self.name
  527. def _get_name(self):
  528. return self.name
  529. def _get_namespace(self):
  530. return self.namespace
  531. _no_type = TypeInfo(None, None)
  532. class Element(Node):
  533. nodeType = Node.ELEMENT_NODE
  534. nodeValue = None
  535. schemaType = _no_type
  536. _magic_id_nodes = 0
  537. _child_node_types = (Node.ELEMENT_NODE,
  538. Node.PROCESSING_INSTRUCTION_NODE,
  539. Node.COMMENT_NODE,
  540. Node.TEXT_NODE,
  541. Node.CDATA_SECTION_NODE,
  542. Node.ENTITY_REFERENCE_NODE)
  543. def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
  544. localName=None):
  545. self.tagName = self.nodeName = tagName
  546. self.prefix = prefix
  547. self.namespaceURI = namespaceURI
  548. self.childNodes = NodeList()
  549. self._attrs = {} # attributes are double-indexed:
  550. self._attrsNS = {} # tagName -> Attribute
  551. # URI,localName -> Attribute
  552. # in the future: consider lazy generation
  553. # of attribute objects this is too tricky
  554. # for now because of headaches with
  555. # namespaces.
  556. def _get_localName(self):
  557. return self.tagName.split(":", 1)[-1]
  558. def _get_tagName(self):
  559. return self.tagName
  560. def unlink(self):
  561. for attr in self._attrs.values():
  562. attr.unlink()
  563. self._attrs = None
  564. self._attrsNS = None
  565. Node.unlink(self)
  566. def getAttribute(self, attname):
  567. try:
  568. return self._attrs[attname].value
  569. except KeyError:
  570. return ""
  571. def getAttributeNS(self, namespaceURI, localName):
  572. try:
  573. return self._attrsNS[(namespaceURI, localName)].value
  574. except KeyError:
  575. return ""
  576. def setAttribute(self, attname, value):
  577. attr = self.getAttributeNode(attname)
  578. if attr is None:
  579. attr = Attr(attname)
  580. # for performance
  581. d = attr.__dict__
  582. d["value"] = d["nodeValue"] = value
  583. d["ownerDocument"] = self.ownerDocument
  584. self.setAttributeNode(attr)
  585. elif value != attr.value:
  586. d = attr.__dict__
  587. d["value"] = d["nodeValue"] = value
  588. if attr.isId:
  589. _clear_id_cache(self)
  590. def setAttributeNS(self, namespaceURI, qualifiedName, value):
  591. prefix, localname = _nssplit(qualifiedName)
  592. attr = self.getAttributeNodeNS(namespaceURI, localname)
  593. if attr is None:
  594. # for performance
  595. attr = Attr(qualifiedName, namespaceURI, localname, prefix)
  596. d = attr.__dict__
  597. d["prefix"] = prefix
  598. d["nodeName"] = qualifiedName
  599. d["value"] = d["nodeValue"] = value
  600. d["ownerDocument"] = self.ownerDocument
  601. self.setAttributeNode(attr)
  602. else:
  603. d = attr.__dict__
  604. if value != attr.value:
  605. d["value"] = d["nodeValue"] = value
  606. if attr.isId:
  607. _clear_id_cache(self)
  608. if attr.prefix != prefix:
  609. d["prefix"] = prefix
  610. d["nodeName"] = qualifiedName
  611. def getAttributeNode(self, attrname):
  612. return self._attrs.get(attrname)
  613. def getAttributeNodeNS(self, namespaceURI, localName):
  614. return self._attrsNS.get((namespaceURI, localName))
  615. def setAttributeNode(self, attr):
  616. if attr.ownerElement not in (None, self):
  617. raise xml.dom.InuseAttributeErr("attribute node already owned")
  618. old1 = self._attrs.get(attr.name, None)
  619. if old1 is not None:
  620. self.removeAttributeNode(old1)
  621. old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
  622. if old2 is not None and old2 is not old1:
  623. self.removeAttributeNode(old2)
  624. _set_attribute_node(self, attr)
  625. if old1 is not attr:
  626. # It might have already been part of this node, in which case
  627. # it doesn't represent a change, and should not be returned.
  628. return old1
  629. if old2 is not attr:
  630. return old2
  631. setAttributeNodeNS = setAttributeNode
  632. def removeAttribute(self, name):
  633. try:
  634. attr = self._attrs[name]
  635. except KeyError:
  636. raise xml.dom.NotFoundErr()
  637. self.removeAttributeNode(attr)
  638. def removeAttributeNS(self, namespaceURI, localName):
  639. try:
  640. attr = self._attrsNS[(namespaceURI, localName)]
  641. except KeyError:
  642. raise xml.dom.NotFoundErr()
  643. self.removeAttributeNode(attr)
  644. def removeAttributeNode(self, node):
  645. if node is None:
  646. raise xml.dom.NotFoundErr()
  647. try:
  648. self._attrs[node.name]
  649. except KeyError:
  650. raise xml.dom.NotFoundErr()
  651. _clear_id_cache(self)
  652. node.unlink()
  653. # Restore this since the node is still useful and otherwise
  654. # unlinked
  655. node.ownerDocument = self.ownerDocument
  656. removeAttributeNodeNS = removeAttributeNode
  657. def hasAttribute(self, name):
  658. return name in self._attrs
  659. def hasAttributeNS(self, namespaceURI, localName):
  660. return (namespaceURI, localName) in self._attrsNS
  661. def getElementsByTagName(self, name):
  662. return _get_elements_by_tagName_helper(self, name, NodeList())
  663. def getElementsByTagNameNS(self, namespaceURI, localName):
  664. return _get_elements_by_tagName_ns_helper(
  665. self, namespaceURI, localName, NodeList())
  666. def __repr__(self):
  667. return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
  668. def writexml(self, writer, indent="", addindent="", newl=""):
  669. # indent = current indentation
  670. # addindent = indentation to add to higher levels
  671. # newl = newline string
  672. writer.write(indent+"<" + self.tagName)
  673. attrs = self._get_attributes()
  674. a_names = attrs.keys()
  675. a_names.sort()
  676. for a_name in a_names:
  677. writer.write(" %s=\"" % a_name)
  678. _write_data(writer, attrs[a_name].value)
  679. writer.write("\"")
  680. if self.childNodes:
  681. writer.write(">")
  682. if (len(self.childNodes) == 1 and
  683. self.childNodes[0].nodeType == Node.TEXT_NODE):
  684. self.childNodes[0].writexml(writer, '', '', '')
  685. else:
  686. writer.write(newl)
  687. for node in self.childNodes:
  688. node.writexml(writer, indent+addindent, addindent, newl)
  689. writer.write(indent)
  690. writer.write("</%s>%s" % (self.tagName, newl))
  691. else:
  692. writer.write("/>%s"%(newl))
  693. def _get_attributes(self):
  694. return NamedNodeMap(self._attrs, self._attrsNS, self)
  695. def hasAttributes(self):
  696. if self._attrs:
  697. return True
  698. else:
  699. return False
  700. # DOM Level 3 attributes, based on the 22 Oct 2002 draft
  701. def setIdAttribute(self, name):
  702. idAttr = self.getAttributeNode(name)
  703. self.setIdAttributeNode(idAttr)
  704. def setIdAttributeNS(self, namespaceURI, localName):
  705. idAttr = self.getAttributeNodeNS(namespaceURI, localName)
  706. self.setIdAttributeNode(idAttr)
  707. def setIdAttributeNode(self, idAttr):
  708. if idAttr is None or not self.isSameNode(idAttr.ownerElement):
  709. raise xml.dom.NotFoundErr()
  710. if _get_containing_entref(self) is not None:
  711. raise xml.dom.NoModificationAllowedErr()
  712. if not idAttr._is_id:
  713. idAttr.__dict__['_is_id'] = True
  714. self._magic_id_nodes += 1
  715. self.ownerDocument._magic_id_count += 1
  716. _clear_id_cache(self)
  717. defproperty(Element, "attributes",
  718. doc="NamedNodeMap of attributes on the element.")
  719. defproperty(Element, "localName",
  720. doc="Namespace-local name of this element.")
  721. def _set_attribute_node(element, attr):
  722. _clear_id_cache(element)
  723. element._attrs[attr.name] = attr
  724. element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
  725. # This creates a circular reference, but Element.unlink()
  726. # breaks the cycle since the references to the attribute
  727. # dictionaries are tossed.
  728. attr.__dict__['ownerElement'] = element
  729. class Childless:
  730. """Mixin that makes childless-ness easy to implement and avoids
  731. the complexity of the Node methods that deal with children.
  732. """
  733. attributes = None
  734. childNodes = EmptyNodeList()
  735. firstChild = None
  736. lastChild = None
  737. def _get_firstChild(self):
  738. return None
  739. def _get_lastChild(self):
  740. return None
  741. def appendChild(self, node):
  742. raise xml.dom.HierarchyRequestErr(
  743. self.nodeName + " nodes cannot have children")
  744. def hasChildNodes(self):
  745. return False
  746. def insertBefore(self, newChild, refChild):
  747. raise xml.dom.HierarchyRequestErr(
  748. self.nodeName + " nodes do not have children")
  749. def removeChild(self, oldChild):
  750. raise xml.dom.NotFoundErr(
  751. self.nodeName + " nodes do not have children")
  752. def normalize(self):
  753. # For childless nodes, normalize() has nothing to do.
  754. pass
  755. def replaceChild(self, newChild, oldChild):
  756. raise xml.dom.HierarchyRequestErr(
  757. self.nodeName + " nodes do not have children")
  758. class ProcessingInstruction(Childless, Node):
  759. nodeType = Node.PROCESSING_INSTRUCTION_NODE
  760. def __init__(self, target, data):
  761. self.target = self.nodeName = target
  762. self.data = self.nodeValue = data
  763. def _get_data(self):
  764. return self.data
  765. def _set_data(self, value):
  766. d = self.__dict__
  767. d['data'] = d['nodeValue'] = value
  768. def _get_target(self):
  769. return self.target
  770. def _set_target(self, value):
  771. d = self.__dict__
  772. d['target'] = d['nodeName'] = value
  773. def __setattr__(self, name, value):
  774. if name == "data" or name == "nodeValue":
  775. self.__dict__['data'] = self.__dict__['nodeValue'] = value
  776. elif name == "target" or name == "nodeName":
  777. self.__dict__['target'] = self.__dict__['nodeName'] = value
  778. else:
  779. self.__dict__[name] = value
  780. def writexml(self, writer, indent="", addindent="", newl=""):
  781. writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
  782. class CharacterData(Childless, Node):
  783. def _get_length(self):
  784. return len(self.data)
  785. __len__ = _get_length
  786. def _get_data(self):
  787. return self.__dict__['data']
  788. def _set_data(self, data):
  789. d = self.__dict__
  790. d['data'] = d['nodeValue'] = data
  791. _get_nodeValue = _get_data
  792. _set_nodeValue = _set_data
  793. def __setattr__(self, name, value):
  794. if name == "data" or name == "nodeValue":
  795. self.__dict__['data'] = self.__dict__['nodeValue'] = value
  796. else:
  797. self.__dict__[name] = value
  798. def __repr__(self):
  799. data = self.data
  800. if len(data) > 10:
  801. dotdotdot = "..."
  802. else:
  803. dotdotdot = ""
  804. return '<DOM %s node "%r%s">' % (
  805. self.__class__.__name__, data[0:10], dotdotdot)
  806. def substringData(self, offset, count):
  807. if offset < 0:
  808. raise xml.dom.IndexSizeErr("offset cannot be negative")
  809. if offset >= len(self.data):
  810. raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
  811. if count < 0:
  812. raise xml.dom.IndexSizeErr("count cannot be negative")
  813. return self.data[offset:offset+count]
  814. def appendData(self, arg):
  815. self.data = self.data + arg
  816. def insertData(self, offset, arg):
  817. if offset < 0:
  818. raise xml.dom.IndexSizeErr("offset cannot be negative")
  819. if offset >= len(self.data):
  820. raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
  821. if arg:
  822. self.data = "%s%s%s" % (
  823. self.data[:offset], arg, self.data[offset:])
  824. def deleteData(self, offset, count):
  825. if offset < 0:
  826. raise xml.dom.IndexSizeErr("offset cannot be negative")
  827. if offset >= len(self.data):
  828. raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
  829. if count < 0:
  830. raise xml.dom.IndexSizeErr("count cannot be negative")
  831. if count:
  832. self.data = self.data[:offset] + self.data[offset+count:]
  833. def replaceData(self, offset, count, arg):
  834. if offset < 0:
  835. raise xml.dom.IndexSizeErr("offset cannot be negative")
  836. if offset >= len(self.data):
  837. raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
  838. if count < 0:
  839. raise xml.dom.IndexSizeErr("count cannot be negative")
  840. if count:
  841. self.data = "%s%s%s" % (
  842. self.data[:offset], arg, self.data[offset+count:])
  843. defproperty(CharacterData, "length", doc="Length of the string data.")
  844. class Text(CharacterData):
  845. # Make sure we don't add an instance __dict__ if we don't already
  846. # have one, at least when that's possible:
  847. # XXX this does not work, CharacterData is an old-style class
  848. # __slots__ = ()
  849. nodeType = Node.TEXT_NODE
  850. nodeName = "#text"
  851. attributes = None
  852. def splitText(self, offset):
  853. if offset < 0 or offset > len(self.data):
  854. raise xml.dom.IndexSizeErr("illegal offset value")
  855. newText = self.__class__()
  856. newText.data = self.data[offset:]
  857. newText.ownerDocument = self.ownerDocument
  858. next = self.nextSibling
  859. if self.parentNode and self in self.parentNode.childNodes:
  860. if next is None:
  861. self.parentNode.appendChild(newText)
  862. else:
  863. self.parentNode.insertBefore(newText, next)
  864. self.data = self.data[:offset]
  865. return newText
  866. def writexml(self, writer, indent="", addindent="", newl=""):
  867. _write_data(writer, "%s%s%s" % (indent, self.data, newl))
  868. # DOM Level 3 (WD 9 April 2002)
  869. def _get_wholeText(self):
  870. L = [self.data]
  871. n = self.previousSibling
  872. while n is not None:
  873. if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
  874. L.insert(0, n.data)
  875. n = n.previousSibling
  876. else:
  877. break
  878. n = self.nextSibling
  879. while n is not None:
  880. if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
  881. L.append(n.data)
  882. n = n.nextSibling
  883. else:
  884. break
  885. return ''.join(L)
  886. def replaceWholeText(self, content):
  887. # XXX This needs to be seriously changed if minidom ever
  888. # supports EntityReference nodes.
  889. parent = self.parentNode
  890. n = self.previousSibling
  891. while n is not None:
  892. if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
  893. next = n.previousSibling
  894. parent.removeChild(n)
  895. n = next
  896. else:
  897. break
  898. n = self.nextSibling
  899. if not content:
  900. parent.removeChild(self)
  901. while n is not None:
  902. if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
  903. next = n.nextSibling
  904. parent.removeChild(n)
  905. n = next
  906. else:
  907. break
  908. if content:
  909. d = self.__dict__
  910. d['data'] = content
  911. d['nodeValue'] = content
  912. return self
  913. else:
  914. return None
  915. def _get_isWhitespaceInElementContent(self):
  916. if self.data.strip():
  917. return False
  918. elem = _get_containing_element(self)
  919. if elem is None:
  920. return False
  921. info = self.ownerDocument._get_elem_info(elem)
  922. if info is None:
  923. return False
  924. else:
  925. return info.isElementContent()
  926. defproperty(Text, "isWhitespaceInElementContent",
  927. doc="True iff this text node contains only whitespace"
  928. " and is in element content.")
  929. defproperty(Text, "wholeText",
  930. doc="The text of all logically-adjacent text nodes.")
  931. def _get_containing_element(node):
  932. c = node.parentNode
  933. while c is not None:
  934. if c.nodeType == Node.ELEMENT_NODE:
  935. return c
  936. c = c.parentNode
  937. return None
  938. def _get_containing_entref(node):
  939. c = node.parentNode
  940. while c is not None:
  941. if c.nodeType == Node.ENTITY_REFERENCE_NODE:
  942. return c
  943. c = c.parentNode
  944. return None
  945. class Comment(Childless, CharacterData):
  946. nodeType = Node.COMMENT_NODE
  947. nodeName = "#comment"
  948. def __init__(self, data):
  949. self.data = self.nodeValue = data
  950. def writexml(self, writer, indent="", addindent="", newl=""):
  951. if "--" in self.data:
  952. raise ValueError("'--' is not allowed in a comment node")
  953. writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
  954. class CDATASection(Text):
  955. # Make sure we don't add an instance __dict__ if we don't already
  956. # have one, at least when that's possible:
  957. # XXX this does not work, Text is an old-style class
  958. # __slots__ = ()
  959. nodeType = Node.CDATA_SECTION_NODE
  960. nodeName = "#cdata-section"
  961. def writexml(self, writer, indent="", addindent="", newl=""):
  962. if self.data.find("]]>") >= 0:
  963. raise ValueError("']]>' not allowed in a CDATA section")
  964. writer.write("<![CDATA[%s]]>" % self.data)
  965. class ReadOnlySequentialNamedNodeMap(object):
  966. __slots__ = '_seq',
  967. def __init__(self, seq=()):
  968. # seq should be a list or tuple
  969. self._seq = seq
  970. def __len__(self):
  971. return len(self._seq)
  972. def _get_length(self):
  973. return len(self._seq)
  974. def getNamedItem(self, name):
  975. for n in self._seq:
  976. if n.nodeName == name:
  977. return n
  978. def getNamedItemNS(self, namespaceURI, localName):
  979. for n in self._seq:
  980. if n.namespaceURI == namespaceURI and n.localName == localName:
  981. return n
  982. def __getitem__(self, name_or_tuple):
  983. if isinstance(name_or_tuple, tuple):
  984. node = self.getNamedItemNS(*name_or_tuple)
  985. else:
  986. node = self.getNamedItem(name_or_tuple)
  987. if node is None:
  988. raise KeyError, name_or_tuple
  989. return node
  990. def item(self, index):
  991. if index < 0:
  992. return None
  993. try:
  994. return self._seq[index]
  995. except IndexError:
  996. return None
  997. def removeNamedItem(self, name):
  998. raise xml.dom.NoModificationAllowedErr(
  999. "NamedNodeMap instance is read-only")
  1000. def removeNamedItemNS(self, namespaceURI, localName):
  1001. raise xml.dom.NoModificationAllowedErr(
  1002. "NamedNodeMap instance is read-only")
  1003. def setNamedItem(self, node):
  1004. raise xml.dom.NoModificationAllowedErr(
  1005. "NamedNodeMap instance is read-only")
  1006. def setNamedItemNS(self, node):
  1007. raise xml.dom.NoModificationAllowedErr(
  1008. "NamedNodeMap instance is read-only")
  1009. def __getstate__(self):
  1010. return [self._seq]
  1011. def __setstate__(self, state):
  1012. self._seq = state[0]
  1013. defproperty(ReadOnlySequentialNamedNodeMap, "length",
  1014. doc="Number of entries in the NamedNodeMap.")
  1015. class Identified:
  1016. """Mix-in class that supports the publicId and systemId attributes."""
  1017. # XXX this does not work, this is an old-style class
  1018. # __slots__ = 'publicId', 'systemId'
  1019. def _identified_mixin_init(self, publicId, systemId):
  1020. self.publicId = publicId
  1021. self.systemId = systemId
  1022. def _get_publicId(self):
  1023. return self.publicId
  1024. def _get_systemId(self):
  1025. return self.systemId
  1026. class DocumentType(Identified, Childless, Node):
  1027. nodeType = Node.DOCUMENT_TYPE_NODE
  1028. nodeValue = None
  1029. name = None
  1030. publicId = None
  1031. systemId = None
  1032. internalSubset = None
  1033. def __init__(self, qualifiedName):
  1034. self.entities = ReadOnlySequentialNamedNodeMap()
  1035. self.notations = ReadOnlySequentialNamedNodeMap()
  1036. if qualifiedName:
  1037. prefix, localname = _nssplit(qualifiedName)
  1038. self.name = localname
  1039. self.nodeName = self.name
  1040. def _get_internalSubset(self):
  1041. return self.internalSubset
  1042. def cloneNode(self, deep):
  1043. if self.ownerDocument is None:
  1044. # it's ok
  1045. clone = DocumentType(None)
  1046. clone.name = self.name
  1047. clone.nodeName = self.name
  1048. operation = xml.dom.UserDataHandler.NODE_CLONED
  1049. if deep:
  1050. clone.entities._seq = []
  1051. clone.notations._seq = []
  1052. for n in self.notations._seq:
  1053. notation = Notation(n.nodeName, n.publicId, n.systemId)
  1054. clone.notations._seq.append(notation)
  1055. n._call_user_data_handler(operation, n, notation)
  1056. for e in self.entities._seq:
  1057. entity = Entity(e.nodeName, e.publicId, e.systemId,
  1058. e.notationName)
  1059. entity.actualEncoding = e.actualEncoding
  1060. entity.encoding = e.encoding
  1061. entity.version = e.version
  1062. clone.entities._seq.append(entity)
  1063. e._call_user_data_handler(operation, n, entity)
  1064. self._call_user_data_handler(operation, self, clone)
  1065. return clone
  1066. else:
  1067. return None
  1068. def writexml(self, writer, indent="", addindent="", newl=""):
  1069. writer.write("<!DOCTYPE ")
  1070. writer.write(self.name)
  1071. if self.publicId:
  1072. writer.write("%s PUBLIC '%s'%s '%s'"
  1073. % (newl, self.publicId, newl, self.systemId))
  1074. elif self.systemId:
  1075. writer.write("%s SYSTEM '%s'" % (newl, self.systemId))
  1076. if self.internalSubset is not None:
  1077. writer.write(" [")
  1078. writer.write(self.internalSubset)
  1079. writer.write("]")
  1080. writer.write(">"+newl)
  1081. class Entity(Identified, Node):
  1082. attributes = None
  1083. nodeType = Node.ENTITY_NODE
  1084. nodeValue = None
  1085. actualEncoding = None
  1086. encoding = None
  1087. version = None
  1088. def __init__(self, name, publicId, systemId, notation):
  1089. self.nodeName = name
  1090. self.notationName = notation
  1091. self.childNodes = NodeList()
  1092. self._identified_mixin_init(publicId, systemId)
  1093. def _get_actualEncoding(self):
  1094. return self.actualEncoding
  1095. def _get_encoding(self):
  1096. return self.encoding
  1097. def _get_version(self):
  1098. return self.version
  1099. def appendChild(self, newChild):
  1100. raise xml.dom.HierarchyRequestErr(
  1101. "cannot append children to an entity node")
  1102. def insertBefore(self, newChild, refChild):
  1103. raise xml.dom.HierarchyRequestErr(
  1104. "cannot insert children below an entity node")
  1105. def removeChild(self, oldChild):
  1106. raise xml.dom.HierarchyRequestErr(
  1107. "cannot remove children from an entity node")
  1108. def replaceChild(self, newChild, oldChild):
  1109. raise xml.dom.HierarchyRequestErr(
  1110. "cannot replace children of an entity node")
  1111. class Notation(Identified, Childless, Node):
  1112. nodeType = Node.NOTATION_NODE
  1113. nodeValue = None
  1114. def __init__(self, name, publicId, systemId):
  1115. self.nodeName = name
  1116. self._identified_mixin_init(publicId, systemId)
  1117. class DOMImplementation(DOMImplementationLS):
  1118. _features = [("core", "1.0"),
  1119. ("core", "2.0"),
  1120. ("core", None),
  1121. ("xml", "1.0"),
  1122. ("xml", "2.0"),
  1123. ("xml", None),
  1124. ("ls-load", "3.0"),
  1125. ("ls-load", None),
  1126. ]
  1127. def hasFeature(self, feature, version):
  1128. if version == "":
  1129. version = None
  1130. return (feature.lower(), version) in self._features
  1131. def createDocument(self, namespaceURI, qualifiedName, doctype):
  1132. if doctype and doctype.parentNode is not None:
  1133. raise xml.dom.WrongDocumentErr(
  1134. "doctype object owned by another DOM tree")
  1135. doc = self._create_document()
  1136. add_root_element = not (namespaceURI is None
  1137. and qualifiedName is None
  1138. and doctype is None)
  1139. if not qualifiedName and add_root_element:
  1140. # The spec is unclear what to raise here; SyntaxErr
  1141. # would be the other obvious candidate. Since Xerces raises
  1142. # InvalidCharacterErr, and since SyntaxErr is not listed
  1143. # for createDocument, that seems to be the better choice.
  1144. # XXX: need to check for illegal characters here and in
  1145. # createElement.
  1146. # DOM Level III clears this up when talking about the return value
  1147. # of this function. If namespaceURI, qName and DocType are
  1148. # Null the document is returned without a document element
  1149. # Otherwise if doctype or namespaceURI are not None
  1150. # Then we go back to the above problem
  1151. raise xml.dom.InvalidCharacterErr("Element with no name")
  1152. if add_root_element:
  1153. prefix, localname = _nssplit(qualifiedName)
  1154. if prefix == "xml" \
  1155. and namespaceURI != "http://www.w3.org/XML/1998/namespace":
  1156. raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
  1157. if prefix and not namespaceURI:
  1158. raise xml.dom.NamespaceErr(
  1159. "illegal use of prefix without namespaces")
  1160. element = doc.createElementNS(namespaceURI, qualifiedName)
  1161. if doctype:
  1162. doc.appendChild(doctype)
  1163. doc.appendChild(element)
  1164. if doctype:
  1165. doctype.parentNode = doctype.ownerDocument = doc
  1166. doc.doctype = doctype
  1167. doc.implementation = self
  1168. return doc
  1169. def createDocumentType(self, qualifiedName, publicId, systemId):
  1170. doctype = DocumentType(qualifiedName)
  1171. doctype.publicId = publicId
  1172. doctype.systemId = systemId
  1173. return doctype
  1174. # DOM Level 3 (WD 9 April 2002)
  1175. def getInterface(self, feature):
  1176. if self.hasFeature(feature, None):
  1177. return self
  1178. else:
  1179. return None
  1180. # internal
  1181. def _create_document(self):
  1182. return Document()
  1183. class ElementInfo(object):
  1184. """Object that represents content-model information for an element.
  1185. This implementation is not expected to be used in practice; DOM
  1186. builders should provide implementations which do the right thing
  1187. using information available to it.
  1188. """
  1189. __slots__ = 'tagName',
  1190. def __init__(self, name):
  1191. self.tagName = name
  1192. def getAttributeType(self, aname):
  1193. return _no_type
  1194. def getAttributeTypeNS(self, namespaceURI, localName):
  1195. return _no_type
  1196. def isElementContent(self):
  1197. return False
  1198. def isEmpty(self):
  1199. """Returns true iff this element is declared to have an EMPTY
  1200. content model."""
  1201. return False
  1202. def isId(self, aname):
  1203. """Returns true iff the named attribute is a DTD-style ID."""
  1204. return False
  1205. def isIdNS(self, namespaceURI, localName):
  1206. """Returns true iff the identified attribute is a DTD-style ID."""
  1207. return False
  1208. def __getstate__(self):
  1209. return self.tagName
  1210. def __setstate__(self, state):
  1211. self.tagName = state
  1212. def _clear_id_cache(node):
  1213. if node.nodeType == Node.DOCUMENT_NODE:
  1214. node._id_cache.clear()
  1215. node._id_search_stack = None
  1216. elif _in_document(node):
  1217. node.ownerDocument._id_cache.clear()
  1218. node.ownerDocument._id_search_stack= None
  1219. class Document(Node, DocumentLS):
  1220. _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
  1221. Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
  1222. nodeType = Node.DOCUMENT_NODE
  1223. nodeName = "#document"
  1224. nodeValue = None
  1225. attributes = None
  1226. doctype = None
  1227. parentNode = None
  1228. previousSibling = nextSibling = None
  1229. implementation = DOMImplementation()
  1230. # Document attributes from Level 3 (WD 9 April 2002)
  1231. actualEncoding = None
  1232. encoding = None
  1233. standalone = None
  1234. version = None
  1235. strictErrorChecking = False
  1236. errorHandler = None
  1237. documentURI = None
  1238. _magic_id_count = 0
  1239. def __init__(self):
  1240. self.childNodes = NodeList()
  1241. # mapping of (namespaceURI, localName) -> ElementInfo
  1242. # and tagName -> ElementInfo
  1243. self._elem_info = {}
  1244. self._id_cache = {}
  1245. self._id_search_stack = None
  1246. def _get_elem_info(self, element):
  1247. if element.namespaceURI:
  1248. key = element.namespaceURI, element.localName
  1249. else:
  1250. key = element.tagName
  1251. return self._elem_info.get(key)
  1252. def _get_actualEncoding(self):
  1253. return self.actualEncoding
  1254. def _get_doctype(self):
  1255. return self.doctype
  1256. def _get_documentURI(self):
  1257. return self.documentURI
  1258. def _get_encoding(self):
  1259. return self.encoding
  1260. def _get_errorHandler(self):
  1261. return self.errorHandler
  1262. def _get_standalone(self):
  1263. return self.standalone
  1264. def _get_strictErrorChecking(self):
  1265. return self.strictErrorChecking
  1266. def _get_version(self):
  1267. return self.version
  1268. def appendChild(self, node):
  1269. if node.nodeType not in self._child_node_types:
  1270. raise xml.dom.HierarchyRequestErr(
  1271. "%s cannot be child of %s" % (repr(node), repr(self)))
  1272. if node.parentNode is not None:
  1273. # This needs to be done before the next test since this
  1274. # may *be* the document element, in which case it should
  1275. # end up re-ordered to the end.
  1276. node.parentNode.removeChild(node)
  1277. if node.nodeType == Node.ELEMENT_NODE \
  1278. and self._get_documentElement():
  1279. raise xml.dom.HierarchyRequestErr(
  1280. "two document elements disallowed")
  1281. return Node.appendChild(self, node)
  1282. def removeChild(self, oldChild):
  1283. try:
  1284. self.childNodes.remove(oldChild)
  1285. except ValueError:
  1286. raise xml.dom.NotFoundErr()
  1287. oldChild.nextSibling = oldChild.previousSibling = None
  1288. oldChild.parentNode = None
  1289. if self.documentElement is oldChild:
  1290. self.documentElement = None
  1291. return oldChild
  1292. def _get_documentElement(self):
  1293. for node in self.childNodes:
  1294. if node.nodeType == Node.ELEMENT_NODE:
  1295. return node
  1296. def unlink(self):
  1297. if self.doctype is not None:
  1298. self.doctype.unlink()
  1299. self.doctype = None
  1300. Node.unlink(self)
  1301. def cloneNode(self, deep):
  1302. if not deep:
  1303. return None
  1304. clone = self.implementation.createDocument(None, None, None)
  1305. clone.encoding = self.encoding
  1306. clone.standalone = self.standalone
  1307. clone.version = self.version
  1308. for n in self.childNodes:
  1309. childclone = _clone_node(n, deep, clone)
  1310. assert childclone.ownerDocument.isSameNode(clone)
  1311. clone.childNodes.append(childclone)
  1312. if childclone.nodeType == Node.DOCUMENT_NODE:
  1313. assert clone.documentElement is None
  1314. elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
  1315. assert clone.doctype is None
  1316. clone.doctype = childclone
  1317. childclone.parentNode = clone
  1318. self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
  1319. self, clone)
  1320. return clone
  1321. def createDocumentFragment(self):
  1322. d = DocumentFragment()
  1323. d.ownerDocument = self
  1324. return d
  1325. def createElement(self, tagName):
  1326. e = Element(tagName)
  1327. e.ownerDocument = self
  1328. return e
  1329. def createTextNode(self, data):
  1330. if not isinstance(data, StringTypes):
  1331. raise TypeError, "node contents must be a string"
  1332. t = Text()
  1333. t.data = data
  1334. t.ownerDocument = self
  1335. return t
  1336. def createCDATASection(self, data):
  1337. if not isinstance(data, StringTypes):
  1338. raise TypeError, "node contents must be a string"
  1339. c = CDATASection()
  1340. c.data = data
  1341. c.ownerDocument = self
  1342. return c
  1343. def createComment(self, data):
  1344. c = Comment(data)
  1345. c.ownerDocument = self
  1346. return c
  1347. def createProcessingInstruction(self, target, data):
  1348. p = ProcessingInstruction(target, data)
  1349. p.ownerDocument = self
  1350. return p
  1351. def createAttribute(self, qName):
  1352. a = Attr(qName)
  1353. a.ownerDocument = self
  1354. a.value = ""
  1355. return a
  1356. def createElementNS(self, namespaceURI, qualifiedName):
  1357. prefix, localName = _nssplit(qualifiedName)
  1358. e = Element(qualifiedName, namespaceURI, prefix)
  1359. e.ownerDocument = self
  1360. return e
  1361. def createAttributeNS(self, namespaceURI, qualifiedName):
  1362. prefix, localName = _nssplit(qualifiedName)
  1363. a = Attr(qualifiedName, namespaceURI, localName, prefix)
  1364. a.ownerDocument = self
  1365. a.value = ""
  1366. return a
  1367. # A couple of implementation-specific helpers to create node types
  1368. # not supported by the W3C DOM specs:
  1369. def _create_entity(self, name, publicId, systemId, notationName):
  1370. e = Entity(name, publicId, systemId, notationName)
  1371. e.ownerDocument = self
  1372. return e
  1373. def _create_notation(self, name, publicId, systemId):
  1374. n = Notation(name, publicId, systemId)
  1375. n.ownerDocument = self
  1376. return n
  1377. def getElementById(self, id):
  1378. if id in self._id_cache:
  1379. return self._id_cache[id]
  1380. if not (self._elem_info or self._magic_id_count):
  1381. return None
  1382. stack = self._id_search_stack
  1383. if stack is None:
  1384. # we never searched before, or the cache has been cleared
  1385. stack = [self.documentElement]
  1386. self._id_search_stack = stack
  1387. elif not stack:
  1388. # Previous search was completed and cache is still valid;
  1389. # no matching node.
  1390. return None
  1391. result = None
  1392. while stack:
  1393. node = stack.pop()
  1394. # add child elements to stack for continued searching
  1395. stack.extend([child for child in node.childNodes
  1396. if child.nodeType in _nodeTypes_with_children])
  1397. # check this node
  1398. info = self._get_elem_info(node)
  1399. if info:
  1400. # We have to process all ID attributes before
  1401. # returning in order to get all the attributes set to
  1402. # be IDs using Element.setIdAttribute*().
  1403. for attr in node.attributes.values():
  1404. if attr.namespaceURI:
  1405. if info.isIdNS(attr.namespaceURI, attr.localName):
  1406. self._id_cache[attr.value] = node
  1407. if attr.value == id:
  1408. result = node
  1409. elif not node._magic_id_nodes:
  1410. break
  1411. elif info.isId(attr.name):
  1412. self._id_cache[attr.value] = node
  1413. if attr.value == id:
  1414. result = node
  1415. elif not node._magic_id_nodes:
  1416. break
  1417. elif attr._is_id:
  1418. self._id_cache[attr.value] = node
  1419. if attr.value == id:
  1420. result = node
  1421. elif node._magic_id_nodes == 1:
  1422. break
  1423. elif node._magic_id_nodes:
  1424. for attr in node.attributes.values():
  1425. if attr._is_id:
  1426. self._id_cache[attr.value] = node
  1427. if attr.value == id:
  1428. result = node
  1429. if result is not None:
  1430. break
  1431. return result
  1432. def getElementsByTagName(self, name):
  1433. return _get_elements_by_tagName_helper(self, name, NodeList())
  1434. def getElementsByTagNameNS(self, namespaceURI, localName):
  1435. return _get_elements_by_tagName_ns_helper(
  1436. self, namespaceURI, localName, NodeList())
  1437. def isSupported(self, feature, version):
  1438. return self.implementation.hasFeature(feature, version)
  1439. def importNode(self, node, deep):
  1440. if node.nodeType == Node.DOCUMENT_NODE:
  1441. raise xml.dom.NotSupportedErr("cannot import document nodes")
  1442. elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
  1443. raise xml.dom.NotSupportedErr("cannot import document type nodes")
  1444. return _clone_node(node, deep, self)
  1445. def writexml(self, writer, indent="", addindent="", newl="",
  1446. encoding = None):
  1447. if encoding is None:
  1448. writer.write('<?xml version="1.0" ?>'+newl)
  1449. else:
  1450. writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
  1451. for node in self.childNodes:
  1452. node.writexml(writer, indent, addindent, newl)
  1453. # DOM Level 3 (WD 9 April 2002)
  1454. def renameNode(self, n, namespaceURI, name):
  1455. if n.ownerDocument is not self:
  1456. raise xml.dom.WrongDocumentErr(
  1457. "cannot rename nodes from other documents;\n"
  1458. "expected %s,\nfound %s" % (self, n.ownerDocument))
  1459. if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
  1460. raise xml.dom.NotSupportedErr(
  1461. "renameNode() only applies to element and attribute nodes")
  1462. if namespaceURI != EMPTY_NAMESPACE:
  1463. if ':' in name:
  1464. prefix, localName = name.split(':', 1)
  1465. if ( prefix == "xmlns"
  1466. and namespaceURI != xml.dom.XMLNS_NAMESPACE):
  1467. raise xml.dom.NamespaceErr(
  1468. "illegal use of 'xmlns' prefix")
  1469. else:
  1470. if ( name == "xmlns"
  1471. and namespaceURI != xml.dom.XMLNS_NAMESPACE
  1472. and n.nodeType == Node.ATTRIBUTE_NODE):
  1473. raise xml.dom.NamespaceErr(
  1474. "illegal use of the 'xmlns' attribute")
  1475. prefix = None
  1476. localName = name
  1477. else:
  1478. prefix = None
  1479. localName = None
  1480. if n.nodeType == Node.ATTRIBUTE_NODE:
  1481. element = n.ownerElement
  1482. if element is not None:
  1483. is_id = n._is_id
  1484. element.removeAttributeNode(n)
  1485. else:
  1486. element = None
  1487. # avoid __setattr__
  1488. d = n.__dict__
  1489. d['prefix'] = prefix
  1490. d['localName'] = localName
  1491. d['namespaceURI'] = namespaceURI
  1492. d['nodeName'] = name
  1493. if n.nodeType == Node.ELEMENT_NODE:
  1494. d['tagName'] = name
  1495. else:
  1496. # attribute node
  1497. d['name'] = name
  1498. if element is not None:
  1499. element.setAttributeNode(n)
  1500. if is_id:
  1501. element.setIdAttributeNode(n)
  1502. # It's not clear from a semantic perspective whether we should
  1503. # call the user data handlers for the NODE_RENAMED event since
  1504. # we're re-using the existing node. The draft spec has been
  1505. # interpreted as meaning "no, don't call the handler unless a
  1506. # new node is created."
  1507. return n
  1508. defproperty(Document, "documentElement",
  1509. doc="Top-level element of this document.")
  1510. def _clone_node(node, deep, newOwnerDocument):
  1511. """
  1512. Clone a node and give it the new owner document.
  1513. Called by Node.cloneNode and Document.importNode
  1514. """
  1515. if node.ownerDocument.isSameNode(newOwnerDocument):
  1516. operation = xml.dom.UserDataHandler.NODE_CLONED
  1517. else:
  1518. operation = xml.dom.UserDataHandler.NODE_IMPORTED
  1519. if node.nodeType == Node.ELEMENT_NODE:
  1520. clone = newOwnerDocument.createElementNS(node.namespaceURI,
  1521. node.nodeName)
  1522. for attr in node.attributes.values():
  1523. clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
  1524. a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
  1525. a.specified = attr.specified
  1526. if deep:
  1527. for child in node.childNodes:
  1528. c = _clone_node(child, deep, newOwnerDocument)
  1529. clone.appendChild(c)
  1530. elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
  1531. clone = newOwnerDocument.createDocumentFragment()
  1532. if deep:
  1533. for child in node.childNodes:
  1534. c = _clone_node(child, deep, newOwnerDocument)
  1535. clone.appendChild(c)
  1536. elif node.nodeType == Node.TEXT_NODE:
  1537. clone = newOwnerDocument.createTextNode(node.data)
  1538. elif node.nodeType == Node.CDATA_SECTION_NODE:
  1539. clone = newOwnerDocument.createCDATASection(node.data)
  1540. elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
  1541. clone = newOwnerDocument.createProcessingInstruction(node.target,
  1542. node.data)
  1543. elif node.nodeType == Node.COMMENT_NODE:
  1544. clone = newOwnerDocument.createComment(node.data)
  1545. elif node.nodeType == Node.ATTRIBUTE_NODE:
  1546. clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
  1547. node.nodeName)
  1548. clone.specified = True
  1549. clone.value = node.value
  1550. elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
  1551. assert node.ownerDocument is not newOwnerDocument
  1552. operation = xml.dom.UserDataHandler.NODE_IMPORTED
  1553. clone = newOwnerDocument.implementation.createDocumentType(
  1554. node.name, node.publicId, node.systemId)
  1555. clone.ownerDocument = newOwnerDocument
  1556. if deep:
  1557. clone.entities._seq = []
  1558. clone.notations._seq = []
  1559. for n in node.notations._seq:
  1560. notation = Notation(n.nodeName, n.publicId, n.systemId)
  1561. notation.ownerDocument = newOwnerDocument
  1562. clone.notations._seq.append(notation)
  1563. if hasattr(n, '_call_user_data_handler'):
  1564. n._call_user_data_handler(operation, n, notation)
  1565. for e in node.entities._seq:
  1566. entity = Entity(e.nodeName, e.publicId, e.systemId,
  1567. e.notationName)
  1568. entity.actualEncoding = e.actualEncoding
  1569. entity.encoding = e.encoding
  1570. entity.version = e.version
  1571. entity.ownerDocument = newOwnerDocument
  1572. clone.entities._seq.append(entity)
  1573. if hasattr(e, '_call_user_data_handler'):
  1574. e._call_user_data_handler(operation, n, entity)
  1575. else:
  1576. # Note the cloning of Document and DocumentType nodes is
  1577. # implementation specific. minidom handles those cases
  1578. # directly in the cloneNode() methods.
  1579. raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
  1580. # Check for _call_user_data_handler() since this could conceivably
  1581. # used with other DOM implementations (one of the FourThought
  1582. # DOMs, perhaps?).
  1583. if hasattr(node, '_call_user_data_handler'):
  1584. node._call_user_data_handler(operation, node, clone)
  1585. return clone
  1586. def _nssplit(qualifiedName):
  1587. fields = qualifiedName.split(':', 1)
  1588. if len(fields) == 2:
  1589. return fields
  1590. else:
  1591. return (None, fields[0])
  1592. def _get_StringIO():
  1593. # we can't use cStringIO since it doesn't support Unicode strings
  1594. from StringIO import StringIO
  1595. return StringIO()
  1596. def _do_pulldom_parse(func, args, kwargs):
  1597. events = func(*args, **kwargs)
  1598. toktype, rootNode = events.getEvent()
  1599. events.expandNode(rootNode)
  1600. events.clear()
  1601. return rootNode
  1602. def parse(file, parser=None, bufsize=None):
  1603. """Parse a file into a DOM by filename or file object."""
  1604. if parser is None and not bufsize:
  1605. from xml.dom import expatbuilder
  1606. return expatbuilder.parse(file)
  1607. else:
  1608. from xml.dom import pulldom
  1609. return _do_pulldom_parse(pulldom.parse, (file,),
  1610. {'parser': parser, 'bufsize': bufsize})
  1611. def parseString(string, parser=None):
  1612. """Parse a file into a DOM from a string."""
  1613. if parser is None:
  1614. from xml.dom import expatbuilder
  1615. return expatbuilder.parseString(string)
  1616. else:
  1617. from xml.dom import pulldom
  1618. return _do_pulldom_parse(pulldom.parseString, (string,),
  1619. {'parser': parser})
  1620. def getDOMImplementation(features=None):
  1621. if features:
  1622. if isinstance(features, StringTypes):
  1623. features = domreg._parse_feature_string(features)
  1624. for f, v in features:
  1625. if not Document.implementation.hasFeature(f, v):
  1626. return None
  1627. return Document.implementation