plistlib.py 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025
  1. r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
  2. The property list (.plist) file format is a simple XML pickle supporting
  3. basic object types, like dictionaries, lists, numbers and strings.
  4. Usually the top level object is a dictionary.
  5. To write out a plist file, use the dump(value, file)
  6. function. 'value' is the top level object, 'file' is
  7. a (writable) file object.
  8. To parse a plist from a file, use the load(file) function,
  9. with a (readable) file object as the only argument. It
  10. returns the top level object (again, usually a dictionary).
  11. To work with plist data in bytes objects, you can use loads()
  12. and dumps().
  13. Values can be strings, integers, floats, booleans, tuples, lists,
  14. dictionaries (but only with string keys), Data, bytes, bytearray, or
  15. datetime.datetime objects.
  16. Generate Plist example:
  17. pl = dict(
  18. aString = "Doodah",
  19. aList = ["A", "B", 12, 32.1, [1, 2, 3]],
  20. aFloat = 0.1,
  21. anInt = 728,
  22. aDict = dict(
  23. anotherString = "<hello & hi there!>",
  24. aUnicodeValue = "M\xe4ssig, Ma\xdf",
  25. aTrueValue = True,
  26. aFalseValue = False,
  27. ),
  28. someData = b"<binary gunk>",
  29. someMoreData = b"<lots of binary gunk>" * 10,
  30. aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
  31. )
  32. with open(fileName, 'wb') as fp:
  33. dump(pl, fp)
  34. Parse Plist example:
  35. with open(fileName, 'rb') as fp:
  36. pl = load(fp)
  37. print(pl["aKey"])
  38. """
  39. __all__ = [
  40. "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
  41. "Plist", "Data", "Dict", "FMT_XML", "FMT_BINARY",
  42. "load", "dump", "loads", "dumps"
  43. ]
  44. import binascii
  45. import codecs
  46. import contextlib
  47. import datetime
  48. import enum
  49. from io import BytesIO
  50. import itertools
  51. import os
  52. import re
  53. import struct
  54. from warnings import warn
  55. from xml.parsers.expat import ParserCreate
  56. PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
  57. globals().update(PlistFormat.__members__)
  58. #
  59. #
  60. # Deprecated functionality
  61. #
  62. #
  63. class _InternalDict(dict):
  64. # This class is needed while Dict is scheduled for deprecation:
  65. # we only need to warn when a *user* instantiates Dict or when
  66. # the "attribute notation for dict keys" is used.
  67. __slots__ = ()
  68. def __getattr__(self, attr):
  69. try:
  70. value = self[attr]
  71. except KeyError:
  72. raise AttributeError(attr)
  73. warn("Attribute access from plist dicts is deprecated, use d[key] "
  74. "notation instead", DeprecationWarning, 2)
  75. return value
  76. def __setattr__(self, attr, value):
  77. warn("Attribute access from plist dicts is deprecated, use d[key] "
  78. "notation instead", DeprecationWarning, 2)
  79. self[attr] = value
  80. def __delattr__(self, attr):
  81. try:
  82. del self[attr]
  83. except KeyError:
  84. raise AttributeError(attr)
  85. warn("Attribute access from plist dicts is deprecated, use d[key] "
  86. "notation instead", DeprecationWarning, 2)
  87. class Dict(_InternalDict):
  88. def __init__(self, **kwargs):
  89. warn("The plistlib.Dict class is deprecated, use builtin dict instead",
  90. DeprecationWarning, 2)
  91. super().__init__(**kwargs)
  92. @contextlib.contextmanager
  93. def _maybe_open(pathOrFile, mode):
  94. if isinstance(pathOrFile, str):
  95. with open(pathOrFile, mode) as fp:
  96. yield fp
  97. else:
  98. yield pathOrFile
  99. class Plist(_InternalDict):
  100. """This class has been deprecated. Use dump() and load()
  101. functions instead, together with regular dict objects.
  102. """
  103. def __init__(self, **kwargs):
  104. warn("The Plist class is deprecated, use the load() and "
  105. "dump() functions instead", DeprecationWarning, 2)
  106. super().__init__(**kwargs)
  107. @classmethod
  108. def fromFile(cls, pathOrFile):
  109. """Deprecated. Use the load() function instead."""
  110. with _maybe_open(pathOrFile, 'rb') as fp:
  111. value = load(fp)
  112. plist = cls()
  113. plist.update(value)
  114. return plist
  115. def write(self, pathOrFile):
  116. """Deprecated. Use the dump() function instead."""
  117. with _maybe_open(pathOrFile, 'wb') as fp:
  118. dump(self, fp)
  119. def readPlist(pathOrFile):
  120. """
  121. Read a .plist from a path or file. pathOrFile should either
  122. be a file name, or a readable binary file object.
  123. This function is deprecated, use load instead.
  124. """
  125. warn("The readPlist function is deprecated, use load() instead",
  126. DeprecationWarning, 2)
  127. with _maybe_open(pathOrFile, 'rb') as fp:
  128. return load(fp, fmt=None, use_builtin_types=False,
  129. dict_type=_InternalDict)
  130. def writePlist(value, pathOrFile):
  131. """
  132. Write 'value' to a .plist file. 'pathOrFile' may either be a
  133. file name or a (writable) file object.
  134. This function is deprecated, use dump instead.
  135. """
  136. warn("The writePlist function is deprecated, use dump() instead",
  137. DeprecationWarning, 2)
  138. with _maybe_open(pathOrFile, 'wb') as fp:
  139. dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
  140. def readPlistFromBytes(data):
  141. """
  142. Read a plist data from a bytes object. Return the root object.
  143. This function is deprecated, use loads instead.
  144. """
  145. warn("The readPlistFromBytes function is deprecated, use loads() instead",
  146. DeprecationWarning, 2)
  147. return load(BytesIO(data), fmt=None, use_builtin_types=False,
  148. dict_type=_InternalDict)
  149. def writePlistToBytes(value):
  150. """
  151. Return 'value' as a plist-formatted bytes object.
  152. This function is deprecated, use dumps instead.
  153. """
  154. warn("The writePlistToBytes function is deprecated, use dumps() instead",
  155. DeprecationWarning, 2)
  156. f = BytesIO()
  157. dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
  158. return f.getvalue()
  159. class Data:
  160. """
  161. Wrapper for binary data.
  162. This class is deprecated, use a bytes object instead.
  163. """
  164. def __init__(self, data):
  165. if not isinstance(data, bytes):
  166. raise TypeError("data must be as bytes")
  167. self.data = data
  168. @classmethod
  169. def fromBase64(cls, data):
  170. # base64.decodebytes just calls binascii.a2b_base64;
  171. # it seems overkill to use both base64 and binascii.
  172. return cls(_decode_base64(data))
  173. def asBase64(self, maxlinelength=76):
  174. return _encode_base64(self.data, maxlinelength)
  175. def __eq__(self, other):
  176. if isinstance(other, self.__class__):
  177. return self.data == other.data
  178. elif isinstance(other, bytes):
  179. return self.data == other
  180. else:
  181. return NotImplemented
  182. def __repr__(self):
  183. return "%s(%s)" % (self.__class__.__name__, repr(self.data))
  184. #
  185. #
  186. # End of deprecated functionality
  187. #
  188. #
  189. #
  190. # XML support
  191. #
  192. # XML 'header'
  193. PLISTHEADER = b"""\
  194. <?xml version="1.0" encoding="UTF-8"?>
  195. <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
  196. """
  197. # Regex to find any control chars, except for \t \n and \r
  198. _controlCharPat = re.compile(
  199. r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
  200. r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
  201. def _encode_base64(s, maxlinelength=76):
  202. # copied from base64.encodebytes(), with added maxlinelength argument
  203. maxbinsize = (maxlinelength//4)*3
  204. pieces = []
  205. for i in range(0, len(s), maxbinsize):
  206. chunk = s[i : i + maxbinsize]
  207. pieces.append(binascii.b2a_base64(chunk))
  208. return b''.join(pieces)
  209. def _decode_base64(s):
  210. if isinstance(s, str):
  211. return binascii.a2b_base64(s.encode("utf-8"))
  212. else:
  213. return binascii.a2b_base64(s)
  214. # Contents should conform to a subset of ISO 8601
  215. # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
  216. # may be omitted with # a loss of precision)
  217. _dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
  218. def _date_from_string(s):
  219. order = ('year', 'month', 'day', 'hour', 'minute', 'second')
  220. gd = _dateParser.match(s).groupdict()
  221. lst = []
  222. for key in order:
  223. val = gd[key]
  224. if val is None:
  225. break
  226. lst.append(int(val))
  227. return datetime.datetime(*lst)
  228. def _date_to_string(d):
  229. return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
  230. d.year, d.month, d.day,
  231. d.hour, d.minute, d.second
  232. )
  233. def _escape(text):
  234. m = _controlCharPat.search(text)
  235. if m is not None:
  236. raise ValueError("strings can't contains control characters; "
  237. "use bytes instead")
  238. text = text.replace("\r\n", "\n") # convert DOS line endings
  239. text = text.replace("\r", "\n") # convert Mac line endings
  240. text = text.replace("&", "&amp;") # escape '&'
  241. text = text.replace("<", "&lt;") # escape '<'
  242. text = text.replace(">", "&gt;") # escape '>'
  243. return text
  244. class _PlistParser:
  245. def __init__(self, use_builtin_types, dict_type):
  246. self.stack = []
  247. self.current_key = None
  248. self.root = None
  249. self._use_builtin_types = use_builtin_types
  250. self._dict_type = dict_type
  251. def parse(self, fileobj):
  252. self.parser = ParserCreate()
  253. self.parser.StartElementHandler = self.handle_begin_element
  254. self.parser.EndElementHandler = self.handle_end_element
  255. self.parser.CharacterDataHandler = self.handle_data
  256. self.parser.ParseFile(fileobj)
  257. return self.root
  258. def handle_begin_element(self, element, attrs):
  259. self.data = []
  260. handler = getattr(self, "begin_" + element, None)
  261. if handler is not None:
  262. handler(attrs)
  263. def handle_end_element(self, element):
  264. handler = getattr(self, "end_" + element, None)
  265. if handler is not None:
  266. handler()
  267. def handle_data(self, data):
  268. self.data.append(data)
  269. def add_object(self, value):
  270. if self.current_key is not None:
  271. if not isinstance(self.stack[-1], type({})):
  272. raise ValueError("unexpected element at line %d" %
  273. self.parser.CurrentLineNumber)
  274. self.stack[-1][self.current_key] = value
  275. self.current_key = None
  276. elif not self.stack:
  277. # this is the root object
  278. self.root = value
  279. else:
  280. if not isinstance(self.stack[-1], type([])):
  281. raise ValueError("unexpected element at line %d" %
  282. self.parser.CurrentLineNumber)
  283. self.stack[-1].append(value)
  284. def get_data(self):
  285. data = ''.join(self.data)
  286. self.data = []
  287. return data
  288. # element handlers
  289. def begin_dict(self, attrs):
  290. d = self._dict_type()
  291. self.add_object(d)
  292. self.stack.append(d)
  293. def end_dict(self):
  294. if self.current_key:
  295. raise ValueError("missing value for key '%s' at line %d" %
  296. (self.current_key,self.parser.CurrentLineNumber))
  297. self.stack.pop()
  298. def end_key(self):
  299. if self.current_key or not isinstance(self.stack[-1], type({})):
  300. raise ValueError("unexpected key at line %d" %
  301. self.parser.CurrentLineNumber)
  302. self.current_key = self.get_data()
  303. def begin_array(self, attrs):
  304. a = []
  305. self.add_object(a)
  306. self.stack.append(a)
  307. def end_array(self):
  308. self.stack.pop()
  309. def end_true(self):
  310. self.add_object(True)
  311. def end_false(self):
  312. self.add_object(False)
  313. def end_integer(self):
  314. self.add_object(int(self.get_data()))
  315. def end_real(self):
  316. self.add_object(float(self.get_data()))
  317. def end_string(self):
  318. self.add_object(self.get_data())
  319. def end_data(self):
  320. if self._use_builtin_types:
  321. self.add_object(_decode_base64(self.get_data()))
  322. else:
  323. self.add_object(Data.fromBase64(self.get_data()))
  324. def end_date(self):
  325. self.add_object(_date_from_string(self.get_data()))
  326. class _DumbXMLWriter:
  327. def __init__(self, file, indent_level=0, indent="\t"):
  328. self.file = file
  329. self.stack = []
  330. self._indent_level = indent_level
  331. self.indent = indent
  332. def begin_element(self, element):
  333. self.stack.append(element)
  334. self.writeln("<%s>" % element)
  335. self._indent_level += 1
  336. def end_element(self, element):
  337. assert self._indent_level > 0
  338. assert self.stack.pop() == element
  339. self._indent_level -= 1
  340. self.writeln("</%s>" % element)
  341. def simple_element(self, element, value=None):
  342. if value is not None:
  343. value = _escape(value)
  344. self.writeln("<%s>%s</%s>" % (element, value, element))
  345. else:
  346. self.writeln("<%s/>" % element)
  347. def writeln(self, line):
  348. if line:
  349. # plist has fixed encoding of utf-8
  350. # XXX: is this test needed?
  351. if isinstance(line, str):
  352. line = line.encode('utf-8')
  353. self.file.write(self._indent_level * self.indent)
  354. self.file.write(line)
  355. self.file.write(b'\n')
  356. class _PlistWriter(_DumbXMLWriter):
  357. def __init__(
  358. self, file, indent_level=0, indent=b"\t", writeHeader=1,
  359. sort_keys=True, skipkeys=False):
  360. if writeHeader:
  361. file.write(PLISTHEADER)
  362. _DumbXMLWriter.__init__(self, file, indent_level, indent)
  363. self._sort_keys = sort_keys
  364. self._skipkeys = skipkeys
  365. def write(self, value):
  366. self.writeln("<plist version=\"1.0\">")
  367. self.write_value(value)
  368. self.writeln("</plist>")
  369. def write_value(self, value):
  370. if isinstance(value, str):
  371. self.simple_element("string", value)
  372. elif value is True:
  373. self.simple_element("true")
  374. elif value is False:
  375. self.simple_element("false")
  376. elif isinstance(value, int):
  377. if -1 << 63 <= value < 1 << 64:
  378. self.simple_element("integer", "%d" % value)
  379. else:
  380. raise OverflowError(value)
  381. elif isinstance(value, float):
  382. self.simple_element("real", repr(value))
  383. elif isinstance(value, dict):
  384. self.write_dict(value)
  385. elif isinstance(value, Data):
  386. self.write_data(value)
  387. elif isinstance(value, (bytes, bytearray)):
  388. self.write_bytes(value)
  389. elif isinstance(value, datetime.datetime):
  390. self.simple_element("date", _date_to_string(value))
  391. elif isinstance(value, (tuple, list)):
  392. self.write_array(value)
  393. else:
  394. raise TypeError("unsupported type: %s" % type(value))
  395. def write_data(self, data):
  396. self.write_bytes(data.data)
  397. def write_bytes(self, data):
  398. self.begin_element("data")
  399. self._indent_level -= 1
  400. maxlinelength = max(
  401. 16,
  402. 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
  403. for line in _encode_base64(data, maxlinelength).split(b"\n"):
  404. if line:
  405. self.writeln(line)
  406. self._indent_level += 1
  407. self.end_element("data")
  408. def write_dict(self, d):
  409. if d:
  410. self.begin_element("dict")
  411. if self._sort_keys:
  412. items = sorted(d.items())
  413. else:
  414. items = d.items()
  415. for key, value in items:
  416. if not isinstance(key, str):
  417. if self._skipkeys:
  418. continue
  419. raise TypeError("keys must be strings")
  420. self.simple_element("key", key)
  421. self.write_value(value)
  422. self.end_element("dict")
  423. else:
  424. self.simple_element("dict")
  425. def write_array(self, array):
  426. if array:
  427. self.begin_element("array")
  428. for value in array:
  429. self.write_value(value)
  430. self.end_element("array")
  431. else:
  432. self.simple_element("array")
  433. def _is_fmt_xml(header):
  434. prefixes = (b'<?xml', b'<plist')
  435. for pfx in prefixes:
  436. if header.startswith(pfx):
  437. return True
  438. # Also check for alternative XML encodings, this is slightly
  439. # overkill because the Apple tools (and plistlib) will not
  440. # generate files with these encodings.
  441. for bom, encoding in (
  442. (codecs.BOM_UTF8, "utf-8"),
  443. (codecs.BOM_UTF16_BE, "utf-16-be"),
  444. (codecs.BOM_UTF16_LE, "utf-16-le"),
  445. # expat does not support utf-32
  446. #(codecs.BOM_UTF32_BE, "utf-32-be"),
  447. #(codecs.BOM_UTF32_LE, "utf-32-le"),
  448. ):
  449. if not header.startswith(bom):
  450. continue
  451. for start in prefixes:
  452. prefix = bom + start.decode('ascii').encode(encoding)
  453. if header[:len(prefix)] == prefix:
  454. return True
  455. return False
  456. #
  457. # Binary Plist
  458. #
  459. class InvalidFileException (ValueError):
  460. def __init__(self, message="Invalid file"):
  461. ValueError.__init__(self, message)
  462. _BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
  463. class _BinaryPlistParser:
  464. """
  465. Read or write a binary plist file, following the description of the binary
  466. format. Raise InvalidFileException in case of error, otherwise return the
  467. root object.
  468. see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
  469. """
  470. def __init__(self, use_builtin_types, dict_type):
  471. self._use_builtin_types = use_builtin_types
  472. self._dict_type = dict_type
  473. def parse(self, fp):
  474. try:
  475. # The basic file format:
  476. # HEADER
  477. # object...
  478. # refid->offset...
  479. # TRAILER
  480. self._fp = fp
  481. self._fp.seek(-32, os.SEEK_END)
  482. trailer = self._fp.read(32)
  483. if len(trailer) != 32:
  484. raise InvalidFileException()
  485. (
  486. offset_size, self._ref_size, num_objects, top_object,
  487. offset_table_offset
  488. ) = struct.unpack('>6xBBQQQ', trailer)
  489. self._fp.seek(offset_table_offset)
  490. self._object_offsets = self._read_ints(num_objects, offset_size)
  491. return self._read_object(self._object_offsets[top_object])
  492. except (OSError, IndexError, struct.error):
  493. raise InvalidFileException()
  494. def _get_size(self, tokenL):
  495. """ return the size of the next object."""
  496. if tokenL == 0xF:
  497. m = self._fp.read(1)[0] & 0x3
  498. s = 1 << m
  499. f = '>' + _BINARY_FORMAT[s]
  500. return struct.unpack(f, self._fp.read(s))[0]
  501. return tokenL
  502. def _read_ints(self, n, size):
  503. data = self._fp.read(size * n)
  504. if size in _BINARY_FORMAT:
  505. return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
  506. else:
  507. return tuple(int.from_bytes(data[i: i + size], 'big')
  508. for i in range(0, size * n, size))
  509. def _read_refs(self, n):
  510. return self._read_ints(n, self._ref_size)
  511. def _read_object(self, offset):
  512. """
  513. read the object at offset.
  514. May recursively read sub-objects (content of an array/dict/set)
  515. """
  516. self._fp.seek(offset)
  517. token = self._fp.read(1)[0]
  518. tokenH, tokenL = token & 0xF0, token & 0x0F
  519. if token == 0x00:
  520. return None
  521. elif token == 0x08:
  522. return False
  523. elif token == 0x09:
  524. return True
  525. # The referenced source code also mentions URL (0x0c, 0x0d) and
  526. # UUID (0x0e), but neither can be generated using the Cocoa libraries.
  527. elif token == 0x0f:
  528. return b''
  529. elif tokenH == 0x10: # int
  530. return int.from_bytes(self._fp.read(1 << tokenL),
  531. 'big', signed=tokenL >= 3)
  532. elif token == 0x22: # real
  533. return struct.unpack('>f', self._fp.read(4))[0]
  534. elif token == 0x23: # real
  535. return struct.unpack('>d', self._fp.read(8))[0]
  536. elif token == 0x33: # date
  537. f = struct.unpack('>d', self._fp.read(8))[0]
  538. # timestamp 0 of binary plists corresponds to 1/1/2001
  539. # (year of Mac OS X 10.0), instead of 1/1/1970.
  540. return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f)
  541. elif tokenH == 0x40: # data
  542. s = self._get_size(tokenL)
  543. if self._use_builtin_types:
  544. return self._fp.read(s)
  545. else:
  546. return Data(self._fp.read(s))
  547. elif tokenH == 0x50: # ascii string
  548. s = self._get_size(tokenL)
  549. result = self._fp.read(s).decode('ascii')
  550. return result
  551. elif tokenH == 0x60: # unicode string
  552. s = self._get_size(tokenL)
  553. return self._fp.read(s * 2).decode('utf-16be')
  554. # tokenH == 0x80 is documented as 'UID' and appears to be used for
  555. # keyed-archiving, not in plists.
  556. elif tokenH == 0xA0: # array
  557. s = self._get_size(tokenL)
  558. obj_refs = self._read_refs(s)
  559. return [self._read_object(self._object_offsets[x])
  560. for x in obj_refs]
  561. # tokenH == 0xB0 is documented as 'ordset', but is not actually
  562. # implemented in the Apple reference code.
  563. # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
  564. # plists.
  565. elif tokenH == 0xD0: # dict
  566. s = self._get_size(tokenL)
  567. key_refs = self._read_refs(s)
  568. obj_refs = self._read_refs(s)
  569. result = self._dict_type()
  570. for k, o in zip(key_refs, obj_refs):
  571. result[self._read_object(self._object_offsets[k])
  572. ] = self._read_object(self._object_offsets[o])
  573. return result
  574. raise InvalidFileException()
  575. def _count_to_size(count):
  576. if count < 1 << 8:
  577. return 1
  578. elif count < 1 << 16:
  579. return 2
  580. elif count << 1 << 32:
  581. return 4
  582. else:
  583. return 8
  584. class _BinaryPlistWriter (object):
  585. def __init__(self, fp, sort_keys, skipkeys):
  586. self._fp = fp
  587. self._sort_keys = sort_keys
  588. self._skipkeys = skipkeys
  589. def write(self, value):
  590. # Flattened object list:
  591. self._objlist = []
  592. # Mappings from object->objectid
  593. # First dict has (type(object), object) as the key,
  594. # second dict is used when object is not hashable and
  595. # has id(object) as the key.
  596. self._objtable = {}
  597. self._objidtable = {}
  598. # Create list of all objects in the plist
  599. self._flatten(value)
  600. # Size of object references in serialized containers
  601. # depends on the number of objects in the plist.
  602. num_objects = len(self._objlist)
  603. self._object_offsets = [0]*num_objects
  604. self._ref_size = _count_to_size(num_objects)
  605. self._ref_format = _BINARY_FORMAT[self._ref_size]
  606. # Write file header
  607. self._fp.write(b'bplist00')
  608. # Write object list
  609. for obj in self._objlist:
  610. self._write_object(obj)
  611. # Write refnum->object offset table
  612. top_object = self._getrefnum(value)
  613. offset_table_offset = self._fp.tell()
  614. offset_size = _count_to_size(offset_table_offset)
  615. offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
  616. self._fp.write(struct.pack(offset_format, *self._object_offsets))
  617. # Write trailer
  618. sort_version = 0
  619. trailer = (
  620. sort_version, offset_size, self._ref_size, num_objects,
  621. top_object, offset_table_offset
  622. )
  623. self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
  624. def _flatten(self, value):
  625. # First check if the object is in the object table, not used for
  626. # containers to ensure that two subcontainers with the same contents
  627. # will be serialized as distinct values.
  628. if isinstance(value, (
  629. str, int, float, datetime.datetime, bytes, bytearray)):
  630. if (type(value), value) in self._objtable:
  631. return
  632. elif isinstance(value, Data):
  633. if (type(value.data), value.data) in self._objtable:
  634. return
  635. # Add to objectreference map
  636. refnum = len(self._objlist)
  637. self._objlist.append(value)
  638. try:
  639. if isinstance(value, Data):
  640. self._objtable[(type(value.data), value.data)] = refnum
  641. else:
  642. self._objtable[(type(value), value)] = refnum
  643. except TypeError:
  644. self._objidtable[id(value)] = refnum
  645. # And finally recurse into containers
  646. if isinstance(value, dict):
  647. keys = []
  648. values = []
  649. items = value.items()
  650. if self._sort_keys:
  651. items = sorted(items)
  652. for k, v in items:
  653. if not isinstance(k, str):
  654. if self._skipkeys:
  655. continue
  656. raise TypeError("keys must be strings")
  657. keys.append(k)
  658. values.append(v)
  659. for o in itertools.chain(keys, values):
  660. self._flatten(o)
  661. elif isinstance(value, (list, tuple)):
  662. for o in value:
  663. self._flatten(o)
  664. def _getrefnum(self, value):
  665. try:
  666. if isinstance(value, Data):
  667. return self._objtable[(type(value.data), value.data)]
  668. else:
  669. return self._objtable[(type(value), value)]
  670. except TypeError:
  671. return self._objidtable[id(value)]
  672. def _write_size(self, token, size):
  673. if size < 15:
  674. self._fp.write(struct.pack('>B', token | size))
  675. elif size < 1 << 8:
  676. self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
  677. elif size < 1 << 16:
  678. self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
  679. elif size < 1 << 32:
  680. self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
  681. else:
  682. self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
  683. def _write_object(self, value):
  684. ref = self._getrefnum(value)
  685. self._object_offsets[ref] = self._fp.tell()
  686. if value is None:
  687. self._fp.write(b'\x00')
  688. elif value is False:
  689. self._fp.write(b'\x08')
  690. elif value is True:
  691. self._fp.write(b'\x09')
  692. elif isinstance(value, int):
  693. if value < 0:
  694. try:
  695. self._fp.write(struct.pack('>Bq', 0x13, value))
  696. except struct.error:
  697. raise OverflowError(value) from None
  698. elif value < 1 << 8:
  699. self._fp.write(struct.pack('>BB', 0x10, value))
  700. elif value < 1 << 16:
  701. self._fp.write(struct.pack('>BH', 0x11, value))
  702. elif value < 1 << 32:
  703. self._fp.write(struct.pack('>BL', 0x12, value))
  704. elif value < 1 << 63:
  705. self._fp.write(struct.pack('>BQ', 0x13, value))
  706. elif value < 1 << 64:
  707. self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
  708. else:
  709. raise OverflowError(value)
  710. elif isinstance(value, float):
  711. self._fp.write(struct.pack('>Bd', 0x23, value))
  712. elif isinstance(value, datetime.datetime):
  713. f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
  714. self._fp.write(struct.pack('>Bd', 0x33, f))
  715. elif isinstance(value, Data):
  716. self._write_size(0x40, len(value.data))
  717. self._fp.write(value.data)
  718. elif isinstance(value, (bytes, bytearray)):
  719. self._write_size(0x40, len(value))
  720. self._fp.write(value)
  721. elif isinstance(value, str):
  722. try:
  723. t = value.encode('ascii')
  724. self._write_size(0x50, len(value))
  725. except UnicodeEncodeError:
  726. t = value.encode('utf-16be')
  727. self._write_size(0x60, len(value))
  728. self._fp.write(t)
  729. elif isinstance(value, (list, tuple)):
  730. refs = [self._getrefnum(o) for o in value]
  731. s = len(refs)
  732. self._write_size(0xA0, s)
  733. self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
  734. elif isinstance(value, dict):
  735. keyRefs, valRefs = [], []
  736. if self._sort_keys:
  737. rootItems = sorted(value.items())
  738. else:
  739. rootItems = value.items()
  740. for k, v in rootItems:
  741. if not isinstance(k, str):
  742. if self._skipkeys:
  743. continue
  744. raise TypeError("keys must be strings")
  745. keyRefs.append(self._getrefnum(k))
  746. valRefs.append(self._getrefnum(v))
  747. s = len(keyRefs)
  748. self._write_size(0xD0, s)
  749. self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
  750. self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
  751. else:
  752. raise TypeError(value)
  753. def _is_fmt_binary(header):
  754. return header[:8] == b'bplist00'
  755. #
  756. # Generic bits
  757. #
  758. _FORMATS={
  759. FMT_XML: dict(
  760. detect=_is_fmt_xml,
  761. parser=_PlistParser,
  762. writer=_PlistWriter,
  763. ),
  764. FMT_BINARY: dict(
  765. detect=_is_fmt_binary,
  766. parser=_BinaryPlistParser,
  767. writer=_BinaryPlistWriter,
  768. )
  769. }
  770. def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
  771. """Read a .plist file. 'fp' should be (readable) file object.
  772. Return the unpacked root object (which usually is a dictionary).
  773. """
  774. if fmt is None:
  775. header = fp.read(32)
  776. fp.seek(0)
  777. for info in _FORMATS.values():
  778. if info['detect'](header):
  779. P = info['parser']
  780. break
  781. else:
  782. raise InvalidFileException()
  783. else:
  784. P = _FORMATS[fmt]['parser']
  785. p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
  786. return p.parse(fp)
  787. def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
  788. """Read a .plist file from a bytes object.
  789. Return the unpacked root object (which usually is a dictionary).
  790. """
  791. fp = BytesIO(value)
  792. return load(
  793. fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
  794. def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
  795. """Write 'value' to a .plist file. 'fp' should be a (writable)
  796. file object.
  797. """
  798. if fmt not in _FORMATS:
  799. raise ValueError("Unsupported format: %r"%(fmt,))
  800. writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
  801. writer.write(value)
  802. def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
  803. """Return a bytes object with the contents for a .plist file.
  804. """
  805. fp = BytesIO()
  806. dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
  807. return fp.getvalue()