uuid.py 22 KB


  1. r"""UUID objects (universally unique identifiers) according to RFC 4122.
  2. This module provides immutable UUID objects (class UUID) and the functions
  3. uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
  4. UUIDs as specified in RFC 4122.
  5. If all you want is a unique ID, you should probably call uuid1() or uuid4().
  6. Note that uuid1() may compromise privacy since it creates a UUID containing
  7. the computer's network address. uuid4() creates a random UUID.
  8. Typical usage:
  9. >>> import uuid
  10. # make a UUID based on the host ID and current time
  11. >>> uuid.uuid1()
  12. UUID('a8098c1a-f86e-11da-bd1a-00112444be1e')
  13. # make a UUID using an MD5 hash of a namespace UUID and a name
  14. >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org')
  15. UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e')
  16. # make a random UUID
  17. >>> uuid.uuid4()
  18. UUID('16fd2706-8baf-433b-82eb-8c7fada847da')
  19. # make a UUID using a SHA-1 hash of a namespace UUID and a name
  20. >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org')
  21. UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d')
  22. # make a UUID from a string of hex digits (braces and hyphens ignored)
  23. >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}')
  24. # convert a UUID to a string of hex digits in standard form
  25. >>> str(x)
  26. '00010203-0405-0607-0809-0a0b0c0d0e0f'
  27. # get the raw 16 bytes of the UUID
  28. >>> x.bytes
  29. '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
  30. # make a UUID from a 16-byte string
  31. >>> uuid.UUID(bytes=x.bytes)
  32. UUID('00010203-0405-0607-0809-0a0b0c0d0e0f')
  33. """
  34. import os
  35. __author__ = 'Ka-Ping Yee <ping@zesty.ca>'
  36. RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [
  37. 'reserved for NCS compatibility', 'specified in RFC 4122',
  38. 'reserved for Microsoft compatibility', 'reserved for future definition']
  39. class UUID(object):
  40. """Instances of the UUID class represent UUIDs as specified in RFC 4122.
  41. UUID objects are immutable, hashable, and usable as dictionary keys.
  42. Converting a UUID to a string with str() yields something in the form
  43. '12345678-1234-1234-1234-123456789abc'. The UUID constructor accepts
  44. five possible forms: a similar string of hexadecimal digits, or a tuple
  45. of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and
  46. 48-bit values respectively) as an argument named 'fields', or a string
  47. of 16 bytes (with all the integer fields in big-endian order) as an
  48. argument named 'bytes', or a string of 16 bytes (with the first three
  49. fields in little-endian order) as an argument named 'bytes_le', or a
  50. single 128-bit integer as an argument named 'int'.
  51. UUIDs have these read-only attributes:
  52. bytes the UUID as a 16-byte string (containing the six
  53. integer fields in big-endian byte order)
  54. bytes_le the UUID as a 16-byte string (with time_low, time_mid,
  55. and time_hi_version in little-endian byte order)
  56. fields a tuple of the six integer fields of the UUID,
  57. which are also available as six individual attributes
  58. and two derived attributes:
  59. time_low the first 32 bits of the UUID
  60. time_mid the next 16 bits of the UUID
  61. time_hi_version the next 16 bits of the UUID
  62. clock_seq_hi_variant the next 8 bits of the UUID
  63. clock_seq_low the next 8 bits of the UUID
  64. node the last 48 bits of the UUID
  65. time the 60-bit timestamp
  66. clock_seq the 14-bit sequence number
  67. hex the UUID as a 32-character hexadecimal string
  68. int the UUID as a 128-bit integer
  69. urn the UUID as a URN as specified in RFC 4122
  70. variant the UUID variant (one of the constants RESERVED_NCS,
  71. RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)
  72. version the UUID version number (1 through 5, meaningful only
  73. when the variant is RFC_4122)
  74. """
  75. def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
  76. int=None, version=None):
  77. r"""Create a UUID from either a string of 32 hexadecimal digits,
  78. a string of 16 bytes as the 'bytes' argument, a string of 16 bytes
  79. in little-endian order as the 'bytes_le' argument, a tuple of six
  80. integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version,
  81. 8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as
  82. the 'fields' argument, or a single 128-bit integer as the 'int'
  83. argument. When a string of hex digits is given, curly braces,
  84. hyphens, and a URN prefix are all optional. For example, these
  85. expressions all yield the same UUID:
  86. UUID('{12345678-1234-5678-1234-567812345678}')
  87. UUID('12345678123456781234567812345678')
  88. UUID('urn:uuid:12345678-1234-5678-1234-567812345678')
  89. UUID(bytes='\x12\x34\x56\x78'*4)
  90. UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' +
  91. '\x12\x34\x56\x78\x12\x34\x56\x78')
  92. UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678))
  93. UUID(int=0x12345678123456781234567812345678)
  94. Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must
  95. be given. The 'version' argument is optional; if given, the resulting
  96. UUID will have its variant and version set according to RFC 4122,
  97. overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'.
  98. """
  99. if [hex, bytes, bytes_le, fields, int].count(None) != 4:
  100. raise TypeError('need one of hex, bytes, bytes_le, fields, or int')
  101. if hex is not None:
  102. hex = hex.replace('urn:', '').replace('uuid:', '')
  103. hex = hex.strip('{}').replace('-', '')
  104. if len(hex) != 32:
  105. raise ValueError('badly formed hexadecimal UUID string')
  106. int = long(hex, 16)
  107. if bytes_le is not None:
  108. if len(bytes_le) != 16:
  109. raise ValueError('bytes_le is not a 16-char string')
  110. bytes = (bytes_le[3] + bytes_le[2] + bytes_le[1] + bytes_le[0] +
  111. bytes_le[5] + bytes_le[4] + bytes_le[7] + bytes_le[6] +
  112. bytes_le[8:])
  113. if bytes is not None:
  114. if len(bytes) != 16:
  115. raise ValueError('bytes is not a 16-char string')
  116. int = long(('%02x'*16) % tuple(map(ord, bytes)), 16)
  117. if fields is not None:
  118. if len(fields) != 6:
  119. raise ValueError('fields is not a 6-tuple')
  120. (time_low, time_mid, time_hi_version,
  121. clock_seq_hi_variant, clock_seq_low, node) = fields
  122. if not 0 <= time_low < 1<<32L:
  123. raise ValueError('field 1 out of range (need a 32-bit value)')
  124. if not 0 <= time_mid < 1<<16L:
  125. raise ValueError('field 2 out of range (need a 16-bit value)')
  126. if not 0 <= time_hi_version < 1<<16L:
  127. raise ValueError('field 3 out of range (need a 16-bit value)')
  128. if not 0 <= clock_seq_hi_variant < 1<<8L:
  129. raise ValueError('field 4 out of range (need an 8-bit value)')
  130. if not 0 <= clock_seq_low < 1<<8L:
  131. raise ValueError('field 5 out of range (need an 8-bit value)')
  132. if not 0 <= node < 1<<48L:
  133. raise ValueError('field 6 out of range (need a 48-bit value)')
  134. clock_seq = (clock_seq_hi_variant << 8L) | clock_seq_low
  135. int = ((time_low << 96L) | (time_mid << 80L) |
  136. (time_hi_version << 64L) | (clock_seq << 48L) | node)
  137. if int is not None:
  138. if not 0 <= int < 1<<128L:
  139. raise ValueError('int is out of range (need a 128-bit value)')
  140. if version is not None:
  141. if not 1 <= version <= 5:
  142. raise ValueError('illegal version number')
  143. # Set the variant to RFC 4122.
  144. int &= ~(0xc000 << 48L)
  145. int |= 0x8000 << 48L
  146. # Set the version number.
  147. int &= ~(0xf000 << 64L)
  148. int |= version << 76L
  149. self.__dict__['int'] = int
  150. def __cmp__(self, other):
  151. if isinstance(other, UUID):
  152. return cmp(self.int, other.int)
  153. return NotImplemented
  154. def __hash__(self):
  155. return hash(self.int)
  156. def __int__(self):
  157. return self.int
  158. def __repr__(self):
  159. return 'UUID(%r)' % str(self)
  160. def __setattr__(self, name, value):
  161. raise TypeError('UUID objects are immutable')
  162. def __str__(self):
  163. hex = '%032x' % self.int
  164. return '%s-%s-%s-%s-%s' % (
  165. hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:])
  166. def get_bytes(self):
  167. bytes = ''
  168. for shift in range(0, 128, 8):
  169. bytes = chr((self.int >> shift) & 0xff) + bytes
  170. return bytes
  171. bytes = property(get_bytes)
  172. def get_bytes_le(self):
  173. bytes = self.bytes
  174. return (bytes[3] + bytes[2] + bytes[1] + bytes[0] +
  175. bytes[5] + bytes[4] + bytes[7] + bytes[6] + bytes[8:])
  176. bytes_le = property(get_bytes_le)
  177. def get_fields(self):
  178. return (self.time_low, self.time_mid, self.time_hi_version,
  179. self.clock_seq_hi_variant, self.clock_seq_low, self.node)
  180. fields = property(get_fields)
  181. def get_time_low(self):
  182. return self.int >> 96L
  183. time_low = property(get_time_low)
  184. def get_time_mid(self):
  185. return (self.int >> 80L) & 0xffff
  186. time_mid = property(get_time_mid)
  187. def get_time_hi_version(self):
  188. return (self.int >> 64L) & 0xffff
  189. time_hi_version = property(get_time_hi_version)
  190. def get_clock_seq_hi_variant(self):
  191. return (self.int >> 56L) & 0xff
  192. clock_seq_hi_variant = property(get_clock_seq_hi_variant)
  193. def get_clock_seq_low(self):
  194. return (self.int >> 48L) & 0xff
  195. clock_seq_low = property(get_clock_seq_low)
  196. def get_time(self):
  197. return (((self.time_hi_version & 0x0fffL) << 48L) |
  198. (self.time_mid << 32L) | self.time_low)
  199. time = property(get_time)
  200. def get_clock_seq(self):
  201. return (((self.clock_seq_hi_variant & 0x3fL) << 8L) |
  202. self.clock_seq_low)
  203. clock_seq = property(get_clock_seq)
  204. def get_node(self):
  205. return self.int & 0xffffffffffff
  206. node = property(get_node)
  207. def get_hex(self):
  208. return '%032x' % self.int
  209. hex = property(get_hex)
  210. def get_urn(self):
  211. return 'urn:uuid:' + str(self)
  212. urn = property(get_urn)
  213. def get_variant(self):
  214. if not self.int & (0x8000 << 48L):
  215. return RESERVED_NCS
  216. elif not self.int & (0x4000 << 48L):
  217. return RFC_4122
  218. elif not self.int & (0x2000 << 48L):
  219. return RESERVED_MICROSOFT
  220. else:
  221. return RESERVED_FUTURE
  222. variant = property(get_variant)
  223. def get_version(self):
  224. # The version bits are only meaningful for RFC 4122 UUIDs.
  225. if self.variant == RFC_4122:
  226. return int((self.int >> 76L) & 0xf)
  227. version = property(get_version)
  228. def _popen(command, args):
  229. import os
  230. path = os.environ.get("PATH", os.defpath).split(os.pathsep)
  231. path.extend(('/sbin', '/usr/sbin'))
  232. for dir in path:
  233. executable = os.path.join(dir, command)
  234. if (os.path.exists(executable) and
  235. os.access(executable, os.F_OK | os.X_OK) and
  236. not os.path.isdir(executable)):
  237. break
  238. else:
  239. return None
  240. # LC_ALL to ensure English output, 2>/dev/null to prevent output on
  241. # stderr (Note: we don't have an example where the words we search for
  242. # are actually localized, but in theory some system could do so.)
  243. cmd = 'LC_ALL=C %s %s 2>/dev/null' % (executable, args)
  244. return os.popen(cmd)
  245. def _find_mac(command, args, hw_identifiers, get_index):
  246. try:
  247. pipe = _popen(command, args)
  248. if not pipe:
  249. return
  250. with pipe:
  251. for line in pipe:
  252. words = line.lower().rstrip().split()
  253. for i in range(len(words)):
  254. if words[i] in hw_identifiers:
  255. try:
  256. word = words[get_index(i)]
  257. mac = int(word.replace(':', ''), 16)
  258. if mac:
  259. return mac
  260. except (ValueError, IndexError):
  261. # Virtual interfaces, such as those provided by
  262. # VPNs, do not have a colon-delimited MAC address
  263. # as expected, but a 16-byte HWAddr separated by
  264. # dashes. These should be ignored in favor of a
  265. # real MAC address
  266. pass
  267. except IOError:
  268. pass
  269. def _ifconfig_getnode():
  270. """Get the hardware address on Unix by running ifconfig."""
  271. # This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes.
  272. for args in ('', '-a', '-av'):
  273. mac = _find_mac('ifconfig', args, ['hwaddr', 'ether'], lambda i: i+1)
  274. if mac:
  275. return mac
  276. def _arp_getnode():
  277. """Get the hardware address on Unix by running arp."""
  278. import os, socket
  279. try:
  280. ip_addr = socket.gethostbyname(socket.gethostname())
  281. except EnvironmentError:
  282. return None
  283. # Try getting the MAC addr from arp based on our IP address (Solaris).
  284. return _find_mac('arp', '-an', [ip_addr], lambda i: -1)
  285. def _lanscan_getnode():
  286. """Get the hardware address on Unix by running lanscan."""
  287. # This might work on HP-UX.
  288. return _find_mac('lanscan', '-ai', ['lan0'], lambda i: 0)
  289. def _netstat_getnode():
  290. """Get the hardware address on Unix by running netstat."""
  291. # This might work on AIX, Tru64 UNIX and presumably on IRIX.
  292. try:
  293. pipe = _popen('netstat', '-ia')
  294. if not pipe:
  295. return
  296. with pipe:
  297. words = pipe.readline().rstrip().split()
  298. try:
  299. i = words.index('Address')
  300. except ValueError:
  301. return
  302. for line in pipe:
  303. try:
  304. words = line.rstrip().split()
  305. word = words[i]
  306. if len(word) == 17 and word.count(':') == 5:
  307. mac = int(word.replace(':', ''), 16)
  308. if mac:
  309. return mac
  310. except (ValueError, IndexError):
  311. pass
  312. except OSError:
  313. pass
  314. def _ipconfig_getnode():
  315. """Get the hardware address on Windows by running ipconfig.exe."""
  316. import os, re
  317. dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
  318. try:
  319. import ctypes
  320. buffer = ctypes.create_string_buffer(300)
  321. ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300)
  322. dirs.insert(0, buffer.value.decode('mbcs'))
  323. except:
  324. pass
  325. for dir in dirs:
  326. try:
  327. pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all')
  328. except IOError:
  329. continue
  330. with pipe:
  331. for line in pipe:
  332. value = line.split(':')[-1].strip().lower()
  333. if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value):
  334. return int(value.replace('-', ''), 16)
  335. def _netbios_getnode():
  336. """Get the hardware address on Windows using NetBIOS calls.
  337. See http://support.microsoft.com/kb/118623 for details."""
  338. import win32wnet, netbios
  339. ncb = netbios.NCB()
  340. ncb.Command = netbios.NCBENUM
  341. ncb.Buffer = adapters = netbios.LANA_ENUM()
  342. adapters._pack()
  343. if win32wnet.Netbios(ncb) != 0:
  344. return
  345. adapters._unpack()
  346. for i in range(adapters.length):
  347. ncb.Reset()
  348. ncb.Command = netbios.NCBRESET
  349. ncb.Lana_num = ord(adapters.lana[i])
  350. if win32wnet.Netbios(ncb) != 0:
  351. continue
  352. ncb.Reset()
  353. ncb.Command = netbios.NCBASTAT
  354. ncb.Lana_num = ord(adapters.lana[i])
  355. ncb.Callname = '*'.ljust(16)
  356. ncb.Buffer = status = netbios.ADAPTER_STATUS()
  357. if win32wnet.Netbios(ncb) != 0:
  358. continue
  359. status._unpack()
  360. bytes = map(ord, status.adapter_address)
  361. return ((bytes[0]<<40L) + (bytes[1]<<32L) + (bytes[2]<<24L) +
  362. (bytes[3]<<16L) + (bytes[4]<<8L) + bytes[5])
  363. # Thanks to Thomas Heller for ctypes and for his help with its use here.
  364. # If ctypes is available, use it to find system routines for UUID generation.
  365. _uuid_generate_time = _UuidCreate = None
  366. try:
  367. import ctypes, ctypes.util
  368. import sys
  369. # The uuid_generate_* routines are provided by libuuid on at least
  370. # Linux and FreeBSD, and provided by libc on Mac OS X.
  371. _libnames = ['uuid']
  372. if not sys.platform.startswith('win'):
  373. _libnames.append('c')
  374. for libname in _libnames:
  375. try:
  376. lib = ctypes.CDLL(ctypes.util.find_library(libname))
  377. except:
  378. continue
  379. if hasattr(lib, 'uuid_generate_time'):
  380. _uuid_generate_time = lib.uuid_generate_time
  381. break
  382. del _libnames
  383. # The uuid_generate_* functions are broken on MacOS X 10.5, as noted
  384. # in issue #8621 the function generates the same sequence of values
  385. # in the parent process and all children created using fork (unless
  386. # those children use exec as well).
  387. #
  388. # Assume that the uuid_generate functions are broken from 10.5 onward,
  389. # the test can be adjusted when a later version is fixed.
  390. if sys.platform == 'darwin':
  391. import os
  392. if int(os.uname()[2].split('.')[0]) >= 9:
  393. _uuid_generate_time = None
  394. # On Windows prior to 2000, UuidCreate gives a UUID containing the
  395. # hardware address. On Windows 2000 and later, UuidCreate makes a
  396. # random UUID and UuidCreateSequential gives a UUID containing the
  397. # hardware address. These routines are provided by the RPC runtime.
  398. # NOTE: at least on Tim's WinXP Pro SP2 desktop box, while the last
  399. # 6 bytes returned by UuidCreateSequential are fixed, they don't appear
  400. # to bear any relationship to the MAC address of any network device
  401. # on the box.
  402. try:
  403. lib = ctypes.windll.rpcrt4
  404. except:
  405. lib = None
  406. _UuidCreate = getattr(lib, 'UuidCreateSequential',
  407. getattr(lib, 'UuidCreate', None))
  408. except:
  409. pass
  410. def _unixdll_getnode():
  411. """Get the hardware address on Unix using ctypes."""
  412. _buffer = ctypes.create_string_buffer(16)
  413. _uuid_generate_time(_buffer)
  414. return UUID(bytes=_buffer.raw).node
  415. def _windll_getnode():
  416. """Get the hardware address on Windows using ctypes."""
  417. _buffer = ctypes.create_string_buffer(16)
  418. if _UuidCreate(_buffer) == 0:
  419. return UUID(bytes=_buffer.raw).node
  420. def _random_getnode():
  421. """Get a random node ID, with eighth bit set as suggested by RFC 4122."""
  422. import random
  423. return random.randrange(0, 1<<48L) | 0x010000000000L
  424. _node = None
  425. def getnode():
  426. """Get the hardware address as a 48-bit positive integer.
  427. The first time this runs, it may launch a separate program, which could
  428. be quite slow. If all attempts to obtain the hardware address fail, we
  429. choose a random 48-bit number with its eighth bit set to 1 as recommended
  430. in RFC 4122.
  431. """
  432. global _node
  433. if _node is not None:
  434. return _node
  435. import sys
  436. if sys.platform == 'win32':
  437. getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode]
  438. else:
  439. getters = [_unixdll_getnode, _ifconfig_getnode, _arp_getnode,
  440. _lanscan_getnode, _netstat_getnode]
  441. for getter in getters + [_random_getnode]:
  442. try:
  443. _node = getter()
  444. except:
  445. continue
  446. if _node is not None:
  447. return _node
  448. _last_timestamp = None
  449. def uuid1(node=None, clock_seq=None):
  450. """Generate a UUID from a host ID, sequence number, and the current time.
  451. If 'node' is not given, getnode() is used to obtain the hardware
  452. address. If 'clock_seq' is given, it is used as the sequence number;
  453. otherwise a random 14-bit sequence number is chosen."""
  454. # When the system provides a version-1 UUID generator, use it (but don't
  455. # use UuidCreate here because its UUIDs don't conform to RFC 4122).
  456. if _uuid_generate_time and node is clock_seq is None:
  457. _buffer = ctypes.create_string_buffer(16)
  458. _uuid_generate_time(_buffer)
  459. return UUID(bytes=_buffer.raw)
  460. global _last_timestamp
  461. import time
  462. nanoseconds = int(time.time() * 1e9)
  463. # 0x01b21dd213814000 is the number of 100-ns intervals between the
  464. # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
  465. timestamp = int(nanoseconds//100) + 0x01b21dd213814000L
  466. if _last_timestamp is not None and timestamp <= _last_timestamp:
  467. timestamp = _last_timestamp + 1
  468. _last_timestamp = timestamp
  469. if clock_seq is None:
  470. import random
  471. clock_seq = random.randrange(1<<14L) # instead of stable storage
  472. time_low = timestamp & 0xffffffffL
  473. time_mid = (timestamp >> 32L) & 0xffffL
  474. time_hi_version = (timestamp >> 48L) & 0x0fffL
  475. clock_seq_low = clock_seq & 0xffL
  476. clock_seq_hi_variant = (clock_seq >> 8L) & 0x3fL
  477. if node is None:
  478. node = getnode()
  479. return UUID(fields=(time_low, time_mid, time_hi_version,
  480. clock_seq_hi_variant, clock_seq_low, node), version=1)
  481. def uuid3(namespace, name):
  482. """Generate a UUID from the MD5 hash of a namespace UUID and a name."""
  483. from hashlib import md5
  484. hash = md5(namespace.bytes + name).digest()
  485. return UUID(bytes=hash[:16], version=3)
  486. def uuid4():
  487. """Generate a random UUID."""
  488. return UUID(bytes=os.urandom(16), version=4)
  489. def uuid5(namespace, name):
  490. """Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
  491. from hashlib import sha1
  492. hash = sha1(namespace.bytes + name).digest()
  493. return UUID(bytes=hash[:16], version=5)
  494. # The following standard UUIDs are for use with uuid3() or uuid5().
  495. NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
  496. NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8')
  497. NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8')
  498. NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8')