pyhash.h 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. #ifndef Py_HASH_H
  2. #define Py_HASH_H
  3. #ifdef __cplusplus
  4. extern "C" {
  5. #endif
  6. /* Helpers for hash functions */
  7. #ifndef Py_LIMITED_API
  8. PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
  9. PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
  10. PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
  11. #endif
  12. /* Prime multiplier used in string and various other hashes. */
  13. #define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
  14. /* Parameters used for the numeric hash implementation. See notes for
  15. _Py_HashDouble in Objects/object.c. Numeric hashes are based on
  16. reduction modulo the prime 2**_PyHASH_BITS - 1. */
  17. #if SIZEOF_VOID_P >= 8
  18. # define _PyHASH_BITS 61
  19. #else
  20. # define _PyHASH_BITS 31
  21. #endif
  22. #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
  23. #define _PyHASH_INF 314159
  24. #define _PyHASH_NAN 0
  25. #define _PyHASH_IMAG _PyHASH_MULTIPLIER
  26. /* hash secret
  27. *
  28. * memory layout on 64 bit systems
  29. * cccccccc cccccccc cccccccc uc -- unsigned char[24]
  30. * pppppppp ssssssss ........ fnv -- two Py_hash_t
  31. * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T
  32. * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
  33. * ........ ........ eeeeeeee pyexpat XML hash salt
  34. *
  35. * memory layout on 32 bit systems
  36. * cccccccc cccccccc cccccccc uc
  37. * ppppssss ........ ........ fnv -- two Py_hash_t
  38. * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T (*)
  39. * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
  40. * ........ ........ eeee.... pyexpat XML hash salt
  41. *
  42. * (*) The siphash member may not be available on 32 bit platforms without
  43. * an unsigned int64 data type.
  44. */
  45. #ifndef Py_LIMITED_API
  46. typedef union {
  47. /* ensure 24 bytes */
  48. unsigned char uc[24];
  49. /* two Py_hash_t for FNV */
  50. struct {
  51. Py_hash_t prefix;
  52. Py_hash_t suffix;
  53. } fnv;
  54. #ifdef PY_UINT64_T
  55. /* two uint64 for SipHash24 */
  56. struct {
  57. PY_UINT64_T k0;
  58. PY_UINT64_T k1;
  59. } siphash;
  60. #endif
  61. /* a different (!) Py_hash_t for small string optimization */
  62. struct {
  63. unsigned char padding[16];
  64. Py_hash_t suffix;
  65. } djbx33a;
  66. struct {
  67. unsigned char padding[16];
  68. Py_hash_t hashsalt;
  69. } expat;
  70. } _Py_HashSecret_t;
  71. PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
  72. #endif
  73. #ifdef Py_DEBUG
  74. PyAPI_DATA(int) _Py_HashSecret_Initialized;
  75. #endif
  76. /* hash function definition */
  77. #ifndef Py_LIMITED_API
  78. typedef struct {
  79. Py_hash_t (*const hash)(const void *, Py_ssize_t);
  80. const char *name;
  81. const int hash_bits;
  82. const int seed_bits;
  83. } PyHash_FuncDef;
  84. PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
  85. #endif
  86. /* cutoff for small string DJBX33A optimization in range [1, cutoff).
  87. *
  88. * About 50% of the strings in a typical Python application are smaller than
  89. * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
  90. * NEVER use DJBX33A for long strings!
  91. *
  92. * A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms
  93. * should use a smaller cutoff because it is easier to create colliding
  94. * strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should
  95. * provide a decent safety margin.
  96. */
  97. #ifndef Py_HASH_CUTOFF
  98. # define Py_HASH_CUTOFF 0
  99. #elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0)
  100. # error Py_HASH_CUTOFF must in range 0...7.
  101. #endif /* Py_HASH_CUTOFF */
  102. /* hash algorithm selection
  103. *
  104. * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
  105. * configure script.
  106. *
  107. * - FNV is available on all platforms and architectures.
  108. * - SIPHASH24 only works on plaforms that provide PY_UINT64_T and doesn't
  109. * require aligned memory for integers.
  110. * - With EXTERNAL embedders can provide an alternative implementation with::
  111. *
  112. * PyHash_FuncDef PyHash_Func = {...};
  113. *
  114. * XXX: Figure out __declspec() for extern PyHash_FuncDef.
  115. */
  116. #define Py_HASH_EXTERNAL 0
  117. #define Py_HASH_SIPHASH24 1
  118. #define Py_HASH_FNV 2
  119. #ifndef Py_HASH_ALGORITHM
  120. # if (defined(PY_UINT64_T) && defined(PY_UINT32_T) \
  121. && !defined(HAVE_ALIGNED_REQUIRED))
  122. # define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
  123. # else
  124. # define Py_HASH_ALGORITHM Py_HASH_FNV
  125. # endif /* uint64_t && uint32_t && aligned */
  126. #endif /* Py_HASH_ALGORITHM */
  127. #ifdef __cplusplus
  128. }
  129. #endif
  130. #endif /* !Py_HASH_H */