tuklib_integer.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file tuklib_integer.h
  4. /// \brief Various integer and bit operations
  5. ///
  6. /// This file provides macros or functions to do some basic integer and bit
  7. /// operations.
  8. ///
  9. /// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
  10. /// - Byte swapping: bswapXX(num)
  11. /// - Byte order conversions to/from native: convXXYe(num)
  12. /// - Aligned reads: readXXYe(ptr)
  13. /// - Aligned writes: writeXXYe(ptr, num)
  14. /// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
  15. /// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
  16. ///
  17. /// Since they can macros, the arguments should have no side effects since
  18. /// they may be evaluated more than once.
  19. ///
  20. /// \todo PowerPC and possibly some other architectures support
  21. /// byte swapping load and store instructions. This file
  22. /// doesn't take advantage of those instructions.
  23. ///
  24. /// Bit scan operations for non-zero 32-bit integers:
  25. /// - Bit scan reverse (find highest non-zero bit): bsr32(num)
  26. /// - Count leading zeros: clz32(num)
  27. /// - Count trailing zeros: ctz32(num)
  28. /// - Bit scan forward (simply an alias for ctz32()): bsf32(num)
  29. ///
  30. /// The above bit scan operations return 0-31. If num is zero,
  31. /// the result is undefined.
  32. //
  33. // Authors: Lasse Collin
  34. // Joachim Henke
  35. //
  36. // This file has been put into the public domain.
  37. // You can do whatever you want with this file.
  38. //
  39. ///////////////////////////////////////////////////////////////////////////////
  40. #ifndef TUKLIB_INTEGER_H
  41. #define TUKLIB_INTEGER_H
  42. #include "sysdefs.h"
  43. #if defined(__GNUC__) && defined(__GNUC_MINOR__)
  44. # define TUKLIB_GNUC_REQ(major, minor) \
  45. ((__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) \
  46. || __GNUC__ > (major))
  47. #else
  48. # define TUKLIB_GNUC_REQ(major, minor) 0
  49. #endif
  50. ////////////////////////////////////////
  51. // Operating system specific features //
  52. ////////////////////////////////////////
  53. #if defined(HAVE_BYTESWAP_H)
  54. // glibc, uClibc, dietlibc
  55. # include <byteswap.h>
  56. # ifdef HAVE_BSWAP_16
  57. # define bswap16(num) bswap_16(num)
  58. # endif
  59. # ifdef HAVE_BSWAP_32
  60. # define bswap32(num) bswap_32(num)
  61. # endif
  62. # ifdef HAVE_BSWAP_64
  63. # define bswap64(num) bswap_64(num)
  64. # endif
  65. #elif defined(HAVE_SYS_ENDIAN_H)
  66. // *BSDs and Darwin
  67. # include <sys/endian.h>
  68. #elif defined(HAVE_SYS_BYTEORDER_H)
  69. // Solaris
  70. # include <sys/byteorder.h>
  71. # ifdef BSWAP_16
  72. # define bswap16(num) BSWAP_16(num)
  73. # endif
  74. # ifdef BSWAP_32
  75. # define bswap32(num) BSWAP_32(num)
  76. # endif
  77. # ifdef BSWAP_64
  78. # define bswap64(num) BSWAP_64(num)
  79. # endif
  80. # ifdef BE_16
  81. # define conv16be(num) BE_16(num)
  82. # endif
  83. # ifdef BE_32
  84. # define conv32be(num) BE_32(num)
  85. # endif
  86. # ifdef BE_64
  87. # define conv64be(num) BE_64(num)
  88. # endif
  89. # ifdef LE_16
  90. # define conv16le(num) LE_16(num)
  91. # endif
  92. # ifdef LE_32
  93. # define conv32le(num) LE_32(num)
  94. # endif
  95. # ifdef LE_64
  96. # define conv64le(num) LE_64(num)
  97. # endif
  98. #endif
  99. ///////////////////
  100. // Byte swapping //
  101. ///////////////////
  102. #ifndef bswap16
  103. # define bswap16(num) \
  104. (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
  105. #endif
  106. #ifndef bswap32
  107. # define bswap32(num) \
  108. ( (((uint32_t)(num) << 24) ) \
  109. | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \
  110. | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \
  111. | (((uint32_t)(num) >> 24) ) )
  112. #endif
  113. #ifndef bswap64
  114. # define bswap64(num) \
  115. ( (((uint64_t)(num) << 56) ) \
  116. | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
  117. | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
  118. | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \
  119. | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \
  120. | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
  121. | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
  122. | (((uint64_t)(num) >> 56) ) )
  123. #endif
  124. // Define conversion macros using the basic byte swapping macros.
  125. #ifdef WORDS_BIGENDIAN
  126. # ifndef conv16be
  127. # define conv16be(num) ((uint16_t)(num))
  128. # endif
  129. # ifndef conv32be
  130. # define conv32be(num) ((uint32_t)(num))
  131. # endif
  132. # ifndef conv64be
  133. # define conv64be(num) ((uint64_t)(num))
  134. # endif
  135. # ifndef conv16le
  136. # define conv16le(num) bswap16(num)
  137. # endif
  138. # ifndef conv32le
  139. # define conv32le(num) bswap32(num)
  140. # endif
  141. # ifndef conv64le
  142. # define conv64le(num) bswap64(num)
  143. # endif
  144. #else
  145. # ifndef conv16be
  146. # define conv16be(num) bswap16(num)
  147. # endif
  148. # ifndef conv32be
  149. # define conv32be(num) bswap32(num)
  150. # endif
  151. # ifndef conv64be
  152. # define conv64be(num) bswap64(num)
  153. # endif
  154. # ifndef conv16le
  155. # define conv16le(num) ((uint16_t)(num))
  156. # endif
  157. # ifndef conv32le
  158. # define conv32le(num) ((uint32_t)(num))
  159. # endif
  160. # ifndef conv64le
  161. # define conv64le(num) ((uint64_t)(num))
  162. # endif
  163. #endif
  164. //////////////////////////////
  165. // Aligned reads and writes //
  166. //////////////////////////////
  167. static inline uint16_t
  168. read16be(const uint8_t *buf)
  169. {
  170. uint16_t num = *(const uint16_t *)buf;
  171. return conv16be(num);
  172. }
  173. static inline uint16_t
  174. read16le(const uint8_t *buf)
  175. {
  176. uint16_t num = *(const uint16_t *)buf;
  177. return conv16le(num);
  178. }
  179. static inline uint32_t
  180. read32be(const uint8_t *buf)
  181. {
  182. uint32_t num = *(const uint32_t *)buf;
  183. return conv32be(num);
  184. }
  185. static inline uint32_t
  186. read32le(const uint8_t *buf)
  187. {
  188. uint32_t num = *(const uint32_t *)buf;
  189. return conv32le(num);
  190. }
  191. static inline uint64_t
  192. read64be(const uint8_t *buf)
  193. {
  194. uint64_t num = *(const uint64_t *)buf;
  195. return conv64be(num);
  196. }
  197. static inline uint64_t
  198. read64le(const uint8_t *buf)
  199. {
  200. uint64_t num = *(const uint64_t *)buf;
  201. return conv64le(num);
  202. }
  203. // NOTE: Possible byte swapping must be done in a macro to allow GCC
  204. // to optimize byte swapping of constants when using glibc's or *BSD's
  205. // byte swapping macros. The actual write is done in an inline function
  206. // to make type checking of the buf pointer possible similarly to readXXYe()
  207. // functions.
  208. #define write16be(buf, num) write16ne((buf), conv16be(num))
  209. #define write16le(buf, num) write16ne((buf), conv16le(num))
  210. #define write32be(buf, num) write32ne((buf), conv32be(num))
  211. #define write32le(buf, num) write32ne((buf), conv32le(num))
  212. #define write64be(buf, num) write64ne((buf), conv64be(num))
  213. #define write64le(buf, num) write64ne((buf), conv64le(num))
  214. static inline void
  215. write16ne(uint8_t *buf, uint16_t num)
  216. {
  217. *(uint16_t *)buf = num;
  218. return;
  219. }
  220. static inline void
  221. write32ne(uint8_t *buf, uint32_t num)
  222. {
  223. *(uint32_t *)buf = num;
  224. return;
  225. }
  226. static inline void
  227. write64ne(uint8_t *buf, uint64_t num)
  228. {
  229. *(uint64_t *)buf = num;
  230. return;
  231. }
  232. ////////////////////////////////
  233. // Unaligned reads and writes //
  234. ////////////////////////////////
  235. // NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
  236. // 32-bit unaligned integer loads and stores. It's possible that 64-bit
  237. // unaligned access doesn't work or is slower than byte-by-byte access.
  238. // Since unaligned 64-bit is probably not needed as often as 16-bit or
  239. // 32-bit, we simply don't support 64-bit unaligned access for now.
  240. #ifdef TUKLIB_FAST_UNALIGNED_ACCESS
  241. # define unaligned_read16be read16be
  242. # define unaligned_read16le read16le
  243. # define unaligned_read32be read32be
  244. # define unaligned_read32le read32le
  245. # define unaligned_write16be write16be
  246. # define unaligned_write16le write16le
  247. # define unaligned_write32be write32be
  248. # define unaligned_write32le write32le
  249. #else
  250. static inline uint16_t
  251. unaligned_read16be(const uint8_t *buf)
  252. {
  253. uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
  254. return num;
  255. }
  256. static inline uint16_t
  257. unaligned_read16le(const uint8_t *buf)
  258. {
  259. uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
  260. return num;
  261. }
  262. static inline uint32_t
  263. unaligned_read32be(const uint8_t *buf)
  264. {
  265. uint32_t num = (uint32_t)buf[0] << 24;
  266. num |= (uint32_t)buf[1] << 16;
  267. num |= (uint32_t)buf[2] << 8;
  268. num |= (uint32_t)buf[3];
  269. return num;
  270. }
  271. static inline uint32_t
  272. unaligned_read32le(const uint8_t *buf)
  273. {
  274. uint32_t num = (uint32_t)buf[0];
  275. num |= (uint32_t)buf[1] << 8;
  276. num |= (uint32_t)buf[2] << 16;
  277. num |= (uint32_t)buf[3] << 24;
  278. return num;
  279. }
  280. static inline void
  281. unaligned_write16be(uint8_t *buf, uint16_t num)
  282. {
  283. buf[0] = num >> 8;
  284. buf[1] = num;
  285. return;
  286. }
  287. static inline void
  288. unaligned_write16le(uint8_t *buf, uint16_t num)
  289. {
  290. buf[0] = num;
  291. buf[1] = num >> 8;
  292. return;
  293. }
  294. static inline void
  295. unaligned_write32be(uint8_t *buf, uint32_t num)
  296. {
  297. buf[0] = num >> 24;
  298. buf[1] = num >> 16;
  299. buf[2] = num >> 8;
  300. buf[3] = num;
  301. return;
  302. }
  303. static inline void
  304. unaligned_write32le(uint8_t *buf, uint32_t num)
  305. {
  306. buf[0] = num;
  307. buf[1] = num >> 8;
  308. buf[2] = num >> 16;
  309. buf[3] = num >> 24;
  310. return;
  311. }
  312. #endif
  313. static inline uint32_t
  314. bsr32(uint32_t n)
  315. {
  316. // Check for ICC first, since it tends to define __GNUC__ too.
  317. #if defined(__INTEL_COMPILER)
  318. return _bit_scan_reverse(n);
  319. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  320. // GCC >= 3.4 has __builtin_clz(), which gives good results on
  321. // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
  322. // either plain BSR (so the XOR gets optimized away) or LZCNT and
  323. // XOR (if -march indicates that SSE4a instructions are supported).
  324. return __builtin_clz(n) ^ 31U;
  325. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  326. uint32_t i;
  327. __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
  328. return i;
  329. #else
  330. uint32_t i = 31;
  331. if ((n & UINT32_C(0xFFFF0000)) == 0) {
  332. n <<= 16;
  333. i = 15;
  334. }
  335. if ((n & UINT32_C(0xFF000000)) == 0) {
  336. n <<= 8;
  337. i -= 8;
  338. }
  339. if ((n & UINT32_C(0xF0000000)) == 0) {
  340. n <<= 4;
  341. i -= 4;
  342. }
  343. if ((n & UINT32_C(0xC0000000)) == 0) {
  344. n <<= 2;
  345. i -= 2;
  346. }
  347. if ((n & UINT32_C(0x80000000)) == 0)
  348. --i;
  349. return i;
  350. #endif
  351. }
  352. static inline uint32_t
  353. clz32(uint32_t n)
  354. {
  355. #if defined(__INTEL_COMPILER)
  356. return _bit_scan_reverse(n) ^ 31U;
  357. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  358. return __builtin_clz(n);
  359. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  360. uint32_t i;
  361. __asm__("bsrl %1, %0\n\t"
  362. "xorl $31, %0"
  363. : "=r" (i) : "rm" (n));
  364. return i;
  365. #else
  366. uint32_t i = 0;
  367. if ((n & UINT32_C(0xFFFF0000)) == 0) {
  368. n <<= 16;
  369. i = 16;
  370. }
  371. if ((n & UINT32_C(0xFF000000)) == 0) {
  372. n <<= 8;
  373. i += 8;
  374. }
  375. if ((n & UINT32_C(0xF0000000)) == 0) {
  376. n <<= 4;
  377. i += 4;
  378. }
  379. if ((n & UINT32_C(0xC0000000)) == 0) {
  380. n <<= 2;
  381. i += 2;
  382. }
  383. if ((n & UINT32_C(0x80000000)) == 0)
  384. ++i;
  385. return i;
  386. #endif
  387. }
  388. static inline uint32_t
  389. ctz32(uint32_t n)
  390. {
  391. #if defined(__INTEL_COMPILER)
  392. return _bit_scan_forward(n);
  393. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
  394. return __builtin_ctz(n);
  395. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  396. uint32_t i;
  397. __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
  398. return i;
  399. #else
  400. uint32_t i = 0;
  401. if ((n & UINT32_C(0x0000FFFF)) == 0) {
  402. n >>= 16;
  403. i = 16;
  404. }
  405. if ((n & UINT32_C(0x000000FF)) == 0) {
  406. n >>= 8;
  407. i += 8;
  408. }
  409. if ((n & UINT32_C(0x0000000F)) == 0) {
  410. n >>= 4;
  411. i += 4;
  412. }
  413. if ((n & UINT32_C(0x00000003)) == 0) {
  414. n >>= 2;
  415. i += 2;
  416. }
  417. if ((n & UINT32_C(0x00000001)) == 0)
  418. ++i;
  419. return i;
  420. #endif
  421. }
  422. #define bsf32 ctz32
  423. #endif