KeccakP-1600-opt64.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /*
  2. Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
  3. Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
  4. denoted as "the implementer".
  5. For more information, feedback or questions, please refer to our websites:
  6. http://keccak.noekeon.org/
  7. http://keyak.noekeon.org/
  8. http://ketje.noekeon.org/
  9. To the extent possible under law, the implementer has waived all copyright
  10. and related or neighboring rights to the source code in this file.
  11. http://creativecommons.org/publicdomain/zero/1.0/
  12. */
  13. #include <string.h>
  14. #include <stdlib.h>
  15. #include "brg_endian.h"
  16. #include "KeccakP-1600-opt64-config.h"
  17. #ifdef __has_feature
  18. # if __has_feature(undefined_behavior_sanitizer)
  19. # define ALLOW_MISALIGNED_ACCESS __attribute__((no_sanitize("alignment")))
  20. # endif
  21. #endif
  22. #ifndef ALLOW_MISALIGNED_ACCESS
  23. # define ALLOW_MISALIGNED_ACCESS
  24. #endif
  25. typedef unsigned char UINT8;
  26. typedef unsigned long long int UINT64;
  27. #if defined(KeccakP1600_useLaneComplementing)
  28. #define UseBebigokimisa
  29. #endif
  30. #if defined(_MSC_VER)
  31. #define ROL64(a, offset) _rotl64(a, offset)
  32. #elif defined(KeccakP1600_useSHLD)
  33. #define ROL64(x,N) ({ \
  34. register UINT64 __out; \
  35. register UINT64 __in = x; \
  36. __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
  37. __out; \
  38. })
  39. #else
  40. #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
  41. #endif
  42. #include "KeccakP-1600-64.macros"
  43. #ifdef KeccakP1600_fullUnrolling
  44. #define FullUnrolling
  45. #else
  46. #define Unrolling KeccakP1600_unrolling
  47. #endif
  48. #include "KeccakP-1600-unrolling.macros"
  49. #include "SnP-Relaned.h"
  50. static const UINT64 KeccakF1600RoundConstants[24] = {
  51. 0x0000000000000001ULL,
  52. 0x0000000000008082ULL,
  53. 0x800000000000808aULL,
  54. 0x8000000080008000ULL,
  55. 0x000000000000808bULL,
  56. 0x0000000080000001ULL,
  57. 0x8000000080008081ULL,
  58. 0x8000000000008009ULL,
  59. 0x000000000000008aULL,
  60. 0x0000000000000088ULL,
  61. 0x0000000080008009ULL,
  62. 0x000000008000000aULL,
  63. 0x000000008000808bULL,
  64. 0x800000000000008bULL,
  65. 0x8000000000008089ULL,
  66. 0x8000000000008003ULL,
  67. 0x8000000000008002ULL,
  68. 0x8000000000000080ULL,
  69. 0x000000000000800aULL,
  70. 0x800000008000000aULL,
  71. 0x8000000080008081ULL,
  72. 0x8000000000008080ULL,
  73. 0x0000000080000001ULL,
  74. 0x8000000080008008ULL };
  75. /* ---------------------------------------------------------------- */
  76. void KeccakP1600_Initialize(void *state)
  77. {
  78. memset(state, 0, 200);
  79. #ifdef KeccakP1600_useLaneComplementing
  80. ((UINT64*)state)[ 1] = ~(UINT64)0;
  81. ((UINT64*)state)[ 2] = ~(UINT64)0;
  82. ((UINT64*)state)[ 8] = ~(UINT64)0;
  83. ((UINT64*)state)[12] = ~(UINT64)0;
  84. ((UINT64*)state)[17] = ~(UINT64)0;
  85. ((UINT64*)state)[20] = ~(UINT64)0;
  86. #endif
  87. }
  88. /* ---------------------------------------------------------------- */
  89. void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
  90. {
  91. #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
  92. UINT64 lane;
  93. if (length == 0)
  94. return;
  95. if (length == 1)
  96. lane = data[0];
  97. else {
  98. lane = 0;
  99. memcpy(&lane, data, length);
  100. }
  101. lane <<= offset*8;
  102. #else
  103. UINT64 lane = 0;
  104. unsigned int i;
  105. for(i=0; i<length; i++)
  106. lane |= ((UINT64)data[i]) << ((i+offset)*8);
  107. #endif
  108. ((UINT64*)state)[lanePosition] ^= lane;
  109. }
  110. /* ---------------------------------------------------------------- */
  111. ALLOW_MISALIGNED_ACCESS
  112. void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
  113. {
  114. #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
  115. unsigned int i = 0;
  116. #ifdef NO_MISALIGNED_ACCESSES
  117. /* If either pointer is misaligned, fall back to byte-wise xor. */
  118. if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) {
  119. for (i = 0; i < laneCount * 8; i++) {
  120. ((unsigned char*)state)[i] ^= data[i];
  121. }
  122. }
  123. else
  124. #endif
  125. {
  126. /* Otherwise... */
  127. for( ; (i+8)<=laneCount; i+=8) {
  128. ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
  129. ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
  130. ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
  131. ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
  132. ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4];
  133. ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5];
  134. ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6];
  135. ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7];
  136. }
  137. for( ; (i+4)<=laneCount; i+=4) {
  138. ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
  139. ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
  140. ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
  141. ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
  142. }
  143. for( ; (i+2)<=laneCount; i+=2) {
  144. ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
  145. ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
  146. }
  147. if (i<laneCount) {
  148. ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
  149. }
  150. }
  151. #else
  152. unsigned int i;
  153. UINT8 *curData = data;
  154. for(i=0; i<laneCount; i++, curData+=8) {
  155. UINT64 lane = (UINT64)curData[0]
  156. | ((UINT64)curData[1] << 8)
  157. | ((UINT64)curData[2] << 16)
  158. | ((UINT64)curData[3] << 24)
  159. | ((UINT64)curData[4] <<32)
  160. | ((UINT64)curData[5] << 40)
  161. | ((UINT64)curData[6] << 48)
  162. | ((UINT64)curData[7] << 56);
  163. ((UINT64*)state)[i] ^= lane;
  164. }
  165. #endif
  166. }
  167. /* ---------------------------------------------------------------- */
  168. #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
  169. void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
  170. {
  171. UINT64 lane = byte;
  172. lane <<= (offset%8)*8;
  173. ((UINT64*)state)[offset/8] ^= lane;
  174. }
  175. #endif
  176. /* ---------------------------------------------------------------- */
  177. void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
  178. {
  179. SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
  180. }
  181. /* ---------------------------------------------------------------- */
  182. void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
  183. {
  184. #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
  185. #ifdef KeccakP1600_useLaneComplementing
  186. if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) {
  187. unsigned int i;
  188. for(i=0; i<length; i++)
  189. ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i];
  190. }
  191. else
  192. #endif
  193. {
  194. memcpy((unsigned char*)state+lanePosition*8+offset, data, length);
  195. }
  196. #else
  197. #error "Not yet implemented"
  198. #endif
  199. }
  200. /* ---------------------------------------------------------------- */
  201. void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
  202. {
  203. #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
  204. #ifdef KeccakP1600_useLaneComplementing
  205. unsigned int lanePosition;
  206. for(lanePosition=0; lanePosition<laneCount; lanePosition++)
  207. if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
  208. ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition];
  209. else
  210. ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition];
  211. #else
  212. memcpy(state, data, laneCount*8);
  213. #endif
  214. #else
  215. #error "Not yet implemented"
  216. #endif
  217. }
  218. /* ---------------------------------------------------------------- */
  219. void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
  220. {
  221. SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
  222. }
  223. /* ---------------------------------------------------------------- */
  224. void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
  225. {
  226. #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
  227. #ifdef KeccakP1600_useLaneComplementing
  228. unsigned int lanePosition;
  229. for(lanePosition=0; lanePosition<byteCount/8; lanePosition++)
  230. if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
  231. ((UINT64*)state)[lanePosition] = ~0;
  232. else
  233. ((UINT64*)state)[lanePosition] = 0;
  234. if (byteCount%8 != 0) {
  235. lanePosition = byteCount/8;
  236. if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
  237. memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8);
  238. else
  239. memset((unsigned char*)state+lanePosition*8, 0, byteCount%8);
  240. }
  241. #else
  242. memset(state, 0, byteCount);
  243. #endif
  244. #else
  245. #error "Not yet implemented"
  246. #endif
  247. }
  248. /* ---------------------------------------------------------------- */
  249. void KeccakP1600_Permute_Nrounds(void *state, unsigned int nr)
  250. {
  251. declareABCDE
  252. unsigned int i;
  253. UINT64 *stateAsLanes = (UINT64*)state;
  254. copyFromState(A, stateAsLanes)
  255. roundsN(nr)
  256. copyToState(stateAsLanes, A)
  257. }
  258. /* ---------------------------------------------------------------- */
  259. void KeccakP1600_Permute_24rounds(void *state)
  260. {
  261. declareABCDE
  262. #ifndef KeccakP1600_fullUnrolling
  263. unsigned int i;
  264. #endif
  265. UINT64 *stateAsLanes = (UINT64*)state;
  266. copyFromState(A, stateAsLanes)
  267. rounds24
  268. copyToState(stateAsLanes, A)
  269. }
  270. /* ---------------------------------------------------------------- */
  271. void KeccakP1600_Permute_12rounds(void *state)
  272. {
  273. declareABCDE
  274. #ifndef KeccakP1600_fullUnrolling
  275. unsigned int i;
  276. #endif
  277. UINT64 *stateAsLanes = (UINT64*)state;
  278. copyFromState(A, stateAsLanes)
  279. rounds12
  280. copyToState(stateAsLanes, A)
  281. }
  282. /* ---------------------------------------------------------------- */
  283. void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
  284. {
  285. UINT64 lane = ((UINT64*)state)[lanePosition];
  286. #ifdef KeccakP1600_useLaneComplementing
  287. if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
  288. lane = ~lane;
  289. #endif
  290. #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
  291. {
  292. UINT64 lane1[1];
  293. lane1[0] = lane;
  294. memcpy(data, (UINT8*)lane1+offset, length);
  295. }
  296. #else
  297. unsigned int i;
  298. lane >>= offset*8;
  299. for(i=0; i<length; i++) {
  300. data[i] = lane & 0xFF;
  301. lane >>= 8;
  302. }
  303. #endif
  304. }
  305. /* ---------------------------------------------------------------- */
  306. #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
  307. void fromWordToBytes(UINT8 *bytes, const UINT64 word)
  308. {
  309. unsigned int i;
  310. for(i=0; i<(64/8); i++)
  311. bytes[i] = (word >> (8*i)) & 0xFF;
  312. }
  313. #endif
  314. void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
  315. {
  316. #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
  317. memcpy(data, state, laneCount*8);
  318. #else
  319. unsigned int i;
  320. for(i=0; i<laneCount; i++)
  321. fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
  322. #endif
  323. #ifdef KeccakP1600_useLaneComplementing
  324. if (laneCount > 1) {
  325. ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
  326. if (laneCount > 2) {
  327. ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
  328. if (laneCount > 8) {
  329. ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
  330. if (laneCount > 12) {
  331. ((UINT64*)data)[12] = ~((UINT64*)data)[12];
  332. if (laneCount > 17) {
  333. ((UINT64*)data)[17] = ~((UINT64*)data)[17];
  334. if (laneCount > 20) {
  335. ((UINT64*)data)[20] = ~((UINT64*)data)[20];
  336. }
  337. }
  338. }
  339. }
  340. }
  341. }
  342. #endif
  343. }
  344. /* ---------------------------------------------------------------- */
  345. void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
  346. {
  347. SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
  348. }
  349. /* ---------------------------------------------------------------- */
  350. void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
  351. {
  352. UINT64 lane = ((UINT64*)state)[lanePosition];
  353. #ifdef KeccakP1600_useLaneComplementing
  354. if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
  355. lane = ~lane;
  356. #endif
  357. #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
  358. {
  359. unsigned int i;
  360. UINT64 lane1[1];
  361. lane1[0] = lane;
  362. for(i=0; i<length; i++)
  363. output[i] = input[i] ^ ((UINT8*)lane1)[offset+i];
  364. }
  365. #else
  366. unsigned int i;
  367. lane >>= offset*8;
  368. for(i=0; i<length; i++) {
  369. output[i] = input[i] ^ (lane & 0xFF);
  370. lane >>= 8;
  371. }
  372. #endif
  373. }
  374. /* ---------------------------------------------------------------- */
  375. void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
  376. {
  377. unsigned int i;
  378. #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
  379. unsigned char temp[8];
  380. unsigned int j;
  381. #endif
  382. for(i=0; i<laneCount; i++) {
  383. #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
  384. ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i];
  385. #else
  386. fromWordToBytes(temp, ((const UINT64*)state)[i]);
  387. for(j=0; j<8; j++)
  388. output[i*8+j] = input[i*8+j] ^ temp[j];
  389. #endif
  390. }
  391. #ifdef KeccakP1600_useLaneComplementing
  392. if (laneCount > 1) {
  393. ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1];
  394. if (laneCount > 2) {
  395. ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2];
  396. if (laneCount > 8) {
  397. ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8];
  398. if (laneCount > 12) {
  399. ((UINT64*)output)[12] = ~((UINT64*)output)[12];
  400. if (laneCount > 17) {
  401. ((UINT64*)output)[17] = ~((UINT64*)output)[17];
  402. if (laneCount > 20) {
  403. ((UINT64*)output)[20] = ~((UINT64*)output)[20];
  404. }
  405. }
  406. }
  407. }
  408. }
  409. }
  410. #endif
  411. }
  412. /* ---------------------------------------------------------------- */
  413. void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
  414. {
  415. SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
  416. }
  417. /* ---------------------------------------------------------------- */
  418. ALLOW_MISALIGNED_ACCESS
  419. size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen)
  420. {
  421. size_t originalDataByteLen = dataByteLen;
  422. declareABCDE
  423. #ifndef KeccakP1600_fullUnrolling
  424. unsigned int i;
  425. #endif
  426. UINT64 *stateAsLanes = (UINT64*)state;
  427. UINT64 *inDataAsLanes = (UINT64*)data;
  428. copyFromState(A, stateAsLanes)
  429. while(dataByteLen >= laneCount*8) {
  430. addInput(A, inDataAsLanes, laneCount)
  431. rounds24
  432. inDataAsLanes += laneCount;
  433. dataByteLen -= laneCount*8;
  434. }
  435. copyToState(stateAsLanes, A)
  436. return originalDataByteLen - dataByteLen;
  437. }