KeccakP-1600-64.macros 69 KB


  1. /*
  2. Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
  3. Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
  4. denoted as "the implementer".
  5. For more information, feedback or questions, please refer to our websites:
  6. http://keccak.noekeon.org/
  7. http://keyak.noekeon.org/
  8. http://ketje.noekeon.org/
  9. To the extent possible under law, the implementer has waived all copyright
  10. and related or neighboring rights to the source code in this file.
  11. http://creativecommons.org/publicdomain/zero/1.0/
  12. */
  13. #define declareABCDE \
  14. UINT64 Aba, Abe, Abi, Abo, Abu; \
  15. UINT64 Aga, Age, Agi, Ago, Agu; \
  16. UINT64 Aka, Ake, Aki, Ako, Aku; \
  17. UINT64 Ama, Ame, Ami, Amo, Amu; \
  18. UINT64 Asa, Ase, Asi, Aso, Asu; \
  19. UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
  20. UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
  21. UINT64 Bka, Bke, Bki, Bko, Bku; \
  22. UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
  23. UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
  24. UINT64 Ca, Ce, Ci, Co, Cu; \
  25. UINT64 Da, De, Di, Do, Du; \
  26. UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
  27. UINT64 Ega, Ege, Egi, Ego, Egu; \
  28. UINT64 Eka, Eke, Eki, Eko, Eku; \
  29. UINT64 Ema, Eme, Emi, Emo, Emu; \
  30. UINT64 Esa, Ese, Esi, Eso, Esu; \
  31. #define prepareTheta \
  32. Ca = Aba^Aga^Aka^Ama^Asa; \
  33. Ce = Abe^Age^Ake^Ame^Ase; \
  34. Ci = Abi^Agi^Aki^Ami^Asi; \
  35. Co = Abo^Ago^Ako^Amo^Aso; \
  36. Cu = Abu^Agu^Aku^Amu^Asu; \
  37. #ifdef UseBebigokimisa
  38. /* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
  39. /* --- 64-bit lanes mapped to 64-bit words */
  40. #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
  41. Da = Cu^ROL64(Ce, 1); \
  42. De = Ca^ROL64(Ci, 1); \
  43. Di = Ce^ROL64(Co, 1); \
  44. Do = Ci^ROL64(Cu, 1); \
  45. Du = Co^ROL64(Ca, 1); \
  46. \
  47. A##ba ^= Da; \
  48. Bba = A##ba; \
  49. A##ge ^= De; \
  50. Bbe = ROL64(A##ge, 44); \
  51. A##ki ^= Di; \
  52. Bbi = ROL64(A##ki, 43); \
  53. A##mo ^= Do; \
  54. Bbo = ROL64(A##mo, 21); \
  55. A##su ^= Du; \
  56. Bbu = ROL64(A##su, 14); \
  57. E##ba = Bba ^( Bbe | Bbi ); \
  58. E##ba ^= KeccakF1600RoundConstants[i]; \
  59. Ca = E##ba; \
  60. E##be = Bbe ^((~Bbi)| Bbo ); \
  61. Ce = E##be; \
  62. E##bi = Bbi ^( Bbo & Bbu ); \
  63. Ci = E##bi; \
  64. E##bo = Bbo ^( Bbu | Bba ); \
  65. Co = E##bo; \
  66. E##bu = Bbu ^( Bba & Bbe ); \
  67. Cu = E##bu; \
  68. \
  69. A##bo ^= Do; \
  70. Bga = ROL64(A##bo, 28); \
  71. A##gu ^= Du; \
  72. Bge = ROL64(A##gu, 20); \
  73. A##ka ^= Da; \
  74. Bgi = ROL64(A##ka, 3); \
  75. A##me ^= De; \
  76. Bgo = ROL64(A##me, 45); \
  77. A##si ^= Di; \
  78. Bgu = ROL64(A##si, 61); \
  79. E##ga = Bga ^( Bge | Bgi ); \
  80. Ca ^= E##ga; \
  81. E##ge = Bge ^( Bgi & Bgo ); \
  82. Ce ^= E##ge; \
  83. E##gi = Bgi ^( Bgo |(~Bgu)); \
  84. Ci ^= E##gi; \
  85. E##go = Bgo ^( Bgu | Bga ); \
  86. Co ^= E##go; \
  87. E##gu = Bgu ^( Bga & Bge ); \
  88. Cu ^= E##gu; \
  89. \
  90. A##be ^= De; \
  91. Bka = ROL64(A##be, 1); \
  92. A##gi ^= Di; \
  93. Bke = ROL64(A##gi, 6); \
  94. A##ko ^= Do; \
  95. Bki = ROL64(A##ko, 25); \
  96. A##mu ^= Du; \
  97. Bko = ROL64(A##mu, 8); \
  98. A##sa ^= Da; \
  99. Bku = ROL64(A##sa, 18); \
  100. E##ka = Bka ^( Bke | Bki ); \
  101. Ca ^= E##ka; \
  102. E##ke = Bke ^( Bki & Bko ); \
  103. Ce ^= E##ke; \
  104. E##ki = Bki ^((~Bko)& Bku ); \
  105. Ci ^= E##ki; \
  106. E##ko = (~Bko)^( Bku | Bka ); \
  107. Co ^= E##ko; \
  108. E##ku = Bku ^( Bka & Bke ); \
  109. Cu ^= E##ku; \
  110. \
  111. A##bu ^= Du; \
  112. Bma = ROL64(A##bu, 27); \
  113. A##ga ^= Da; \
  114. Bme = ROL64(A##ga, 36); \
  115. A##ke ^= De; \
  116. Bmi = ROL64(A##ke, 10); \
  117. A##mi ^= Di; \
  118. Bmo = ROL64(A##mi, 15); \
  119. A##so ^= Do; \
  120. Bmu = ROL64(A##so, 56); \
  121. E##ma = Bma ^( Bme & Bmi ); \
  122. Ca ^= E##ma; \
  123. E##me = Bme ^( Bmi | Bmo ); \
  124. Ce ^= E##me; \
  125. E##mi = Bmi ^((~Bmo)| Bmu ); \
  126. Ci ^= E##mi; \
  127. E##mo = (~Bmo)^( Bmu & Bma ); \
  128. Co ^= E##mo; \
  129. E##mu = Bmu ^( Bma | Bme ); \
  130. Cu ^= E##mu; \
  131. \
  132. A##bi ^= Di; \
  133. Bsa = ROL64(A##bi, 62); \
  134. A##go ^= Do; \
  135. Bse = ROL64(A##go, 55); \
  136. A##ku ^= Du; \
  137. Bsi = ROL64(A##ku, 39); \
  138. A##ma ^= Da; \
  139. Bso = ROL64(A##ma, 41); \
  140. A##se ^= De; \
  141. Bsu = ROL64(A##se, 2); \
  142. E##sa = Bsa ^((~Bse)& Bsi ); \
  143. Ca ^= E##sa; \
  144. E##se = (~Bse)^( Bsi | Bso ); \
  145. Ce ^= E##se; \
  146. E##si = Bsi ^( Bso & Bsu ); \
  147. Ci ^= E##si; \
  148. E##so = Bso ^( Bsu | Bsa ); \
  149. Co ^= E##so; \
  150. E##su = Bsu ^( Bsa & Bse ); \
  151. Cu ^= E##su; \
  152. \
  153. /* --- Code for round (lane complementing pattern 'bebigokimisa') */
  154. /* --- 64-bit lanes mapped to 64-bit words */
  155. #define thetaRhoPiChiIota(i, A, E) \
  156. Da = Cu^ROL64(Ce, 1); \
  157. De = Ca^ROL64(Ci, 1); \
  158. Di = Ce^ROL64(Co, 1); \
  159. Do = Ci^ROL64(Cu, 1); \
  160. Du = Co^ROL64(Ca, 1); \
  161. \
  162. A##ba ^= Da; \
  163. Bba = A##ba; \
  164. A##ge ^= De; \
  165. Bbe = ROL64(A##ge, 44); \
  166. A##ki ^= Di; \
  167. Bbi = ROL64(A##ki, 43); \
  168. A##mo ^= Do; \
  169. Bbo = ROL64(A##mo, 21); \
  170. A##su ^= Du; \
  171. Bbu = ROL64(A##su, 14); \
  172. E##ba = Bba ^( Bbe | Bbi ); \
  173. E##ba ^= KeccakF1600RoundConstants[i]; \
  174. E##be = Bbe ^((~Bbi)| Bbo ); \
  175. E##bi = Bbi ^( Bbo & Bbu ); \
  176. E##bo = Bbo ^( Bbu | Bba ); \
  177. E##bu = Bbu ^( Bba & Bbe ); \
  178. \
  179. A##bo ^= Do; \
  180. Bga = ROL64(A##bo, 28); \
  181. A##gu ^= Du; \
  182. Bge = ROL64(A##gu, 20); \
  183. A##ka ^= Da; \
  184. Bgi = ROL64(A##ka, 3); \
  185. A##me ^= De; \
  186. Bgo = ROL64(A##me, 45); \
  187. A##si ^= Di; \
  188. Bgu = ROL64(A##si, 61); \
  189. E##ga = Bga ^( Bge | Bgi ); \
  190. E##ge = Bge ^( Bgi & Bgo ); \
  191. E##gi = Bgi ^( Bgo |(~Bgu)); \
  192. E##go = Bgo ^( Bgu | Bga ); \
  193. E##gu = Bgu ^( Bga & Bge ); \
  194. \
  195. A##be ^= De; \
  196. Bka = ROL64(A##be, 1); \
  197. A##gi ^= Di; \
  198. Bke = ROL64(A##gi, 6); \
  199. A##ko ^= Do; \
  200. Bki = ROL64(A##ko, 25); \
  201. A##mu ^= Du; \
  202. Bko = ROL64(A##mu, 8); \
  203. A##sa ^= Da; \
  204. Bku = ROL64(A##sa, 18); \
  205. E##ka = Bka ^( Bke | Bki ); \
  206. E##ke = Bke ^( Bki & Bko ); \
  207. E##ki = Bki ^((~Bko)& Bku ); \
  208. E##ko = (~Bko)^( Bku | Bka ); \
  209. E##ku = Bku ^( Bka & Bke ); \
  210. \
  211. A##bu ^= Du; \
  212. Bma = ROL64(A##bu, 27); \
  213. A##ga ^= Da; \
  214. Bme = ROL64(A##ga, 36); \
  215. A##ke ^= De; \
  216. Bmi = ROL64(A##ke, 10); \
  217. A##mi ^= Di; \
  218. Bmo = ROL64(A##mi, 15); \
  219. A##so ^= Do; \
  220. Bmu = ROL64(A##so, 56); \
  221. E##ma = Bma ^( Bme & Bmi ); \
  222. E##me = Bme ^( Bmi | Bmo ); \
  223. E##mi = Bmi ^((~Bmo)| Bmu ); \
  224. E##mo = (~Bmo)^( Bmu & Bma ); \
  225. E##mu = Bmu ^( Bma | Bme ); \
  226. \
  227. A##bi ^= Di; \
  228. Bsa = ROL64(A##bi, 62); \
  229. A##go ^= Do; \
  230. Bse = ROL64(A##go, 55); \
  231. A##ku ^= Du; \
  232. Bsi = ROL64(A##ku, 39); \
  233. A##ma ^= Da; \
  234. Bso = ROL64(A##ma, 41); \
  235. A##se ^= De; \
  236. Bsu = ROL64(A##se, 2); \
  237. E##sa = Bsa ^((~Bse)& Bsi ); \
  238. E##se = (~Bse)^( Bsi | Bso ); \
  239. E##si = Bsi ^( Bso & Bsu ); \
  240. E##so = Bso ^( Bsu | Bsa ); \
  241. E##su = Bsu ^( Bsa & Bse ); \
  242. \
  243. #else /* UseBebigokimisa */
  244. /* --- Code for round, with prepare-theta */
  245. /* --- 64-bit lanes mapped to 64-bit words */
  246. #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
  247. Da = Cu^ROL64(Ce, 1); \
  248. De = Ca^ROL64(Ci, 1); \
  249. Di = Ce^ROL64(Co, 1); \
  250. Do = Ci^ROL64(Cu, 1); \
  251. Du = Co^ROL64(Ca, 1); \
  252. \
  253. A##ba ^= Da; \
  254. Bba = A##ba; \
  255. A##ge ^= De; \
  256. Bbe = ROL64(A##ge, 44); \
  257. A##ki ^= Di; \
  258. Bbi = ROL64(A##ki, 43); \
  259. A##mo ^= Do; \
  260. Bbo = ROL64(A##mo, 21); \
  261. A##su ^= Du; \
  262. Bbu = ROL64(A##su, 14); \
  263. E##ba = Bba ^((~Bbe)& Bbi ); \
  264. E##ba ^= KeccakF1600RoundConstants[i]; \
  265. Ca = E##ba; \
  266. E##be = Bbe ^((~Bbi)& Bbo ); \
  267. Ce = E##be; \
  268. E##bi = Bbi ^((~Bbo)& Bbu ); \
  269. Ci = E##bi; \
  270. E##bo = Bbo ^((~Bbu)& Bba ); \
  271. Co = E##bo; \
  272. E##bu = Bbu ^((~Bba)& Bbe ); \
  273. Cu = E##bu; \
  274. \
  275. A##bo ^= Do; \
  276. Bga = ROL64(A##bo, 28); \
  277. A##gu ^= Du; \
  278. Bge = ROL64(A##gu, 20); \
  279. A##ka ^= Da; \
  280. Bgi = ROL64(A##ka, 3); \
  281. A##me ^= De; \
  282. Bgo = ROL64(A##me, 45); \
  283. A##si ^= Di; \
  284. Bgu = ROL64(A##si, 61); \
  285. E##ga = Bga ^((~Bge)& Bgi ); \
  286. Ca ^= E##ga; \
  287. E##ge = Bge ^((~Bgi)& Bgo ); \
  288. Ce ^= E##ge; \
  289. E##gi = Bgi ^((~Bgo)& Bgu ); \
  290. Ci ^= E##gi; \
  291. E##go = Bgo ^((~Bgu)& Bga ); \
  292. Co ^= E##go; \
  293. E##gu = Bgu ^((~Bga)& Bge ); \
  294. Cu ^= E##gu; \
  295. \
  296. A##be ^= De; \
  297. Bka = ROL64(A##be, 1); \
  298. A##gi ^= Di; \
  299. Bke = ROL64(A##gi, 6); \
  300. A##ko ^= Do; \
  301. Bki = ROL64(A##ko, 25); \
  302. A##mu ^= Du; \
  303. Bko = ROL64(A##mu, 8); \
  304. A##sa ^= Da; \
  305. Bku = ROL64(A##sa, 18); \
  306. E##ka = Bka ^((~Bke)& Bki ); \
  307. Ca ^= E##ka; \
  308. E##ke = Bke ^((~Bki)& Bko ); \
  309. Ce ^= E##ke; \
  310. E##ki = Bki ^((~Bko)& Bku ); \
  311. Ci ^= E##ki; \
  312. E##ko = Bko ^((~Bku)& Bka ); \
  313. Co ^= E##ko; \
  314. E##ku = Bku ^((~Bka)& Bke ); \
  315. Cu ^= E##ku; \
  316. \
  317. A##bu ^= Du; \
  318. Bma = ROL64(A##bu, 27); \
  319. A##ga ^= Da; \
  320. Bme = ROL64(A##ga, 36); \
  321. A##ke ^= De; \
  322. Bmi = ROL64(A##ke, 10); \
  323. A##mi ^= Di; \
  324. Bmo = ROL64(A##mi, 15); \
  325. A##so ^= Do; \
  326. Bmu = ROL64(A##so, 56); \
  327. E##ma = Bma ^((~Bme)& Bmi ); \
  328. Ca ^= E##ma; \
  329. E##me = Bme ^((~Bmi)& Bmo ); \
  330. Ce ^= E##me; \
  331. E##mi = Bmi ^((~Bmo)& Bmu ); \
  332. Ci ^= E##mi; \
  333. E##mo = Bmo ^((~Bmu)& Bma ); \
  334. Co ^= E##mo; \
  335. E##mu = Bmu ^((~Bma)& Bme ); \
  336. Cu ^= E##mu; \
  337. \
  338. A##bi ^= Di; \
  339. Bsa = ROL64(A##bi, 62); \
  340. A##go ^= Do; \
  341. Bse = ROL64(A##go, 55); \
  342. A##ku ^= Du; \
  343. Bsi = ROL64(A##ku, 39); \
  344. A##ma ^= Da; \
  345. Bso = ROL64(A##ma, 41); \
  346. A##se ^= De; \
  347. Bsu = ROL64(A##se, 2); \
  348. E##sa = Bsa ^((~Bse)& Bsi ); \
  349. Ca ^= E##sa; \
  350. E##se = Bse ^((~Bsi)& Bso ); \
  351. Ce ^= E##se; \
  352. E##si = Bsi ^((~Bso)& Bsu ); \
  353. Ci ^= E##si; \
  354. E##so = Bso ^((~Bsu)& Bsa ); \
  355. Co ^= E##so; \
  356. E##su = Bsu ^((~Bsa)& Bse ); \
  357. Cu ^= E##su; \
  358. \
  359. /* --- Code for round */
  360. /* --- 64-bit lanes mapped to 64-bit words */
  361. #define thetaRhoPiChiIota(i, A, E) \
  362. Da = Cu^ROL64(Ce, 1); \
  363. De = Ca^ROL64(Ci, 1); \
  364. Di = Ce^ROL64(Co, 1); \
  365. Do = Ci^ROL64(Cu, 1); \
  366. Du = Co^ROL64(Ca, 1); \
  367. \
  368. A##ba ^= Da; \
  369. Bba = A##ba; \
  370. A##ge ^= De; \
  371. Bbe = ROL64(A##ge, 44); \
  372. A##ki ^= Di; \
  373. Bbi = ROL64(A##ki, 43); \
  374. A##mo ^= Do; \
  375. Bbo = ROL64(A##mo, 21); \
  376. A##su ^= Du; \
  377. Bbu = ROL64(A##su, 14); \
  378. E##ba = Bba ^((~Bbe)& Bbi ); \
  379. E##ba ^= KeccakF1600RoundConstants[i]; \
  380. E##be = Bbe ^((~Bbi)& Bbo ); \
  381. E##bi = Bbi ^((~Bbo)& Bbu ); \
  382. E##bo = Bbo ^((~Bbu)& Bba ); \
  383. E##bu = Bbu ^((~Bba)& Bbe ); \
  384. \
  385. A##bo ^= Do; \
  386. Bga = ROL64(A##bo, 28); \
  387. A##gu ^= Du; \
  388. Bge = ROL64(A##gu, 20); \
  389. A##ka ^= Da; \
  390. Bgi = ROL64(A##ka, 3); \
  391. A##me ^= De; \
  392. Bgo = ROL64(A##me, 45); \
  393. A##si ^= Di; \
  394. Bgu = ROL64(A##si, 61); \
  395. E##ga = Bga ^((~Bge)& Bgi ); \
  396. E##ge = Bge ^((~Bgi)& Bgo ); \
  397. E##gi = Bgi ^((~Bgo)& Bgu ); \
  398. E##go = Bgo ^((~Bgu)& Bga ); \
  399. E##gu = Bgu ^((~Bga)& Bge ); \
  400. \
  401. A##be ^= De; \
  402. Bka = ROL64(A##be, 1); \
  403. A##gi ^= Di; \
  404. Bke = ROL64(A##gi, 6); \
  405. A##ko ^= Do; \
  406. Bki = ROL64(A##ko, 25); \
  407. A##mu ^= Du; \
  408. Bko = ROL64(A##mu, 8); \
  409. A##sa ^= Da; \
  410. Bku = ROL64(A##sa, 18); \
  411. E##ka = Bka ^((~Bke)& Bki ); \
  412. E##ke = Bke ^((~Bki)& Bko ); \
  413. E##ki = Bki ^((~Bko)& Bku ); \
  414. E##ko = Bko ^((~Bku)& Bka ); \
  415. E##ku = Bku ^((~Bka)& Bke ); \
  416. \
  417. A##bu ^= Du; \
  418. Bma = ROL64(A##bu, 27); \
  419. A##ga ^= Da; \
  420. Bme = ROL64(A##ga, 36); \
  421. A##ke ^= De; \
  422. Bmi = ROL64(A##ke, 10); \
  423. A##mi ^= Di; \
  424. Bmo = ROL64(A##mi, 15); \
  425. A##so ^= Do; \
  426. Bmu = ROL64(A##so, 56); \
  427. E##ma = Bma ^((~Bme)& Bmi ); \
  428. E##me = Bme ^((~Bmi)& Bmo ); \
  429. E##mi = Bmi ^((~Bmo)& Bmu ); \
  430. E##mo = Bmo ^((~Bmu)& Bma ); \
  431. E##mu = Bmu ^((~Bma)& Bme ); \
  432. \
  433. A##bi ^= Di; \
  434. Bsa = ROL64(A##bi, 62); \
  435. A##go ^= Do; \
  436. Bse = ROL64(A##go, 55); \
  437. A##ku ^= Du; \
  438. Bsi = ROL64(A##ku, 39); \
  439. A##ma ^= Da; \
  440. Bso = ROL64(A##ma, 41); \
  441. A##se ^= De; \
  442. Bsu = ROL64(A##se, 2); \
  443. E##sa = Bsa ^((~Bse)& Bsi ); \
  444. E##se = Bse ^((~Bsi)& Bso ); \
  445. E##si = Bsi ^((~Bso)& Bsu ); \
  446. E##so = Bso ^((~Bsu)& Bsa ); \
  447. E##su = Bsu ^((~Bsa)& Bse ); \
  448. \
  449. #endif /* UseBebigokimisa */
  450. #define copyFromState(X, state) \
  451. X##ba = state[ 0]; \
  452. X##be = state[ 1]; \
  453. X##bi = state[ 2]; \
  454. X##bo = state[ 3]; \
  455. X##bu = state[ 4]; \
  456. X##ga = state[ 5]; \
  457. X##ge = state[ 6]; \
  458. X##gi = state[ 7]; \
  459. X##go = state[ 8]; \
  460. X##gu = state[ 9]; \
  461. X##ka = state[10]; \
  462. X##ke = state[11]; \
  463. X##ki = state[12]; \
  464. X##ko = state[13]; \
  465. X##ku = state[14]; \
  466. X##ma = state[15]; \
  467. X##me = state[16]; \
  468. X##mi = state[17]; \
  469. X##mo = state[18]; \
  470. X##mu = state[19]; \
  471. X##sa = state[20]; \
  472. X##se = state[21]; \
  473. X##si = state[22]; \
  474. X##so = state[23]; \
  475. X##su = state[24]; \
  476. #define copyToState(state, X) \
  477. state[ 0] = X##ba; \
  478. state[ 1] = X##be; \
  479. state[ 2] = X##bi; \
  480. state[ 3] = X##bo; \
  481. state[ 4] = X##bu; \
  482. state[ 5] = X##ga; \
  483. state[ 6] = X##ge; \
  484. state[ 7] = X##gi; \
  485. state[ 8] = X##go; \
  486. state[ 9] = X##gu; \
  487. state[10] = X##ka; \
  488. state[11] = X##ke; \
  489. state[12] = X##ki; \
  490. state[13] = X##ko; \
  491. state[14] = X##ku; \
  492. state[15] = X##ma; \
  493. state[16] = X##me; \
  494. state[17] = X##mi; \
  495. state[18] = X##mo; \
  496. state[19] = X##mu; \
  497. state[20] = X##sa; \
  498. state[21] = X##se; \
  499. state[22] = X##si; \
  500. state[23] = X##so; \
  501. state[24] = X##su; \
  502. #define copyStateVariables(X, Y) \
  503. X##ba = Y##ba; \
  504. X##be = Y##be; \
  505. X##bi = Y##bi; \
  506. X##bo = Y##bo; \
  507. X##bu = Y##bu; \
  508. X##ga = Y##ga; \
  509. X##ge = Y##ge; \
  510. X##gi = Y##gi; \
  511. X##go = Y##go; \
  512. X##gu = Y##gu; \
  513. X##ka = Y##ka; \
  514. X##ke = Y##ke; \
  515. X##ki = Y##ki; \
  516. X##ko = Y##ko; \
  517. X##ku = Y##ku; \
  518. X##ma = Y##ma; \
  519. X##me = Y##me; \
  520. X##mi = Y##mi; \
  521. X##mo = Y##mo; \
  522. X##mu = Y##mu; \
  523. X##sa = Y##sa; \
  524. X##se = Y##se; \
  525. X##si = Y##si; \
  526. X##so = Y##so; \
  527. X##su = Y##su; \
  528. #define copyFromStateAndAdd(X, state, input, laneCount) \
  529. if (laneCount < 16) { \
  530. if (laneCount < 8) { \
  531. if (laneCount < 4) { \
  532. if (laneCount < 2) { \
  533. if (laneCount < 1) { \
  534. X##ba = state[ 0]; \
  535. } \
  536. else { \
  537. X##ba = state[ 0]^input[ 0]; \
  538. } \
  539. X##be = state[ 1]; \
  540. X##bi = state[ 2]; \
  541. } \
  542. else { \
  543. X##ba = state[ 0]^input[ 0]; \
  544. X##be = state[ 1]^input[ 1]; \
  545. if (laneCount < 3) { \
  546. X##bi = state[ 2]; \
  547. } \
  548. else { \
  549. X##bi = state[ 2]^input[ 2]; \
  550. } \
  551. } \
  552. X##bo = state[ 3]; \
  553. X##bu = state[ 4]; \
  554. X##ga = state[ 5]; \
  555. X##ge = state[ 6]; \
  556. } \
  557. else { \
  558. X##ba = state[ 0]^input[ 0]; \
  559. X##be = state[ 1]^input[ 1]; \
  560. X##bi = state[ 2]^input[ 2]; \
  561. X##bo = state[ 3]^input[ 3]; \
  562. if (laneCount < 6) { \
  563. if (laneCount < 5) { \
  564. X##bu = state[ 4]; \
  565. } \
  566. else { \
  567. X##bu = state[ 4]^input[ 4]; \
  568. } \
  569. X##ga = state[ 5]; \
  570. X##ge = state[ 6]; \
  571. } \
  572. else { \
  573. X##bu = state[ 4]^input[ 4]; \
  574. X##ga = state[ 5]^input[ 5]; \
  575. if (laneCount < 7) { \
  576. X##ge = state[ 6]; \
  577. } \
  578. else { \
  579. X##ge = state[ 6]^input[ 6]; \
  580. } \
  581. } \
  582. } \
  583. X##gi = state[ 7]; \
  584. X##go = state[ 8]; \
  585. X##gu = state[ 9]; \
  586. X##ka = state[10]; \
  587. X##ke = state[11]; \
  588. X##ki = state[12]; \
  589. X##ko = state[13]; \
  590. X##ku = state[14]; \
  591. } \
  592. else { \
  593. X##ba = state[ 0]^input[ 0]; \
  594. X##be = state[ 1]^input[ 1]; \
  595. X##bi = state[ 2]^input[ 2]; \
  596. X##bo = state[ 3]^input[ 3]; \
  597. X##bu = state[ 4]^input[ 4]; \
  598. X##ga = state[ 5]^input[ 5]; \
  599. X##ge = state[ 6]^input[ 6]; \
  600. X##gi = state[ 7]^input[ 7]; \
  601. if (laneCount < 12) { \
  602. if (laneCount < 10) { \
  603. if (laneCount < 9) { \
  604. X##go = state[ 8]; \
  605. } \
  606. else { \
  607. X##go = state[ 8]^input[ 8]; \
  608. } \
  609. X##gu = state[ 9]; \
  610. X##ka = state[10]; \
  611. } \
  612. else { \
  613. X##go = state[ 8]^input[ 8]; \
  614. X##gu = state[ 9]^input[ 9]; \
  615. if (laneCount < 11) { \
  616. X##ka = state[10]; \
  617. } \
  618. else { \
  619. X##ka = state[10]^input[10]; \
  620. } \
  621. } \
  622. X##ke = state[11]; \
  623. X##ki = state[12]; \
  624. X##ko = state[13]; \
  625. X##ku = state[14]; \
  626. } \
  627. else { \
  628. X##go = state[ 8]^input[ 8]; \
  629. X##gu = state[ 9]^input[ 9]; \
  630. X##ka = state[10]^input[10]; \
  631. X##ke = state[11]^input[11]; \
  632. if (laneCount < 14) { \
  633. if (laneCount < 13) { \
  634. X##ki = state[12]; \
  635. } \
  636. else { \
  637. X##ki = state[12]^input[12]; \
  638. } \
  639. X##ko = state[13]; \
  640. X##ku = state[14]; \
  641. } \
  642. else { \
  643. X##ki = state[12]^input[12]; \
  644. X##ko = state[13]^input[13]; \
  645. if (laneCount < 15) { \
  646. X##ku = state[14]; \
  647. } \
  648. else { \
  649. X##ku = state[14]^input[14]; \
  650. } \
  651. } \
  652. } \
  653. } \
  654. X##ma = state[15]; \
  655. X##me = state[16]; \
  656. X##mi = state[17]; \
  657. X##mo = state[18]; \
  658. X##mu = state[19]; \
  659. X##sa = state[20]; \
  660. X##se = state[21]; \
  661. X##si = state[22]; \
  662. X##so = state[23]; \
  663. X##su = state[24]; \
  664. } \
  665. else { \
  666. X##ba = state[ 0]^input[ 0]; \
  667. X##be = state[ 1]^input[ 1]; \
  668. X##bi = state[ 2]^input[ 2]; \
  669. X##bo = state[ 3]^input[ 3]; \
  670. X##bu = state[ 4]^input[ 4]; \
  671. X##ga = state[ 5]^input[ 5]; \
  672. X##ge = state[ 6]^input[ 6]; \
  673. X##gi = state[ 7]^input[ 7]; \
  674. X##go = state[ 8]^input[ 8]; \
  675. X##gu = state[ 9]^input[ 9]; \
  676. X##ka = state[10]^input[10]; \
  677. X##ke = state[11]^input[11]; \
  678. X##ki = state[12]^input[12]; \
  679. X##ko = state[13]^input[13]; \
  680. X##ku = state[14]^input[14]; \
  681. X##ma = state[15]^input[15]; \
  682. if (laneCount < 24) { \
  683. if (laneCount < 20) { \
  684. if (laneCount < 18) { \
  685. if (laneCount < 17) { \
  686. X##me = state[16]; \
  687. } \
  688. else { \
  689. X##me = state[16]^input[16]; \
  690. } \
  691. X##mi = state[17]; \
  692. X##mo = state[18]; \
  693. } \
  694. else { \
  695. X##me = state[16]^input[16]; \
  696. X##mi = state[17]^input[17]; \
  697. if (laneCount < 19) { \
  698. X##mo = state[18]; \
  699. } \
  700. else { \
  701. X##mo = state[18]^input[18]; \
  702. } \
  703. } \
  704. X##mu = state[19]; \
  705. X##sa = state[20]; \
  706. X##se = state[21]; \
  707. X##si = state[22]; \
  708. } \
  709. else { \
  710. X##me = state[16]^input[16]; \
  711. X##mi = state[17]^input[17]; \
  712. X##mo = state[18]^input[18]; \
  713. X##mu = state[19]^input[19]; \
  714. if (laneCount < 22) { \
  715. if (laneCount < 21) { \
  716. X##sa = state[20]; \
  717. } \
  718. else { \
  719. X##sa = state[20]^input[20]; \
  720. } \
  721. X##se = state[21]; \
  722. X##si = state[22]; \
  723. } \
  724. else { \
  725. X##sa = state[20]^input[20]; \
  726. X##se = state[21]^input[21]; \
  727. if (laneCount < 23) { \
  728. X##si = state[22]; \
  729. } \
  730. else { \
  731. X##si = state[22]^input[22]; \
  732. } \
  733. } \
  734. } \
  735. X##so = state[23]; \
  736. X##su = state[24]; \
  737. } \
  738. else { \
  739. X##me = state[16]^input[16]; \
  740. X##mi = state[17]^input[17]; \
  741. X##mo = state[18]^input[18]; \
  742. X##mu = state[19]^input[19]; \
  743. X##sa = state[20]^input[20]; \
  744. X##se = state[21]^input[21]; \
  745. X##si = state[22]^input[22]; \
  746. X##so = state[23]^input[23]; \
  747. if (laneCount < 25) { \
  748. X##su = state[24]; \
  749. } \
  750. else { \
  751. X##su = state[24]^input[24]; \
  752. } \
  753. } \
  754. }
  755. #define addInput(X, input, laneCount) \
  756. if (laneCount == 21) { \
  757. X##ba ^= input[ 0]; \
  758. X##be ^= input[ 1]; \
  759. X##bi ^= input[ 2]; \
  760. X##bo ^= input[ 3]; \
  761. X##bu ^= input[ 4]; \
  762. X##ga ^= input[ 5]; \
  763. X##ge ^= input[ 6]; \
  764. X##gi ^= input[ 7]; \
  765. X##go ^= input[ 8]; \
  766. X##gu ^= input[ 9]; \
  767. X##ka ^= input[10]; \
  768. X##ke ^= input[11]; \
  769. X##ki ^= input[12]; \
  770. X##ko ^= input[13]; \
  771. X##ku ^= input[14]; \
  772. X##ma ^= input[15]; \
  773. X##me ^= input[16]; \
  774. X##mi ^= input[17]; \
  775. X##mo ^= input[18]; \
  776. X##mu ^= input[19]; \
  777. X##sa ^= input[20]; \
  778. } \
  779. else if (laneCount < 16) { \
  780. if (laneCount < 8) { \
  781. if (laneCount < 4) { \
  782. if (laneCount < 2) { \
  783. if (laneCount < 1) { \
  784. } \
  785. else { \
  786. X##ba ^= input[ 0]; \
  787. } \
  788. } \
  789. else { \
  790. X##ba ^= input[ 0]; \
  791. X##be ^= input[ 1]; \
  792. if (laneCount < 3) { \
  793. } \
  794. else { \
  795. X##bi ^= input[ 2]; \
  796. } \
  797. } \
  798. } \
  799. else { \
  800. X##ba ^= input[ 0]; \
  801. X##be ^= input[ 1]; \
  802. X##bi ^= input[ 2]; \
  803. X##bo ^= input[ 3]; \
  804. if (laneCount < 6) { \
  805. if (laneCount < 5) { \
  806. } \
  807. else { \
  808. X##bu ^= input[ 4]; \
  809. } \
  810. } \
  811. else { \
  812. X##bu ^= input[ 4]; \
  813. X##ga ^= input[ 5]; \
  814. if (laneCount < 7) { \
  815. } \
  816. else { \
  817. X##ge ^= input[ 6]; \
  818. } \
  819. } \
  820. } \
  821. } \
  822. else { \
  823. X##ba ^= input[ 0]; \
  824. X##be ^= input[ 1]; \
  825. X##bi ^= input[ 2]; \
  826. X##bo ^= input[ 3]; \
  827. X##bu ^= input[ 4]; \
  828. X##ga ^= input[ 5]; \
  829. X##ge ^= input[ 6]; \
  830. X##gi ^= input[ 7]; \
  831. if (laneCount < 12) { \
  832. if (laneCount < 10) { \
  833. if (laneCount < 9) { \
  834. } \
  835. else { \
  836. X##go ^= input[ 8]; \
  837. } \
  838. } \
  839. else { \
  840. X##go ^= input[ 8]; \
  841. X##gu ^= input[ 9]; \
  842. if (laneCount < 11) { \
  843. } \
  844. else { \
  845. X##ka ^= input[10]; \
  846. } \
  847. } \
  848. } \
  849. else { \
  850. X##go ^= input[ 8]; \
  851. X##gu ^= input[ 9]; \
  852. X##ka ^= input[10]; \
  853. X##ke ^= input[11]; \
  854. if (laneCount < 14) { \
  855. if (laneCount < 13) { \
  856. } \
  857. else { \
  858. X##ki ^= input[12]; \
  859. } \
  860. } \
  861. else { \
  862. X##ki ^= input[12]; \
  863. X##ko ^= input[13]; \
  864. if (laneCount < 15) { \
  865. } \
  866. else { \
  867. X##ku ^= input[14]; \
  868. } \
  869. } \
  870. } \
  871. } \
  872. } \
  873. else { \
  874. X##ba ^= input[ 0]; \
  875. X##be ^= input[ 1]; \
  876. X##bi ^= input[ 2]; \
  877. X##bo ^= input[ 3]; \
  878. X##bu ^= input[ 4]; \
  879. X##ga ^= input[ 5]; \
  880. X##ge ^= input[ 6]; \
  881. X##gi ^= input[ 7]; \
  882. X##go ^= input[ 8]; \
  883. X##gu ^= input[ 9]; \
  884. X##ka ^= input[10]; \
  885. X##ke ^= input[11]; \
  886. X##ki ^= input[12]; \
  887. X##ko ^= input[13]; \
  888. X##ku ^= input[14]; \
  889. X##ma ^= input[15]; \
  890. if (laneCount < 24) { \
  891. if (laneCount < 20) { \
  892. if (laneCount < 18) { \
  893. if (laneCount < 17) { \
  894. } \
  895. else { \
  896. X##me ^= input[16]; \
  897. } \
  898. } \
  899. else { \
  900. X##me ^= input[16]; \
  901. X##mi ^= input[17]; \
  902. if (laneCount < 19) { \
  903. } \
  904. else { \
  905. X##mo ^= input[18]; \
  906. } \
  907. } \
  908. } \
  909. else { \
  910. X##me ^= input[16]; \
  911. X##mi ^= input[17]; \
  912. X##mo ^= input[18]; \
  913. X##mu ^= input[19]; \
  914. if (laneCount < 22) { \
  915. if (laneCount < 21) { \
  916. } \
  917. else { \
  918. X##sa ^= input[20]; \
  919. } \
  920. } \
  921. else { \
  922. X##sa ^= input[20]; \
  923. X##se ^= input[21]; \
  924. if (laneCount < 23) { \
  925. } \
  926. else { \
  927. X##si ^= input[22]; \
  928. } \
  929. } \
  930. } \
  931. } \
  932. else { \
  933. X##me ^= input[16]; \
  934. X##mi ^= input[17]; \
  935. X##mo ^= input[18]; \
  936. X##mu ^= input[19]; \
  937. X##sa ^= input[20]; \
  938. X##se ^= input[21]; \
  939. X##si ^= input[22]; \
  940. X##so ^= input[23]; \
  941. if (laneCount < 25) { \
  942. } \
  943. else { \
  944. X##su ^= input[24]; \
  945. } \
  946. } \
  947. }
  948. #ifdef UseBebigokimisa
  949. #define copyToStateAndOutput(X, state, output, laneCount) \
  950. if (laneCount < 16) { \
  951. if (laneCount < 8) { \
  952. if (laneCount < 4) { \
  953. if (laneCount < 2) { \
  954. state[ 0] = X##ba; \
  955. if (laneCount >= 1) { \
  956. output[ 0] = X##ba; \
  957. } \
  958. state[ 1] = X##be; \
  959. state[ 2] = X##bi; \
  960. } \
  961. else { \
  962. state[ 0] = X##ba; \
  963. output[ 0] = X##ba; \
  964. state[ 1] = X##be; \
  965. output[ 1] = ~X##be; \
  966. state[ 2] = X##bi; \
  967. if (laneCount >= 3) { \
  968. output[ 2] = ~X##bi; \
  969. } \
  970. } \
  971. state[ 3] = X##bo; \
  972. state[ 4] = X##bu; \
  973. state[ 5] = X##ga; \
  974. state[ 6] = X##ge; \
  975. } \
  976. else { \
  977. state[ 0] = X##ba; \
  978. output[ 0] = X##ba; \
  979. state[ 1] = X##be; \
  980. output[ 1] = ~X##be; \
  981. state[ 2] = X##bi; \
  982. output[ 2] = ~X##bi; \
  983. state[ 3] = X##bo; \
  984. output[ 3] = X##bo; \
  985. if (laneCount < 6) { \
  986. state[ 4] = X##bu; \
  987. if (laneCount >= 5) { \
  988. output[ 4] = X##bu; \
  989. } \
  990. state[ 5] = X##ga; \
  991. state[ 6] = X##ge; \
  992. } \
  993. else { \
  994. state[ 4] = X##bu; \
  995. output[ 4] = X##bu; \
  996. state[ 5] = X##ga; \
  997. output[ 5] = X##ga; \
  998. state[ 6] = X##ge; \
  999. if (laneCount >= 7) { \
  1000. output[ 6] = X##ge; \
  1001. } \
  1002. } \
  1003. } \
  1004. state[ 7] = X##gi; \
  1005. state[ 8] = X##go; \
  1006. state[ 9] = X##gu; \
  1007. state[10] = X##ka; \
  1008. state[11] = X##ke; \
  1009. state[12] = X##ki; \
  1010. state[13] = X##ko; \
  1011. state[14] = X##ku; \
  1012. } \
  1013. else { \
  1014. state[ 0] = X##ba; \
  1015. output[ 0] = X##ba; \
  1016. state[ 1] = X##be; \
  1017. output[ 1] = ~X##be; \
  1018. state[ 2] = X##bi; \
  1019. output[ 2] = ~X##bi; \
  1020. state[ 3] = X##bo; \
  1021. output[ 3] = X##bo; \
  1022. state[ 4] = X##bu; \
  1023. output[ 4] = X##bu; \
  1024. state[ 5] = X##ga; \
  1025. output[ 5] = X##ga; \
  1026. state[ 6] = X##ge; \
  1027. output[ 6] = X##ge; \
  1028. state[ 7] = X##gi; \
  1029. output[ 7] = X##gi; \
  1030. if (laneCount < 12) { \
  1031. if (laneCount < 10) { \
  1032. state[ 8] = X##go; \
  1033. if (laneCount >= 9) { \
  1034. output[ 8] = ~X##go; \
  1035. } \
  1036. state[ 9] = X##gu; \
  1037. state[10] = X##ka; \
  1038. } \
  1039. else { \
  1040. state[ 8] = X##go; \
  1041. output[ 8] = ~X##go; \
  1042. state[ 9] = X##gu; \
  1043. output[ 9] = X##gu; \
  1044. state[10] = X##ka; \
  1045. if (laneCount >= 11) { \
  1046. output[10] = X##ka; \
  1047. } \
  1048. } \
  1049. state[11] = X##ke; \
  1050. state[12] = X##ki; \
  1051. state[13] = X##ko; \
  1052. state[14] = X##ku; \
  1053. } \
  1054. else { \
  1055. state[ 8] = X##go; \
  1056. output[ 8] = ~X##go; \
  1057. state[ 9] = X##gu; \
  1058. output[ 9] = X##gu; \
  1059. state[10] = X##ka; \
  1060. output[10] = X##ka; \
  1061. state[11] = X##ke; \
  1062. output[11] = X##ke; \
  1063. if (laneCount < 14) { \
  1064. state[12] = X##ki; \
  1065. if (laneCount >= 13) { \
  1066. output[12] = ~X##ki; \
  1067. } \
  1068. state[13] = X##ko; \
  1069. state[14] = X##ku; \
  1070. } \
  1071. else { \
  1072. state[12] = X##ki; \
  1073. output[12] = ~X##ki; \
  1074. state[13] = X##ko; \
  1075. output[13] = X##ko; \
  1076. state[14] = X##ku; \
  1077. if (laneCount >= 15) { \
  1078. output[14] = X##ku; \
  1079. } \
  1080. } \
  1081. } \
  1082. } \
  1083. state[15] = X##ma; \
  1084. state[16] = X##me; \
  1085. state[17] = X##mi; \
  1086. state[18] = X##mo; \
  1087. state[19] = X##mu; \
  1088. state[20] = X##sa; \
  1089. state[21] = X##se; \
  1090. state[22] = X##si; \
  1091. state[23] = X##so; \
  1092. state[24] = X##su; \
  1093. } \
  1094. else { \
  1095. state[ 0] = X##ba; \
  1096. output[ 0] = X##ba; \
  1097. state[ 1] = X##be; \
  1098. output[ 1] = ~X##be; \
  1099. state[ 2] = X##bi; \
  1100. output[ 2] = ~X##bi; \
  1101. state[ 3] = X##bo; \
  1102. output[ 3] = X##bo; \
  1103. state[ 4] = X##bu; \
  1104. output[ 4] = X##bu; \
  1105. state[ 5] = X##ga; \
  1106. output[ 5] = X##ga; \
  1107. state[ 6] = X##ge; \
  1108. output[ 6] = X##ge; \
  1109. state[ 7] = X##gi; \
  1110. output[ 7] = X##gi; \
  1111. state[ 8] = X##go; \
  1112. output[ 8] = ~X##go; \
  1113. state[ 9] = X##gu; \
  1114. output[ 9] = X##gu; \
  1115. state[10] = X##ka; \
  1116. output[10] = X##ka; \
  1117. state[11] = X##ke; \
  1118. output[11] = X##ke; \
  1119. state[12] = X##ki; \
  1120. output[12] = ~X##ki; \
  1121. state[13] = X##ko; \
  1122. output[13] = X##ko; \
  1123. state[14] = X##ku; \
  1124. output[14] = X##ku; \
  1125. state[15] = X##ma; \
  1126. output[15] = X##ma; \
  1127. if (laneCount < 24) { \
  1128. if (laneCount < 20) { \
  1129. if (laneCount < 18) { \
  1130. state[16] = X##me; \
  1131. if (laneCount >= 17) { \
  1132. output[16] = X##me; \
  1133. } \
  1134. state[17] = X##mi; \
  1135. state[18] = X##mo; \
  1136. } \
  1137. else { \
  1138. state[16] = X##me; \
  1139. output[16] = X##me; \
  1140. state[17] = X##mi; \
  1141. output[17] = ~X##mi; \
  1142. state[18] = X##mo; \
  1143. if (laneCount >= 19) { \
  1144. output[18] = X##mo; \
  1145. } \
  1146. } \
  1147. state[19] = X##mu; \
  1148. state[20] = X##sa; \
  1149. state[21] = X##se; \
  1150. state[22] = X##si; \
  1151. } \
  1152. else { \
  1153. state[16] = X##me; \
  1154. output[16] = X##me; \
  1155. state[17] = X##mi; \
  1156. output[17] = ~X##mi; \
  1157. state[18] = X##mo; \
  1158. output[18] = X##mo; \
  1159. state[19] = X##mu; \
  1160. output[19] = X##mu; \
  1161. if (laneCount < 22) { \
  1162. state[20] = X##sa; \
  1163. if (laneCount >= 21) { \
  1164. output[20] = ~X##sa; \
  1165. } \
  1166. state[21] = X##se; \
  1167. state[22] = X##si; \
  1168. } \
  1169. else { \
  1170. state[20] = X##sa; \
  1171. output[20] = ~X##sa; \
  1172. state[21] = X##se; \
  1173. output[21] = X##se; \
  1174. state[22] = X##si; \
  1175. if (laneCount >= 23) { \
  1176. output[22] = X##si; \
  1177. } \
  1178. } \
  1179. } \
  1180. state[23] = X##so; \
  1181. state[24] = X##su; \
  1182. } \
  1183. else { \
  1184. state[16] = X##me; \
  1185. output[16] = X##me; \
  1186. state[17] = X##mi; \
  1187. output[17] = ~X##mi; \
  1188. state[18] = X##mo; \
  1189. output[18] = X##mo; \
  1190. state[19] = X##mu; \
  1191. output[19] = X##mu; \
  1192. state[20] = X##sa; \
  1193. output[20] = ~X##sa; \
  1194. state[21] = X##se; \
  1195. output[21] = X##se; \
  1196. state[22] = X##si; \
  1197. output[22] = X##si; \
  1198. state[23] = X##so; \
  1199. output[23] = X##so; \
  1200. state[24] = X##su; \
  1201. if (laneCount >= 25) { \
  1202. output[24] = X##su; \
  1203. } \
  1204. } \
  1205. }
  1206. #define output(X, output, laneCount) \
  1207. if (laneCount < 16) { \
  1208. if (laneCount < 8) { \
  1209. if (laneCount < 4) { \
  1210. if (laneCount < 2) { \
  1211. if (laneCount >= 1) { \
  1212. output[ 0] = X##ba; \
  1213. } \
  1214. } \
  1215. else { \
  1216. output[ 0] = X##ba; \
  1217. output[ 1] = ~X##be; \
  1218. if (laneCount >= 3) { \
  1219. output[ 2] = ~X##bi; \
  1220. } \
  1221. } \
  1222. } \
  1223. else { \
  1224. output[ 0] = X##ba; \
  1225. output[ 1] = ~X##be; \
  1226. output[ 2] = ~X##bi; \
  1227. output[ 3] = X##bo; \
  1228. if (laneCount < 6) { \
  1229. if (laneCount >= 5) { \
  1230. output[ 4] = X##bu; \
  1231. } \
  1232. } \
  1233. else { \
  1234. output[ 4] = X##bu; \
  1235. output[ 5] = X##ga; \
  1236. if (laneCount >= 7) { \
  1237. output[ 6] = X##ge; \
  1238. } \
  1239. } \
  1240. } \
  1241. } \
  1242. else { \
  1243. output[ 0] = X##ba; \
  1244. output[ 1] = ~X##be; \
  1245. output[ 2] = ~X##bi; \
  1246. output[ 3] = X##bo; \
  1247. output[ 4] = X##bu; \
  1248. output[ 5] = X##ga; \
  1249. output[ 6] = X##ge; \
  1250. output[ 7] = X##gi; \
  1251. if (laneCount < 12) { \
  1252. if (laneCount < 10) { \
  1253. if (laneCount >= 9) { \
  1254. output[ 8] = ~X##go; \
  1255. } \
  1256. } \
  1257. else { \
  1258. output[ 8] = ~X##go; \
  1259. output[ 9] = X##gu; \
  1260. if (laneCount >= 11) { \
  1261. output[10] = X##ka; \
  1262. } \
  1263. } \
  1264. } \
  1265. else { \
  1266. output[ 8] = ~X##go; \
  1267. output[ 9] = X##gu; \
  1268. output[10] = X##ka; \
  1269. output[11] = X##ke; \
  1270. if (laneCount < 14) { \
  1271. if (laneCount >= 13) { \
  1272. output[12] = ~X##ki; \
  1273. } \
  1274. } \
  1275. else { \
  1276. output[12] = ~X##ki; \
  1277. output[13] = X##ko; \
  1278. if (laneCount >= 15) { \
  1279. output[14] = X##ku; \
  1280. } \
  1281. } \
  1282. } \
  1283. } \
  1284. } \
  1285. else { \
  1286. output[ 0] = X##ba; \
  1287. output[ 1] = ~X##be; \
  1288. output[ 2] = ~X##bi; \
  1289. output[ 3] = X##bo; \
  1290. output[ 4] = X##bu; \
  1291. output[ 5] = X##ga; \
  1292. output[ 6] = X##ge; \
  1293. output[ 7] = X##gi; \
  1294. output[ 8] = ~X##go; \
  1295. output[ 9] = X##gu; \
  1296. output[10] = X##ka; \
  1297. output[11] = X##ke; \
  1298. output[12] = ~X##ki; \
  1299. output[13] = X##ko; \
  1300. output[14] = X##ku; \
  1301. output[15] = X##ma; \
  1302. if (laneCount < 24) { \
  1303. if (laneCount < 20) { \
  1304. if (laneCount < 18) { \
  1305. if (laneCount >= 17) { \
  1306. output[16] = X##me; \
  1307. } \
  1308. } \
  1309. else { \
  1310. output[16] = X##me; \
  1311. output[17] = ~X##mi; \
  1312. if (laneCount >= 19) { \
  1313. output[18] = X##mo; \
  1314. } \
  1315. } \
  1316. } \
  1317. else { \
  1318. output[16] = X##me; \
  1319. output[17] = ~X##mi; \
  1320. output[18] = X##mo; \
  1321. output[19] = X##mu; \
  1322. if (laneCount < 22) { \
  1323. if (laneCount >= 21) { \
  1324. output[20] = ~X##sa; \
  1325. } \
  1326. } \
  1327. else { \
  1328. output[20] = ~X##sa; \
  1329. output[21] = X##se; \
  1330. if (laneCount >= 23) { \
  1331. output[22] = X##si; \
  1332. } \
  1333. } \
  1334. } \
  1335. } \
  1336. else { \
  1337. output[16] = X##me; \
  1338. output[17] = ~X##mi; \
  1339. output[18] = X##mo; \
  1340. output[19] = X##mu; \
  1341. output[20] = ~X##sa; \
  1342. output[21] = X##se; \
  1343. output[22] = X##si; \
  1344. output[23] = X##so; \
  1345. if (laneCount >= 25) { \
  1346. output[24] = X##su; \
  1347. } \
  1348. } \
  1349. }
  1350. #define wrapOne(X, input, output, index, name) \
  1351. X##name ^= input[index]; \
  1352. output[index] = X##name;
  1353. #define wrapOneInvert(X, input, output, index, name) \
  1354. X##name ^= input[index]; \
  1355. output[index] = ~X##name;
  1356. #define unwrapOne(X, input, output, index, name) \
  1357. output[index] = input[index] ^ X##name; \
  1358. X##name ^= output[index];
  1359. #define unwrapOneInvert(X, input, output, index, name) \
  1360. output[index] = ~(input[index] ^ X##name); \
  1361. X##name ^= output[index]; \
  1362. #else /* UseBebigokimisa */
  1363. #define copyToStateAndOutput(X, state, output, laneCount) \
  1364. if (laneCount < 16) { \
  1365. if (laneCount < 8) { \
  1366. if (laneCount < 4) { \
  1367. if (laneCount < 2) { \
  1368. state[ 0] = X##ba; \
  1369. if (laneCount >= 1) { \
  1370. output[ 0] = X##ba; \
  1371. } \
  1372. state[ 1] = X##be; \
  1373. state[ 2] = X##bi; \
  1374. } \
  1375. else { \
  1376. state[ 0] = X##ba; \
  1377. output[ 0] = X##ba; \
  1378. state[ 1] = X##be; \
  1379. output[ 1] = X##be; \
  1380. state[ 2] = X##bi; \
  1381. if (laneCount >= 3) { \
  1382. output[ 2] = X##bi; \
  1383. } \
  1384. } \
  1385. state[ 3] = X##bo; \
  1386. state[ 4] = X##bu; \
  1387. state[ 5] = X##ga; \
  1388. state[ 6] = X##ge; \
  1389. } \
  1390. else { \
  1391. state[ 0] = X##ba; \
  1392. output[ 0] = X##ba; \
  1393. state[ 1] = X##be; \
  1394. output[ 1] = X##be; \
  1395. state[ 2] = X##bi; \
  1396. output[ 2] = X##bi; \
  1397. state[ 3] = X##bo; \
  1398. output[ 3] = X##bo; \
  1399. if (laneCount < 6) { \
  1400. state[ 4] = X##bu; \
  1401. if (laneCount >= 5) { \
  1402. output[ 4] = X##bu; \
  1403. } \
  1404. state[ 5] = X##ga; \
  1405. state[ 6] = X##ge; \
  1406. } \
  1407. else { \
  1408. state[ 4] = X##bu; \
  1409. output[ 4] = X##bu; \
  1410. state[ 5] = X##ga; \
  1411. output[ 5] = X##ga; \
  1412. state[ 6] = X##ge; \
  1413. if (laneCount >= 7) { \
  1414. output[ 6] = X##ge; \
  1415. } \
  1416. } \
  1417. } \
  1418. state[ 7] = X##gi; \
  1419. state[ 8] = X##go; \
  1420. state[ 9] = X##gu; \
  1421. state[10] = X##ka; \
  1422. state[11] = X##ke; \
  1423. state[12] = X##ki; \
  1424. state[13] = X##ko; \
  1425. state[14] = X##ku; \
  1426. } \
  1427. else { \
  1428. state[ 0] = X##ba; \
  1429. output[ 0] = X##ba; \
  1430. state[ 1] = X##be; \
  1431. output[ 1] = X##be; \
  1432. state[ 2] = X##bi; \
  1433. output[ 2] = X##bi; \
  1434. state[ 3] = X##bo; \
  1435. output[ 3] = X##bo; \
  1436. state[ 4] = X##bu; \
  1437. output[ 4] = X##bu; \
  1438. state[ 5] = X##ga; \
  1439. output[ 5] = X##ga; \
  1440. state[ 6] = X##ge; \
  1441. output[ 6] = X##ge; \
  1442. state[ 7] = X##gi; \
  1443. output[ 7] = X##gi; \
  1444. if (laneCount < 12) { \
  1445. if (laneCount < 10) { \
  1446. state[ 8] = X##go; \
  1447. if (laneCount >= 9) { \
  1448. output[ 8] = X##go; \
  1449. } \
  1450. state[ 9] = X##gu; \
  1451. state[10] = X##ka; \
  1452. } \
  1453. else { \
  1454. state[ 8] = X##go; \
  1455. output[ 8] = X##go; \
  1456. state[ 9] = X##gu; \
  1457. output[ 9] = X##gu; \
  1458. state[10] = X##ka; \
  1459. if (laneCount >= 11) { \
  1460. output[10] = X##ka; \
  1461. } \
  1462. } \
  1463. state[11] = X##ke; \
  1464. state[12] = X##ki; \
  1465. state[13] = X##ko; \
  1466. state[14] = X##ku; \
  1467. } \
  1468. else { \
  1469. state[ 8] = X##go; \
  1470. output[ 8] = X##go; \
  1471. state[ 9] = X##gu; \
  1472. output[ 9] = X##gu; \
  1473. state[10] = X##ka; \
  1474. output[10] = X##ka; \
  1475. state[11] = X##ke; \
  1476. output[11] = X##ke; \
  1477. if (laneCount < 14) { \
  1478. state[12] = X##ki; \
  1479. if (laneCount >= 13) { \
  1480. output[12]= X##ki; \
  1481. } \
  1482. state[13] = X##ko; \
  1483. state[14] = X##ku; \
  1484. } \
  1485. else { \
  1486. state[12] = X##ki; \
  1487. output[12]= X##ki; \
  1488. state[13] = X##ko; \
  1489. output[13] = X##ko; \
  1490. state[14] = X##ku; \
  1491. if (laneCount >= 15) { \
  1492. output[14] = X##ku; \
  1493. } \
  1494. } \
  1495. } \
  1496. } \
  1497. state[15] = X##ma; \
  1498. state[16] = X##me; \
  1499. state[17] = X##mi; \
  1500. state[18] = X##mo; \
  1501. state[19] = X##mu; \
  1502. state[20] = X##sa; \
  1503. state[21] = X##se; \
  1504. state[22] = X##si; \
  1505. state[23] = X##so; \
  1506. state[24] = X##su; \
  1507. } \
  1508. else { \
  1509. state[ 0] = X##ba; \
  1510. output[ 0] = X##ba; \
  1511. state[ 1] = X##be; \
  1512. output[ 1] = X##be; \
  1513. state[ 2] = X##bi; \
  1514. output[ 2] = X##bi; \
  1515. state[ 3] = X##bo; \
  1516. output[ 3] = X##bo; \
  1517. state[ 4] = X##bu; \
  1518. output[ 4] = X##bu; \
  1519. state[ 5] = X##ga; \
  1520. output[ 5] = X##ga; \
  1521. state[ 6] = X##ge; \
  1522. output[ 6] = X##ge; \
  1523. state[ 7] = X##gi; \
  1524. output[ 7] = X##gi; \
  1525. state[ 8] = X##go; \
  1526. output[ 8] = X##go; \
  1527. state[ 9] = X##gu; \
  1528. output[ 9] = X##gu; \
  1529. state[10] = X##ka; \
  1530. output[10] = X##ka; \
  1531. state[11] = X##ke; \
  1532. output[11] = X##ke; \
  1533. state[12] = X##ki; \
  1534. output[12]= X##ki; \
  1535. state[13] = X##ko; \
  1536. output[13] = X##ko; \
  1537. state[14] = X##ku; \
  1538. output[14] = X##ku; \
  1539. state[15] = X##ma; \
  1540. output[15] = X##ma; \
  1541. if (laneCount < 24) { \
  1542. if (laneCount < 20) { \
  1543. if (laneCount < 18) { \
  1544. state[16] = X##me; \
  1545. if (laneCount >= 17) { \
  1546. output[16] = X##me; \
  1547. } \
  1548. state[17] = X##mi; \
  1549. state[18] = X##mo; \
  1550. } \
  1551. else { \
  1552. state[16] = X##me; \
  1553. output[16] = X##me; \
  1554. state[17] = X##mi; \
  1555. output[17] = X##mi; \
  1556. state[18] = X##mo; \
  1557. if (laneCount >= 19) { \
  1558. output[18] = X##mo; \
  1559. } \
  1560. } \
  1561. state[19] = X##mu; \
  1562. state[20] = X##sa; \
  1563. state[21] = X##se; \
  1564. state[22] = X##si; \
  1565. } \
  1566. else { \
  1567. state[16] = X##me; \
  1568. output[16] = X##me; \
  1569. state[17] = X##mi; \
  1570. output[17] = X##mi; \
  1571. state[18] = X##mo; \
  1572. output[18] = X##mo; \
  1573. state[19] = X##mu; \
  1574. output[19] = X##mu; \
  1575. if (laneCount < 22) { \
  1576. state[20] = X##sa; \
  1577. if (laneCount >= 21) { \
  1578. output[20] = X##sa; \
  1579. } \
  1580. state[21] = X##se; \
  1581. state[22] = X##si; \
  1582. } \
  1583. else { \
  1584. state[20] = X##sa; \
  1585. output[20] = X##sa; \
  1586. state[21] = X##se; \
  1587. output[21] = X##se; \
  1588. state[22] = X##si; \
  1589. if (laneCount >= 23) { \
  1590. output[22] = X##si; \
  1591. } \
  1592. } \
  1593. } \
  1594. state[23] = X##so; \
  1595. state[24] = X##su; \
  1596. } \
  1597. else { \
  1598. state[16] = X##me; \
  1599. output[16] = X##me; \
  1600. state[17] = X##mi; \
  1601. output[17] = X##mi; \
  1602. state[18] = X##mo; \
  1603. output[18] = X##mo; \
  1604. state[19] = X##mu; \
  1605. output[19] = X##mu; \
  1606. state[20] = X##sa; \
  1607. output[20] = X##sa; \
  1608. state[21] = X##se; \
  1609. output[21] = X##se; \
  1610. state[22] = X##si; \
  1611. output[22] = X##si; \
  1612. state[23] = X##so; \
  1613. output[23] = X##so; \
  1614. state[24] = X##su; \
  1615. if (laneCount >= 25) { \
  1616. output[24] = X##su; \
  1617. } \
  1618. } \
  1619. }
  1620. #define output(X, output, laneCount) \
  1621. if (laneCount < 16) { \
  1622. if (laneCount < 8) { \
  1623. if (laneCount < 4) { \
  1624. if (laneCount < 2) { \
  1625. if (laneCount >= 1) { \
  1626. output[ 0] = X##ba; \
  1627. } \
  1628. } \
  1629. else { \
  1630. output[ 0] = X##ba; \
  1631. output[ 1] = X##be; \
  1632. if (laneCount >= 3) { \
  1633. output[ 2] = X##bi; \
  1634. } \
  1635. } \
  1636. } \
  1637. else { \
  1638. output[ 0] = X##ba; \
  1639. output[ 1] = X##be; \
  1640. output[ 2] = X##bi; \
  1641. output[ 3] = X##bo; \
  1642. if (laneCount < 6) { \
  1643. if (laneCount >= 5) { \
  1644. output[ 4] = X##bu; \
  1645. } \
  1646. } \
  1647. else { \
  1648. output[ 4] = X##bu; \
  1649. output[ 5] = X##ga; \
  1650. if (laneCount >= 7) { \
  1651. output[ 6] = X##ge; \
  1652. } \
  1653. } \
  1654. } \
  1655. } \
  1656. else { \
  1657. output[ 0] = X##ba; \
  1658. output[ 1] = X##be; \
  1659. output[ 2] = X##bi; \
  1660. output[ 3] = X##bo; \
  1661. output[ 4] = X##bu; \
  1662. output[ 5] = X##ga; \
  1663. output[ 6] = X##ge; \
  1664. output[ 7] = X##gi; \
  1665. if (laneCount < 12) { \
  1666. if (laneCount < 10) { \
  1667. if (laneCount >= 9) { \
  1668. output[ 8] = X##go; \
  1669. } \
  1670. } \
  1671. else { \
  1672. output[ 8] = X##go; \
  1673. output[ 9] = X##gu; \
  1674. if (laneCount >= 11) { \
  1675. output[10] = X##ka; \
  1676. } \
  1677. } \
  1678. } \
  1679. else { \
  1680. output[ 8] = X##go; \
  1681. output[ 9] = X##gu; \
  1682. output[10] = X##ka; \
  1683. output[11] = X##ke; \
  1684. if (laneCount < 14) { \
  1685. if (laneCount >= 13) { \
  1686. output[12] = X##ki; \
  1687. } \
  1688. } \
  1689. else { \
  1690. output[12] = X##ki; \
  1691. output[13] = X##ko; \
  1692. if (laneCount >= 15) { \
  1693. output[14] = X##ku; \
  1694. } \
  1695. } \
  1696. } \
  1697. } \
  1698. } \
  1699. else { \
  1700. output[ 0] = X##ba; \
  1701. output[ 1] = X##be; \
  1702. output[ 2] = X##bi; \
  1703. output[ 3] = X##bo; \
  1704. output[ 4] = X##bu; \
  1705. output[ 5] = X##ga; \
  1706. output[ 6] = X##ge; \
  1707. output[ 7] = X##gi; \
  1708. output[ 8] = X##go; \
  1709. output[ 9] = X##gu; \
  1710. output[10] = X##ka; \
  1711. output[11] = X##ke; \
  1712. output[12] = X##ki; \
  1713. output[13] = X##ko; \
  1714. output[14] = X##ku; \
  1715. output[15] = X##ma; \
  1716. if (laneCount < 24) { \
  1717. if (laneCount < 20) { \
  1718. if (laneCount < 18) { \
  1719. if (laneCount >= 17) { \
  1720. output[16] = X##me; \
  1721. } \
  1722. } \
  1723. else { \
  1724. output[16] = X##me; \
  1725. output[17] = X##mi; \
  1726. if (laneCount >= 19) { \
  1727. output[18] = X##mo; \
  1728. } \
  1729. } \
  1730. } \
  1731. else { \
  1732. output[16] = X##me; \
  1733. output[17] = X##mi; \
  1734. output[18] = X##mo; \
  1735. output[19] = X##mu; \
  1736. if (laneCount < 22) { \
  1737. if (laneCount >= 21) { \
  1738. output[20] = X##sa; \
  1739. } \
  1740. } \
  1741. else { \
  1742. output[20] = X##sa; \
  1743. output[21] = X##se; \
  1744. if (laneCount >= 23) { \
  1745. output[22] = X##si; \
  1746. } \
  1747. } \
  1748. } \
  1749. } \
  1750. else { \
  1751. output[16] = X##me; \
  1752. output[17] = X##mi; \
  1753. output[18] = X##mo; \
  1754. output[19] = X##mu; \
  1755. output[20] = X##sa; \
  1756. output[21] = X##se; \
  1757. output[22] = X##si; \
  1758. output[23] = X##so; \
  1759. if (laneCount >= 25) { \
  1760. output[24] = X##su; \
  1761. } \
  1762. } \
  1763. }
  1764. #define wrapOne(X, input, output, index, name) \
  1765. X##name ^= input[index]; \
  1766. output[index] = X##name;
  1767. #define wrapOneInvert(X, input, output, index, name) \
  1768. X##name ^= input[index]; \
  1769. output[index] = X##name;
  1770. #define unwrapOne(X, input, output, index, name) \
  1771. output[index] = input[index] ^ X##name; \
  1772. X##name ^= output[index];
  1773. #define unwrapOneInvert(X, input, output, index, name) \
  1774. output[index] = input[index] ^ X##name; \
  1775. X##name ^= output[index];
  1776. #endif
  1777. #define wrap(X, input, output, laneCount, trailingBits) \
  1778. if (laneCount < 16) { \
  1779. if (laneCount < 8) { \
  1780. if (laneCount < 4) { \
  1781. if (laneCount < 2) { \
  1782. if (laneCount < 1) { \
  1783. X##ba ^= trailingBits; \
  1784. } \
  1785. else { \
  1786. wrapOne(X, input, output, 0, ba) \
  1787. X##be ^= trailingBits; \
  1788. } \
  1789. } \
  1790. else { \
  1791. wrapOne(X, input, output, 0, ba) \
  1792. wrapOneInvert(X, input, output, 1, be) \
  1793. if (laneCount < 3) { \
  1794. X##bi ^= trailingBits; \
  1795. } \
  1796. else { \
  1797. wrapOneInvert(X, input, output, 2, bi) \
  1798. X##bo ^= trailingBits; \
  1799. } \
  1800. } \
  1801. } \
  1802. else { \
  1803. wrapOne(X, input, output, 0, ba) \
  1804. wrapOneInvert(X, input, output, 1, be) \
  1805. wrapOneInvert(X, input, output, 2, bi) \
  1806. wrapOne(X, input, output, 3, bo) \
  1807. if (laneCount < 6) { \
  1808. if (laneCount < 5) { \
  1809. X##bu ^= trailingBits; \
  1810. } \
  1811. else { \
  1812. wrapOne(X, input, output, 4, bu) \
  1813. X##ga ^= trailingBits; \
  1814. } \
  1815. } \
  1816. else { \
  1817. wrapOne(X, input, output, 4, bu) \
  1818. wrapOne(X, input, output, 5, ga) \
  1819. if (laneCount < 7) { \
  1820. X##ge ^= trailingBits; \
  1821. } \
  1822. else { \
  1823. wrapOne(X, input, output, 6, ge) \
  1824. X##gi ^= trailingBits; \
  1825. } \
  1826. } \
  1827. } \
  1828. } \
  1829. else { \
  1830. wrapOne(X, input, output, 0, ba) \
  1831. wrapOneInvert(X, input, output, 1, be) \
  1832. wrapOneInvert(X, input, output, 2, bi) \
  1833. wrapOne(X, input, output, 3, bo) \
  1834. wrapOne(X, input, output, 4, bu) \
  1835. wrapOne(X, input, output, 5, ga) \
  1836. wrapOne(X, input, output, 6, ge) \
  1837. wrapOne(X, input, output, 7, gi) \
  1838. if (laneCount < 12) { \
  1839. if (laneCount < 10) { \
  1840. if (laneCount < 9) { \
  1841. X##go ^= trailingBits; \
  1842. } \
  1843. else { \
  1844. wrapOneInvert(X, input, output, 8, go) \
  1845. X##gu ^= trailingBits; \
  1846. } \
  1847. } \
  1848. else { \
  1849. wrapOneInvert(X, input, output, 8, go) \
  1850. wrapOne(X, input, output, 9, gu) \
  1851. if (laneCount < 11) { \
  1852. X##ka ^= trailingBits; \
  1853. } \
  1854. else { \
  1855. wrapOne(X, input, output, 10, ka) \
  1856. X##ke ^= trailingBits; \
  1857. } \
  1858. } \
  1859. } \
  1860. else { \
  1861. wrapOneInvert(X, input, output, 8, go) \
  1862. wrapOne(X, input, output, 9, gu) \
  1863. wrapOne(X, input, output, 10, ka) \
  1864. wrapOne(X, input, output, 11, ke) \
  1865. if (laneCount < 14) { \
  1866. if (laneCount < 13) { \
  1867. X##ki ^= trailingBits; \
  1868. } \
  1869. else { \
  1870. wrapOneInvert(X, input, output, 12, ki) \
  1871. X##ko ^= trailingBits; \
  1872. } \
  1873. } \
  1874. else { \
  1875. wrapOneInvert(X, input, output, 12, ki) \
  1876. wrapOne(X, input, output, 13, ko) \
  1877. if (laneCount < 15) { \
  1878. X##ku ^= trailingBits; \
  1879. } \
  1880. else { \
  1881. wrapOne(X, input, output, 14, ku) \
  1882. X##ma ^= trailingBits; \
  1883. } \
  1884. } \
  1885. } \
  1886. } \
  1887. } \
  1888. else { \
  1889. wrapOne(X, input, output, 0, ba) \
  1890. wrapOneInvert(X, input, output, 1, be) \
  1891. wrapOneInvert(X, input, output, 2, bi) \
  1892. wrapOne(X, input, output, 3, bo) \
  1893. wrapOne(X, input, output, 4, bu) \
  1894. wrapOne(X, input, output, 5, ga) \
  1895. wrapOne(X, input, output, 6, ge) \
  1896. wrapOne(X, input, output, 7, gi) \
  1897. wrapOneInvert(X, input, output, 8, go) \
  1898. wrapOne(X, input, output, 9, gu) \
  1899. wrapOne(X, input, output, 10, ka) \
  1900. wrapOne(X, input, output, 11, ke) \
  1901. wrapOneInvert(X, input, output, 12, ki) \
  1902. wrapOne(X, input, output, 13, ko) \
  1903. wrapOne(X, input, output, 14, ku) \
  1904. wrapOne(X, input, output, 15, ma) \
  1905. if (laneCount < 24) { \
  1906. if (laneCount < 20) { \
  1907. if (laneCount < 18) { \
  1908. if (laneCount < 17) { \
  1909. X##me ^= trailingBits; \
  1910. } \
  1911. else { \
  1912. wrapOne(X, input, output, 16, me) \
  1913. X##mi ^= trailingBits; \
  1914. } \
  1915. } \
  1916. else { \
  1917. wrapOne(X, input, output, 16, me) \
  1918. wrapOneInvert(X, input, output, 17, mi) \
  1919. if (laneCount < 19) { \
  1920. X##mo ^= trailingBits; \
  1921. } \
  1922. else { \
  1923. wrapOne(X, input, output, 18, mo) \
  1924. X##mu ^= trailingBits; \
  1925. } \
  1926. } \
  1927. } \
  1928. else { \
  1929. wrapOne(X, input, output, 16, me) \
  1930. wrapOneInvert(X, input, output, 17, mi) \
  1931. wrapOne(X, input, output, 18, mo) \
  1932. wrapOne(X, input, output, 19, mu) \
  1933. if (laneCount < 22) { \
  1934. if (laneCount < 21) { \
  1935. X##sa ^= trailingBits; \
  1936. } \
  1937. else { \
  1938. wrapOneInvert(X, input, output, 20, sa) \
  1939. X##se ^= trailingBits; \
  1940. } \
  1941. } \
  1942. else { \
  1943. wrapOneInvert(X, input, output, 20, sa) \
  1944. wrapOne(X, input, output, 21, se) \
  1945. if (laneCount < 23) { \
  1946. X##si ^= trailingBits; \
  1947. } \
  1948. else { \
  1949. wrapOne(X, input, output, 22, si) \
  1950. X##so ^= trailingBits; \
  1951. } \
  1952. } \
  1953. } \
  1954. } \
  1955. else { \
  1956. wrapOne(X, input, output, 16, me) \
  1957. wrapOneInvert(X, input, output, 17, mi) \
  1958. wrapOne(X, input, output, 18, mo) \
  1959. wrapOne(X, input, output, 19, mu) \
  1960. wrapOneInvert(X, input, output, 20, sa) \
  1961. wrapOne(X, input, output, 21, se) \
  1962. wrapOne(X, input, output, 22, si) \
  1963. wrapOne(X, input, output, 23, so) \
  1964. if (laneCount < 25) { \
  1965. X##su ^= trailingBits; \
  1966. } \
  1967. else { \
  1968. wrapOne(X, input, output, 24, su) \
  1969. } \
  1970. } \
  1971. }
  1972. #define unwrap(X, input, output, laneCount, trailingBits) \
  1973. if (laneCount < 16) { \
  1974. if (laneCount < 8) { \
  1975. if (laneCount < 4) { \
  1976. if (laneCount < 2) { \
  1977. if (laneCount < 1) { \
  1978. X##ba ^= trailingBits; \
  1979. } \
  1980. else { \
  1981. unwrapOne(X, input, output, 0, ba) \
  1982. X##be ^= trailingBits; \
  1983. } \
  1984. } \
  1985. else { \
  1986. unwrapOne(X, input, output, 0, ba) \
  1987. unwrapOneInvert(X, input, output, 1, be) \
  1988. if (laneCount < 3) { \
  1989. X##bi ^= trailingBits; \
  1990. } \
  1991. else { \
  1992. unwrapOneInvert(X, input, output, 2, bi) \
  1993. X##bo ^= trailingBits; \
  1994. } \
  1995. } \
  1996. } \
  1997. else { \
  1998. unwrapOne(X, input, output, 0, ba) \
  1999. unwrapOneInvert(X, input, output, 1, be) \
  2000. unwrapOneInvert(X, input, output, 2, bi) \
  2001. unwrapOne(X, input, output, 3, bo) \
  2002. if (laneCount < 6) { \
  2003. if (laneCount < 5) { \
  2004. X##bu ^= trailingBits; \
  2005. } \
  2006. else { \
  2007. unwrapOne(X, input, output, 4, bu) \
  2008. X##ga ^= trailingBits; \
  2009. } \
  2010. } \
  2011. else { \
  2012. unwrapOne(X, input, output, 4, bu) \
  2013. unwrapOne(X, input, output, 5, ga) \
  2014. if (laneCount < 7) { \
  2015. X##ge ^= trailingBits; \
  2016. } \
  2017. else { \
  2018. unwrapOne(X, input, output, 6, ge) \
  2019. X##gi ^= trailingBits; \
  2020. } \
  2021. } \
  2022. } \
  2023. } \
  2024. else { \
  2025. unwrapOne(X, input, output, 0, ba) \
  2026. unwrapOneInvert(X, input, output, 1, be) \
  2027. unwrapOneInvert(X, input, output, 2, bi) \
  2028. unwrapOne(X, input, output, 3, bo) \
  2029. unwrapOne(X, input, output, 4, bu) \
  2030. unwrapOne(X, input, output, 5, ga) \
  2031. unwrapOne(X, input, output, 6, ge) \
  2032. unwrapOne(X, input, output, 7, gi) \
  2033. if (laneCount < 12) { \
  2034. if (laneCount < 10) { \
  2035. if (laneCount < 9) { \
  2036. X##go ^= trailingBits; \
  2037. } \
  2038. else { \
  2039. unwrapOneInvert(X, input, output, 8, go) \
  2040. X##gu ^= trailingBits; \
  2041. } \
  2042. } \
  2043. else { \
  2044. unwrapOneInvert(X, input, output, 8, go) \
  2045. unwrapOne(X, input, output, 9, gu) \
  2046. if (laneCount < 11) { \
  2047. X##ka ^= trailingBits; \
  2048. } \
  2049. else { \
  2050. unwrapOne(X, input, output, 10, ka) \
  2051. X##ke ^= trailingBits; \
  2052. } \
  2053. } \
  2054. } \
  2055. else { \
  2056. unwrapOneInvert(X, input, output, 8, go) \
  2057. unwrapOne(X, input, output, 9, gu) \
  2058. unwrapOne(X, input, output, 10, ka) \
  2059. unwrapOne(X, input, output, 11, ke) \
  2060. if (laneCount < 14) { \
  2061. if (laneCount < 13) { \
  2062. X##ki ^= trailingBits; \
  2063. } \
  2064. else { \
  2065. unwrapOneInvert(X, input, output, 12, ki) \
  2066. X##ko ^= trailingBits; \
  2067. } \
  2068. } \
  2069. else { \
  2070. unwrapOneInvert(X, input, output, 12, ki) \
  2071. unwrapOne(X, input, output, 13, ko) \
  2072. if (laneCount < 15) { \
  2073. X##ku ^= trailingBits; \
  2074. } \
  2075. else { \
  2076. unwrapOne(X, input, output, 14, ku) \
  2077. X##ma ^= trailingBits; \
  2078. } \
  2079. } \
  2080. } \
  2081. } \
  2082. } \
  2083. else { \
  2084. unwrapOne(X, input, output, 0, ba) \
  2085. unwrapOneInvert(X, input, output, 1, be) \
  2086. unwrapOneInvert(X, input, output, 2, bi) \
  2087. unwrapOne(X, input, output, 3, bo) \
  2088. unwrapOne(X, input, output, 4, bu) \
  2089. unwrapOne(X, input, output, 5, ga) \
  2090. unwrapOne(X, input, output, 6, ge) \
  2091. unwrapOne(X, input, output, 7, gi) \
  2092. unwrapOneInvert(X, input, output, 8, go) \
  2093. unwrapOne(X, input, output, 9, gu) \
  2094. unwrapOne(X, input, output, 10, ka) \
  2095. unwrapOne(X, input, output, 11, ke) \
  2096. unwrapOneInvert(X, input, output, 12, ki) \
  2097. unwrapOne(X, input, output, 13, ko) \
  2098. unwrapOne(X, input, output, 14, ku) \
  2099. unwrapOne(X, input, output, 15, ma) \
  2100. if (laneCount < 24) { \
  2101. if (laneCount < 20) { \
  2102. if (laneCount < 18) { \
  2103. if (laneCount < 17) { \
  2104. X##me ^= trailingBits; \
  2105. } \
  2106. else { \
  2107. unwrapOne(X, input, output, 16, me) \
  2108. X##mi ^= trailingBits; \
  2109. } \
  2110. } \
  2111. else { \
  2112. unwrapOne(X, input, output, 16, me) \
  2113. unwrapOneInvert(X, input, output, 17, mi) \
  2114. if (laneCount < 19) { \
  2115. X##mo ^= trailingBits; \
  2116. } \
  2117. else { \
  2118. unwrapOne(X, input, output, 18, mo) \
  2119. X##mu ^= trailingBits; \
  2120. } \
  2121. } \
  2122. } \
  2123. else { \
  2124. unwrapOne(X, input, output, 16, me) \
  2125. unwrapOneInvert(X, input, output, 17, mi) \
  2126. unwrapOne(X, input, output, 18, mo) \
  2127. unwrapOne(X, input, output, 19, mu) \
  2128. if (laneCount < 22) { \
  2129. if (laneCount < 21) { \
  2130. X##sa ^= trailingBits; \
  2131. } \
  2132. else { \
  2133. unwrapOneInvert(X, input, output, 20, sa) \
  2134. X##se ^= trailingBits; \
  2135. } \
  2136. } \
  2137. else { \
  2138. unwrapOneInvert(X, input, output, 20, sa) \
  2139. unwrapOne(X, input, output, 21, se) \
  2140. if (laneCount < 23) { \
  2141. X##si ^= trailingBits; \
  2142. } \
  2143. else { \
  2144. unwrapOne(X, input, output, 22, si) \
  2145. X##so ^= trailingBits; \
  2146. } \
  2147. } \
  2148. } \
  2149. } \
  2150. else { \
  2151. unwrapOne(X, input, output, 16, me) \
  2152. unwrapOneInvert(X, input, output, 17, mi) \
  2153. unwrapOne(X, input, output, 18, mo) \
  2154. unwrapOne(X, input, output, 19, mu) \
  2155. unwrapOneInvert(X, input, output, 20, sa) \
  2156. unwrapOne(X, input, output, 21, se) \
  2157. unwrapOne(X, input, output, 22, si) \
  2158. unwrapOne(X, input, output, 23, so) \
  2159. if (laneCount < 25) { \
  2160. X##su ^= trailingBits; \
  2161. } \
  2162. else { \
  2163. unwrapOne(X, input, output, 24, su) \
  2164. } \
  2165. } \
  2166. }