bn_mp_toom_mul.c 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. #include <tommath.h>
  2. #ifdef BN_MP_TOOM_MUL_C
  3. /* LibTomMath, multiple-precision integer library -- Tom St Denis
  4. *
  5. * LibTomMath is a library that provides multiple-precision
  6. * integer arithmetic as well as number theoretic functionality.
  7. *
  8. * The library was designed directly after the MPI library by
  9. * Michael Fromberger but has been written from scratch with
  10. * additional optimizations in place.
  11. *
  12. * The library is free for all purposes without any express
  13. * guarantee it works.
  14. *
  15. * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
  16. */
  17. /* multiplication using the Toom-Cook 3-way algorithm
  18. *
  19. * Much more complicated than Karatsuba but has a lower
  20. * asymptotic running time of O(N**1.464). This algorithm is
  21. * only particularly useful on VERY large inputs
  22. * (we're talking 1000s of digits here...).
  23. */
  24. int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
  25. {
  26. mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
  27. int res, B;
  28. /* init temps */
  29. if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
  30. &a0, &a1, &a2, &b0, &b1,
  31. &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
  32. return res;
  33. }
  34. /* B */
  35. B = MIN(a->used, b->used) / 3;
  36. /* a = a2 * B**2 + a1 * B + a0 */
  37. if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
  38. goto ERR;
  39. }
  40. if ((res = mp_copy(a, &a1)) != MP_OKAY) {
  41. goto ERR;
  42. }
  43. mp_rshd(&a1, B);
  44. mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
  45. if ((res = mp_copy(a, &a2)) != MP_OKAY) {
  46. goto ERR;
  47. }
  48. mp_rshd(&a2, B*2);
  49. /* b = b2 * B**2 + b1 * B + b0 */
  50. if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
  51. goto ERR;
  52. }
  53. if ((res = mp_copy(b, &b1)) != MP_OKAY) {
  54. goto ERR;
  55. }
  56. mp_rshd(&b1, B);
  57. mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
  58. if ((res = mp_copy(b, &b2)) != MP_OKAY) {
  59. goto ERR;
  60. }
  61. mp_rshd(&b2, B*2);
  62. /* w0 = a0*b0 */
  63. if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
  64. goto ERR;
  65. }
  66. /* w4 = a2 * b2 */
  67. if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
  68. goto ERR;
  69. }
  70. /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
  71. if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
  72. goto ERR;
  73. }
  74. if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
  75. goto ERR;
  76. }
  77. if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
  78. goto ERR;
  79. }
  80. if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
  81. goto ERR;
  82. }
  83. if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
  84. goto ERR;
  85. }
  86. if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
  87. goto ERR;
  88. }
  89. if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
  90. goto ERR;
  91. }
  92. if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
  93. goto ERR;
  94. }
  95. if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
  96. goto ERR;
  97. }
  98. /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
  99. if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
  100. goto ERR;
  101. }
  102. if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
  103. goto ERR;
  104. }
  105. if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
  106. goto ERR;
  107. }
  108. if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
  109. goto ERR;
  110. }
  111. if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
  112. goto ERR;
  113. }
  114. if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
  115. goto ERR;
  116. }
  117. if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
  118. goto ERR;
  119. }
  120. if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
  121. goto ERR;
  122. }
  123. if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
  124. goto ERR;
  125. }
  126. /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
  127. if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
  128. goto ERR;
  129. }
  130. if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
  131. goto ERR;
  132. }
  133. if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
  134. goto ERR;
  135. }
  136. if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
  137. goto ERR;
  138. }
  139. if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
  140. goto ERR;
  141. }
  142. /* now solve the matrix
  143. 0 0 0 0 1
  144. 1 2 4 8 16
  145. 1 1 1 1 1
  146. 16 8 4 2 1
  147. 1 0 0 0 0
  148. using 12 subtractions, 4 shifts,
  149. 2 small divisions and 1 small multiplication
  150. */
  151. /* r1 - r4 */
  152. if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
  153. goto ERR;
  154. }
  155. /* r3 - r0 */
  156. if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
  157. goto ERR;
  158. }
  159. /* r1/2 */
  160. if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
  161. goto ERR;
  162. }
  163. /* r3/2 */
  164. if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
  165. goto ERR;
  166. }
  167. /* r2 - r0 - r4 */
  168. if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
  169. goto ERR;
  170. }
  171. if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
  172. goto ERR;
  173. }
  174. /* r1 - r2 */
  175. if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
  176. goto ERR;
  177. }
  178. /* r3 - r2 */
  179. if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
  180. goto ERR;
  181. }
  182. /* r1 - 8r0 */
  183. if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
  184. goto ERR;
  185. }
  186. if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
  187. goto ERR;
  188. }
  189. /* r3 - 8r4 */
  190. if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
  191. goto ERR;
  192. }
  193. if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
  194. goto ERR;
  195. }
  196. /* 3r2 - r1 - r3 */
  197. if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
  198. goto ERR;
  199. }
  200. if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
  201. goto ERR;
  202. }
  203. if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
  204. goto ERR;
  205. }
  206. /* r1 - r2 */
  207. if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
  208. goto ERR;
  209. }
  210. /* r3 - r2 */
  211. if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
  212. goto ERR;
  213. }
  214. /* r1/3 */
  215. if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
  216. goto ERR;
  217. }
  218. /* r3/3 */
  219. if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
  220. goto ERR;
  221. }
  222. /* at this point shift W[n] by B*n */
  223. if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
  224. goto ERR;
  225. }
  226. if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
  227. goto ERR;
  228. }
  229. if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
  230. goto ERR;
  231. }
  232. if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
  233. goto ERR;
  234. }
  235. if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
  236. goto ERR;
  237. }
  238. if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
  239. goto ERR;
  240. }
  241. if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
  242. goto ERR;
  243. }
  244. if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
  245. goto ERR;
  246. }
  247. ERR:
  248. mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
  249. &a0, &a1, &a2, &b0, &b1,
  250. &b2, &tmp1, &tmp2, NULL);
  251. return res;
  252. }
  253. #endif
  254. /* $Source: /cvs/libtom/libtommath/bn_mp_toom_mul.c,v $ */
  255. /* $Revision: 1.3 $ */
  256. /* $Date: 2006/03/31 14:18:44 $ */