mul_n.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /* mpn_mul_n -- Multiply two natural numbers of length n.
  2. Copyright (C) 1991-2019 Free Software Foundation, Inc.
  3. This file is part of the GNU MP Library.
  4. The GNU MP Library is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU Lesser General Public License as published by
  6. the Free Software Foundation; either version 2.1 of the License, or (at your
  7. option) any later version.
  8. The GNU MP Library is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  10. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  11. License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with the GNU MP Library; see the file COPYING.LIB. If not, see
  14. <http://www.gnu.org/licenses/>. */
  15. #include <gmp.h>
  16. #include "gmp-impl.h"
  17. /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
  18. both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
  19. always stored. Return the most significant limb.
  20. Argument constraints:
  21. 1. PRODP != UP and PRODP != VP, i.e. the destination
  22. must be distinct from the multiplier and the multiplicand. */
  23. /* If KARATSUBA_THRESHOLD is not already defined, define it to a
  24. value which is good on most machines. */
  25. #ifndef KARATSUBA_THRESHOLD
  26. #define KARATSUBA_THRESHOLD 32
  27. #endif
  28. /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
  29. #if KARATSUBA_THRESHOLD < 2
  30. #undef KARATSUBA_THRESHOLD
  31. #define KARATSUBA_THRESHOLD 2
  32. #endif
  33. /* Handle simple cases with traditional multiplication.
  34. This is the most critical code of multiplication. All multiplies rely
  35. on this, both small and huge. Small ones arrive here immediately. Huge
  36. ones arrive here as this is the base case for Karatsuba's recursive
  37. algorithm below. */
  38. void
  39. impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
  40. {
  41. mp_size_t i;
  42. mp_limb_t cy_limb;
  43. mp_limb_t v_limb;
  44. /* Multiply by the first limb in V separately, as the result can be
  45. stored (not added) to PROD. We also avoid a loop for zeroing. */
  46. v_limb = vp[0];
  47. if (v_limb <= 1)
  48. {
  49. if (v_limb == 1)
  50. MPN_COPY (prodp, up, size);
  51. else
  52. MPN_ZERO (prodp, size);
  53. cy_limb = 0;
  54. }
  55. else
  56. cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
  57. prodp[size] = cy_limb;
  58. prodp++;
  59. /* For each iteration in the outer loop, multiply one limb from
  60. U with one limb from V, and add it to PROD. */
  61. for (i = 1; i < size; i++)
  62. {
  63. v_limb = vp[i];
  64. if (v_limb <= 1)
  65. {
  66. cy_limb = 0;
  67. if (v_limb == 1)
  68. cy_limb = mpn_add_n (prodp, prodp, up, size);
  69. }
  70. else
  71. cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
  72. prodp[size] = cy_limb;
  73. prodp++;
  74. }
  75. }
  76. void
  77. impn_mul_n (mp_ptr prodp,
  78. mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
  79. {
  80. if ((size & 1) != 0)
  81. {
  82. /* The size is odd, the code code below doesn't handle that.
  83. Multiply the least significant (size - 1) limbs with a recursive
  84. call, and handle the most significant limb of S1 and S2
  85. separately. */
  86. /* A slightly faster way to do this would be to make the Karatsuba
  87. code below behave as if the size were even, and let it check for
  88. odd size in the end. I.e., in essence move this code to the end.
  89. Doing so would save us a recursive call, and potentially make the
  90. stack grow a lot less. */
  91. mp_size_t esize = size - 1; /* even size */
  92. mp_limb_t cy_limb;
  93. MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
  94. cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
  95. prodp[esize + esize] = cy_limb;
  96. cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
  97. prodp[esize + size] = cy_limb;
  98. }
  99. else
  100. {
  101. /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
  102. Split U in two pieces, U1 and U0, such that
  103. U = U0 + U1*(B**n),
  104. and V in V1 and V0, such that
  105. V = V0 + V1*(B**n).
  106. UV is then computed recursively using the identity
  107. 2n n n n
  108. UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
  109. 1 1 1 0 0 1 0 0
  110. Where B = 2**BITS_PER_MP_LIMB. */
  111. mp_size_t hsize = size >> 1;
  112. mp_limb_t cy;
  113. int negflg;
  114. /*** Product H. ________________ ________________
  115. |_____U1 x V1____||____U0 x V0_____| */
  116. /* Put result in upper part of PROD and pass low part of TSPACE
  117. as new TSPACE. */
  118. MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
  119. /*** Product M. ________________
  120. |_(U1-U0)(V0-V1)_| */
  121. if (mpn_cmp (up + hsize, up, hsize) >= 0)
  122. {
  123. mpn_sub_n (prodp, up + hsize, up, hsize);
  124. negflg = 0;
  125. }
  126. else
  127. {
  128. mpn_sub_n (prodp, up, up + hsize, hsize);
  129. negflg = 1;
  130. }
  131. if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
  132. {
  133. mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
  134. negflg ^= 1;
  135. }
  136. else
  137. {
  138. mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
  139. /* No change of NEGFLG. */
  140. }
  141. /* Read temporary operands from low part of PROD.
  142. Put result in low part of TSPACE using upper part of TSPACE
  143. as new TSPACE. */
  144. MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
  145. /*** Add/copy product H. */
  146. MPN_COPY (prodp + hsize, prodp + size, hsize);
  147. cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
  148. /*** Add product M (if NEGFLG M is a negative number). */
  149. if (negflg)
  150. cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
  151. else
  152. cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
  153. /*** Product L. ________________ ________________
  154. |________________||____U0 x V0_____| */
  155. /* Read temporary operands from low part of PROD.
  156. Put result in low part of TSPACE using upper part of TSPACE
  157. as new TSPACE. */
  158. MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
  159. /*** Add/copy Product L (twice). */
  160. cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
  161. if (cy)
  162. mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
  163. MPN_COPY (prodp, tspace, hsize);
  164. cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
  165. if (cy)
  166. mpn_add_1 (prodp + size, prodp + size, size, 1);
  167. }
  168. }
  169. void
  170. impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
  171. {
  172. mp_size_t i;
  173. mp_limb_t cy_limb;
  174. mp_limb_t v_limb;
  175. /* Multiply by the first limb in V separately, as the result can be
  176. stored (not added) to PROD. We also avoid a loop for zeroing. */
  177. v_limb = up[0];
  178. if (v_limb <= 1)
  179. {
  180. if (v_limb == 1)
  181. MPN_COPY (prodp, up, size);
  182. else
  183. MPN_ZERO (prodp, size);
  184. cy_limb = 0;
  185. }
  186. else
  187. cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
  188. prodp[size] = cy_limb;
  189. prodp++;
  190. /* For each iteration in the outer loop, multiply one limb from
  191. U with one limb from V, and add it to PROD. */
  192. for (i = 1; i < size; i++)
  193. {
  194. v_limb = up[i];
  195. if (v_limb <= 1)
  196. {
  197. cy_limb = 0;
  198. if (v_limb == 1)
  199. cy_limb = mpn_add_n (prodp, prodp, up, size);
  200. }
  201. else
  202. cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
  203. prodp[size] = cy_limb;
  204. prodp++;
  205. }
  206. }
  207. void
  208. impn_sqr_n (mp_ptr prodp,
  209. mp_srcptr up, mp_size_t size, mp_ptr tspace)
  210. {
  211. if ((size & 1) != 0)
  212. {
  213. /* The size is odd, the code code below doesn't handle that.
  214. Multiply the least significant (size - 1) limbs with a recursive
  215. call, and handle the most significant limb of S1 and S2
  216. separately. */
  217. /* A slightly faster way to do this would be to make the Karatsuba
  218. code below behave as if the size were even, and let it check for
  219. odd size in the end. I.e., in essence move this code to the end.
  220. Doing so would save us a recursive call, and potentially make the
  221. stack grow a lot less. */
  222. mp_size_t esize = size - 1; /* even size */
  223. mp_limb_t cy_limb;
  224. MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
  225. cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
  226. prodp[esize + esize] = cy_limb;
  227. cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
  228. prodp[esize + size] = cy_limb;
  229. }
  230. else
  231. {
  232. mp_size_t hsize = size >> 1;
  233. mp_limb_t cy;
  234. /*** Product H. ________________ ________________
  235. |_____U1 x U1____||____U0 x U0_____| */
  236. /* Put result in upper part of PROD and pass low part of TSPACE
  237. as new TSPACE. */
  238. MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
  239. /*** Product M. ________________
  240. |_(U1-U0)(U0-U1)_| */
  241. if (mpn_cmp (up + hsize, up, hsize) >= 0)
  242. {
  243. mpn_sub_n (prodp, up + hsize, up, hsize);
  244. }
  245. else
  246. {
  247. mpn_sub_n (prodp, up, up + hsize, hsize);
  248. }
  249. /* Read temporary operands from low part of PROD.
  250. Put result in low part of TSPACE using upper part of TSPACE
  251. as new TSPACE. */
  252. MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
  253. /*** Add/copy product H. */
  254. MPN_COPY (prodp + hsize, prodp + size, hsize);
  255. cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
  256. /*** Add product M (if NEGFLG M is a negative number). */
  257. cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
  258. /*** Product L. ________________ ________________
  259. |________________||____U0 x U0_____| */
  260. /* Read temporary operands from low part of PROD.
  261. Put result in low part of TSPACE using upper part of TSPACE
  262. as new TSPACE. */
  263. MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
  264. /*** Add/copy Product L (twice). */
  265. cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
  266. if (cy)
  267. mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
  268. MPN_COPY (prodp, tspace, hsize);
  269. cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
  270. if (cy)
  271. mpn_add_1 (prodp + size, prodp + size, size, 1);
  272. }
  273. }
  274. /* This should be made into an inline function in gmp.h. */
  275. void
  276. mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
  277. {
  278. TMP_DECL (marker);
  279. TMP_MARK (marker);
  280. if (up == vp)
  281. {
  282. if (size < KARATSUBA_THRESHOLD)
  283. {
  284. impn_sqr_n_basecase (prodp, up, size);
  285. }
  286. else
  287. {
  288. mp_ptr tspace;
  289. tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
  290. impn_sqr_n (prodp, up, size, tspace);
  291. }
  292. }
  293. else
  294. {
  295. if (size < KARATSUBA_THRESHOLD)
  296. {
  297. impn_mul_n_basecase (prodp, up, vp, size);
  298. }
  299. else
  300. {
  301. mp_ptr tspace;
  302. tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
  303. impn_mul_n (prodp, up, vp, size, tspace);
  304. }
  305. }
  306. TMP_FREE (marker);
  307. }