bn_s_mp_sqr_fast.c 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #include "tommath_private.h"
  2. #ifdef BN_S_MP_SQR_FAST_C
  3. /* LibTomMath, multiple-precision integer library -- Tom St Denis */
  4. /* SPDX-License-Identifier: Unlicense */
  5. /* the jist of squaring...
  6. * you do like mult except the offset of the tmpx [one that
  7. * starts closer to zero] can't equal the offset of tmpy.
  8. * So basically you set up iy like before then you min it with
  9. * (ty-tx) so that it never happens. You double all those
  10. * you add in the inner loop
  11. After that loop you do the squares and add them in.
  12. */
  13. mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b)
  14. {
  15. int olduse, pa, ix, iz;
  16. mp_digit W[MP_WARRAY], *tmpx;
  17. mp_word W1;
  18. mp_err err;
  19. /* grow the destination as required */
  20. pa = a->used + a->used;
  21. if (b->alloc < pa) {
  22. if ((err = mp_grow(b, pa)) != MP_OKAY) {
  23. return err;
  24. }
  25. }
  26. /* number of output digits to produce */
  27. W1 = 0;
  28. for (ix = 0; ix < pa; ix++) {
  29. int tx, ty, iy;
  30. mp_word _W;
  31. mp_digit *tmpy;
  32. /* clear counter */
  33. _W = 0;
  34. /* get offsets into the two bignums */
  35. ty = MP_MIN(a->used-1, ix);
  36. tx = ix - ty;
  37. /* setup temp aliases */
  38. tmpx = a->dp + tx;
  39. tmpy = a->dp + ty;
  40. /* this is the number of times the loop will iterrate, essentially
  41. while (tx++ < a->used && ty-- >= 0) { ... }
  42. */
  43. iy = MP_MIN(a->used-tx, ty+1);
  44. /* now for squaring tx can never equal ty
  45. * we halve the distance since they approach at a rate of 2x
  46. * and we have to round because odd cases need to be executed
  47. */
  48. iy = MP_MIN(iy, ((ty-tx)+1)>>1);
  49. /* execute loop */
  50. for (iz = 0; iz < iy; iz++) {
  51. _W += (mp_word)*tmpx++ * (mp_word)*tmpy--;
  52. }
  53. /* double the inner product and add carry */
  54. _W = _W + _W + W1;
  55. /* even columns have the square term in them */
  56. if (((unsigned)ix & 1u) == 0u) {
  57. _W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1];
  58. }
  59. /* store it */
  60. W[ix] = (mp_digit)_W & MP_MASK;
  61. /* make next carry */
  62. W1 = _W >> (mp_word)MP_DIGIT_BIT;
  63. }
  64. /* setup dest */
  65. olduse = b->used;
  66. b->used = a->used+a->used;
  67. {
  68. mp_digit *tmpb;
  69. tmpb = b->dp;
  70. for (ix = 0; ix < pa; ix++) {
  71. *tmpb++ = W[ix] & MP_MASK;
  72. }
  73. /* clear unused digits [that existed in the old copy of c] */
  74. MP_ZERO_DIGITS(tmpb, olduse - ix);
  75. }
  76. mp_clamp(b);
  77. return MP_OKAY;
  78. }
  79. #endif