bn_fast_s_mp_sqr.c 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. #include <tommath.h>
  2. #ifdef BN_FAST_S_MP_SQR_C
  3. /* LibTomMath, multiple-precision integer library -- Tom St Denis
  4. *
  5. * LibTomMath is a library that provides multiple-precision
  6. * integer arithmetic as well as number theoretic functionality.
  7. *
  8. * The library was designed directly after the MPI library by
  9. * Michael Fromberger but has been written from scratch with
  10. * additional optimizations in place.
  11. *
  12. * The library is free for all purposes without any express
  13. * guarantee it works.
  14. *
  15. * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
  16. */
  17. /* the jist of squaring...
  18. * you do like mult except the offset of the tmpx [one that
  19. * starts closer to zero] can't equal the offset of tmpy.
  20. * So basically you set up iy like before then you min it with
  21. * (ty-tx) so that it never happens. You double all those
  22. * you add in the inner loop
  23. After that loop you do the squares and add them in.
  24. */
  25. int fast_s_mp_sqr (mp_int * a, mp_int * b)
  26. {
  27. int olduse, res, pa, ix, iz;
  28. mp_digit W[MP_WARRAY], *tmpx;
  29. mp_word W1;
  30. /* grow the destination as required */
  31. pa = a->used + a->used;
  32. if (b->alloc < pa) {
  33. if ((res = mp_grow (b, pa)) != MP_OKAY) {
  34. return res;
  35. }
  36. }
  37. /* number of output digits to produce */
  38. W1 = 0;
  39. for (ix = 0; ix < pa; ix++) {
  40. int tx, ty, iy;
  41. mp_word _W;
  42. mp_digit *tmpy;
  43. /* clear counter */
  44. _W = 0;
  45. /* get offsets into the two bignums */
  46. ty = MIN(a->used-1, ix);
  47. tx = ix - ty;
  48. /* setup temp aliases */
  49. tmpx = a->dp + tx;
  50. tmpy = a->dp + ty;
  51. /* this is the number of times the loop will iterrate, essentially
  52. while (tx++ < a->used && ty-- >= 0) { ... }
  53. */
  54. iy = MIN(a->used-tx, ty+1);
  55. /* now for squaring tx can never equal ty
  56. * we halve the distance since they approach at a rate of 2x
  57. * and we have to round because odd cases need to be executed
  58. */
  59. iy = MIN(iy, (ty-tx+1)>>1);
  60. /* execute loop */
  61. for (iz = 0; iz < iy; iz++) {
  62. _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
  63. }
  64. /* double the inner product and add carry */
  65. _W = _W + _W + W1;
  66. /* even columns have the square term in them */
  67. if ((ix&1) == 0) {
  68. _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
  69. }
  70. /* store it */
  71. W[ix] = (mp_digit)(_W & MP_MASK);
  72. /* make next carry */
  73. W1 = _W >> ((mp_word)DIGIT_BIT);
  74. }
  75. /* setup dest */
  76. olduse = b->used;
  77. b->used = a->used+a->used;
  78. {
  79. mp_digit *tmpb;
  80. tmpb = b->dp;
  81. for (ix = 0; ix < pa; ix++) {
  82. *tmpb++ = W[ix] & MP_MASK;
  83. }
  84. /* clear unused digits [that existed in the old copy of c] */
  85. for (; ix < olduse; ix++) {
  86. *tmpb++ = 0;
  87. }
  88. }
  89. mp_clamp (b);
  90. return MP_OKAY;
  91. }
  92. #endif
  93. /* $Source: /cvs/libtom/libtommath/bn_fast_s_mp_sqr.c,v $ */
  94. /* $Revision: 1.3 $ */
  95. /* $Date: 2006/03/31 14:18:44 $ */