bn_fast_mp_montgomery_reduce.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. #include <tommath.h>
  2. #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
  3. /* LibTomMath, multiple-precision integer library -- Tom St Denis
  4. *
  5. * LibTomMath is a library that provides multiple-precision
  6. * integer arithmetic as well as number theoretic functionality.
  7. *
  8. * The library was designed directly after the MPI library by
  9. * Michael Fromberger but has been written from scratch with
  10. * additional optimizations in place.
  11. *
  12. * The library is free for all purposes without any express
  13. * guarantee it works.
  14. *
  15. * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
  16. */
  17. /* computes xR**-1 == x (mod N) via Montgomery Reduction
  18. *
  19. * This is an optimized implementation of montgomery_reduce
  20. * which uses the comba method to quickly calculate the columns of the
  21. * reduction.
  22. *
  23. * Based on Algorithm 14.32 on pp.601 of HAC.
  24. */
  25. int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
  26. {
  27. int ix, res, olduse;
  28. mp_word W[MP_WARRAY];
  29. /* get old used count */
  30. olduse = x->used;
  31. /* grow a as required */
  32. if (x->alloc < n->used + 1) {
  33. if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
  34. return res;
  35. }
  36. }
  37. /* first we have to get the digits of the input into
  38. * an array of double precision words W[...]
  39. */
  40. {
  41. register mp_word *_W;
  42. register mp_digit *tmpx;
  43. /* alias for the W[] array */
  44. _W = W;
  45. /* alias for the digits of x*/
  46. tmpx = x->dp;
  47. /* copy the digits of a into W[0..a->used-1] */
  48. for (ix = 0; ix < x->used; ix++) {
  49. *_W++ = *tmpx++;
  50. }
  51. /* zero the high words of W[a->used..m->used*2] */
  52. for (; ix < n->used * 2 + 1; ix++) {
  53. *_W++ = 0;
  54. }
  55. }
  56. /* now we proceed to zero successive digits
  57. * from the least significant upwards
  58. */
  59. for (ix = 0; ix < n->used; ix++) {
  60. /* mu = ai * m' mod b
  61. *
  62. * We avoid a double precision multiplication (which isn't required)
  63. * by casting the value down to a mp_digit. Note this requires
  64. * that W[ix-1] have the carry cleared (see after the inner loop)
  65. */
  66. register mp_digit mu;
  67. mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
  68. /* a = a + mu * m * b**i
  69. *
  70. * This is computed in place and on the fly. The multiplication
  71. * by b**i is handled by offseting which columns the results
  72. * are added to.
  73. *
  74. * Note the comba method normally doesn't handle carries in the
  75. * inner loop In this case we fix the carry from the previous
  76. * column since the Montgomery reduction requires digits of the
  77. * result (so far) [see above] to work. This is
  78. * handled by fixing up one carry after the inner loop. The
  79. * carry fixups are done in order so after these loops the
  80. * first m->used words of W[] have the carries fixed
  81. */
  82. {
  83. register int iy;
  84. register mp_digit *tmpn;
  85. register mp_word *_W;
  86. /* alias for the digits of the modulus */
  87. tmpn = n->dp;
  88. /* Alias for the columns set by an offset of ix */
  89. _W = W + ix;
  90. /* inner loop */
  91. for (iy = 0; iy < n->used; iy++) {
  92. *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
  93. }
  94. }
  95. /* now fix carry for next digit, W[ix+1] */
  96. W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
  97. }
  98. /* now we have to propagate the carries and
  99. * shift the words downward [all those least
  100. * significant digits we zeroed].
  101. */
  102. {
  103. register mp_digit *tmpx;
  104. register mp_word *_W, *_W1;
  105. /* nox fix rest of carries */
  106. /* alias for current word */
  107. _W1 = W + ix;
  108. /* alias for next word, where the carry goes */
  109. _W = W + ++ix;
  110. for (; ix <= n->used * 2 + 1; ix++) {
  111. *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
  112. }
  113. /* copy out, A = A/b**n
  114. *
  115. * The result is A/b**n but instead of converting from an
  116. * array of mp_word to mp_digit than calling mp_rshd
  117. * we just copy them in the right order
  118. */
  119. /* alias for destination word */
  120. tmpx = x->dp;
  121. /* alias for shifted double precision result */
  122. _W = W + n->used;
  123. for (ix = 0; ix < n->used + 1; ix++) {
  124. *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
  125. }
  126. /* zero oldused digits, if the input a was larger than
  127. * m->used+1 we'll have to clear the digits
  128. */
  129. for (; ix < olduse; ix++) {
  130. *tmpx++ = 0;
  131. }
  132. }
  133. /* set the max used and clamp */
  134. x->used = n->used + 1;
  135. mp_clamp (x);
  136. /* if A >= m then A = A - m */
  137. if (mp_cmp_mag (x, n) != MP_LT) {
  138. return s_mp_sub (x, n, x);
  139. }
  140. return MP_OKAY;
  141. }
  142. #endif
  143. /* $Source: /cvs/libtom/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */
  144. /* $Revision: 1.3 $ */
  145. /* $Date: 2006/03/31 14:18:44 $ */