next.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. // (C) Copyright John Maddock 2008.
  2. // Use, modification and distribution are subject to the
  3. // Boost Software License, Version 1.0. (See accompanying file
  4. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #ifndef BOOST_MATH_SPECIAL_NEXT_HPP
  6. #define BOOST_MATH_SPECIAL_NEXT_HPP
  7. #ifdef _MSC_VER
  8. #pragma once
  9. #endif
  10. #include <boost/math/special_functions/math_fwd.hpp>
  11. #include <boost/math/policies/error_handling.hpp>
  12. #include <boost/math/special_functions/fpclassify.hpp>
  13. #include <boost/math/special_functions/sign.hpp>
  14. #include <boost/math/special_functions/trunc.hpp>
  15. #include <float.h>
  16. #if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
  17. #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__)
  18. #include "xmmintrin.h"
  19. #define BOOST_MATH_CHECK_SSE2
  20. #endif
  21. #endif
  22. namespace boost{ namespace math{
  23. namespace detail{
  24. template <class T>
  25. inline T get_smallest_value(mpl::true_ const&)
  26. {
  27. //
  28. // numeric_limits lies about denorms being present - particularly
  29. // when this can be turned on or off at runtime, as is the case
  30. // when using the SSE2 registers in DAZ or FTZ mode.
  31. //
  32. static const T m = std::numeric_limits<T>::denorm_min();
  33. #ifdef BOOST_MATH_CHECK_SSE2
  34. return (_mm_getcsr() & (_MM_FLUSH_ZERO_ON | 0x40)) ? tools::min_value<T>() : m;;
  35. #else
  36. return ((tools::min_value<T>() / 2) == 0) ? tools::min_value<T>() : m;
  37. #endif
  38. }
  39. template <class T>
  40. inline T get_smallest_value(mpl::false_ const&)
  41. {
  42. return tools::min_value<T>();
  43. }
  44. template <class T>
  45. inline T get_smallest_value()
  46. {
  47. #if defined(BOOST_MSVC) && (BOOST_MSVC <= 1310)
  48. return get_smallest_value<T>(mpl::bool_<std::numeric_limits<T>::is_specialized && (std::numeric_limits<T>::has_denorm == 1)>());
  49. #else
  50. return get_smallest_value<T>(mpl::bool_<std::numeric_limits<T>::is_specialized && (std::numeric_limits<T>::has_denorm == std::denorm_present)>());
  51. #endif
  52. }
  53. //
  54. // Returns the smallest value that won't generate denorms when
  55. // we calculate the value of the least-significant-bit:
  56. //
  57. template <class T>
  58. T get_min_shift_value();
  59. template <class T>
  60. struct min_shift_initializer
  61. {
  62. struct init
  63. {
  64. init()
  65. {
  66. do_init();
  67. }
  68. static void do_init()
  69. {
  70. get_min_shift_value<T>();
  71. }
  72. void force_instantiate()const{}
  73. };
  74. static const init initializer;
  75. static void force_instantiate()
  76. {
  77. initializer.force_instantiate();
  78. }
  79. };
  80. template <class T>
  81. const typename min_shift_initializer<T>::init min_shift_initializer<T>::initializer;
  82. template <class T>
  83. inline T get_min_shift_value()
  84. {
  85. BOOST_MATH_STD_USING
  86. static const T val = ldexp(tools::min_value<T>(), tools::digits<T>() + 1);
  87. min_shift_initializer<T>::force_instantiate();
  88. return val;
  89. }
  90. template <class T, class Policy>
  91. T float_next_imp(const T& val, const Policy& pol)
  92. {
  93. BOOST_MATH_STD_USING
  94. int expon;
  95. static const char* function = "float_next<%1%>(%1%)";
  96. int fpclass = (boost::math::fpclassify)(val);
  97. if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE))
  98. {
  99. if(val < 0)
  100. return -tools::max_value<T>();
  101. return policies::raise_domain_error<T>(
  102. function,
  103. "Argument must be finite, but got %1%", val, pol);
  104. }
  105. if(val >= tools::max_value<T>())
  106. return policies::raise_overflow_error<T>(function, 0, pol);
  107. if(val == 0)
  108. return detail::get_smallest_value<T>();
  109. if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value<T>()) && (val != -tools::min_value<T>()))
  110. {
  111. //
  112. // Special case: if the value of the least significant bit is a denorm, and the result
  113. // would not be a denorm, then shift the input, increment, and shift back.
  114. // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set.
  115. //
  116. return ldexp(float_next(T(ldexp(val, 2 * tools::digits<T>())), pol), -2 * tools::digits<T>());
  117. }
  118. if(-0.5f == frexp(val, &expon))
  119. --expon; // reduce exponent when val is a power of two, and negative.
  120. T diff = ldexp(T(1), expon - tools::digits<T>());
  121. if(diff == 0)
  122. diff = detail::get_smallest_value<T>();
  123. return val + diff;
  124. }
  125. }
  126. template <class T, class Policy>
  127. inline typename tools::promote_args<T>::type float_next(const T& val, const Policy& pol)
  128. {
  129. typedef typename tools::promote_args<T>::type result_type;
  130. return detail::float_next_imp(static_cast<result_type>(val), pol);
  131. }
  132. #if 0 //def BOOST_MSVC
  133. //
  134. // We used to use ::_nextafter here, but doing so fails when using
  135. // the SSE2 registers if the FTZ or DAZ flags are set, so use our own
  136. // - albeit slower - code instead as at least that gives the correct answer.
  137. //
  138. template <class Policy>
  139. inline double float_next(const double& val, const Policy& pol)
  140. {
  141. static const char* function = "float_next<%1%>(%1%)";
  142. if(!(boost::math::isfinite)(val) && (val > 0))
  143. return policies::raise_domain_error<double>(
  144. function,
  145. "Argument must be finite, but got %1%", val, pol);
  146. if(val >= tools::max_value<double>())
  147. return policies::raise_overflow_error<double>(function, 0, pol);
  148. return ::_nextafter(val, tools::max_value<double>());
  149. }
  150. #endif
  151. template <class T>
  152. inline typename tools::promote_args<T>::type float_next(const T& val)
  153. {
  154. return float_next(val, policies::policy<>());
  155. }
  156. namespace detail{
  157. template <class T, class Policy>
  158. T float_prior_imp(const T& val, const Policy& pol)
  159. {
  160. BOOST_MATH_STD_USING
  161. int expon;
  162. static const char* function = "float_prior<%1%>(%1%)";
  163. int fpclass = (boost::math::fpclassify)(val);
  164. if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE))
  165. {
  166. if(val > 0)
  167. return tools::max_value<T>();
  168. return policies::raise_domain_error<T>(
  169. function,
  170. "Argument must be finite, but got %1%", val, pol);
  171. }
  172. if(val <= -tools::max_value<T>())
  173. return -policies::raise_overflow_error<T>(function, 0, pol);
  174. if(val == 0)
  175. return -detail::get_smallest_value<T>();
  176. if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value<T>()) && (val != tools::min_value<T>()))
  177. {
  178. //
  179. // Special case: if the value of the least significant bit is a denorm, and the result
  180. // would not be a denorm, then shift the input, increment, and shift back.
  181. // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set.
  182. //
  183. return ldexp(float_prior(T(ldexp(val, 2 * tools::digits<T>())), pol), -2 * tools::digits<T>());
  184. }
  185. T remain = frexp(val, &expon);
  186. if(remain == 0.5)
  187. --expon; // when val is a power of two we must reduce the exponent
  188. T diff = ldexp(T(1), expon - tools::digits<T>());
  189. if(diff == 0)
  190. diff = detail::get_smallest_value<T>();
  191. return val - diff;
  192. }
  193. }
  194. template <class T, class Policy>
  195. inline typename tools::promote_args<T>::type float_prior(const T& val, const Policy& pol)
  196. {
  197. typedef typename tools::promote_args<T>::type result_type;
  198. return detail::float_prior_imp(static_cast<result_type>(val), pol);
  199. }
  200. #if 0 //def BOOST_MSVC
  201. //
  202. // We used to use ::_nextafter here, but doing so fails when using
  203. // the SSE2 registers if the FTZ or DAZ flags are set, so use our own
  204. // - albeit slower - code instead as at least that gives the correct answer.
  205. //
  206. template <class Policy>
  207. inline double float_prior(const double& val, const Policy& pol)
  208. {
  209. static const char* function = "float_prior<%1%>(%1%)";
  210. if(!(boost::math::isfinite)(val) && (val < 0))
  211. return policies::raise_domain_error<double>(
  212. function,
  213. "Argument must be finite, but got %1%", val, pol);
  214. if(val <= -tools::max_value<double>())
  215. return -policies::raise_overflow_error<double>(function, 0, pol);
  216. return ::_nextafter(val, -tools::max_value<double>());
  217. }
  218. #endif
  219. template <class T>
  220. inline typename tools::promote_args<T>::type float_prior(const T& val)
  221. {
  222. return float_prior(val, policies::policy<>());
  223. }
  224. template <class T, class U, class Policy>
  225. inline typename tools::promote_args<T, U>::type nextafter(const T& val, const U& direction, const Policy& pol)
  226. {
  227. typedef typename tools::promote_args<T, U>::type result_type;
  228. return val < direction ? boost::math::float_next<result_type>(val, pol) : val == direction ? val : boost::math::float_prior<result_type>(val, pol);
  229. }
  230. template <class T, class U>
  231. inline typename tools::promote_args<T, U>::type nextafter(const T& val, const U& direction)
  232. {
  233. return nextafter(val, direction, policies::policy<>());
  234. }
  235. namespace detail{
  236. template <class T, class Policy>
  237. T float_distance_imp(const T& a, const T& b, const Policy& pol)
  238. {
  239. BOOST_MATH_STD_USING
  240. //
  241. // Error handling:
  242. //
  243. static const char* function = "float_distance<%1%>(%1%, %1%)";
  244. if(!(boost::math::isfinite)(a))
  245. return policies::raise_domain_error<T>(
  246. function,
  247. "Argument a must be finite, but got %1%", a, pol);
  248. if(!(boost::math::isfinite)(b))
  249. return policies::raise_domain_error<T>(
  250. function,
  251. "Argument b must be finite, but got %1%", b, pol);
  252. //
  253. // Special cases:
  254. //
  255. if(a > b)
  256. return -float_distance(b, a, pol);
  257. if(a == b)
  258. return 0;
  259. if(a == 0)
  260. return 1 + fabs(float_distance(static_cast<T>((b < 0) ? T(-detail::get_smallest_value<T>()) : detail::get_smallest_value<T>()), b, pol));
  261. if(b == 0)
  262. return 1 + fabs(float_distance(static_cast<T>((a < 0) ? T(-detail::get_smallest_value<T>()) : detail::get_smallest_value<T>()), a, pol));
  263. if(boost::math::sign(a) != boost::math::sign(b))
  264. return 2 + fabs(float_distance(static_cast<T>((b < 0) ? T(-detail::get_smallest_value<T>()) : detail::get_smallest_value<T>()), b, pol))
  265. + fabs(float_distance(static_cast<T>((a < 0) ? T(-detail::get_smallest_value<T>()) : detail::get_smallest_value<T>()), a, pol));
  266. //
  267. // By the time we get here, both a and b must have the same sign, we want
  268. // b > a and both postive for the following logic:
  269. //
  270. if(a < 0)
  271. return float_distance(static_cast<T>(-b), static_cast<T>(-a), pol);
  272. BOOST_ASSERT(a >= 0);
  273. BOOST_ASSERT(b >= a);
  274. int expon;
  275. //
  276. // Note that if a is a denorm then the usual formula fails
  277. // because we actually have fewer than tools::digits<T>()
  278. // significant bits in the representation:
  279. //
  280. frexp(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) ? tools::min_value<T>() : a, &expon);
  281. T upper = ldexp(T(1), expon);
  282. T result = 0;
  283. expon = tools::digits<T>() - expon;
  284. //
  285. // If b is greater than upper, then we *must* split the calculation
  286. // as the size of the ULP changes with each order of magnitude change:
  287. //
  288. if(b > upper)
  289. {
  290. result = float_distance(upper, b);
  291. }
  292. //
  293. // Use compensated double-double addition to avoid rounding
  294. // errors in the subtraction:
  295. //
  296. T mb, x, y, z;
  297. if(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) || (b - a < tools::min_value<T>()))
  298. {
  299. //
  300. // Special case - either one end of the range is a denormal, or else the difference is.
  301. // The regular code will fail if we're using the SSE2 registers on Intel and either
  302. // the FTZ or DAZ flags are set.
  303. //
  304. T a2 = ldexp(a, tools::digits<T>());
  305. T b2 = ldexp(b, tools::digits<T>());
  306. mb = -(std::min)(T(ldexp(upper, tools::digits<T>())), b2);
  307. x = a2 + mb;
  308. z = x - a2;
  309. y = (a2 - (x - z)) + (mb - z);
  310. expon -= tools::digits<T>();
  311. }
  312. else
  313. {
  314. mb = -(std::min)(upper, b);
  315. x = a + mb;
  316. z = x - a;
  317. y = (a - (x - z)) + (mb - z);
  318. }
  319. if(x < 0)
  320. {
  321. x = -x;
  322. y = -y;
  323. }
  324. result += ldexp(x, expon) + ldexp(y, expon);
  325. //
  326. // Result must be an integer:
  327. //
  328. BOOST_ASSERT(result == floor(result));
  329. return result;
  330. }
  331. }
  332. template <class T, class U, class Policy>
  333. inline typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b, const Policy& pol)
  334. {
  335. typedef typename tools::promote_args<T, U>::type result_type;
  336. return detail::float_distance_imp(static_cast<result_type>(a), static_cast<result_type>(b), pol);
  337. }
  338. template <class T, class U>
  339. typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b)
  340. {
  341. return boost::math::float_distance(a, b, policies::policy<>());
  342. }
  343. namespace detail{
  344. template <class T, class Policy>
  345. T float_advance_imp(T val, int distance, const Policy& pol)
  346. {
  347. BOOST_MATH_STD_USING
  348. //
  349. // Error handling:
  350. //
  351. static const char* function = "float_advance<%1%>(%1%, int)";
  352. int fpclass = (boost::math::fpclassify)(val);
  353. if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE))
  354. return policies::raise_domain_error<T>(
  355. function,
  356. "Argument val must be finite, but got %1%", val, pol);
  357. if(val < 0)
  358. return -float_advance(-val, -distance, pol);
  359. if(distance == 0)
  360. return val;
  361. if(distance == 1)
  362. return float_next(val, pol);
  363. if(distance == -1)
  364. return float_prior(val, pol);
  365. if(fabs(val) < detail::get_min_shift_value<T>())
  366. {
  367. //
  368. // Special case: if the value of the least significant bit is a denorm,
  369. // implement in terms of float_next/float_prior.
  370. // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set.
  371. //
  372. if(distance > 0)
  373. {
  374. do{ val = float_next(val, pol); } while(--distance);
  375. }
  376. else
  377. {
  378. do{ val = float_prior(val, pol); } while(++distance);
  379. }
  380. return val;
  381. }
  382. int expon;
  383. frexp(val, &expon);
  384. T limit = ldexp((distance < 0 ? T(0.5f) : T(1)), expon);
  385. if(val <= tools::min_value<T>())
  386. {
  387. limit = sign(T(distance)) * tools::min_value<T>();
  388. }
  389. T limit_distance = float_distance(val, limit);
  390. while(fabs(limit_distance) < abs(distance))
  391. {
  392. distance -= itrunc(limit_distance);
  393. val = limit;
  394. if(distance < 0)
  395. {
  396. limit /= 2;
  397. expon--;
  398. }
  399. else
  400. {
  401. limit *= 2;
  402. expon++;
  403. }
  404. limit_distance = float_distance(val, limit);
  405. if(distance && (limit_distance == 0))
  406. {
  407. return policies::raise_evaluation_error<T>(function, "Internal logic failed while trying to increment floating point value %1%: most likely your FPU is in non-IEEE conforming mode.", val, pol);
  408. }
  409. }
  410. if((0.5f == frexp(val, &expon)) && (distance < 0))
  411. --expon;
  412. T diff = 0;
  413. if(val != 0)
  414. diff = distance * ldexp(T(1), expon - tools::digits<T>());
  415. if(diff == 0)
  416. diff = distance * detail::get_smallest_value<T>();
  417. return val += diff;
  418. }
  419. }
  420. template <class T, class Policy>
  421. inline typename tools::promote_args<T>::type float_advance(T val, int distance, const Policy& pol)
  422. {
  423. typedef typename tools::promote_args<T>::type result_type;
  424. return detail::float_advance_imp(static_cast<result_type>(val), distance, pol);
  425. }
  426. template <class T>
  427. inline typename tools::promote_args<T>::type float_advance(const T& val, int distance)
  428. {
  429. return boost::math::float_advance(val, distance, policies::policy<>());
  430. }
  431. }} // namespaces
  432. #endif // BOOST_MATH_SPECIAL_NEXT_HPP