accumulate.hpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
  11. #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
  12. #include <boost/preprocessor/seq/for_each.hpp>
  13. #include <boost/compute/system.hpp>
  14. #include <boost/compute/functional.hpp>
  15. #include <boost/compute/command_queue.hpp>
  16. #include <boost/compute/algorithm/reduce.hpp>
  17. #include <boost/compute/algorithm/detail/serial_accumulate.hpp>
  18. #include <boost/compute/container/array.hpp>
  19. #include <boost/compute/container/vector.hpp>
  20. #include <boost/compute/detail/iterator_range_size.hpp>
  21. namespace boost {
  22. namespace compute {
  23. namespace detail {
  24. template<class InputIterator, class T, class BinaryFunction>
  25. inline T generic_accumulate(InputIterator first,
  26. InputIterator last,
  27. T init,
  28. BinaryFunction function,
  29. command_queue &queue)
  30. {
  31. const context &context = queue.get_context();
  32. size_t size = iterator_range_size(first, last);
  33. if(size == 0){
  34. return init;
  35. }
  36. // accumulate on device
  37. array<T, 1> device_result(context);
  38. detail::serial_accumulate(
  39. first, last, device_result.begin(), init, function, queue
  40. );
  41. // copy result to host
  42. T result;
  43. ::boost::compute::copy_n(device_result.begin(), 1, &result, queue);
  44. return result;
  45. }
  46. // returns true if we can use reduce() instead of accumulate() when
  47. // accumulate() this is true when the function is commutative (such as
  48. // addition of integers) and the initial value is the identity value
  49. // for the operation (zero for addition, one for multiplication).
  50. template<class T, class F>
  51. inline bool can_accumulate_with_reduce(T init, F function)
  52. {
  53. (void) init;
  54. (void) function;
  55. return false;
  56. }
  57. /// \internal_
  58. #define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \
  59. inline bool can_accumulate_with_reduce(type init, plus<type>) \
  60. { \
  61. return init == type(0); \
  62. } \
  63. inline bool can_accumulate_with_reduce(type init, multiplies<type>) \
  64. { \
  65. return init == type(1); \
  66. }
  67. BOOST_PP_SEQ_FOR_EACH(
  68. BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE,
  69. _,
  70. (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_)
  71. )
  72. template<class T>
  73. inline bool can_accumulate_with_reduce(T init, min<T>)
  74. {
  75. return init == (std::numeric_limits<T>::max)();
  76. }
  77. template<class T>
  78. inline bool can_accumulate_with_reduce(T init, max<T>)
  79. {
  80. return init == (std::numeric_limits<T>::min)();
  81. }
  82. #undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE
  83. template<class InputIterator, class T, class BinaryFunction>
  84. inline T dispatch_accumulate(InputIterator first,
  85. InputIterator last,
  86. T init,
  87. BinaryFunction function,
  88. command_queue &queue)
  89. {
  90. size_t size = iterator_range_size(first, last);
  91. if(size == 0){
  92. return init;
  93. }
  94. if(can_accumulate_with_reduce(init, function)){
  95. T result;
  96. reduce(first, last, &result, function, queue);
  97. return result;
  98. }
  99. else {
  100. return generic_accumulate(first, last, init, function, queue);
  101. }
  102. }
  103. } // end detail namespace
  104. /// Returns the result of applying \p function to the elements in the
  105. /// range [\p first, \p last) and \p init.
  106. ///
  107. /// If no function is specified, \c plus will be used.
  108. ///
  109. /// \param first first element in the input range
  110. /// \param last last element in the input range
  111. /// \param init initial value
  112. /// \param function binary reduction function
  113. /// \param queue command queue to perform the operation
  114. ///
  115. /// \return the accumulated result value
  116. ///
  117. /// In specific situations the call to \c accumulate() can be automatically
  118. /// optimized to a call to the more efficient \c reduce() algorithm. This
  119. /// occurs when the binary reduction function is recognized as associative
  120. /// (such as the \c plus<int> function).
  121. ///
  122. /// Note that because floating-point addition is not associative, calling
  123. /// \c accumulate() with \c plus<float> results in a less efficient serial
  124. /// reduction algorithm being executed. If a slight loss in precision is
  125. /// acceptable, the more efficient parallel \c reduce() algorithm should be
  126. /// used instead.
  127. ///
  128. /// For example:
  129. /// \code
  130. /// // with vec = boost::compute::vector<int>
  131. /// accumulate(vec.begin(), vec.end(), 0, plus<int>()); // fast
  132. /// reduce(vec.begin(), vec.end(), &result, plus<int>()); // fast
  133. ///
  134. /// // with vec = boost::compute::vector<float>
  135. /// accumulate(vec.begin(), vec.end(), 0, plus<float>()); // slow
  136. /// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast
  137. /// \endcode
  138. ///
  139. /// \see reduce()
  140. template<class InputIterator, class T, class BinaryFunction>
  141. inline T accumulate(InputIterator first,
  142. InputIterator last,
  143. T init,
  144. BinaryFunction function,
  145. command_queue &queue = system::default_queue())
  146. {
  147. return detail::dispatch_accumulate(first, last, init, function, queue);
  148. }
  149. /// \overload
  150. template<class InputIterator, class T>
  151. inline T accumulate(InputIterator first,
  152. InputIterator last,
  153. T init,
  154. command_queue &queue = system::default_queue())
  155. {
  156. typedef typename std::iterator_traits<InputIterator>::value_type IT;
  157. return detail::dispatch_accumulate(first, last, init, plus<IT>(), queue);
  158. }
  159. } // end compute namespace
  160. } // end boost namespace
  161. #endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP