fill.hpp 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
  11. #define BOOST_COMPUTE_ALGORITHM_FILL_HPP
  12. #include <iterator>
  13. #include <boost/mpl/int.hpp>
  14. #include <boost/mpl/vector.hpp>
  15. #include <boost/mpl/contains.hpp>
  16. #include <boost/utility/enable_if.hpp>
  17. #include <boost/compute/cl.hpp>
  18. #include <boost/compute/system.hpp>
  19. #include <boost/compute/command_queue.hpp>
  20. #include <boost/compute/algorithm/copy.hpp>
  21. #include <boost/compute/async/future.hpp>
  22. #include <boost/compute/iterator/constant_iterator.hpp>
  23. #include <boost/compute/iterator/discard_iterator.hpp>
  24. #include <boost/compute/detail/is_buffer_iterator.hpp>
  25. #include <boost/compute/detail/iterator_range_size.hpp>
  26. namespace boost {
  27. namespace compute {
  28. namespace detail {
  29. namespace mpl = boost::mpl;
  30. // fills the range [first, first + count) with value using copy()
  31. template<class BufferIterator, class T>
  32. inline void fill_with_copy(BufferIterator first,
  33. size_t count,
  34. const T &value,
  35. command_queue &queue)
  36. {
  37. ::boost::compute::copy(
  38. ::boost::compute::make_constant_iterator(value, 0),
  39. ::boost::compute::make_constant_iterator(value, count),
  40. first,
  41. queue
  42. );
  43. }
  44. // fills the range [first, first + count) with value using copy_async()
  45. template<class BufferIterator, class T>
  46. inline future<void> fill_async_with_copy(BufferIterator first,
  47. size_t count,
  48. const T &value,
  49. command_queue &queue)
  50. {
  51. return ::boost::compute::copy_async(
  52. ::boost::compute::make_constant_iterator(value, 0),
  53. ::boost::compute::make_constant_iterator(value, count),
  54. first,
  55. queue
  56. );
  57. }
  58. #if defined(CL_VERSION_1_2)
  59. // meta-function returing true if Iterator points to a range of values
  60. // that can be filled using clEnqueueFillBuffer(). to meet this criteria
  61. // it must have a buffer accessible through iter.get_buffer() and the
  62. // size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
  63. template<class Iterator>
  64. struct is_valid_fill_buffer_iterator :
  65. public mpl::and_<
  66. is_buffer_iterator<Iterator>,
  67. mpl::contains<
  68. mpl::vector<
  69. mpl::int_<1>,
  70. mpl::int_<2>,
  71. mpl::int_<4>,
  72. mpl::int_<8>,
  73. mpl::int_<16>,
  74. mpl::int_<32>,
  75. mpl::int_<64>,
  76. mpl::int_<128>
  77. >,
  78. mpl::int_<
  79. sizeof(typename std::iterator_traits<Iterator>::value_type)
  80. >
  81. >
  82. >::type { };
  83. template<>
  84. struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {};
  85. // specialization which uses clEnqueueFillBuffer for buffer iterators
  86. template<class BufferIterator, class T>
  87. inline void
  88. dispatch_fill(BufferIterator first,
  89. size_t count,
  90. const T &value,
  91. command_queue &queue,
  92. typename boost::enable_if<
  93. is_valid_fill_buffer_iterator<BufferIterator>
  94. >::type* = 0)
  95. {
  96. typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
  97. if(count == 0){
  98. // nothing to do
  99. return;
  100. }
  101. // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
  102. if(!queue.check_device_version(1, 2)){
  103. return fill_with_copy(first, count, value, queue);
  104. }
  105. value_type pattern = static_cast<value_type>(value);
  106. size_t offset = static_cast<size_t>(first.get_index());
  107. if(count == 1){
  108. // use clEnqueueWriteBuffer() directly when writing a single value
  109. // to the device buffer. this is potentially more efficient and also
  110. // works around a bug in the intel opencl driver.
  111. queue.enqueue_write_buffer(
  112. first.get_buffer(),
  113. offset * sizeof(value_type),
  114. sizeof(value_type),
  115. &pattern
  116. );
  117. }
  118. else {
  119. queue.enqueue_fill_buffer(
  120. first.get_buffer(),
  121. &pattern,
  122. sizeof(value_type),
  123. offset * sizeof(value_type),
  124. count * sizeof(value_type)
  125. );
  126. }
  127. }
  128. template<class BufferIterator, class T>
  129. inline future<void>
  130. dispatch_fill_async(BufferIterator first,
  131. size_t count,
  132. const T &value,
  133. command_queue &queue,
  134. typename boost::enable_if<
  135. is_valid_fill_buffer_iterator<BufferIterator>
  136. >::type* = 0)
  137. {
  138. typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
  139. // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
  140. if(!queue.check_device_version(1, 2)){
  141. return fill_async_with_copy(first, count, value, queue);
  142. }
  143. value_type pattern = static_cast<value_type>(value);
  144. size_t offset = static_cast<size_t>(first.get_index());
  145. event event_ =
  146. queue.enqueue_fill_buffer(first.get_buffer(),
  147. &pattern,
  148. sizeof(value_type),
  149. offset * sizeof(value_type),
  150. count * sizeof(value_type));
  151. return future<void>(event_);
  152. }
  153. #ifdef CL_VERSION_2_0
  154. // specializations for svm_ptr<T>
  155. template<class T>
  156. inline void dispatch_fill(svm_ptr<T> first,
  157. size_t count,
  158. const T &value,
  159. command_queue &queue)
  160. {
  161. if(count == 0){
  162. return;
  163. }
  164. queue.enqueue_svm_fill(
  165. first.get(), &value, sizeof(T), count * sizeof(T)
  166. );
  167. }
  168. template<class T>
  169. inline future<void> dispatch_fill_async(svm_ptr<T> first,
  170. size_t count,
  171. const T &value,
  172. command_queue &queue)
  173. {
  174. if(count == 0){
  175. return future<void>();
  176. }
  177. event event_ = queue.enqueue_svm_fill(
  178. first.get(), &value, sizeof(T), count * sizeof(T)
  179. );
  180. return future<void>(event_);
  181. }
  182. #endif // CL_VERSION_2_0
  183. // default implementations
  184. template<class BufferIterator, class T>
  185. inline void
  186. dispatch_fill(BufferIterator first,
  187. size_t count,
  188. const T &value,
  189. command_queue &queue,
  190. typename boost::disable_if<
  191. is_valid_fill_buffer_iterator<BufferIterator>
  192. >::type* = 0)
  193. {
  194. fill_with_copy(first, count, value, queue);
  195. }
  196. template<class BufferIterator, class T>
  197. inline future<void>
  198. dispatch_fill_async(BufferIterator first,
  199. size_t count,
  200. const T &value,
  201. command_queue &queue,
  202. typename boost::disable_if<
  203. is_valid_fill_buffer_iterator<BufferIterator>
  204. >::type* = 0)
  205. {
  206. return fill_async_with_copy(first, count, value, queue);
  207. }
  208. #else
  209. template<class BufferIterator, class T>
  210. inline void dispatch_fill(BufferIterator first,
  211. size_t count,
  212. const T &value,
  213. command_queue &queue)
  214. {
  215. fill_with_copy(first, count, value, queue);
  216. }
  217. template<class BufferIterator, class T>
  218. inline future<void> dispatch_fill_async(BufferIterator first,
  219. size_t count,
  220. const T &value,
  221. command_queue &queue)
  222. {
  223. return fill_async_with_copy(first, count, value, queue);
  224. }
  225. #endif // !defined(CL_VERSION_1_2)
  226. } // end detail namespace
  227. /// Fills the range [\p first, \p last) with \p value.
  228. ///
  229. /// \param first first element in the range to fill
  230. /// \param last last element in the range to fill
  231. /// \param value value to copy to each element
  232. /// \param queue command queue to perform the operation
  233. ///
  234. /// For example, to fill a vector on the device with sevens:
  235. /// \code
  236. /// // vector on the device
  237. /// boost::compute::vector<int> vec(10, context);
  238. ///
  239. /// // fill vector with sevens
  240. /// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
  241. /// \endcode
  242. ///
  243. /// \see boost::compute::fill_n()
  244. template<class BufferIterator, class T>
  245. inline void fill(BufferIterator first,
  246. BufferIterator last,
  247. const T &value,
  248. command_queue &queue = system::default_queue())
  249. {
  250. size_t count = detail::iterator_range_size(first, last);
  251. if(count == 0){
  252. return;
  253. }
  254. detail::dispatch_fill(first, count, value, queue);
  255. }
  256. template<class BufferIterator, class T>
  257. inline future<void> fill_async(BufferIterator first,
  258. BufferIterator last,
  259. const T &value,
  260. command_queue &queue = system::default_queue())
  261. {
  262. size_t count = detail::iterator_range_size(first, last);
  263. if(count == 0){
  264. return future<void>();
  265. }
  266. return detail::dispatch_fill_async(first, count, value, queue);
  267. }
  268. } // end compute namespace
  269. } // end boost namespace
  270. #endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP