copy.hpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP
  11. #define BOOST_COMPUTE_ALGORITHM_COPY_HPP
  12. #include <algorithm>
  13. #include <iterator>
  14. #include <boost/utility/enable_if.hpp>
  15. #include <boost/mpl/and.hpp>
  16. #include <boost/mpl/not.hpp>
  17. #include <boost/compute/buffer.hpp>
  18. #include <boost/compute/system.hpp>
  19. #include <boost/compute/command_queue.hpp>
  20. #include <boost/compute/algorithm/detail/copy_on_device.hpp>
  21. #include <boost/compute/algorithm/detail/copy_to_device.hpp>
  22. #include <boost/compute/algorithm/detail/copy_to_host.hpp>
  23. #include <boost/compute/async/future.hpp>
  24. #include <boost/compute/detail/is_contiguous_iterator.hpp>
  25. #include <boost/compute/detail/iterator_range_size.hpp>
  26. #include <boost/compute/iterator/buffer_iterator.hpp>
  27. #include <boost/compute/type_traits/is_device_iterator.hpp>
  28. namespace boost {
  29. namespace compute {
  30. namespace detail {
  31. namespace mpl = boost::mpl;
  32. // meta-function returning true if copy() between InputIterator and
  33. // OutputIterator can be implemented with clEnqueueCopyBuffer().
  34. template<class InputIterator, class OutputIterator>
  35. struct can_copy_with_copy_buffer :
  36. mpl::and_<
  37. boost::is_same<
  38. InputIterator,
  39. buffer_iterator<typename InputIterator::value_type>
  40. >,
  41. boost::is_same<
  42. OutputIterator,
  43. buffer_iterator<typename OutputIterator::value_type>
  44. >,
  45. boost::is_same<
  46. typename InputIterator::value_type,
  47. typename OutputIterator::value_type
  48. >
  49. >::type {};
  50. // host -> device
  51. template<class InputIterator, class OutputIterator>
  52. inline OutputIterator
  53. dispatch_copy(InputIterator first,
  54. InputIterator last,
  55. OutputIterator result,
  56. command_queue &queue,
  57. typename boost::enable_if_c<
  58. !is_device_iterator<InputIterator>::value &&
  59. is_device_iterator<OutputIterator>::value
  60. >::type* = 0)
  61. {
  62. if(is_contiguous_iterator<InputIterator>::value){
  63. return copy_to_device(first, last, result, queue);
  64. }
  65. else {
  66. // for non-contiguous input we first copy the values to
  67. // a temporary std::vector and then copy from there
  68. typedef typename std::iterator_traits<InputIterator>::value_type T;
  69. std::vector<T> vector(first, last);
  70. return copy_to_device(vector.begin(), vector.end(), result, queue);
  71. }
  72. }
  73. // host -> device (async)
  74. template<class InputIterator, class OutputIterator>
  75. inline future<OutputIterator>
  76. dispatch_copy_async(InputIterator first,
  77. InputIterator last,
  78. OutputIterator result,
  79. command_queue &queue,
  80. typename boost::enable_if_c<
  81. !is_device_iterator<InputIterator>::value &&
  82. is_device_iterator<OutputIterator>::value
  83. >::type* = 0)
  84. {
  85. BOOST_STATIC_ASSERT_MSG(
  86. is_contiguous_iterator<InputIterator>::value,
  87. "copy_async() is only supported for contiguous host iterators"
  88. );
  89. return copy_to_device_async(first, last, result, queue);
  90. }
  91. // device -> host
  92. template<class InputIterator, class OutputIterator>
  93. inline OutputIterator
  94. dispatch_copy(InputIterator first,
  95. InputIterator last,
  96. OutputIterator result,
  97. command_queue &queue,
  98. typename boost::enable_if_c<
  99. is_device_iterator<InputIterator>::value &&
  100. !is_device_iterator<OutputIterator>::value
  101. >::type* = 0)
  102. {
  103. if(is_contiguous_iterator<OutputIterator>::value){
  104. return copy_to_host(first, last, result, queue);
  105. }
  106. else {
  107. // for non-contiguous input we first copy the values to
  108. // a temporary std::vector and then copy from there
  109. typedef typename std::iterator_traits<InputIterator>::value_type T;
  110. std::vector<T> vector(iterator_range_size(first, last));
  111. copy_to_host(first, last, vector.begin(), queue);
  112. return std::copy(vector.begin(), vector.end(), result);
  113. }
  114. }
  115. // device -> host (async)
  116. template<class InputIterator, class OutputIterator>
  117. inline future<OutputIterator>
  118. dispatch_copy_async(InputIterator first,
  119. InputIterator last,
  120. OutputIterator result,
  121. command_queue &queue,
  122. typename boost::enable_if_c<
  123. is_device_iterator<InputIterator>::value &&
  124. !is_device_iterator<OutputIterator>::value
  125. >::type* = 0)
  126. {
  127. BOOST_STATIC_ASSERT_MSG(
  128. is_contiguous_iterator<OutputIterator>::value,
  129. "copy_async() is only supported for contiguous host iterators"
  130. );
  131. return copy_to_host_async(first, last, result, queue);
  132. }
  133. // device -> device
  134. template<class InputIterator, class OutputIterator>
  135. inline OutputIterator
  136. dispatch_copy(InputIterator first,
  137. InputIterator last,
  138. OutputIterator result,
  139. command_queue &queue,
  140. typename boost::enable_if<
  141. mpl::and_<
  142. is_device_iterator<InputIterator>,
  143. is_device_iterator<OutputIterator>,
  144. mpl::not_<
  145. can_copy_with_copy_buffer<
  146. InputIterator, OutputIterator
  147. >
  148. >
  149. >
  150. >::type* = 0)
  151. {
  152. return copy_on_device(first, last, result, queue);
  153. }
  154. // device -> device (specialization for buffer iterators)
  155. template<class InputIterator, class OutputIterator>
  156. inline OutputIterator
  157. dispatch_copy(InputIterator first,
  158. InputIterator last,
  159. OutputIterator result,
  160. command_queue &queue,
  161. typename boost::enable_if<
  162. mpl::and_<
  163. is_device_iterator<InputIterator>,
  164. is_device_iterator<OutputIterator>,
  165. can_copy_with_copy_buffer<
  166. InputIterator, OutputIterator
  167. >
  168. >
  169. >::type* = 0)
  170. {
  171. typedef typename std::iterator_traits<InputIterator>::value_type value_type;
  172. typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
  173. difference_type n = std::distance(first, last);
  174. if(n < 1){
  175. // nothing to copy
  176. return result;
  177. }
  178. queue.enqueue_copy_buffer(first.get_buffer(),
  179. result.get_buffer(),
  180. first.get_index() * sizeof(value_type),
  181. result.get_index() * sizeof(value_type),
  182. static_cast<size_t>(n) * sizeof(value_type));
  183. return result + n;
  184. }
  185. // device -> device (async)
  186. template<class InputIterator, class OutputIterator>
  187. inline future<OutputIterator>
  188. dispatch_copy_async(InputIterator first,
  189. InputIterator last,
  190. OutputIterator result,
  191. command_queue &queue,
  192. typename boost::enable_if<
  193. mpl::and_<
  194. is_device_iterator<InputIterator>,
  195. is_device_iterator<OutputIterator>,
  196. mpl::not_<
  197. can_copy_with_copy_buffer<
  198. InputIterator, OutputIterator
  199. >
  200. >
  201. >
  202. >::type* = 0)
  203. {
  204. return copy_on_device_async(first, last, result, queue);
  205. }
  206. // device -> device (async, specialization for buffer iterators)
  207. template<class InputIterator, class OutputIterator>
  208. inline future<OutputIterator>
  209. dispatch_copy_async(InputIterator first,
  210. InputIterator last,
  211. OutputIterator result,
  212. command_queue &queue,
  213. typename boost::enable_if<
  214. mpl::and_<
  215. is_device_iterator<InputIterator>,
  216. is_device_iterator<OutputIterator>,
  217. can_copy_with_copy_buffer<
  218. InputIterator, OutputIterator
  219. >
  220. >
  221. >::type* = 0)
  222. {
  223. typedef typename std::iterator_traits<InputIterator>::value_type value_type;
  224. typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
  225. difference_type n = std::distance(first, last);
  226. if(n < 1){
  227. // nothing to copy
  228. return make_future(result, event());
  229. }
  230. event event_ =
  231. queue.enqueue_copy_buffer(
  232. first.get_buffer(),
  233. result.get_buffer(),
  234. first.get_index() * sizeof(value_type),
  235. result.get_index() * sizeof(value_type),
  236. static_cast<size_t>(n) * sizeof(value_type)
  237. );
  238. return make_future(result + n, event_);
  239. }
  240. // host -> host
  241. template<class InputIterator, class OutputIterator>
  242. inline OutputIterator
  243. dispatch_copy(InputIterator first,
  244. InputIterator last,
  245. OutputIterator result,
  246. command_queue &queue,
  247. typename boost::enable_if_c<
  248. !is_device_iterator<InputIterator>::value &&
  249. !is_device_iterator<OutputIterator>::value
  250. >::type* = 0)
  251. {
  252. (void) queue;
  253. return std::copy(first, last, result);
  254. }
  255. } // end detail namespace
  256. /// Copies the values in the range [\p first, \p last) to the range
  257. /// beginning at \p result.
  258. ///
  259. /// The generic copy() function can be used for a variety of data
  260. /// transfer tasks and provides a standard interface to the following
  261. /// OpenCL functions:
  262. ///
  263. /// \li \c clEnqueueReadBuffer()
  264. /// \li \c clEnqueueWriteBuffer()
  265. /// \li \c clEnqueueCopyBuffer()
  266. ///
  267. /// Unlike the aforementioned OpenCL functions, copy() will also work
  268. /// with non-contiguous data-structures (e.g. \c std::list<T>) as
  269. /// well as with "fancy" iterators (e.g. transform_iterator).
  270. ///
  271. /// \param first first element in the range to copy
  272. /// \param last last element in the range to copy
  273. /// \param result first element in the result range
  274. /// \param queue command queue to perform the operation
  275. ///
  276. /// \return \c OutputIterator to the end of the result range
  277. ///
  278. /// For example, to copy an array of \c int values on the host to a vector on
  279. /// the device:
  280. /// \code
  281. /// // array on the host
  282. /// int data[] = { 1, 2, 3, 4 };
  283. ///
  284. /// // vector on the device
  285. /// boost::compute::vector<int> vec(4, context);
  286. ///
  287. /// // copy values to the device vector
  288. /// boost::compute::copy(data, data + 4, vec.begin(), queue);
  289. /// \endcode
  290. ///
  291. /// The copy algorithm can also be used with standard containers such as
  292. /// \c std::vector<T>:
  293. /// \code
  294. /// std::vector<int> host_vector = ...
  295. /// boost::compute::vector<int> device_vector = ...
  296. ///
  297. /// // copy from the host to the device
  298. /// boost::compute::copy(
  299. /// host_vector.begin(), host_vector.end(), device_vector.begin(), queue
  300. /// );
  301. ///
  302. /// // copy from the device to the host
  303. /// boost::compute::copy(
  304. /// device_vector.begin(), device_vector.end(), host_vector.begin(), queue
  305. /// );
  306. /// \endcode
  307. ///
  308. /// \see copy_n(), copy_if(), copy_async()
  309. template<class InputIterator, class OutputIterator>
  310. inline OutputIterator copy(InputIterator first,
  311. InputIterator last,
  312. OutputIterator result,
  313. command_queue &queue = system::default_queue())
  314. {
  315. return detail::dispatch_copy(first, last, result, queue);
  316. }
  317. /// Copies the values in the range [\p first, \p last) to the range
  318. /// beginning at \p result. The copy is performed asynchronously.
  319. ///
  320. /// \see copy()
  321. template<class InputIterator, class OutputIterator>
  322. inline future<OutputIterator>
  323. copy_async(InputIterator first,
  324. InputIterator last,
  325. OutputIterator result,
  326. command_queue &queue = system::default_queue())
  327. {
  328. return detail::dispatch_copy_async(first, last, result, queue);
  329. }
  330. } // end compute namespace
  331. } // end boost namespace
  332. #endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP