command_queue.hpp 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_COMMAND_QUEUE_HPP
  11. #define BOOST_COMPUTE_COMMAND_QUEUE_HPP
  12. #include <cstddef>
  13. #include <algorithm>
  14. #include <boost/assert.hpp>
  15. #include <boost/compute/config.hpp>
  16. #include <boost/compute/event.hpp>
  17. #include <boost/compute/buffer.hpp>
  18. #include <boost/compute/device.hpp>
  19. #include <boost/compute/kernel.hpp>
  20. #include <boost/compute/context.hpp>
  21. #include <boost/compute/exception.hpp>
  22. #include <boost/compute/image/image1d.hpp>
  23. #include <boost/compute/image/image2d.hpp>
  24. #include <boost/compute/image/image3d.hpp>
  25. #include <boost/compute/image/image_object.hpp>
  26. #include <boost/compute/utility/wait_list.hpp>
  27. #include <boost/compute/detail/get_object_info.hpp>
  28. #include <boost/compute/detail/assert_cl_success.hpp>
  29. #include <boost/compute/detail/diagnostic.hpp>
  30. #include <boost/compute/utility/extents.hpp>
  31. namespace boost {
  32. namespace compute {
  33. namespace detail {
  34. inline void BOOST_COMPUTE_CL_CALLBACK
  35. nullary_native_kernel_trampoline(void *user_func_ptr)
  36. {
  37. void (*user_func)();
  38. std::memcpy(&user_func, user_func_ptr, sizeof(user_func));
  39. user_func();
  40. }
  41. } // end detail namespace
  42. /// \class command_queue
  43. /// \brief A command queue.
  44. ///
  45. /// Command queues provide the interface for interacting with compute
  46. /// devices. The command_queue class provides methods to copy data to
  47. /// and from a compute device as well as execute compute kernels.
  48. ///
  49. /// Command queues are created for a compute device within a compute
  50. /// context.
  51. ///
  52. /// For example, to create a context and command queue for the default device
  53. /// on the system (this is the normal set up code used by almost all OpenCL
  54. /// programs):
  55. /// \code
  56. /// #include <boost/compute/core.hpp>
  57. ///
  58. /// // get the default compute device
  59. /// boost::compute::device device = boost::compute::system::default_device();
  60. ///
  61. /// // set up a compute context and command queue
  62. /// boost::compute::context context(device);
  63. /// boost::compute::command_queue queue(context, device);
  64. /// \endcode
  65. ///
  66. /// The default command queue for the system can be obtained with the
  67. /// system::default_queue() method.
  68. ///
  69. /// \see buffer, context, kernel
  70. class command_queue
  71. {
  72. public:
  73. enum properties {
  74. enable_profiling = CL_QUEUE_PROFILING_ENABLE,
  75. enable_out_of_order_execution = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
  76. };
  77. enum map_flags {
  78. map_read = CL_MAP_READ,
  79. map_write = CL_MAP_WRITE
  80. #ifdef CL_VERSION_1_2
  81. ,
  82. map_write_invalidate_region = CL_MAP_WRITE_INVALIDATE_REGION
  83. #endif
  84. };
  85. /// Creates a null command queue.
  86. command_queue()
  87. : m_queue(0)
  88. {
  89. }
  90. explicit command_queue(cl_command_queue queue, bool retain = true)
  91. : m_queue(queue)
  92. {
  93. if(m_queue && retain){
  94. clRetainCommandQueue(m_queue);
  95. }
  96. }
  97. /// Creates a command queue in \p context for \p device with
  98. /// \p properties.
  99. ///
  100. /// \see_opencl_ref{clCreateCommandQueue}
  101. command_queue(const context &context,
  102. const device &device,
  103. cl_command_queue_properties properties = 0)
  104. {
  105. BOOST_ASSERT(device.id() != 0);
  106. cl_int error = 0;
  107. #ifdef CL_VERSION_2_0
  108. if (device.check_version(2, 0)){
  109. std::vector<cl_queue_properties> queue_properties;
  110. if(properties){
  111. queue_properties.push_back(CL_QUEUE_PROPERTIES);
  112. queue_properties.push_back(cl_queue_properties(properties));
  113. queue_properties.push_back(cl_queue_properties(0));
  114. }
  115. const cl_queue_properties *queue_properties_ptr =
  116. queue_properties.empty() ? 0 : &queue_properties[0];
  117. m_queue = clCreateCommandQueueWithProperties(
  118. context, device.id(), queue_properties_ptr, &error
  119. );
  120. } else
  121. #endif
  122. {
  123. // Suppress deprecated declarations warning
  124. BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
  125. m_queue = clCreateCommandQueue(
  126. context, device.id(), properties, &error
  127. );
  128. BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
  129. }
  130. if(!m_queue){
  131. BOOST_THROW_EXCEPTION(opencl_error(error));
  132. }
  133. }
  134. /// Creates a new command queue object as a copy of \p other.
  135. command_queue(const command_queue &other)
  136. : m_queue(other.m_queue)
  137. {
  138. if(m_queue){
  139. clRetainCommandQueue(m_queue);
  140. }
  141. }
  142. /// Copies the command queue object from \p other to \c *this.
  143. command_queue& operator=(const command_queue &other)
  144. {
  145. if(this != &other){
  146. if(m_queue){
  147. clReleaseCommandQueue(m_queue);
  148. }
  149. m_queue = other.m_queue;
  150. if(m_queue){
  151. clRetainCommandQueue(m_queue);
  152. }
  153. }
  154. return *this;
  155. }
  156. #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
  157. /// Move-constructs a new command queue object from \p other.
  158. command_queue(command_queue&& other) BOOST_NOEXCEPT
  159. : m_queue(other.m_queue)
  160. {
  161. other.m_queue = 0;
  162. }
  163. /// Move-assigns the command queue from \p other to \c *this.
  164. command_queue& operator=(command_queue&& other) BOOST_NOEXCEPT
  165. {
  166. if(m_queue){
  167. clReleaseCommandQueue(m_queue);
  168. }
  169. m_queue = other.m_queue;
  170. other.m_queue = 0;
  171. return *this;
  172. }
  173. #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
  174. /// Destroys the command queue.
  175. ///
  176. /// \see_opencl_ref{clReleaseCommandQueue}
  177. ~command_queue()
  178. {
  179. if(m_queue){
  180. BOOST_COMPUTE_ASSERT_CL_SUCCESS(
  181. clReleaseCommandQueue(m_queue)
  182. );
  183. }
  184. }
  185. /// Returns the underlying OpenCL command queue.
  186. cl_command_queue& get() const
  187. {
  188. return const_cast<cl_command_queue &>(m_queue);
  189. }
  190. /// Returns the device that the command queue issues commands to.
  191. device get_device() const
  192. {
  193. return device(get_info<cl_device_id>(CL_QUEUE_DEVICE));
  194. }
  195. /// Returns the context for the command queue.
  196. context get_context() const
  197. {
  198. return context(get_info<cl_context>(CL_QUEUE_CONTEXT));
  199. }
  200. /// Returns information about the command queue.
  201. ///
  202. /// \see_opencl_ref{clGetCommandQueueInfo}
  203. template<class T>
  204. T get_info(cl_command_queue_info info) const
  205. {
  206. return detail::get_object_info<T>(clGetCommandQueueInfo, m_queue, info);
  207. }
  208. /// \overload
  209. template<int Enum>
  210. typename detail::get_object_info_type<command_queue, Enum>::type
  211. get_info() const;
  212. /// Returns the properties for the command queue.
  213. cl_command_queue_properties get_properties() const
  214. {
  215. return get_info<cl_command_queue_properties>(CL_QUEUE_PROPERTIES);
  216. }
  217. /// Enqueues a command to read data from \p buffer to host memory.
  218. ///
  219. /// \see_opencl_ref{clEnqueueReadBuffer}
  220. ///
  221. /// \see copy()
  222. event enqueue_read_buffer(const buffer &buffer,
  223. size_t offset,
  224. size_t size,
  225. void *host_ptr,
  226. const wait_list &events = wait_list())
  227. {
  228. BOOST_ASSERT(m_queue != 0);
  229. BOOST_ASSERT(size <= buffer.size());
  230. BOOST_ASSERT(buffer.get_context() == this->get_context());
  231. BOOST_ASSERT(host_ptr != 0);
  232. event event_;
  233. cl_int ret = clEnqueueReadBuffer(
  234. m_queue,
  235. buffer.get(),
  236. CL_TRUE,
  237. offset,
  238. size,
  239. host_ptr,
  240. events.size(),
  241. events.get_event_ptr(),
  242. &event_.get()
  243. );
  244. if(ret != CL_SUCCESS){
  245. BOOST_THROW_EXCEPTION(opencl_error(ret));
  246. }
  247. return event_;
  248. }
  249. /// Enqueues a command to read data from \p buffer to host memory. The
  250. /// copy will be performed asynchronously.
  251. ///
  252. /// \see_opencl_ref{clEnqueueReadBuffer}
  253. ///
  254. /// \see copy_async()
  255. event enqueue_read_buffer_async(const buffer &buffer,
  256. size_t offset,
  257. size_t size,
  258. void *host_ptr,
  259. const wait_list &events = wait_list())
  260. {
  261. BOOST_ASSERT(m_queue != 0);
  262. BOOST_ASSERT(size <= buffer.size());
  263. BOOST_ASSERT(buffer.get_context() == this->get_context());
  264. BOOST_ASSERT(host_ptr != 0);
  265. event event_;
  266. cl_int ret = clEnqueueReadBuffer(
  267. m_queue,
  268. buffer.get(),
  269. CL_FALSE,
  270. offset,
  271. size,
  272. host_ptr,
  273. events.size(),
  274. events.get_event_ptr(),
  275. &event_.get()
  276. );
  277. if(ret != CL_SUCCESS){
  278. BOOST_THROW_EXCEPTION(opencl_error(ret));
  279. }
  280. return event_;
  281. }
  282. #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  283. /// Enqueues a command to read a rectangular region from \p buffer to
  284. /// host memory.
  285. ///
  286. /// \see_opencl_ref{clEnqueueReadBufferRect}
  287. ///
  288. /// \opencl_version_warning{1,1}
  289. event enqueue_read_buffer_rect(const buffer &buffer,
  290. const size_t buffer_origin[3],
  291. const size_t host_origin[3],
  292. const size_t region[3],
  293. size_t buffer_row_pitch,
  294. size_t buffer_slice_pitch,
  295. size_t host_row_pitch,
  296. size_t host_slice_pitch,
  297. void *host_ptr,
  298. const wait_list &events = wait_list())
  299. {
  300. BOOST_ASSERT(m_queue != 0);
  301. BOOST_ASSERT(buffer.get_context() == this->get_context());
  302. BOOST_ASSERT(host_ptr != 0);
  303. event event_;
  304. cl_int ret = clEnqueueReadBufferRect(
  305. m_queue,
  306. buffer.get(),
  307. CL_TRUE,
  308. buffer_origin,
  309. host_origin,
  310. region,
  311. buffer_row_pitch,
  312. buffer_slice_pitch,
  313. host_row_pitch,
  314. host_slice_pitch,
  315. host_ptr,
  316. events.size(),
  317. events.get_event_ptr(),
  318. &event_.get()
  319. );
  320. if(ret != CL_SUCCESS){
  321. BOOST_THROW_EXCEPTION(opencl_error(ret));
  322. }
  323. return event_;
  324. }
  325. /// Enqueues a command to read a rectangular region from \p buffer to
  326. /// host memory. The copy will be performed asynchronously.
  327. ///
  328. /// \see_opencl_ref{clEnqueueReadBufferRect}
  329. ///
  330. /// \opencl_version_warning{1,1}
  331. event enqueue_read_buffer_rect_async(const buffer &buffer,
  332. const size_t buffer_origin[3],
  333. const size_t host_origin[3],
  334. const size_t region[3],
  335. size_t buffer_row_pitch,
  336. size_t buffer_slice_pitch,
  337. size_t host_row_pitch,
  338. size_t host_slice_pitch,
  339. void *host_ptr,
  340. const wait_list &events = wait_list())
  341. {
  342. BOOST_ASSERT(m_queue != 0);
  343. BOOST_ASSERT(buffer.get_context() == this->get_context());
  344. BOOST_ASSERT(host_ptr != 0);
  345. event event_;
  346. cl_int ret = clEnqueueReadBufferRect(
  347. m_queue,
  348. buffer.get(),
  349. CL_FALSE,
  350. buffer_origin,
  351. host_origin,
  352. region,
  353. buffer_row_pitch,
  354. buffer_slice_pitch,
  355. host_row_pitch,
  356. host_slice_pitch,
  357. host_ptr,
  358. events.size(),
  359. events.get_event_ptr(),
  360. &event_.get()
  361. );
  362. if(ret != CL_SUCCESS){
  363. BOOST_THROW_EXCEPTION(opencl_error(ret));
  364. }
  365. return event_;
  366. }
  367. #endif // CL_VERSION_1_1
  368. /// Enqueues a command to write data from host memory to \p buffer.
  369. ///
  370. /// \see_opencl_ref{clEnqueueWriteBuffer}
  371. ///
  372. /// \see copy()
  373. event enqueue_write_buffer(const buffer &buffer,
  374. size_t offset,
  375. size_t size,
  376. const void *host_ptr,
  377. const wait_list &events = wait_list())
  378. {
  379. BOOST_ASSERT(m_queue != 0);
  380. BOOST_ASSERT(size <= buffer.size());
  381. BOOST_ASSERT(buffer.get_context() == this->get_context());
  382. BOOST_ASSERT(host_ptr != 0);
  383. event event_;
  384. cl_int ret = clEnqueueWriteBuffer(
  385. m_queue,
  386. buffer.get(),
  387. CL_TRUE,
  388. offset,
  389. size,
  390. host_ptr,
  391. events.size(),
  392. events.get_event_ptr(),
  393. &event_.get()
  394. );
  395. if(ret != CL_SUCCESS){
  396. BOOST_THROW_EXCEPTION(opencl_error(ret));
  397. }
  398. return event_;
  399. }
  400. /// Enqueues a command to write data from host memory to \p buffer.
  401. /// The copy is performed asynchronously.
  402. ///
  403. /// \see_opencl_ref{clEnqueueWriteBuffer}
  404. ///
  405. /// \see copy_async()
  406. event enqueue_write_buffer_async(const buffer &buffer,
  407. size_t offset,
  408. size_t size,
  409. const void *host_ptr,
  410. const wait_list &events = wait_list())
  411. {
  412. BOOST_ASSERT(m_queue != 0);
  413. BOOST_ASSERT(size <= buffer.size());
  414. BOOST_ASSERT(buffer.get_context() == this->get_context());
  415. BOOST_ASSERT(host_ptr != 0);
  416. event event_;
  417. cl_int ret = clEnqueueWriteBuffer(
  418. m_queue,
  419. buffer.get(),
  420. CL_FALSE,
  421. offset,
  422. size,
  423. host_ptr,
  424. events.size(),
  425. events.get_event_ptr(),
  426. &event_.get()
  427. );
  428. if(ret != CL_SUCCESS){
  429. BOOST_THROW_EXCEPTION(opencl_error(ret));
  430. }
  431. return event_;
  432. }
  433. #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  434. /// Enqueues a command to write a rectangular region from host memory
  435. /// to \p buffer.
  436. ///
  437. /// \see_opencl_ref{clEnqueueWriteBufferRect}
  438. ///
  439. /// \opencl_version_warning{1,1}
  440. event enqueue_write_buffer_rect(const buffer &buffer,
  441. const size_t buffer_origin[3],
  442. const size_t host_origin[3],
  443. const size_t region[3],
  444. size_t buffer_row_pitch,
  445. size_t buffer_slice_pitch,
  446. size_t host_row_pitch,
  447. size_t host_slice_pitch,
  448. void *host_ptr,
  449. const wait_list &events = wait_list())
  450. {
  451. BOOST_ASSERT(m_queue != 0);
  452. BOOST_ASSERT(buffer.get_context() == this->get_context());
  453. BOOST_ASSERT(host_ptr != 0);
  454. event event_;
  455. cl_int ret = clEnqueueWriteBufferRect(
  456. m_queue,
  457. buffer.get(),
  458. CL_TRUE,
  459. buffer_origin,
  460. host_origin,
  461. region,
  462. buffer_row_pitch,
  463. buffer_slice_pitch,
  464. host_row_pitch,
  465. host_slice_pitch,
  466. host_ptr,
  467. events.size(),
  468. events.get_event_ptr(),
  469. &event_.get()
  470. );
  471. if(ret != CL_SUCCESS){
  472. BOOST_THROW_EXCEPTION(opencl_error(ret));
  473. }
  474. return event_;
  475. }
  476. /// Enqueues a command to write a rectangular region from host memory
  477. /// to \p buffer. The copy is performed asynchronously.
  478. ///
  479. /// \see_opencl_ref{clEnqueueWriteBufferRect}
  480. ///
  481. /// \opencl_version_warning{1,1}
  482. event enqueue_write_buffer_rect_async(const buffer &buffer,
  483. const size_t buffer_origin[3],
  484. const size_t host_origin[3],
  485. const size_t region[3],
  486. size_t buffer_row_pitch,
  487. size_t buffer_slice_pitch,
  488. size_t host_row_pitch,
  489. size_t host_slice_pitch,
  490. void *host_ptr,
  491. const wait_list &events = wait_list())
  492. {
  493. BOOST_ASSERT(m_queue != 0);
  494. BOOST_ASSERT(buffer.get_context() == this->get_context());
  495. BOOST_ASSERT(host_ptr != 0);
  496. event event_;
  497. cl_int ret = clEnqueueWriteBufferRect(
  498. m_queue,
  499. buffer.get(),
  500. CL_FALSE,
  501. buffer_origin,
  502. host_origin,
  503. region,
  504. buffer_row_pitch,
  505. buffer_slice_pitch,
  506. host_row_pitch,
  507. host_slice_pitch,
  508. host_ptr,
  509. events.size(),
  510. events.get_event_ptr(),
  511. &event_.get()
  512. );
  513. if(ret != CL_SUCCESS){
  514. BOOST_THROW_EXCEPTION(opencl_error(ret));
  515. }
  516. return event_;
  517. }
  518. #endif // CL_VERSION_1_1
  519. /// Enqueues a command to copy data from \p src_buffer to
  520. /// \p dst_buffer.
  521. ///
  522. /// \see_opencl_ref{clEnqueueCopyBuffer}
  523. ///
  524. /// \see copy()
  525. event enqueue_copy_buffer(const buffer &src_buffer,
  526. const buffer &dst_buffer,
  527. size_t src_offset,
  528. size_t dst_offset,
  529. size_t size,
  530. const wait_list &events = wait_list())
  531. {
  532. BOOST_ASSERT(m_queue != 0);
  533. BOOST_ASSERT(src_offset + size <= src_buffer.size());
  534. BOOST_ASSERT(dst_offset + size <= dst_buffer.size());
  535. BOOST_ASSERT(src_buffer.get_context() == this->get_context());
  536. BOOST_ASSERT(dst_buffer.get_context() == this->get_context());
  537. event event_;
  538. cl_int ret = clEnqueueCopyBuffer(
  539. m_queue,
  540. src_buffer.get(),
  541. dst_buffer.get(),
  542. src_offset,
  543. dst_offset,
  544. size,
  545. events.size(),
  546. events.get_event_ptr(),
  547. &event_.get()
  548. );
  549. if(ret != CL_SUCCESS){
  550. BOOST_THROW_EXCEPTION(opencl_error(ret));
  551. }
  552. return event_;
  553. }
  554. #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  555. /// Enqueues a command to copy a rectangular region from
  556. /// \p src_buffer to \p dst_buffer.
  557. ///
  558. /// \see_opencl_ref{clEnqueueCopyBufferRect}
  559. ///
  560. /// \opencl_version_warning{1,1}
  561. event enqueue_copy_buffer_rect(const buffer &src_buffer,
  562. const buffer &dst_buffer,
  563. const size_t src_origin[3],
  564. const size_t dst_origin[3],
  565. const size_t region[3],
  566. size_t buffer_row_pitch,
  567. size_t buffer_slice_pitch,
  568. size_t host_row_pitch,
  569. size_t host_slice_pitch,
  570. const wait_list &events = wait_list())
  571. {
  572. BOOST_ASSERT(m_queue != 0);
  573. BOOST_ASSERT(src_buffer.get_context() == this->get_context());
  574. BOOST_ASSERT(dst_buffer.get_context() == this->get_context());
  575. event event_;
  576. cl_int ret = clEnqueueCopyBufferRect(
  577. m_queue,
  578. src_buffer.get(),
  579. dst_buffer.get(),
  580. src_origin,
  581. dst_origin,
  582. region,
  583. buffer_row_pitch,
  584. buffer_slice_pitch,
  585. host_row_pitch,
  586. host_slice_pitch,
  587. events.size(),
  588. events.get_event_ptr(),
  589. &event_.get()
  590. );
  591. if(ret != CL_SUCCESS){
  592. BOOST_THROW_EXCEPTION(opencl_error(ret));
  593. }
  594. return event_;
  595. }
  596. #endif // CL_VERSION_1_1
  597. #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  598. /// Enqueues a command to fill \p buffer with \p pattern.
  599. ///
  600. /// \see_opencl_ref{clEnqueueFillBuffer}
  601. ///
  602. /// \opencl_version_warning{1,2}
  603. ///
  604. /// \see fill()
  605. event enqueue_fill_buffer(const buffer &buffer,
  606. const void *pattern,
  607. size_t pattern_size,
  608. size_t offset,
  609. size_t size,
  610. const wait_list &events = wait_list())
  611. {
  612. BOOST_ASSERT(m_queue != 0);
  613. BOOST_ASSERT(offset + size <= buffer.size());
  614. BOOST_ASSERT(buffer.get_context() == this->get_context());
  615. event event_;
  616. cl_int ret = clEnqueueFillBuffer(
  617. m_queue,
  618. buffer.get(),
  619. pattern,
  620. pattern_size,
  621. offset,
  622. size,
  623. events.size(),
  624. events.get_event_ptr(),
  625. &event_.get()
  626. );
  627. if(ret != CL_SUCCESS){
  628. BOOST_THROW_EXCEPTION(opencl_error(ret));
  629. }
  630. return event_;
  631. }
  632. #endif // CL_VERSION_1_2
  633. /// Enqueues a command to map \p buffer into the host address space.
  634. /// Event associated with map operation is returned through
  635. /// \p map_buffer_event parameter.
  636. ///
  637. /// \see_opencl_ref{clEnqueueMapBuffer}
  638. void* enqueue_map_buffer(const buffer &buffer,
  639. cl_map_flags flags,
  640. size_t offset,
  641. size_t size,
  642. event &map_buffer_event,
  643. const wait_list &events = wait_list())
  644. {
  645. BOOST_ASSERT(m_queue != 0);
  646. BOOST_ASSERT(offset + size <= buffer.size());
  647. BOOST_ASSERT(buffer.get_context() == this->get_context());
  648. cl_int ret = 0;
  649. void *pointer = clEnqueueMapBuffer(
  650. m_queue,
  651. buffer.get(),
  652. CL_TRUE,
  653. flags,
  654. offset,
  655. size,
  656. events.size(),
  657. events.get_event_ptr(),
  658. &map_buffer_event.get(),
  659. &ret
  660. );
  661. if(ret != CL_SUCCESS){
  662. BOOST_THROW_EXCEPTION(opencl_error(ret));
  663. }
  664. return pointer;
  665. }
  666. /// \overload
  667. void* enqueue_map_buffer(const buffer &buffer,
  668. cl_map_flags flags,
  669. size_t offset,
  670. size_t size,
  671. const wait_list &events = wait_list())
  672. {
  673. event event_;
  674. return enqueue_map_buffer(buffer, flags, offset, size, event_, events);
  675. }
  676. /// Enqueues a command to map \p buffer into the host address space.
  677. /// Map operation is performed asynchronously. The pointer to the mapped
  678. /// region cannot be used until the map operation has completed.
  679. ///
  680. /// Event associated with map operation is returned through
  681. /// \p map_buffer_event parameter.
  682. ///
  683. /// \see_opencl_ref{clEnqueueMapBuffer}
  684. void* enqueue_map_buffer_async(const buffer &buffer,
  685. cl_map_flags flags,
  686. size_t offset,
  687. size_t size,
  688. event &map_buffer_event,
  689. const wait_list &events = wait_list())
  690. {
  691. BOOST_ASSERT(m_queue != 0);
  692. BOOST_ASSERT(offset + size <= buffer.size());
  693. BOOST_ASSERT(buffer.get_context() == this->get_context());
  694. cl_int ret = 0;
  695. void *pointer = clEnqueueMapBuffer(
  696. m_queue,
  697. buffer.get(),
  698. CL_FALSE,
  699. flags,
  700. offset,
  701. size,
  702. events.size(),
  703. events.get_event_ptr(),
  704. &map_buffer_event.get(),
  705. &ret
  706. );
  707. if(ret != CL_SUCCESS){
  708. BOOST_THROW_EXCEPTION(opencl_error(ret));
  709. }
  710. return pointer;
  711. }
  712. /// Enqueues a command to unmap \p buffer from the host memory space.
  713. ///
  714. /// \see_opencl_ref{clEnqueueUnmapMemObject}
  715. event enqueue_unmap_buffer(const buffer &buffer,
  716. void *mapped_ptr,
  717. const wait_list &events = wait_list())
  718. {
  719. BOOST_ASSERT(buffer.get_context() == this->get_context());
  720. return enqueue_unmap_mem_object(buffer.get(), mapped_ptr, events);
  721. }
  722. /// Enqueues a command to unmap \p mem from the host memory space.
  723. ///
  724. /// \see_opencl_ref{clEnqueueUnmapMemObject}
  725. event enqueue_unmap_mem_object(cl_mem mem,
  726. void *mapped_ptr,
  727. const wait_list &events = wait_list())
  728. {
  729. BOOST_ASSERT(m_queue != 0);
  730. event event_;
  731. cl_int ret = clEnqueueUnmapMemObject(
  732. m_queue,
  733. mem,
  734. mapped_ptr,
  735. events.size(),
  736. events.get_event_ptr(),
  737. &event_.get()
  738. );
  739. if(ret != CL_SUCCESS){
  740. BOOST_THROW_EXCEPTION(opencl_error(ret));
  741. }
  742. return event_;
  743. }
  744. /// Enqueues a command to read data from \p image to host memory.
  745. ///
  746. /// \see_opencl_ref{clEnqueueReadImage}
  747. event enqueue_read_image(const image_object& image,
  748. const size_t *origin,
  749. const size_t *region,
  750. size_t row_pitch,
  751. size_t slice_pitch,
  752. void *host_ptr,
  753. const wait_list &events = wait_list())
  754. {
  755. BOOST_ASSERT(m_queue != 0);
  756. event event_;
  757. cl_int ret = clEnqueueReadImage(
  758. m_queue,
  759. image.get(),
  760. CL_TRUE,
  761. origin,
  762. region,
  763. row_pitch,
  764. slice_pitch,
  765. host_ptr,
  766. events.size(),
  767. events.get_event_ptr(),
  768. &event_.get()
  769. );
  770. if(ret != CL_SUCCESS){
  771. BOOST_THROW_EXCEPTION(opencl_error(ret));
  772. }
  773. return event_;
  774. }
  775. /// \overload
  776. template<size_t N>
  777. event enqueue_read_image(const image_object& image,
  778. const extents<N> origin,
  779. const extents<N> region,
  780. void *host_ptr,
  781. size_t row_pitch = 0,
  782. size_t slice_pitch = 0,
  783. const wait_list &events = wait_list())
  784. {
  785. BOOST_ASSERT(image.get_context() == this->get_context());
  786. size_t origin3[3] = { 0, 0, 0 };
  787. size_t region3[3] = { 1, 1, 1 };
  788. std::copy(origin.data(), origin.data() + N, origin3);
  789. std::copy(region.data(), region.data() + N, region3);
  790. return enqueue_read_image(
  791. image, origin3, region3, row_pitch, slice_pitch, host_ptr, events
  792. );
  793. }
  794. /// Enqueues a command to write data from host memory to \p image.
  795. ///
  796. /// \see_opencl_ref{clEnqueueWriteImage}
  797. event enqueue_write_image(image_object& image,
  798. const size_t *origin,
  799. const size_t *region,
  800. const void *host_ptr,
  801. size_t input_row_pitch = 0,
  802. size_t input_slice_pitch = 0,
  803. const wait_list &events = wait_list())
  804. {
  805. BOOST_ASSERT(m_queue != 0);
  806. event event_;
  807. cl_int ret = clEnqueueWriteImage(
  808. m_queue,
  809. image.get(),
  810. CL_TRUE,
  811. origin,
  812. region,
  813. input_row_pitch,
  814. input_slice_pitch,
  815. host_ptr,
  816. events.size(),
  817. events.get_event_ptr(),
  818. &event_.get()
  819. );
  820. if(ret != CL_SUCCESS){
  821. BOOST_THROW_EXCEPTION(opencl_error(ret));
  822. }
  823. return event_;
  824. }
  825. /// \overload
  826. template<size_t N>
  827. event enqueue_write_image(image_object& image,
  828. const extents<N> origin,
  829. const extents<N> region,
  830. const void *host_ptr,
  831. const size_t input_row_pitch = 0,
  832. const size_t input_slice_pitch = 0,
  833. const wait_list &events = wait_list())
  834. {
  835. BOOST_ASSERT(image.get_context() == this->get_context());
  836. size_t origin3[3] = { 0, 0, 0 };
  837. size_t region3[3] = { 1, 1, 1 };
  838. std::copy(origin.data(), origin.data() + N, origin3);
  839. std::copy(region.data(), region.data() + N, region3);
  840. return enqueue_write_image(
  841. image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events
  842. );
  843. }
  844. /// Enqueues a command to map \p image into the host address space.
  845. ///
  846. /// Event associated with map operation is returned through
  847. /// \p map_image_event parameter.
  848. ///
  849. /// \see_opencl_ref{clEnqueueMapImage}
  850. void* enqueue_map_image(const image_object &image,
  851. cl_map_flags flags,
  852. const size_t *origin,
  853. const size_t *region,
  854. size_t &output_row_pitch,
  855. size_t &output_slice_pitch,
  856. event &map_image_event,
  857. const wait_list &events = wait_list())
  858. {
  859. BOOST_ASSERT(m_queue != 0);
  860. BOOST_ASSERT(image.get_context() == this->get_context());
  861. cl_int ret = 0;
  862. void *pointer = clEnqueueMapImage(
  863. m_queue,
  864. image.get(),
  865. CL_TRUE,
  866. flags,
  867. origin,
  868. region,
  869. &output_row_pitch,
  870. &output_slice_pitch,
  871. events.size(),
  872. events.get_event_ptr(),
  873. &map_image_event.get(),
  874. &ret
  875. );
  876. if(ret != CL_SUCCESS){
  877. BOOST_THROW_EXCEPTION(opencl_error(ret));
  878. }
  879. return pointer;
  880. }
  881. /// \overload
  882. void* enqueue_map_image(const image_object &image,
  883. cl_map_flags flags,
  884. const size_t *origin,
  885. const size_t *region,
  886. size_t &output_row_pitch,
  887. size_t &output_slice_pitch,
  888. const wait_list &events = wait_list())
  889. {
  890. event event_;
  891. return enqueue_map_image(
  892. image, flags, origin, region,
  893. output_row_pitch, output_slice_pitch, event_, events
  894. );
  895. }
  896. /// \overload
  897. template<size_t N>
  898. void* enqueue_map_image(image_object& image,
  899. cl_map_flags flags,
  900. const extents<N> origin,
  901. const extents<N> region,
  902. size_t &output_row_pitch,
  903. size_t &output_slice_pitch,
  904. event &map_image_event,
  905. const wait_list &events = wait_list())
  906. {
  907. BOOST_ASSERT(image.get_context() == this->get_context());
  908. size_t origin3[3] = { 0, 0, 0 };
  909. size_t region3[3] = { 1, 1, 1 };
  910. std::copy(origin.data(), origin.data() + N, origin3);
  911. std::copy(region.data(), region.data() + N, region3);
  912. return enqueue_map_image(
  913. image, flags, origin3, region3,
  914. output_row_pitch, output_slice_pitch, map_image_event, events
  915. );
  916. }
  917. /// \overload
  918. template<size_t N>
  919. void* enqueue_map_image(image_object& image,
  920. cl_map_flags flags,
  921. const extents<N> origin,
  922. const extents<N> region,
  923. size_t &output_row_pitch,
  924. size_t &output_slice_pitch,
  925. const wait_list &events = wait_list())
  926. {
  927. event event_;
  928. return enqueue_map_image(
  929. image, flags, origin, region,
  930. output_row_pitch, output_slice_pitch, event_, events
  931. );
  932. }
  933. /// Enqueues a command to map \p image into the host address space.
  934. /// Map operation is performed asynchronously. The pointer to the mapped
  935. /// region cannot be used until the map operation has completed.
  936. ///
  937. /// Event associated with map operation is returned through
  938. /// \p map_image_event parameter.
  939. ///
  940. /// \see_opencl_ref{clEnqueueMapImage}
  941. void* enqueue_map_image_async(const image_object &image,
  942. cl_map_flags flags,
  943. const size_t *origin,
  944. const size_t *region,
  945. size_t &output_row_pitch,
  946. size_t &output_slice_pitch,
  947. event &map_image_event,
  948. const wait_list &events = wait_list())
  949. {
  950. BOOST_ASSERT(m_queue != 0);
  951. BOOST_ASSERT(image.get_context() == this->get_context());
  952. cl_int ret = 0;
  953. void *pointer = clEnqueueMapImage(
  954. m_queue,
  955. image.get(),
  956. CL_FALSE,
  957. flags,
  958. origin,
  959. region,
  960. &output_row_pitch,
  961. &output_slice_pitch,
  962. events.size(),
  963. events.get_event_ptr(),
  964. &map_image_event.get(),
  965. &ret
  966. );
  967. if(ret != CL_SUCCESS){
  968. BOOST_THROW_EXCEPTION(opencl_error(ret));
  969. }
  970. return pointer;
  971. }
  972. /// \overload
  973. template<size_t N>
  974. void* enqueue_map_image_async(image_object& image,
  975. cl_map_flags flags,
  976. const extents<N> origin,
  977. const extents<N> region,
  978. size_t &output_row_pitch,
  979. size_t &output_slice_pitch,
  980. event &map_image_event,
  981. const wait_list &events = wait_list())
  982. {
  983. BOOST_ASSERT(image.get_context() == this->get_context());
  984. size_t origin3[3] = { 0, 0, 0 };
  985. size_t region3[3] = { 1, 1, 1 };
  986. std::copy(origin.data(), origin.data() + N, origin3);
  987. std::copy(region.data(), region.data() + N, region3);
  988. return enqueue_map_image_async(
  989. image, flags, origin3, region3,
  990. output_row_pitch, output_slice_pitch, map_image_event, events
  991. );
  992. }
  993. /// Enqueues a command to unmap \p image from the host memory space.
  994. ///
  995. /// \see_opencl_ref{clEnqueueUnmapMemObject}
  996. event enqueue_unmap_image(const image_object &image,
  997. void *mapped_ptr,
  998. const wait_list &events = wait_list())
  999. {
  1000. BOOST_ASSERT(image.get_context() == this->get_context());
  1001. return enqueue_unmap_mem_object(image.get(), mapped_ptr, events);
  1002. }
  1003. /// Enqueues a command to copy data from \p src_image to \p dst_image.
  1004. ///
  1005. /// \see_opencl_ref{clEnqueueCopyImage}
  1006. event enqueue_copy_image(const image_object& src_image,
  1007. image_object& dst_image,
  1008. const size_t *src_origin,
  1009. const size_t *dst_origin,
  1010. const size_t *region,
  1011. const wait_list &events = wait_list())
  1012. {
  1013. BOOST_ASSERT(m_queue != 0);
  1014. event event_;
  1015. cl_int ret = clEnqueueCopyImage(
  1016. m_queue,
  1017. src_image.get(),
  1018. dst_image.get(),
  1019. src_origin,
  1020. dst_origin,
  1021. region,
  1022. events.size(),
  1023. events.get_event_ptr(),
  1024. &event_.get()
  1025. );
  1026. if(ret != CL_SUCCESS){
  1027. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1028. }
  1029. return event_;
  1030. }
  1031. /// \overload
  1032. template<size_t N>
  1033. event enqueue_copy_image(const image_object& src_image,
  1034. image_object& dst_image,
  1035. const extents<N> src_origin,
  1036. const extents<N> dst_origin,
  1037. const extents<N> region,
  1038. const wait_list &events = wait_list())
  1039. {
  1040. BOOST_ASSERT(src_image.get_context() == this->get_context());
  1041. BOOST_ASSERT(dst_image.get_context() == this->get_context());
  1042. BOOST_ASSERT_MSG(src_image.format() == dst_image.format(),
  1043. "Source and destination image formats must match.");
  1044. size_t src_origin3[3] = { 0, 0, 0 };
  1045. size_t dst_origin3[3] = { 0, 0, 0 };
  1046. size_t region3[3] = { 1, 1, 1 };
  1047. std::copy(src_origin.data(), src_origin.data() + N, src_origin3);
  1048. std::copy(dst_origin.data(), dst_origin.data() + N, dst_origin3);
  1049. std::copy(region.data(), region.data() + N, region3);
  1050. return enqueue_copy_image(
  1051. src_image, dst_image, src_origin3, dst_origin3, region3, events
  1052. );
  1053. }
  1054. /// Enqueues a command to copy data from \p src_image to \p dst_buffer.
  1055. ///
  1056. /// \see_opencl_ref{clEnqueueCopyImageToBuffer}
  1057. event enqueue_copy_image_to_buffer(const image_object& src_image,
  1058. memory_object& dst_buffer,
  1059. const size_t *src_origin,
  1060. const size_t *region,
  1061. size_t dst_offset,
  1062. const wait_list &events = wait_list())
  1063. {
  1064. BOOST_ASSERT(m_queue != 0);
  1065. event event_;
  1066. cl_int ret = clEnqueueCopyImageToBuffer(
  1067. m_queue,
  1068. src_image.get(),
  1069. dst_buffer.get(),
  1070. src_origin,
  1071. region,
  1072. dst_offset,
  1073. events.size(),
  1074. events.get_event_ptr(),
  1075. &event_.get()
  1076. );
  1077. if(ret != CL_SUCCESS){
  1078. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1079. }
  1080. return event_;
  1081. }
  1082. /// Enqueues a command to copy data from \p src_buffer to \p dst_image.
  1083. ///
  1084. /// \see_opencl_ref{clEnqueueCopyBufferToImage}
  1085. event enqueue_copy_buffer_to_image(const memory_object& src_buffer,
  1086. image_object& dst_image,
  1087. size_t src_offset,
  1088. const size_t *dst_origin,
  1089. const size_t *region,
  1090. const wait_list &events = wait_list())
  1091. {
  1092. BOOST_ASSERT(m_queue != 0);
  1093. event event_;
  1094. cl_int ret = clEnqueueCopyBufferToImage(
  1095. m_queue,
  1096. src_buffer.get(),
  1097. dst_image.get(),
  1098. src_offset,
  1099. dst_origin,
  1100. region,
  1101. events.size(),
  1102. events.get_event_ptr(),
  1103. &event_.get()
  1104. );
  1105. if(ret != CL_SUCCESS){
  1106. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1107. }
  1108. return event_;
  1109. }
  1110. #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  1111. /// Enqueues a command to fill \p image with \p fill_color.
  1112. ///
  1113. /// \see_opencl_ref{clEnqueueFillImage}
  1114. ///
  1115. /// \opencl_version_warning{1,2}
  1116. event enqueue_fill_image(image_object& image,
  1117. const void *fill_color,
  1118. const size_t *origin,
  1119. const size_t *region,
  1120. const wait_list &events = wait_list())
  1121. {
  1122. BOOST_ASSERT(m_queue != 0);
  1123. event event_;
  1124. cl_int ret = clEnqueueFillImage(
  1125. m_queue,
  1126. image.get(),
  1127. fill_color,
  1128. origin,
  1129. region,
  1130. events.size(),
  1131. events.get_event_ptr(),
  1132. &event_.get()
  1133. );
  1134. if(ret != CL_SUCCESS){
  1135. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1136. }
  1137. return event_;
  1138. }
  1139. /// \overload
  1140. template<size_t N>
  1141. event enqueue_fill_image(image_object& image,
  1142. const void *fill_color,
  1143. const extents<N> origin,
  1144. const extents<N> region,
  1145. const wait_list &events = wait_list())
  1146. {
  1147. BOOST_ASSERT(image.get_context() == this->get_context());
  1148. size_t origin3[3] = { 0, 0, 0 };
  1149. size_t region3[3] = { 1, 1, 1 };
  1150. std::copy(origin.data(), origin.data() + N, origin3);
  1151. std::copy(region.data(), region.data() + N, region3);
  1152. return enqueue_fill_image(
  1153. image, fill_color, origin3, region3, events
  1154. );
  1155. }
  1156. /// Enqueues a command to migrate \p mem_objects.
  1157. ///
  1158. /// \see_opencl_ref{clEnqueueMigrateMemObjects}
  1159. ///
  1160. /// \opencl_version_warning{1,2}
  1161. event enqueue_migrate_memory_objects(uint_ num_mem_objects,
  1162. const cl_mem *mem_objects,
  1163. cl_mem_migration_flags flags,
  1164. const wait_list &events = wait_list())
  1165. {
  1166. BOOST_ASSERT(m_queue != 0);
  1167. event event_;
  1168. cl_int ret = clEnqueueMigrateMemObjects(
  1169. m_queue,
  1170. num_mem_objects,
  1171. mem_objects,
  1172. flags,
  1173. events.size(),
  1174. events.get_event_ptr(),
  1175. &event_.get()
  1176. );
  1177. if(ret != CL_SUCCESS){
  1178. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1179. }
  1180. return event_;
  1181. }
  1182. #endif // CL_VERSION_1_2
  1183. /// Enqueues a kernel for execution.
  1184. ///
  1185. /// \see_opencl_ref{clEnqueueNDRangeKernel}
  1186. event enqueue_nd_range_kernel(const kernel &kernel,
  1187. size_t work_dim,
  1188. const size_t *global_work_offset,
  1189. const size_t *global_work_size,
  1190. const size_t *local_work_size,
  1191. const wait_list &events = wait_list())
  1192. {
  1193. BOOST_ASSERT(m_queue != 0);
  1194. BOOST_ASSERT(kernel.get_context() == this->get_context());
  1195. event event_;
  1196. cl_int ret = clEnqueueNDRangeKernel(
  1197. m_queue,
  1198. kernel,
  1199. static_cast<cl_uint>(work_dim),
  1200. global_work_offset,
  1201. global_work_size,
  1202. local_work_size,
  1203. events.size(),
  1204. events.get_event_ptr(),
  1205. &event_.get()
  1206. );
  1207. if(ret != CL_SUCCESS){
  1208. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1209. }
  1210. return event_;
  1211. }
  1212. /// \overload
  1213. template<size_t N>
  1214. event enqueue_nd_range_kernel(const kernel &kernel,
  1215. const extents<N> &global_work_offset,
  1216. const extents<N> &global_work_size,
  1217. const extents<N> &local_work_size,
  1218. const wait_list &events = wait_list())
  1219. {
  1220. return enqueue_nd_range_kernel(
  1221. kernel,
  1222. N,
  1223. global_work_offset.data(),
  1224. global_work_size.data(),
  1225. local_work_size.data(),
  1226. events
  1227. );
  1228. }
  1229. /// Convenience method which calls enqueue_nd_range_kernel() with a
  1230. /// one-dimensional range.
  1231. event enqueue_1d_range_kernel(const kernel &kernel,
  1232. size_t global_work_offset,
  1233. size_t global_work_size,
  1234. size_t local_work_size,
  1235. const wait_list &events = wait_list())
  1236. {
  1237. return enqueue_nd_range_kernel(
  1238. kernel,
  1239. 1,
  1240. &global_work_offset,
  1241. &global_work_size,
  1242. local_work_size ? &local_work_size : 0,
  1243. events
  1244. );
  1245. }
  1246. /// Enqueues a kernel to execute using a single work-item.
  1247. ///
  1248. /// \see_opencl_ref{clEnqueueTask}
  1249. event enqueue_task(const kernel &kernel, const wait_list &events = wait_list())
  1250. {
  1251. BOOST_ASSERT(m_queue != 0);
  1252. BOOST_ASSERT(kernel.get_context() == this->get_context());
  1253. event event_;
  1254. // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we
  1255. // just forward to the equivalent clEnqueueNDRangeKernel() call.
  1256. #ifdef CL_VERSION_2_0
  1257. size_t one = 1;
  1258. cl_int ret = clEnqueueNDRangeKernel(
  1259. m_queue, kernel, 1, 0, &one, &one,
  1260. events.size(), events.get_event_ptr(), &event_.get()
  1261. );
  1262. #else
  1263. cl_int ret = clEnqueueTask(
  1264. m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get()
  1265. );
  1266. #endif
  1267. if(ret != CL_SUCCESS){
  1268. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1269. }
  1270. return event_;
  1271. }
  1272. /// Enqueues a function to execute on the host.
  1273. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *),
  1274. void *args,
  1275. size_t cb_args,
  1276. uint_ num_mem_objects,
  1277. const cl_mem *mem_list,
  1278. const void **args_mem_loc,
  1279. const wait_list &events = wait_list())
  1280. {
  1281. BOOST_ASSERT(m_queue != 0);
  1282. event event_;
  1283. cl_int ret = clEnqueueNativeKernel(
  1284. m_queue,
  1285. user_func,
  1286. args,
  1287. cb_args,
  1288. num_mem_objects,
  1289. mem_list,
  1290. args_mem_loc,
  1291. events.size(),
  1292. events.get_event_ptr(),
  1293. &event_.get()
  1294. );
  1295. if(ret != CL_SUCCESS){
  1296. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1297. }
  1298. return event_;
  1299. }
  1300. /// Convenience overload for enqueue_native_kernel() which enqueues a
  1301. /// native kernel on the host with a nullary function.
  1302. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void),
  1303. const wait_list &events = wait_list())
  1304. {
  1305. return enqueue_native_kernel(
  1306. detail::nullary_native_kernel_trampoline,
  1307. reinterpret_cast<void *>(&user_func),
  1308. sizeof(user_func),
  1309. 0,
  1310. 0,
  1311. 0,
  1312. events
  1313. );
  1314. }
  1315. /// Flushes the command queue.
  1316. ///
  1317. /// \see_opencl_ref{clFlush}
  1318. void flush()
  1319. {
  1320. BOOST_ASSERT(m_queue != 0);
  1321. clFlush(m_queue);
  1322. }
  1323. /// Blocks until all outstanding commands in the queue have finished.
  1324. ///
  1325. /// \see_opencl_ref{clFinish}
  1326. void finish()
  1327. {
  1328. BOOST_ASSERT(m_queue != 0);
  1329. clFinish(m_queue);
  1330. }
  1331. /// Enqueues a barrier in the queue.
  1332. void enqueue_barrier()
  1333. {
  1334. BOOST_ASSERT(m_queue != 0);
  1335. cl_int ret = CL_SUCCESS;
  1336. #ifdef CL_VERSION_1_2
  1337. if(get_device().check_version(1, 2)){
  1338. ret = clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0);
  1339. } else
  1340. #endif // CL_VERSION_1_2
  1341. {
  1342. // Suppress deprecated declarations warning
  1343. BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
  1344. ret = clEnqueueBarrier(m_queue);
  1345. BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
  1346. }
  1347. if(ret != CL_SUCCESS){
  1348. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1349. }
  1350. }
  1351. #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  1352. /// Enqueues a barrier in the queue after \p events.
  1353. ///
  1354. /// \opencl_version_warning{1,2}
  1355. event enqueue_barrier(const wait_list &events)
  1356. {
  1357. BOOST_ASSERT(m_queue != 0);
  1358. event event_;
  1359. cl_int ret = CL_SUCCESS;
  1360. ret = clEnqueueBarrierWithWaitList(
  1361. m_queue, events.size(), events.get_event_ptr(), &event_.get()
  1362. );
  1363. if(ret != CL_SUCCESS){
  1364. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1365. }
  1366. return event_;
  1367. }
  1368. #endif // CL_VERSION_1_2
  1369. /// Enqueues a marker in the queue and returns an event that can be
  1370. /// used to track its progress.
  1371. event enqueue_marker()
  1372. {
  1373. event event_;
  1374. cl_int ret = CL_SUCCESS;
  1375. #ifdef CL_VERSION_1_2
  1376. if(get_device().check_version(1, 2)){
  1377. ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get());
  1378. } else
  1379. #endif
  1380. {
  1381. // Suppress deprecated declarations warning
  1382. BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
  1383. ret = clEnqueueMarker(m_queue, &event_.get());
  1384. BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
  1385. }
  1386. if(ret != CL_SUCCESS){
  1387. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1388. }
  1389. return event_;
  1390. }
  1391. #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  1392. /// Enqueues a marker after \p events in the queue and returns an
  1393. /// event that can be used to track its progress.
  1394. ///
  1395. /// \opencl_version_warning{1,2}
  1396. event enqueue_marker(const wait_list &events)
  1397. {
  1398. event event_;
  1399. cl_int ret = clEnqueueMarkerWithWaitList(
  1400. m_queue, events.size(), events.get_event_ptr(), &event_.get()
  1401. );
  1402. if(ret != CL_SUCCESS){
  1403. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1404. }
  1405. return event_;
  1406. }
  1407. #endif // CL_VERSION_1_2
  1408. #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  1409. /// Enqueues a command to copy \p size bytes of data from \p src_ptr to
  1410. /// \p dst_ptr.
  1411. ///
  1412. /// \opencl_version_warning{2,0}
  1413. ///
  1414. /// \see_opencl2_ref{clEnqueueSVMMemcpy}
  1415. event enqueue_svm_memcpy(void *dst_ptr,
  1416. const void *src_ptr,
  1417. size_t size,
  1418. const wait_list &events = wait_list())
  1419. {
  1420. event event_;
  1421. cl_int ret = clEnqueueSVMMemcpy(
  1422. m_queue,
  1423. CL_TRUE,
  1424. dst_ptr,
  1425. src_ptr,
  1426. size,
  1427. events.size(),
  1428. events.get_event_ptr(),
  1429. &event_.get()
  1430. );
  1431. if(ret != CL_SUCCESS){
  1432. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1433. }
  1434. return event_;
  1435. }
  1436. /// Enqueues a command to copy \p size bytes of data from \p src_ptr to
  1437. /// \p dst_ptr. The operation is performed asynchronously.
  1438. ///
  1439. /// \opencl_version_warning{2,0}
  1440. ///
  1441. /// \see_opencl2_ref{clEnqueueSVMMemcpy}
  1442. event enqueue_svm_memcpy_async(void *dst_ptr,
  1443. const void *src_ptr,
  1444. size_t size,
  1445. const wait_list &events = wait_list())
  1446. {
  1447. event event_;
  1448. cl_int ret = clEnqueueSVMMemcpy(
  1449. m_queue,
  1450. CL_FALSE,
  1451. dst_ptr,
  1452. src_ptr,
  1453. size,
  1454. events.size(),
  1455. events.get_event_ptr(),
  1456. &event_.get()
  1457. );
  1458. if(ret != CL_SUCCESS){
  1459. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1460. }
  1461. return event_;
  1462. }
  1463. /// Enqueues a command to fill \p size bytes of data at \p svm_ptr with
  1464. /// \p pattern.
  1465. ///
  1466. /// \opencl_version_warning{2,0}
  1467. ///
  1468. /// \see_opencl2_ref{clEnqueueSVMMemFill}
  1469. event enqueue_svm_fill(void *svm_ptr,
  1470. const void *pattern,
  1471. size_t pattern_size,
  1472. size_t size,
  1473. const wait_list &events = wait_list())
  1474. {
  1475. event event_;
  1476. cl_int ret = clEnqueueSVMMemFill(
  1477. m_queue,
  1478. svm_ptr,
  1479. pattern,
  1480. pattern_size,
  1481. size,
  1482. events.size(),
  1483. events.get_event_ptr(),
  1484. &event_.get()
  1485. );
  1486. if(ret != CL_SUCCESS){
  1487. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1488. }
  1489. return event_;
  1490. }
  1491. /// Enqueues a command to free \p svm_ptr.
  1492. ///
  1493. /// \opencl_version_warning{2,0}
  1494. ///
  1495. /// \see_opencl2_ref{clEnqueueSVMFree}
  1496. ///
  1497. /// \see svm_free()
  1498. event enqueue_svm_free(void *svm_ptr,
  1499. const wait_list &events = wait_list())
  1500. {
  1501. event event_;
  1502. cl_int ret = clEnqueueSVMFree(
  1503. m_queue,
  1504. 1,
  1505. &svm_ptr,
  1506. 0,
  1507. 0,
  1508. events.size(),
  1509. events.get_event_ptr(),
  1510. &event_.get()
  1511. );
  1512. if(ret != CL_SUCCESS){
  1513. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1514. }
  1515. return event_;
  1516. }
  1517. /// Enqueues a command to map \p svm_ptr to the host memory space.
  1518. ///
  1519. /// \opencl_version_warning{2,0}
  1520. ///
  1521. /// \see_opencl2_ref{clEnqueueSVMMap}
  1522. event enqueue_svm_map(void *svm_ptr,
  1523. size_t size,
  1524. cl_map_flags flags,
  1525. const wait_list &events = wait_list())
  1526. {
  1527. event event_;
  1528. cl_int ret = clEnqueueSVMMap(
  1529. m_queue,
  1530. CL_TRUE,
  1531. flags,
  1532. svm_ptr,
  1533. size,
  1534. events.size(),
  1535. events.get_event_ptr(),
  1536. &event_.get()
  1537. );
  1538. if(ret != CL_SUCCESS){
  1539. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1540. }
  1541. return event_;
  1542. }
  1543. /// Enqueues a command to unmap \p svm_ptr from the host memory space.
  1544. ///
  1545. /// \opencl_version_warning{2,0}
  1546. ///
  1547. /// \see_opencl2_ref{clEnqueueSVMUnmap}
  1548. event enqueue_svm_unmap(void *svm_ptr,
  1549. const wait_list &events = wait_list())
  1550. {
  1551. event event_;
  1552. cl_int ret = clEnqueueSVMUnmap(
  1553. m_queue,
  1554. svm_ptr,
  1555. events.size(),
  1556. events.get_event_ptr(),
  1557. &event_.get()
  1558. );
  1559. if(ret != CL_SUCCESS){
  1560. BOOST_THROW_EXCEPTION(opencl_error(ret));
  1561. }
  1562. return event_;
  1563. }
  1564. #endif // CL_VERSION_2_0
  1565. /// Returns \c true if the command queue is the same at \p other.
  1566. bool operator==(const command_queue &other) const
  1567. {
  1568. return m_queue == other.m_queue;
  1569. }
  1570. /// Returns \c true if the command queue is different from \p other.
  1571. bool operator!=(const command_queue &other) const
  1572. {
  1573. return m_queue != other.m_queue;
  1574. }
  1575. /// \internal_
  1576. operator cl_command_queue() const
  1577. {
  1578. return m_queue;
  1579. }
  1580. /// \internal_
  1581. bool check_device_version(int major, int minor) const
  1582. {
  1583. return get_device().check_version(major, minor);
  1584. }
  1585. private:
  1586. cl_command_queue m_queue;
  1587. };
  1588. inline buffer buffer::clone(command_queue &queue) const
  1589. {
  1590. buffer copy(get_context(), size(), get_memory_flags());
  1591. queue.enqueue_copy_buffer(*this, copy, 0, 0, size());
  1592. return copy;
  1593. }
  1594. inline image1d image1d::clone(command_queue &queue) const
  1595. {
  1596. image1d copy(
  1597. get_context(), width(), format(), get_memory_flags()
  1598. );
  1599. queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size());
  1600. return copy;
  1601. }
  1602. inline image2d image2d::clone(command_queue &queue) const
  1603. {
  1604. image2d copy(
  1605. get_context(), width(), height(), format(), get_memory_flags()
  1606. );
  1607. queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size());
  1608. return copy;
  1609. }
  1610. inline image3d image3d::clone(command_queue &queue) const
  1611. {
  1612. image3d copy(
  1613. get_context(), width(), height(), depth(), format(), get_memory_flags()
  1614. );
  1615. queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size());
  1616. return copy;
  1617. }
  1618. /// \internal_ define get_info() specializations for command_queue
  1619. BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(command_queue,
  1620. ((cl_context, CL_QUEUE_CONTEXT))
  1621. ((cl_device_id, CL_QUEUE_DEVICE))
  1622. ((uint_, CL_QUEUE_REFERENCE_COUNT))
  1623. ((cl_command_queue_properties, CL_QUEUE_PROPERTIES))
  1624. )
  1625. } // end compute namespace
  1626. } // end boost namespace
  1627. #endif // BOOST_COMPUTE_COMMAND_QUEUE_HPP