kernel.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_KERNEL_HPP
  11. #define BOOST_COMPUTE_KERNEL_HPP
  12. #include <string>
  13. #include <boost/assert.hpp>
  14. #include <boost/utility/enable_if.hpp>
  15. #include <boost/compute/config.hpp>
  16. #include <boost/compute/program.hpp>
  17. #include <boost/compute/exception.hpp>
  18. #include <boost/compute/type_traits/is_fundamental.hpp>
  19. #include <boost/compute/detail/get_object_info.hpp>
  20. #include <boost/compute/detail/assert_cl_success.hpp>
  21. #include <boost/compute/memory/svm_ptr.hpp>
  22. namespace boost {
  23. namespace compute {
  24. namespace detail {
  25. template<class T> struct set_kernel_arg;
  26. } // end detail namespace
  27. /// \class kernel
  28. /// \brief A compute kernel.
  29. ///
  30. /// \see command_queue, program
  31. class kernel
  32. {
  33. public:
  34. /// Creates a null kernel object.
  35. kernel()
  36. : m_kernel(0)
  37. {
  38. }
  39. /// Creates a new kernel object for \p kernel. If \p retain is
  40. /// \c true, the reference count for \p kernel will be incremented.
  41. explicit kernel(cl_kernel kernel, bool retain = true)
  42. : m_kernel(kernel)
  43. {
  44. if(m_kernel && retain){
  45. clRetainKernel(m_kernel);
  46. }
  47. }
  48. /// Creates a new kernel object with \p name from \p program.
  49. kernel(const program &program, const std::string &name)
  50. {
  51. cl_int error = 0;
  52. m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
  53. if(!m_kernel){
  54. BOOST_THROW_EXCEPTION(opencl_error(error));
  55. }
  56. }
  57. /// Creates a new kernel object as a copy of \p other.
  58. kernel(const kernel &other)
  59. : m_kernel(other.m_kernel)
  60. {
  61. if(m_kernel){
  62. clRetainKernel(m_kernel);
  63. }
  64. }
  65. /// Copies the kernel object from \p other to \c *this.
  66. kernel& operator=(const kernel &other)
  67. {
  68. if(this != &other){
  69. if(m_kernel){
  70. clReleaseKernel(m_kernel);
  71. }
  72. m_kernel = other.m_kernel;
  73. if(m_kernel){
  74. clRetainKernel(m_kernel);
  75. }
  76. }
  77. return *this;
  78. }
  79. #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
  80. /// Move-constructs a new kernel object from \p other.
  81. kernel(kernel&& other) BOOST_NOEXCEPT
  82. : m_kernel(other.m_kernel)
  83. {
  84. other.m_kernel = 0;
  85. }
  86. /// Move-assigns the kernel from \p other to \c *this.
  87. kernel& operator=(kernel&& other) BOOST_NOEXCEPT
  88. {
  89. if(m_kernel){
  90. clReleaseKernel(m_kernel);
  91. }
  92. m_kernel = other.m_kernel;
  93. other.m_kernel = 0;
  94. return *this;
  95. }
  96. #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
  97. /// Destroys the kernel object.
  98. ~kernel()
  99. {
  100. if(m_kernel){
  101. BOOST_COMPUTE_ASSERT_CL_SUCCESS(
  102. clReleaseKernel(m_kernel)
  103. );
  104. }
  105. }
  106. /// Returns a reference to the underlying OpenCL kernel object.
  107. cl_kernel& get() const
  108. {
  109. return const_cast<cl_kernel &>(m_kernel);
  110. }
  111. /// Returns the function name for the kernel.
  112. std::string name() const
  113. {
  114. return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
  115. }
  116. /// Returns the number of arguments for the kernel.
  117. size_t arity() const
  118. {
  119. return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
  120. }
  121. /// Returns the program for the kernel.
  122. program get_program() const
  123. {
  124. return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
  125. }
  126. /// Returns the context for the kernel.
  127. context get_context() const
  128. {
  129. return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
  130. }
  131. /// Returns information about the kernel.
  132. ///
  133. /// \see_opencl_ref{clGetKernelInfo}
  134. template<class T>
  135. T get_info(cl_kernel_info info) const
  136. {
  137. return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
  138. }
  139. /// \overload
  140. template<int Enum>
  141. typename detail::get_object_info_type<kernel, Enum>::type
  142. get_info() const;
  143. #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  144. /// Returns information about the argument at \p index.
  145. ///
  146. /// For example, to get the name of the first argument:
  147. /// \code
  148. /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
  149. /// \endcode
  150. ///
  151. /// Note, this function requires that the program be compiled with the
  152. /// \c "-cl-kernel-arg-info" flag. For example:
  153. /// \code
  154. /// program.build("-cl-kernel-arg-info");
  155. /// \endcode
  156. ///
  157. /// \opencl_version_warning{1,2}
  158. ///
  159. /// \see_opencl_ref{clGetKernelArgInfo}
  160. template<class T>
  161. T get_arg_info(size_t index, cl_kernel_arg_info info) const
  162. {
  163. return detail::get_object_info<T>(clGetKernelArgInfo, m_kernel, info, index);
  164. }
  165. #endif // CL_VERSION_1_2
  166. /// Returns work-group information for the kernel with \p device.
  167. ///
  168. /// \see_opencl_ref{clGetKernelWorkGroupInfo}
  169. template<class T>
  170. T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
  171. {
  172. return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
  173. }
  174. /// Sets the argument at \p index to \p value with \p size.
  175. ///
  176. /// \see_opencl_ref{clSetKernelArg}
  177. void set_arg(size_t index, size_t size, const void *value)
  178. {
  179. BOOST_ASSERT(index < arity());
  180. cl_int ret = clSetKernelArg(m_kernel,
  181. static_cast<cl_uint>(index),
  182. size,
  183. value);
  184. if(ret != CL_SUCCESS){
  185. BOOST_THROW_EXCEPTION(opencl_error(ret));
  186. }
  187. }
  188. /// Sets the argument at \p index to \p value.
  189. ///
  190. /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
  191. /// calling set_arg(index, sizeof(type), &value).
  192. ///
  193. /// Additionally, this method is specialized for device memory objects
  194. /// such as buffer and image2d. This allows for them to be passed directly
  195. /// without having to extract their underlying cl_mem object.
  196. ///
  197. /// This method is also specialized for device container types such as
  198. /// vector<T> and array<T, N>. This allows for them to be passed directly
  199. /// as kernel arguments without having to extract their underlying buffer.
  200. ///
  201. /// For setting local memory arguments (e.g. "__local float *buf"), the
  202. /// local_buffer<T> class may be used:
  203. /// \code
  204. /// // set argument to a local buffer with storage for 32 float's
  205. /// kernel.set_arg(0, local_buffer<float>(32));
  206. /// \endcode
  207. template<class T>
  208. void set_arg(size_t index, const T &value)
  209. {
  210. // if you get a compilation error pointing here it means you
  211. // attempted to set a kernel argument from an invalid type.
  212. detail::set_kernel_arg<T>()(*this, index, value);
  213. }
  214. /// \internal_
  215. void set_arg(size_t index, const cl_mem mem)
  216. {
  217. set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
  218. }
  219. /// \internal_
  220. void set_arg(size_t index, const cl_sampler sampler)
  221. {
  222. set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
  223. }
  224. /// \internal_
  225. template<class T>
  226. void set_arg(size_t index, const svm_ptr<T> ptr)
  227. {
  228. #ifdef CL_VERSION_2_0
  229. cl_int ret = clSetKernelArgSVMPointer(m_kernel, index, ptr.get());
  230. if(ret != CL_SUCCESS){
  231. BOOST_THROW_EXCEPTION(opencl_error(ret));
  232. }
  233. #else
  234. BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
  235. #endif
  236. }
  237. #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
  238. /// Sets the arguments for the kernel to \p args.
  239. template<class... T>
  240. void set_args(T&&... args)
  241. {
  242. BOOST_ASSERT(sizeof...(T) <= arity());
  243. _set_args<0>(args...);
  244. }
  245. #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
  246. #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  247. /// Sets additional execution information for the kernel.
  248. ///
  249. /// \opencl_version_warning{2,0}
  250. ///
  251. /// \see_opencl2_ref{clSetKernelExecInfo}
  252. void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
  253. {
  254. cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
  255. if(ret != CL_SUCCESS){
  256. BOOST_THROW_EXCEPTION(opencl_error(ret));
  257. }
  258. }
  259. #endif // CL_VERSION_2_0
  260. /// Returns \c true if the kernel is the same at \p other.
  261. bool operator==(const kernel &other) const
  262. {
  263. return m_kernel == other.m_kernel;
  264. }
  265. /// Returns \c true if the kernel is different from \p other.
  266. bool operator!=(const kernel &other) const
  267. {
  268. return m_kernel != other.m_kernel;
  269. }
  270. /// \internal_
  271. operator cl_kernel() const
  272. {
  273. return m_kernel;
  274. }
  275. /// \internal_
  276. static kernel create_with_source(const std::string &source,
  277. const std::string &name,
  278. const context &context)
  279. {
  280. return program::build_with_source(source, context).create_kernel(name);
  281. }
  282. private:
  283. #ifndef BOOST_NO_VARIADIC_TEMPLATES
  284. /// \internal_
  285. template<size_t N>
  286. void _set_args()
  287. {
  288. }
  289. /// \internal_
  290. template<size_t N, class T, class... Args>
  291. void _set_args(T&& arg, Args&&... rest)
  292. {
  293. set_arg(N, arg);
  294. _set_args<N+1>(rest...);
  295. }
  296. #endif // BOOST_NO_VARIADIC_TEMPLATES
  297. private:
  298. cl_kernel m_kernel;
  299. };
  300. inline kernel program::create_kernel(const std::string &name) const
  301. {
  302. return kernel(*this, name);
  303. }
  304. /// \internal_ define get_info() specializations for kernel
  305. BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
  306. ((std::string, CL_KERNEL_FUNCTION_NAME))
  307. ((cl_uint, CL_KERNEL_NUM_ARGS))
  308. ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
  309. ((cl_context, CL_KERNEL_CONTEXT))
  310. ((cl_program, CL_KERNEL_PROGRAM))
  311. )
  312. #ifdef CL_VERSION_1_2
  313. BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
  314. ((std::string, CL_KERNEL_ATTRIBUTES))
  315. )
  316. #endif // CL_VERSION_1_2
  317. namespace detail {
  318. // set_kernel_arg implementation for built-in types
  319. template<class T>
  320. struct set_kernel_arg
  321. {
  322. typename boost::enable_if<is_fundamental<T> >::type
  323. operator()(kernel &kernel_, size_t index, const T &value)
  324. {
  325. kernel_.set_arg(index, sizeof(T), &value);
  326. }
  327. };
  328. // set_kernel_arg specialization for char (different from built-in cl_char)
  329. template<>
  330. struct set_kernel_arg<char>
  331. {
  332. void operator()(kernel &kernel_, size_t index, const char c)
  333. {
  334. kernel_.set_arg(index, sizeof(char), &c);
  335. }
  336. };
  337. } // end detail namespace
  338. } // end namespace compute
  339. } // end namespace boost
  340. #endif // BOOST_COMPUTE_KERNEL_HPP