linemod.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16. // Third party copyrights are property of their respective owners.
  17. //
  18. // Redistribution and use in source and binary forms, with or without modification,
  19. // are permitted provided that the following conditions are met:
  20. //
  21. // * Redistribution's of source code must retain the above copyright notice,
  22. // this list of conditions and the following disclaimer.
  23. //
  24. // * Redistribution's in binary form must reproduce the above copyright notice,
  25. // this list of conditions and the following disclaimer in the documentation
  26. // and/or other materials provided with the distribution.
  27. //
  28. // * The name of the copyright holders may not be used to endorse or promote products
  29. // derived from this software without specific prior written permission.
  30. //
  31. // This software is provided by the copyright holders and contributors "as is" and
  32. // any express or implied warranties, including, but not limited to, the implied
  33. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34. // In no event shall the Intel Corporation or contributors be liable for any direct,
  35. // indirect, incidental, special, exemplary, or consequential damages
  36. // (including, but not limited to, procurement of substitute goods or services;
  37. // loss of use, data, or profits; or business interruption) however caused
  38. // and on any theory of liability, whether in contract, strict liability,
  39. // or tort (including negligence or otherwise) arising in any way out of
  40. // the use of this software, even if advised of the possibility of such damage.
  41. //
  42. //M*/
  43. #ifndef __OPENCV_OBJDETECT_LINEMOD_HPP__
  44. #define __OPENCV_OBJDETECT_LINEMOD_HPP__
  45. #include "opencv2/core.hpp"
  46. #include <map>
  47. /****************************************************************************************\
  48. * LINE-MOD *
  49. \****************************************************************************************/
  50. namespace cv {
  51. namespace linemod {
  52. //! @addtogroup rgbd
  53. //! @{
  54. /**
  55. * \brief Discriminant feature described by its location and label.
  56. */
  57. struct CV_EXPORTS Feature
  58. {
  59. int x; ///< x offset
  60. int y; ///< y offset
  61. int label; ///< Quantization
  62. Feature() : x(0), y(0), label(0) {}
  63. Feature(int x, int y, int label);
  64. void read(const FileNode& fn);
  65. void write(FileStorage& fs) const;
  66. };
  67. inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}
  68. struct CV_EXPORTS Template
  69. {
  70. int width;
  71. int height;
  72. int pyramid_level;
  73. std::vector<Feature> features;
  74. void read(const FileNode& fn);
  75. void write(FileStorage& fs) const;
  76. };
  77. /**
  78. * \brief Represents a modality operating over an image pyramid.
  79. */
  80. class QuantizedPyramid
  81. {
  82. public:
  83. // Virtual destructor
  84. virtual ~QuantizedPyramid() {}
  85. /**
  86. * \brief Compute quantized image at current pyramid level for online detection.
  87. *
  88. * \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
  89. * representing its classification.
  90. */
  91. virtual void quantize(Mat& dst) const =0;
  92. /**
  93. * \brief Extract most discriminant features at current pyramid level to form a new template.
  94. *
  95. * \param[out] templ The new template.
  96. */
  97. virtual bool extractTemplate(Template& templ) const =0;
  98. /**
  99. * \brief Go to the next pyramid level.
  100. *
  101. * \todo Allow pyramid scale factor other than 2
  102. */
  103. virtual void pyrDown() =0;
  104. protected:
  105. /// Candidate feature with a score
  106. struct Candidate
  107. {
  108. Candidate(int x, int y, int label, float score);
  109. /// Sort candidates with high score to the front
  110. bool operator<(const Candidate& rhs) const
  111. {
  112. return score > rhs.score;
  113. }
  114. Feature f;
  115. float score;
  116. };
  117. /**
  118. * \brief Choose candidate features so that they are not bunched together.
  119. *
  120. * \param[in] candidates Candidate features sorted by score.
  121. * \param[out] features Destination vector of selected features.
  122. * \param[in] num_features Number of candidates to select.
  123. * \param[in] distance Hint for desired distance between features.
  124. */
  125. static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
  126. std::vector<Feature>& features,
  127. size_t num_features, float distance);
  128. };
  129. inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}
  130. /**
  131. * \brief Interface for modalities that plug into the LINE template matching representation.
  132. *
  133. * \todo Max response, to allow optimization of summing (255/MAX) features as uint8
  134. */
  135. class CV_EXPORTS Modality
  136. {
  137. public:
  138. // Virtual destructor
  139. virtual ~Modality() {}
  140. /**
  141. * \brief Form a quantized image pyramid from a source image.
  142. *
  143. * \param[in] src The source image. Type depends on the modality.
  144. * \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
  145. * in quantized image and cannot be extracted as features.
  146. */
  147. Ptr<QuantizedPyramid> process(const Mat& src,
  148. const Mat& mask = Mat()) const
  149. {
  150. return processImpl(src, mask);
  151. }
  152. virtual String name() const =0;
  153. virtual void read(const FileNode& fn) =0;
  154. virtual void write(FileStorage& fs) const =0;
  155. /**
  156. * \brief Create modality by name.
  157. *
  158. * The following modality types are supported:
  159. * - "ColorGradient"
  160. * - "DepthNormal"
  161. */
  162. static Ptr<Modality> create(const String& modality_type);
  163. /**
  164. * \brief Load a modality from file.
  165. */
  166. static Ptr<Modality> create(const FileNode& fn);
  167. protected:
  168. // Indirection is because process() has a default parameter.
  169. virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
  170. const Mat& mask) const =0;
  171. };
  172. /**
  173. * \brief Modality that computes quantized gradient orientations from a color image.
  174. */
  175. class CV_EXPORTS ColorGradient : public Modality
  176. {
  177. public:
  178. /**
  179. * \brief Default constructor. Uses reasonable default parameter values.
  180. */
  181. ColorGradient();
  182. /**
  183. * \brief Constructor.
  184. *
  185. * \param weak_threshold When quantizing, discard gradients with magnitude less than this.
  186. * \param num_features How many features a template must contain.
  187. * \param strong_threshold Consider as candidate features only gradients whose norms are
  188. * larger than this.
  189. */
  190. ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);
  191. virtual String name() const;
  192. virtual void read(const FileNode& fn);
  193. virtual void write(FileStorage& fs) const;
  194. float weak_threshold;
  195. size_t num_features;
  196. float strong_threshold;
  197. protected:
  198. virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
  199. const Mat& mask) const;
  200. };
  201. /**
  202. * \brief Modality that computes quantized surface normals from a dense depth map.
  203. */
  204. class CV_EXPORTS DepthNormal : public Modality
  205. {
  206. public:
  207. /**
  208. * \brief Default constructor. Uses reasonable default parameter values.
  209. */
  210. DepthNormal();
  211. /**
  212. * \brief Constructor.
  213. *
  214. * \param distance_threshold Ignore pixels beyond this distance.
  215. * \param difference_threshold When computing normals, ignore contributions of pixels whose
  216. * depth difference with the central pixel is above this threshold.
  217. * \param num_features How many features a template must contain.
  218. * \param extract_threshold Consider as candidate feature only if there are no differing
  219. * orientations within a distance of extract_threshold.
  220. */
  221. DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
  222. int extract_threshold);
  223. virtual String name() const;
  224. virtual void read(const FileNode& fn);
  225. virtual void write(FileStorage& fs) const;
  226. int distance_threshold;
  227. int difference_threshold;
  228. size_t num_features;
  229. int extract_threshold;
  230. protected:
  231. virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
  232. const Mat& mask) const;
  233. };
  234. /**
  235. * \brief Debug function to colormap a quantized image for viewing.
  236. */
  237. void colormap(const Mat& quantized, Mat& dst);
  238. /**
  239. * \brief Represents a successful template match.
  240. */
  241. struct CV_EXPORTS Match
  242. {
  243. Match()
  244. {
  245. }
  246. Match(int x, int y, float similarity, const String& class_id, int template_id);
  247. /// Sort matches with high similarity to the front
  248. bool operator<(const Match& rhs) const
  249. {
  250. // Secondarily sort on template_id for the sake of duplicate removal
  251. if (similarity != rhs.similarity)
  252. return similarity > rhs.similarity;
  253. else
  254. return template_id < rhs.template_id;
  255. }
  256. bool operator==(const Match& rhs) const
  257. {
  258. return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
  259. }
  260. int x;
  261. int y;
  262. float similarity;
  263. String class_id;
  264. int template_id;
  265. };
  266. inline
  267. Match::Match(int _x, int _y, float _similarity, const String& _class_id, int _template_id)
  268. : x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
  269. {}
  270. /**
  271. * \brief Object detector using the LINE template matching algorithm with any set of
  272. * modalities.
  273. */
  274. class CV_EXPORTS Detector
  275. {
  276. public:
  277. /**
  278. * \brief Empty constructor, initialize with read().
  279. */
  280. Detector();
  281. /**
  282. * \brief Constructor.
  283. *
  284. * \param modalities Modalities to use (color gradients, depth normals, ...).
  285. * \param T_pyramid Value of the sampling step T at each pyramid level. The
  286. * number of pyramid levels is T_pyramid.size().
  287. */
  288. Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
  289. /**
  290. * \brief Detect objects by template matching.
  291. *
  292. * Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid.
  293. *
  294. * \param sources Source images, one for each modality.
  295. * \param threshold Similarity threshold, a percentage between 0 and 100.
  296. * \param[out] matches Template matches, sorted by similarity score.
  297. * \param class_ids If non-empty, only search for the desired object classes.
  298. * \param[out] quantized_images Optionally return vector<Mat> of quantized images.
  299. * \param masks The masks for consideration during matching. The masks should be CV_8UC1
  300. * where 255 represents a valid pixel. If non-empty, the vector must be
  301. * the same size as sources. Each element must be
  302. * empty or the same size as its corresponding source.
  303. */
  304. void match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
  305. const std::vector<String>& class_ids = std::vector<String>(),
  306. OutputArrayOfArrays quantized_images = noArray(),
  307. const std::vector<Mat>& masks = std::vector<Mat>()) const;
  308. /**
  309. * \brief Add new object template.
  310. *
  311. * \param sources Source images, one for each modality.
  312. * \param class_id Object class ID.
  313. * \param object_mask Mask separating object from background.
  314. * \param[out] bounding_box Optionally return bounding box of the extracted features.
  315. *
  316. * \return Template ID, or -1 if failed to extract a valid template.
  317. */
  318. int addTemplate(const std::vector<Mat>& sources, const String& class_id,
  319. const Mat& object_mask, Rect* bounding_box = NULL);
  320. /**
  321. * \brief Add a new object template computed by external means.
  322. */
  323. int addSyntheticTemplate(const std::vector<Template>& templates, const String& class_id);
  324. /**
  325. * \brief Get the modalities used by this detector.
  326. *
  327. * You are not permitted to add/remove modalities, but you may dynamic_cast them to
  328. * tweak parameters.
  329. */
  330. const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
  331. /**
  332. * \brief Get sampling step T at pyramid_level.
  333. */
  334. int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
  335. /**
  336. * \brief Get number of pyramid levels used by this detector.
  337. */
  338. int pyramidLevels() const { return pyramid_levels; }
  339. /**
  340. * \brief Get the template pyramid identified by template_id.
  341. *
  342. * For example, with 2 modalities (Gradient, Normal) and two pyramid levels
  343. * (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
  344. */
  345. const std::vector<Template>& getTemplates(const String& class_id, int template_id) const;
  346. int numTemplates() const;
  347. int numTemplates(const String& class_id) const;
  348. int numClasses() const { return static_cast<int>(class_templates.size()); }
  349. std::vector<String> classIds() const;
  350. void read(const FileNode& fn);
  351. void write(FileStorage& fs) const;
  352. String readClass(const FileNode& fn, const String &class_id_override = "");
  353. void writeClass(const String& class_id, FileStorage& fs) const;
  354. void readClasses(const std::vector<String>& class_ids,
  355. const String& format = "templates_%s.yml.gz");
  356. void writeClasses(const String& format = "templates_%s.yml.gz") const;
  357. protected:
  358. std::vector< Ptr<Modality> > modalities;
  359. int pyramid_levels;
  360. std::vector<int> T_at_level;
  361. typedef std::vector<Template> TemplatePyramid;
  362. typedef std::map<String, std::vector<TemplatePyramid> > TemplatesMap;
  363. TemplatesMap class_templates;
  364. typedef std::vector<Mat> LinearMemories;
  365. // Indexed as [pyramid level][modality][quantized label]
  366. typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;
  367. void matchClass(const LinearMemoryPyramid& lm_pyramid,
  368. const std::vector<Size>& sizes,
  369. float threshold, std::vector<Match>& matches,
  370. const String& class_id,
  371. const std::vector<TemplatePyramid>& template_pyramids) const;
  372. };
  373. /**
  374. * \brief Factory function for detector using LINE algorithm with color gradients.
  375. *
  376. * Default parameter settings suitable for VGA images.
  377. */
  378. CV_EXPORTS Ptr<Detector> getDefaultLINE();
  379. /**
  380. * \brief Factory function for detector using LINE-MOD algorithm with color gradients
  381. * and depth normals.
  382. *
  383. * Default parameter settings suitable for VGA images.
  384. */
  385. CV_EXPORTS Ptr<Detector> getDefaultLINEMOD();
  386. //! @}
  387. } // namespace linemod
  388. } // namespace cv
  389. #endif // __OPENCV_OBJDETECT_LINEMOD_HPP__