dataset.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2014, Itseez Inc, all rights reserved.
  14. // Third party copyrights are property of their respective owners.
  15. //
  16. // Redistribution and use in source and binary forms, with or without modification,
  17. // are permitted provided that the following conditions are met:
  18. //
  19. // * Redistribution's of source code must retain the above copyright notice,
  20. // this list of conditions and the following disclaimer.
  21. //
  22. // * Redistribution's in binary form must reproduce the above copyright notice,
  23. // this list of conditions and the following disclaimer in the documentation
  24. // and/or other materials provided with the distribution.
  25. //
  26. // * The name of the copyright holders may not be used to endorse or promote products
  27. // derived from this software without specific prior written permission.
  28. //
  29. // This software is provided by the copyright holders and contributors "as is" and
  30. // any express or implied warranties, including, but not limited to, the implied
  31. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32. // In no event shall the Itseez Inc or contributors be liable for any direct,
  33. // indirect, incidental, special, exemplary, or consequential damages
  34. // (including, but not limited to, procurement of substitute goods or services;
  35. // loss of use, data, or profits; or business interruption) however caused
  36. // and on any theory of liability, whether in contract, strict liability,
  37. // or tort (including negligence or otherwise) arising in any way out of
  38. // the use of this software, even if advised of the possibility of such damage.
  39. //
  40. //M*/
  41. #ifndef OPENCV_DATASETS_DATASET_HPP
  42. #define OPENCV_DATASETS_DATASET_HPP
  43. #include <string>
  44. #include <vector>
  45. #include <opencv2/core.hpp>
  46. /** @defgroup datasets Framework for working with different datasets
  47. The datasets module includes classes for working with different datasets: load data, evaluate
  48. different algorithms on them, contains benchmarks, etc.
  49. It is planned to have:
  50. - basic: loading code for all datasets to help start work with them.
  51. - next stage: quick benchmarks for all datasets to show how to solve them using OpenCV and
  52. implement evaluation code.
  53. - finally: implement on OpenCV state-of-the-art algorithms, which solve these tasks.
  54. @{
  55. @defgroup datasets_ar Action Recognition
  56. ### HMDB: A Large Human Motion Database
  57. Implements loading dataset:
  58. "HMDB: A Large Human Motion Database": <http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/>
  59. Usage:
  60. -# From link above download dataset files: `hmdb51_org.rar` & `test_train_splits.rar`.
  61. -# Unpack them. Unpack all archives from directory: `hmdb51_org/` and remove them.
  62. -# To load data run:
  63. ~~~
  64. ./opencv/build/bin/example_datasets_ar_hmdb -p=/home/user/path_to_unpacked_folders/
  65. ~~~
  66. #### Benchmark
  67. For this dataset was implemented benchmark with accuracy: 0.107407 (using precomputed HOG/HOF
  68. "STIP" features from site, averaging for 3 splits)
  69. To run this benchmark execute:
  70. ~~~
  71. ./opencv/build/bin/example_datasets_ar_hmdb_benchmark -p=/home/user/path_to_unpacked_folders/
  72. ~~~
  73. @note
  74. Precomputed features should be unpacked in the same folder: `/home/user/path_to_unpacked_folders/hmdb51_org_stips/`.
  75. Also unpack all archives from directory: `hmdb51_org_stips/` and remove them.
  76. ### Sports-1M %Dataset
  77. Implements loading dataset:
  78. "Sports-1M Dataset": <http://cs.stanford.edu/people/karpathy/deepvideo/>
  79. Usage:
  80. -# From link above download dataset files (`git clone https://code.google.com/p/sports-1m-dataset/`).
  81. -# To load data run:
  82. ~~~
  83. ./opencv/build/bin/example_datasets_ar_sports -p=/home/user/path_to_downloaded_folders/
  84. ~~~
  85. @defgroup datasets_fr Face Recognition
  86. ### Adience
  87. Implements loading dataset:
  88. "Adience": <http://www.openu.ac.il/home/hassner/Adience/data.html>
  89. Usage:
  90. -# From link above download any dataset file: `faces.tar.gz\aligned.tar.gz` and files with splits:
  91. `fold_0_data.txt-fold_4_data.txt`, `fold_frontal_0_data.txt-fold_frontal_4_data.txt`. (For
  92. face recognition task another splits should be created)
  93. -# Unpack dataset file to some folder and place split files into the same folder.
  94. -# To load data run:
  95. ~~~
  96. ./opencv/build/bin/example_datasets_fr_adience -p=/home/user/path_to_created_folder/
  97. ~~~
  98. ### Labeled Faces in the Wild
  99. Implements loading dataset:
  100. "Labeled Faces in the Wild": <http://vis-www.cs.umass.edu/lfw/>
  101. Usage:
  102. -# From link above download any dataset file:
  103. `lfw.tgz\lfwa.tar.gz\lfw-deepfunneled.tgz\lfw-funneled.tgz` and files with pairs: 10 test
  104. splits: `pairs.txt` and developer train split: `pairsDevTrain.txt`.
  105. -# Unpack dataset file and place `pairs.txt` and `pairsDevTrain.txt` in created folder.
  106. -# To load data run:
  107. ~~~
  108. ./opencv/build/bin/example_datasets_fr_lfw -p=/home/user/path_to_unpacked_folder/lfw2/
  109. ~~~
  110. #### Benchmark
  111. For this dataset was implemented benchmark with accuracy: 0.623833 +- 0.005223 (train split:
  112. `pairsDevTrain.txt`, dataset: lfwa)
  113. To run this benchmark execute:
  114. ~~~
  115. ./opencv/build/bin/example_datasets_fr_lfw_benchmark -p=/home/user/path_to_unpacked_folder/lfw2/
  116. ~~~
  117. @defgroup datasets_gr Gesture Recognition
  118. ### ChaLearn Looking at People
  119. Implements loading dataset:
  120. "ChaLearn Looking at People": <http://gesture.chalearn.org/>
  121. Usage
  122. -# Follow instruction from site above, download files for dataset "Track 3: Gesture Recognition":
  123. `Train1.zip`-`Train5.zip`, `Validation1.zip`-`Validation3.zip` (Register on site: www.codalab.org and
  124. accept the terms and conditions of competition:
  125. <https://www.codalab.org/competitions/991#learn_the_details> There are three mirrors for
  126. downloading dataset files. When I downloaded data only mirror: "Universitat Oberta de Catalunya"
  127. works).
  128. -# Unpack train archives `Train1.zip`-`Train5.zip` to folder `Train/`, validation archives
  129. `Validation1.zip`-`Validation3.zip` to folder `Validation/`
  130. -# Unpack all archives in `Train/` & `Validation/` in the folders with the same names, for example:
  131. `Sample0001.zip` to `Sample0001/`
  132. -# To load data run:
  133. ~~~
  134. ./opencv/build/bin/example_datasets_gr_chalearn -p=/home/user/path_to_unpacked_folders/
  135. ~~~
  136. ### Sheffield Kinect Gesture Dataset
  137. Implements loading dataset:
  138. "Sheffield Kinect Gesture Dataset": <http://lshao.staff.shef.ac.uk/data/SheffieldKinectGesture.htm>
  139. Usage:
  140. -# From link above download dataset files: `subject1_dep.7z`-`subject6_dep.7z`, `subject1_rgb.7z`-`subject6_rgb.7z`.
  141. -# Unpack them.
  142. -# To load data run:
  143. ~~~
  144. ./opencv/build/bin/example_datasets_gr_skig -p=/home/user/path_to_unpacked_folders/
  145. ~~~
  146. @defgroup datasets_hpe Human Pose Estimation
  147. ### HumanEva Dataset
  148. Implements loading dataset:
  149. "HumanEva Dataset": <http://humaneva.is.tue.mpg.de>
  150. Usage:
  151. -# From link above download dataset files for `HumanEva-I` (tar) & `HumanEva-II`.
  152. -# Unpack them to `HumanEva_1` & `HumanEva_2` accordingly.
  153. -# To load data run:
  154. ~~~
  155. ./opencv/build/bin/example_datasets_hpe_humaneva -p=/home/user/path_to_unpacked_folders/
  156. ~~~
  157. ### PARSE Dataset
  158. Implements loading dataset:
  159. "PARSE Dataset": <http://www.ics.uci.edu/~dramanan/papers/parse/>
  160. Usage:
  161. -# From link above download dataset file: `people.zip`.
  162. -# Unpack it.
  163. -# To load data run:
  164. ~~~
  165. ./opencv/build/bin/example_datasets_hpe_parse -p=/home/user/path_to_unpacked_folder/people_all/
  166. ~~~
  167. @defgroup datasets_ir Image Registration
  168. ### Affine Covariant Regions Datasets
  169. Implements loading dataset:
  170. "Affine Covariant Regions Datasets": <http://www.robots.ox.ac.uk/~vgg/data/data-aff.html>
  171. Usage:
  172. -# From link above download dataset files:
  173. `bark\bikes\boat\graf\leuven\trees\ubc\wall.tar.gz`.
  174. -# Unpack them.
  175. -# To load data, for example, for "bark", run:
  176. ```
  177. ./opencv/build/bin/example_datasets_ir_affine -p=/home/user/path_to_unpacked_folder/bark/
  178. ```
  179. ### Robot Data Set
  180. Implements loading dataset:
  181. "Robot Data Set, Point Feature Data Set – 2010": <http://roboimagedata.compute.dtu.dk/?page_id=24>
  182. Usage:
  183. -# From link above download dataset files: `SET001_6.tar.gz`-`SET055_60.tar.gz`
  184. -# Unpack them to one folder.
  185. -# To load data run:
  186. ~~~
  187. ./opencv/build/bin/example_datasets_ir_robot -p=/home/user/path_to_unpacked_folder/
  188. ~~~
  189. @defgroup datasets_is Image Segmentation
  190. ### The Berkeley Segmentation Dataset and Benchmark
  191. Implements loading dataset:
  192. "The Berkeley Segmentation Dataset and Benchmark": <https://www.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/>
  193. Usage:
  194. -# From link above download dataset files: `BSDS300-human.tgz` & `BSDS300-images.tgz`.
  195. -# Unpack them.
  196. -# To load data run:
  197. ~~~
  198. ./opencv/build/bin/example_datasets_is_bsds -p=/home/user/path_to_unpacked_folder/BSDS300/
  199. ~~~
  200. ### Weizmann Segmentation Evaluation Database
  201. Implements loading dataset:
  202. "Weizmann Segmentation Evaluation Database": <http://www.wisdom.weizmann.ac.il/~vision/Seg_Evaluation_DB/>
  203. Usage:
  204. -# From link above download dataset files: `Weizmann_Seg_DB_1obj.ZIP` & `Weizmann_Seg_DB_2obj.ZIP`.
  205. -# Unpack them.
  206. -# To load data, for example, for `1 object` dataset, run:
  207. ~~~
  208. ./opencv/build/bin/example_datasets_is_weizmann -p=/home/user/path_to_unpacked_folder/1obj/
  209. ~~~
  210. @defgroup datasets_msm Multiview Stereo Matching
  211. ### EPFL Multi-View Stereo
  212. Implements loading dataset:
  213. "EPFL Multi-View Stereo": <http://cvlab.epfl.ch/data/strechamvs>
  214. Usage:
  215. -# From link above download dataset files:
  216. `castle_dense\castle_dense_large\castle_entry\fountain\herzjesu_dense\herzjesu_dense_large_bounding\cameras\images\p.tar.gz`.
  217. -# Unpack them in separate folder for each object. For example, for "fountain", in folder `fountain/` :
  218. `fountain_dense_bounding.tar.gz -> bounding/`,
  219. `fountain_dense_cameras.tar.gz -> camera/`,
  220. `fountain_dense_images.tar.gz -> png/`,
  221. `fountain_dense_p.tar.gz -> P/`
  222. -# To load data, for example, for "fountain", run:
  223. ~~~
  224. ./opencv/build/bin/example_datasets_msm_epfl -p=/home/user/path_to_unpacked_folder/fountain/
  225. ~~~
  226. ### Stereo – Middlebury Computer Vision
  227. Implements loading dataset:
  228. "Stereo – Middlebury Computer Vision": <http://vision.middlebury.edu/mview/>
  229. Usage:
  230. -# From link above download dataset files:
  231. `dino\dinoRing\dinoSparseRing\temple\templeRing\templeSparseRing.zip`
  232. -# Unpack them.
  233. -# To load data, for example "temple" dataset, run:
  234. ~~~
  235. ./opencv/build/bin/example_datasets_msm_middlebury -p=/home/user/path_to_unpacked_folder/temple/
  236. ~~~
  237. @defgroup datasets_or Object Recognition
  238. ### ImageNet
  239. Implements loading dataset: "ImageNet": <http://www.image-net.org/>
  240. Usage:
  241. -# From link above download dataset files:
  242. `ILSVRC2010_images_train.tar\ILSVRC2010_images_test.tar\ILSVRC2010_images_val.tar` & devkit:
  243. `ILSVRC2010_devkit-1.0.tar.gz` (Implemented loading of 2010 dataset as only this dataset has ground
  244. truth for test data, but structure for ILSVRC2014 is similar)
  245. -# Unpack them to: `some_folder/train/`, `some_folder/test/`, `some_folder/val` &
  246. `some_folder/ILSVRC2010_validation_ground_truth.txt`,
  247. `some_folder/ILSVRC2010_test_ground_truth.txt`.
  248. -# Create file with labels: `some_folder/labels.txt`, for example, using python script below (each
  249. file's row format: `synset,labelID,description`. For example: "n07751451,18,plum").
  250. -# Unpack all tar files in train.
  251. -# To load data run:
  252. ~~~
  253. ./opencv/build/bin/example_datasets_or_imagenet -p=/home/user/some_folder/
  254. ~~~
  255. Python script to parse `meta.mat`:
  256. ~~~{py}
  257. import scipy.io
  258. meta_mat = scipy.io.loadmat("devkit-1.0/data/meta.mat")
  259. labels_dic = dict((m[0][1][0], m[0][0][0][0]-1) for m in meta_mat['synsets']
  260. label_names_dic = dict((m[0][1][0], m[0][2][0]) for m in meta_mat['synsets']
  261. for label in labels_dic.keys():
  262. print "{0},{1},{2}".format(label, labels_dic[label], label_names_dic[label])
  263. ~~~
  264. ### MNIST
  265. Implements loading dataset:
  266. "MNIST": <http://yann.lecun.com/exdb/mnist/>
  267. Usage:
  268. -# From link above download dataset files:
  269. `t10k-images-idx3-ubyte.gz`, `t10k-labels-idx1-ubyte.gz`, `train-images-idx3-ubyte.gz`, `train-labels-idx1-ubyte.gz`.
  270. -# Unpack them.
  271. -# To load data run:
  272. ~~~
  273. ./opencv/build/bin/example_datasets_or_mnist -p=/home/user/path_to_unpacked_files/
  274. ~~~
  275. ### SUN Database
  276. Implements loading dataset:
  277. "SUN Database, Scene Recognition Benchmark. SUN397": <http://vision.cs.princeton.edu/projects/2010/SUN/>
  278. Usage:
  279. -# From link above download dataset file: `SUN397.tar` & file with splits: `Partitions.zip`
  280. -# Unpack `SUN397.tar` into folder: `SUN397/` & `Partitions.zip` into folder: `SUN397/Partitions/`
  281. -# To load data run:
  282. ~~~
  283. ./opencv/build/bin/example_datasets_or_sun -p=/home/user/path_to_unpacked_files/SUN397/
  284. ~~~
  285. @defgroup datasets_pd Pedestrian Detection
  286. ### Caltech Pedestrian Detection Benchmark
  287. Implements loading dataset:
  288. "Caltech Pedestrian Detection Benchmark": <http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/>
  289. @note First version of Caltech Pedestrian dataset loading. Code to unpack all frames from seq files
  290. commented as their number is huge! So currently load only meta information without data. Also
  291. ground truth isn't processed, as need to convert it from mat files first.
  292. Usage:
  293. -# From link above download dataset files: `set00.tar`-`set10.tar`.
  294. -# Unpack them to separate folder.
  295. -# To load data run:
  296. ~~~
  297. ./opencv/build/bin/example_datasets_pd_caltech -p=/home/user/path_to_unpacked_folders/
  298. ~~~
  299. @defgroup datasets_slam SLAM
  300. ### KITTI Vision Benchmark
  301. Implements loading dataset:
  302. "KITTI Vision Benchmark": <http://www.cvlibs.net/datasets/kitti/eval_odometry.php>
  303. Usage:
  304. -# From link above download "Odometry" dataset files:
  305. `data_odometry_gray\data_odometry_color\data_odometry_velodyne\data_odometry_poses\data_odometry_calib.zip`.
  306. -# Unpack `data_odometry_poses.zip`, it creates folder `dataset/poses/`. After that unpack
  307. `data_odometry_gray.zip`, `data_odometry_color.zip`, `data_odometry_velodyne.zip`. Folder
  308. `dataset/sequences/` will be created with folders `00/..21/`. Each of these folders will contain:
  309. `image_0/`, `image_1/`, `image_2/`, `image_3/`, `velodyne/` and files `calib.txt` & `times.txt`.
  310. These two last files will be replaced after unpacking `data_odometry_calib.zip` at the end.
  311. -# To load data run:
  312. ~~~
  313. ./opencv/build/bin/example_datasets_slam_kitti -p=/home/user/path_to_unpacked_folder/dataset/
  314. ~~~
  315. ### TUMindoor Dataset
  316. Implements loading dataset:
  317. "TUMindoor Dataset": <http://www.navvis.lmt.ei.tum.de/dataset/>
  318. Usage:
  319. -# From link above download dataset files: `dslr\info\ladybug\pointcloud.tar.bz2` for each dataset:
  320. `11-11-28 (1st floor)\11-12-13 (1st floor N1)\11-12-17a (4th floor)\11-12-17b (3rd floor)\11-12-17c (Ground I)\11-12-18a (Ground II)\11-12-18b (2nd floor)`
  321. -# Unpack them in separate folder for each dataset.
  322. `dslr.tar.bz2 -> dslr/`,
  323. `info.tar.bz2 -> info/`,
  324. `ladybug.tar.bz2 -> ladybug/`,
  325. `pointcloud.tar.bz2 -> pointcloud/`.
  326. -# To load each dataset run:
  327. ~~~
  328. ./opencv/build/bin/example_datasets_slam_tumindoor -p=/home/user/path_to_unpacked_folders/
  329. ~~~
  330. @defgroup datasets_tr Text Recognition
  331. ### The Chars74K Dataset
  332. Implements loading dataset:
  333. "The Chars74K Dataset": <http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/>
  334. Usage:
  335. -# From link above download dataset files:
  336. `EnglishFnt\EnglishHnd\EnglishImg\KannadaHnd\KannadaImg.tgz`, `ListsTXT.tgz`.
  337. -# Unpack them.
  338. -# Move `.m` files from folder `ListsTXT/` to appropriate folder. For example,
  339. `English/list_English_Img.m` for `EnglishImg.tgz`.
  340. -# To load data, for example "EnglishImg", run:
  341. ~~~
  342. ./opencv/build/bin/example_datasets_tr_chars -p=/home/user/path_to_unpacked_folder/English/
  343. ~~~
  344. ### The Street View Text Dataset
  345. Implements loading dataset:
  346. "The Street View Text Dataset": <http://vision.ucsd.edu/~kai/svt/>
  347. Usage:
  348. -# From link above download dataset file: `svt.zip`.
  349. -# Unpack it.
  350. -# To load data run:
  351. ~~~
  352. ./opencv/build/bin/example_datasets_tr_svt -p=/home/user/path_to_unpacked_folder/svt/svt1/
  353. ~~~
  354. #### Benchmark
  355. For this dataset was implemented benchmark with accuracy (mean f1): 0.217
  356. To run benchmark execute:
  357. ~~~
  358. ./opencv/build/bin/example_datasets_tr_svt_benchmark -p=/home/user/path_to_unpacked_folders/svt/svt1/
  359. ~~~
  360. @defgroup datasets_track Tracking
  361. ### VOT 2015 Database
  362. Implements loading dataset:
  363. "VOT 2015 dataset comprises 60 short sequences showing various objects in challenging backgrounds.
  364. The sequences were chosen from a large pool of sequences including the ALOV dataset, OTB2 dataset,
  365. non-tracking datasets, Computer Vision Online, Professor Bob Fisher’s Image Database, Videezy,
  366. Center for Research in Computer Vision, University of Central Florida, USA, NYU Center for Genomics
  367. and Systems Biology, Data Wrangling, Open Access Directory and Learning and Recognition in Vision
  368. Group, INRIA, France. The VOT sequence selection protocol was applied to obtain a representative
  369. set of challenging sequences.": <http://box.vicos.si/vot/vot2015.zip>
  370. Usage:
  371. -# From link above download dataset file: `vot2015.zip`
  372. -# Unpack `vot2015.zip` into folder: `VOT2015/`
  373. -# To load data run:
  374. ~~~
  375. ./opencv/build/bin/example_datasets_track_vot -p=/home/user/path_to_unpacked_files/VOT2015/
  376. ~~~
  377. @}
  378. */
  379. namespace cv
  380. {
  381. namespace datasets
  382. {
  383. //! @addtogroup datasets
  384. //! @{
  385. struct Object
  386. {
  387. };
  388. class CV_EXPORTS Dataset
  389. {
  390. public:
  391. Dataset() {}
  392. virtual ~Dataset() {}
  393. virtual void load(const std::string &path) = 0;
  394. std::vector< Ptr<Object> >& getTrain(int splitNum = 0);
  395. std::vector< Ptr<Object> >& getTest(int splitNum = 0);
  396. std::vector< Ptr<Object> >& getValidation(int splitNum = 0);
  397. int getNumSplits() const;
  398. protected:
  399. std::vector< std::vector< Ptr<Object> > > train;
  400. std::vector< std::vector< Ptr<Object> > > test;
  401. std::vector< std::vector< Ptr<Object> > > validation;
  402. private:
  403. std::vector< Ptr<Object> > empty;
  404. };
  405. //! @}
  406. }
  407. }
  408. #endif