123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588 |
- #ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
- #define OPENCV_FLANN_AUTOTUNED_INDEX_H_
- #include "general.h"
- #include "nn_index.h"
- #include "ground_truth.h"
- #include "index_testing.h"
- #include "sampling.h"
- #include "kdtree_index.h"
- #include "kdtree_single_index.h"
- #include "kmeans_index.h"
- #include "composite_index.h"
- #include "linear_index.h"
- #include "logger.h"
- namespace cvflann
- {
- template<typename Distance>
- NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance);
- struct AutotunedIndexParams : public IndexParams
- {
- AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
- {
- (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
-
- (*this)["target_precision"] = target_precision;
-
- (*this)["build_weight"] = build_weight;
-
- (*this)["memory_weight"] = memory_weight;
-
- (*this)["sample_fraction"] = sample_fraction;
- }
- };
- template <typename Distance>
- class AutotunedIndex : public NNIndex<Distance>
- {
- public:
- typedef typename Distance::ElementType ElementType;
- typedef typename Distance::ResultType DistanceType;
- AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
- dataset_(inputData), distance_(d)
- {
- target_precision_ = get_param(params, "target_precision",0.8f);
- build_weight_ = get_param(params,"build_weight", 0.01f);
- memory_weight_ = get_param(params, "memory_weight", 0.0f);
- sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
- bestIndex_ = NULL;
- }
- AutotunedIndex(const AutotunedIndex&);
- AutotunedIndex& operator=(const AutotunedIndex&);
- virtual ~AutotunedIndex()
- {
- if (bestIndex_ != NULL) {
- delete bestIndex_;
- bestIndex_ = NULL;
- }
- }
-
- virtual void buildIndex()
- {
- std::ostringstream stream;
- bestParams_ = estimateBuildParams();
- print_params(bestParams_, stream);
- Logger::info("----------------------------------------------------\n");
- Logger::info("Autotuned parameters:\n");
- Logger::info("%s", stream.str().c_str());
- Logger::info("----------------------------------------------------\n");
- bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
- bestIndex_->buildIndex();
- speedup_ = estimateSearchParams(bestSearchParams_);
- stream.str(std::string());
- print_params(bestSearchParams_, stream);
- Logger::info("----------------------------------------------------\n");
- Logger::info("Search parameters:\n");
- Logger::info("%s", stream.str().c_str());
- Logger::info("----------------------------------------------------\n");
- }
-
- virtual void saveIndex(FILE* stream)
- {
- save_value(stream, (int)bestIndex_->getType());
- bestIndex_->saveIndex(stream);
- save_value(stream, get_param<int>(bestSearchParams_, "checks"));
- }
-
- virtual void loadIndex(FILE* stream)
- {
- int index_type;
- load_value(stream, index_type);
- IndexParams params;
- params["algorithm"] = (flann_algorithm_t)index_type;
- bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_);
- bestIndex_->loadIndex(stream);
- int checks;
- load_value(stream, checks);
- bestSearchParams_["checks"] = checks;
- }
-
- virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
- {
- int checks = get_param<int>(searchParams,"checks",FLANN_CHECKS_AUTOTUNED);
- if (checks == FLANN_CHECKS_AUTOTUNED) {
- bestIndex_->findNeighbors(result, vec, bestSearchParams_);
- }
- else {
- bestIndex_->findNeighbors(result, vec, searchParams);
- }
- }
- IndexParams getParameters() const
- {
- return bestIndex_->getParameters();
- }
- SearchParams getSearchParameters() const
- {
- return bestSearchParams_;
- }
- float getSpeedup() const
- {
- return speedup_;
- }
-
- virtual size_t size() const
- {
- return bestIndex_->size();
- }
-
- virtual size_t veclen() const
- {
- return bestIndex_->veclen();
- }
-
- virtual int usedMemory() const
- {
- return bestIndex_->usedMemory();
- }
-
- virtual flann_algorithm_t getType() const
- {
- return FLANN_INDEX_AUTOTUNED;
- }
- private:
- struct CostData
- {
- float searchTimeCost;
- float buildTimeCost;
- float memoryCost;
- float totalCost;
- IndexParams params;
- };
- void evaluate_kmeans(CostData& cost)
- {
- StartStopTimer t;
- int checks;
- const int nn = 1;
- Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
- get_param<int>(cost.params,"iterations"),
- get_param<int>(cost.params,"branching"));
- KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
-
- t.start();
- kmeans.buildIndex();
- t.stop();
- float buildTime = (float)t.value;
-
- float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
- float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
- cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
- cost.searchTimeCost = searchTime;
- cost.buildTimeCost = buildTime;
- Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
- }
- void evaluate_kdtree(CostData& cost)
- {
- StartStopTimer t;
- int checks;
- const int nn = 1;
- Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
- KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
- t.start();
- kdtree.buildIndex();
- t.stop();
- float buildTime = (float)t.value;
-
- float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
- float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
- cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
- cost.searchTimeCost = searchTime;
- cost.buildTimeCost = buildTime;
- Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- void optimizeKMeans(std::vector<CostData>& costs)
- {
- Logger::info("KMEANS, Step 1: Exploring parameter space\n");
-
- int maxIterations[] = { 1, 5, 10, 15 };
- int branchingFactors[] = { 16, 32, 64, 128, 256 };
- int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
- costs.reserve(costs.size() + kmeansParamSpaceSize);
-
- for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
- for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
- CostData cost;
- cost.params["algorithm"] = FLANN_INDEX_KMEANS;
- cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
- cost.params["iterations"] = maxIterations[i];
- cost.params["branching"] = branchingFactors[j];
- evaluate_kmeans(cost);
- costs.push_back(cost);
- }
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- }
- void optimizeKDTree(std::vector<CostData>& costs)
- {
- Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
-
- int testTrees[] = { 1, 4, 8, 16, 32 };
-
- for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
- CostData cost;
- cost.params["algorithm"] = FLANN_INDEX_KDTREE;
- cost.params["trees"] = testTrees[i];
- evaluate_kdtree(cost);
- costs.push_back(cost);
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- }
-
- IndexParams estimateBuildParams()
- {
- std::vector<CostData> costs;
- int sampleSize = int(sample_fraction_ * dataset_.rows);
- int testSampleSize = std::min(sampleSize / 10, 1000);
- Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
-
-
- if (testSampleSize < 10) {
- Logger::info("Choosing linear, dataset too small\n");
- return LinearIndexParams();
- }
-
- sampledDataset_ = random_sample(dataset_, sampleSize);
-
- testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
-
- Logger::info("Computing ground truth... \n");
- gt_matches_ = Matrix<int>(new int[testDataset_.rows], testDataset_.rows, 1);
- StartStopTimer t;
- t.start();
- compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
- t.stop();
- CostData linear_cost;
- linear_cost.searchTimeCost = (float)t.value;
- linear_cost.buildTimeCost = 0;
- linear_cost.memoryCost = 0;
- linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
- costs.push_back(linear_cost);
-
- Logger::info("Autotuning parameters...\n");
- optimizeKMeans(costs);
- optimizeKDTree(costs);
- float bestTimeCost = costs[0].searchTimeCost;
- for (size_t i = 0; i < costs.size(); ++i) {
- float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
- if (timeCost < bestTimeCost) {
- bestTimeCost = timeCost;
- }
- }
- float bestCost = costs[0].searchTimeCost / bestTimeCost;
- IndexParams bestParams = costs[0].params;
- if (bestTimeCost > 0) {
- for (size_t i = 0; i < costs.size(); ++i) {
- float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
- memory_weight_ * costs[i].memoryCost;
- if (crtCost < bestCost) {
- bestCost = crtCost;
- bestParams = costs[i].params;
- }
- }
- }
- delete[] gt_matches_.data;
- delete[] testDataset_.data;
- delete[] sampledDataset_.data;
- return bestParams;
- }
-
- float estimateSearchParams(SearchParams& searchParams)
- {
- const int nn = 1;
- const size_t SAMPLE_COUNT = 1000;
- assert(bestIndex_ != NULL);
- float speedup = 0;
- int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
- if (samples > 0) {
- Matrix<ElementType> testDataset = random_sample(dataset_, samples);
- Logger::info("Computing ground truth\n");
-
- Matrix<int> gt_matches(new int[testDataset.rows], testDataset.rows, 1);
- StartStopTimer t;
- t.start();
- compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
- t.stop();
- float linear = (float)t.value;
- int checks;
- Logger::info("Estimating number of checks\n");
- float searchTime;
- float cb_index;
- if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
- Logger::info("KMeans algorithm, estimating cluster border factor\n");
- KMeansIndex<Distance>* kmeans = (KMeansIndex<Distance>*)bestIndex_;
- float bestSearchTime = -1;
- float best_cb_index = -1;
- int best_checks = -1;
- for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
- kmeans->set_cb_index(cb_index);
- searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
- if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
- bestSearchTime = searchTime;
- best_cb_index = cb_index;
- best_checks = checks;
- }
- }
- searchTime = bestSearchTime;
- cb_index = best_cb_index;
- checks = best_checks;
- kmeans->set_cb_index(best_cb_index);
- Logger::info("Optimum cb_index: %g\n", cb_index);
- bestParams_["cb_index"] = cb_index;
- }
- else {
- searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
- }
- Logger::info("Required number of checks: %d \n", checks);
- searchParams["checks"] = checks;
- speedup = linear / searchTime;
- delete[] gt_matches.data;
- delete[] testDataset.data;
- }
- return speedup;
- }
- private:
- NNIndex<Distance>* bestIndex_;
- IndexParams bestParams_;
- SearchParams bestSearchParams_;
- Matrix<ElementType> sampledDataset_;
- Matrix<ElementType> testDataset_;
- Matrix<int> gt_matches_;
- float speedup_;
-
- const Matrix<ElementType> dataset_;
-
- float target_precision_;
- float build_weight_;
- float memory_weight_;
- float sample_fraction_;
- Distance distance_;
- };
- }
- #endif
|