median.hpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // median.hpp
  3. //
  4. // Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005
  8. #define BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005
  9. #include <boost/mpl/placeholders.hpp>
  10. #include <boost/range/iterator_range.hpp>
  11. #include <boost/accumulators/framework/accumulator_base.hpp>
  12. #include <boost/accumulators/framework/extractor.hpp>
  13. #include <boost/accumulators/numeric/functional.hpp>
  14. #include <boost/accumulators/framework/parameters/sample.hpp>
  15. #include <boost/accumulators/framework/depends_on.hpp>
  16. #include <boost/accumulators/statistics_fwd.hpp>
  17. #include <boost/accumulators/statistics/count.hpp>
  18. #include <boost/accumulators/statistics/p_square_quantile.hpp>
  19. #include <boost/accumulators/statistics/density.hpp>
  20. #include <boost/accumulators/statistics/p_square_cumul_dist.hpp>
  21. namespace boost { namespace accumulators
  22. {
  23. namespace impl
  24. {
  25. ///////////////////////////////////////////////////////////////////////////////
  26. // median_impl
  27. //
  28. /**
  29. @brief Median estimation based on the \f$P^2\f$ quantile estimator
  30. The \f$P^2\f$ algorithm is invoked with a quantile probability of 0.5.
  31. */
  32. template<typename Sample>
  33. struct median_impl
  34. : accumulator_base
  35. {
  36. // for boost::result_of
  37. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type;
  38. median_impl(dont_care) {}
  39. template<typename Args>
  40. result_type result(Args const &args) const
  41. {
  42. return p_square_quantile_for_median(args);
  43. }
  44. };
  45. ///////////////////////////////////////////////////////////////////////////////
  46. // with_density_median_impl
  47. //
  48. /**
  49. @brief Median estimation based on the density estimator
  50. The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being
  51. the total number of samples. It returns the approximate horizontal position of this sample,
  52. based on a linear interpolation inside the bin.
  53. */
  54. template<typename Sample>
  55. struct with_density_median_impl
  56. : accumulator_base
  57. {
  58. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
  59. typedef std::vector<std::pair<float_type, float_type> > histogram_type;
  60. typedef iterator_range<typename histogram_type::iterator> range_type;
  61. // for boost::result_of
  62. typedef float_type result_type;
  63. template<typename Args>
  64. with_density_median_impl(Args const &args)
  65. : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
  66. , is_dirty(true)
  67. {
  68. }
  69. void operator ()(dont_care)
  70. {
  71. this->is_dirty = true;
  72. }
  73. template<typename Args>
  74. result_type result(Args const &args) const
  75. {
  76. if (this->is_dirty)
  77. {
  78. this->is_dirty = false;
  79. std::size_t cnt = count(args);
  80. range_type histogram = density(args);
  81. typename range_type::iterator it = histogram.begin();
  82. while (this->sum < 0.5 * cnt)
  83. {
  84. this->sum += it->second * cnt;
  85. ++it;
  86. }
  87. --it;
  88. float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt);
  89. this->median = it->first * over + (it + 1)->first * (1. - over);
  90. }
  91. return this->median;
  92. }
  93. private:
  94. mutable float_type sum;
  95. mutable bool is_dirty;
  96. mutable float_type median;
  97. };
  98. ///////////////////////////////////////////////////////////////////////////////
  99. // with_p_square_cumulative_distribution_median_impl
  100. //
  101. /**
  102. @brief Median estimation based on the \f$P^2\f$ cumulative distribution estimator
  103. The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It
  104. returns the approximate horizontal position of where the cumulative distribution
  105. equals 0.5, based on a linear interpolation inside the bin.
  106. */
  107. template<typename Sample>
  108. struct with_p_square_cumulative_distribution_median_impl
  109. : accumulator_base
  110. {
  111. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
  112. typedef std::vector<std::pair<float_type, float_type> > histogram_type;
  113. typedef iterator_range<typename histogram_type::iterator> range_type;
  114. // for boost::result_of
  115. typedef float_type result_type;
  116. with_p_square_cumulative_distribution_median_impl(dont_care)
  117. : is_dirty(true)
  118. {
  119. }
  120. void operator ()(dont_care)
  121. {
  122. this->is_dirty = true;
  123. }
  124. template<typename Args>
  125. result_type result(Args const &args) const
  126. {
  127. if (this->is_dirty)
  128. {
  129. this->is_dirty = false;
  130. range_type histogram = p_square_cumulative_distribution(args);
  131. typename range_type::iterator it = histogram.begin();
  132. while (it->second < 0.5)
  133. {
  134. ++it;
  135. }
  136. float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second);
  137. this->median = it->first * over + (it + 1)->first * ( 1. - over );
  138. }
  139. return this->median;
  140. }
  141. private:
  142. mutable bool is_dirty;
  143. mutable float_type median;
  144. };
  145. } // namespace impl
  146. ///////////////////////////////////////////////////////////////////////////////
  147. // tag::median
  148. // tag::with_densisty_median
  149. // tag::with_p_square_cumulative_distribution_median
  150. //
  151. namespace tag
  152. {
  153. struct median
  154. : depends_on<p_square_quantile_for_median>
  155. {
  156. /// INTERNAL ONLY
  157. ///
  158. typedef accumulators::impl::median_impl<mpl::_1> impl;
  159. };
  160. struct with_density_median
  161. : depends_on<count, density>
  162. {
  163. /// INTERNAL ONLY
  164. ///
  165. typedef accumulators::impl::with_density_median_impl<mpl::_1> impl;
  166. };
  167. struct with_p_square_cumulative_distribution_median
  168. : depends_on<p_square_cumulative_distribution>
  169. {
  170. /// INTERNAL ONLY
  171. ///
  172. typedef accumulators::impl::with_p_square_cumulative_distribution_median_impl<mpl::_1> impl;
  173. };
  174. }
  175. ///////////////////////////////////////////////////////////////////////////////
  176. // extract::median
  177. // extract::with_density_median
  178. // extract::with_p_square_cumulative_distribution_median
  179. //
  180. namespace extract
  181. {
  182. extractor<tag::median> const median = {};
  183. extractor<tag::with_density_median> const with_density_median = {};
  184. extractor<tag::with_p_square_cumulative_distribution_median> const with_p_square_cumulative_distribution_median = {};
  185. BOOST_ACCUMULATORS_IGNORE_GLOBAL(median)
  186. BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_density_median)
  187. BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_p_square_cumulative_distribution_median)
  188. }
  189. using extract::median;
  190. using extract::with_density_median;
  191. using extract::with_p_square_cumulative_distribution_median;
  192. // median(with_p_square_quantile) -> median
  193. template<>
  194. struct as_feature<tag::median(with_p_square_quantile)>
  195. {
  196. typedef tag::median type;
  197. };
  198. // median(with_density) -> with_density_median
  199. template<>
  200. struct as_feature<tag::median(with_density)>
  201. {
  202. typedef tag::with_density_median type;
  203. };
  204. // median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_median
  205. template<>
  206. struct as_feature<tag::median(with_p_square_cumulative_distribution)>
  207. {
  208. typedef tag::with_p_square_cumulative_distribution_median type;
  209. };
  210. // for the purposes of feature-based dependency resolution,
  211. // with_density_median and with_p_square_cumulative_distribution_median
  212. // provide the same feature as median
  213. template<>
  214. struct feature_of<tag::with_density_median>
  215. : feature_of<tag::median>
  216. {
  217. };
  218. template<>
  219. struct feature_of<tag::with_p_square_cumulative_distribution_median>
  220. : feature_of<tag::median>
  221. {
  222. };
  223. // So that median can be automatically substituted with
  224. // weighted_median when the weight parameter is non-void.
  225. template<>
  226. struct as_weighted_feature<tag::median>
  227. {
  228. typedef tag::weighted_median type;
  229. };
  230. template<>
  231. struct feature_of<tag::weighted_median>
  232. : feature_of<tag::median>
  233. {
  234. };
  235. // So that with_density_median can be automatically substituted with
  236. // with_density_weighted_median when the weight parameter is non-void.
  237. template<>
  238. struct as_weighted_feature<tag::with_density_median>
  239. {
  240. typedef tag::with_density_weighted_median type;
  241. };
  242. template<>
  243. struct feature_of<tag::with_density_weighted_median>
  244. : feature_of<tag::with_density_median>
  245. {
  246. };
  247. // So that with_p_square_cumulative_distribution_median can be automatically substituted with
  248. // with_p_square_cumulative_distribution_weighted_median when the weight parameter is non-void.
  249. template<>
  250. struct as_weighted_feature<tag::with_p_square_cumulative_distribution_median>
  251. {
  252. typedef tag::with_p_square_cumulative_distribution_weighted_median type;
  253. };
  254. template<>
  255. struct feature_of<tag::with_p_square_cumulative_distribution_weighted_median>
  256. : feature_of<tag::with_p_square_cumulative_distribution_median>
  257. {
  258. };
  259. }} // namespace boost::accumulators
  260. #endif