KJB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
prob_histogram.h
Go to the documentation of this file.
1 /* $Id: prob_histogram.h 17639 2014-09-30 05:58:58Z ksimek $ */
2 /* =========================================================================== *
3  |
4  | Copyright (c) 1994-2010 by Kobus Barnard (author)
5  |
6  | Personal and educational use of this code is granted, provided that this
7  | header is kept intact, and that the authorship is not misrepresented, that
8  | its use is acknowledged in publications, and relevant papers are cited.
9  |
10  | For other use contact the author (kobus AT cs DOT arizona DOT edu).
11  |
12  | Please note that the code in this file has not necessarily been adequately
13  | tested. Naturally, there is no guarantee of performance, support, or fitness
14  | for any particular task. Nonetheless, I am interested in hearing about
15  | problems that you encounter.
16  |
17  | Author: Ernesto Brau, Jinyan Guan
18  * =========================================================================== */
19 
20 
21 #ifndef KJB_PROB_HISTOGRAM
22 #define KJB_PROB_HISTOGRAM
23 
28 #include <m_cpp/m_matrix.h>
29 #include <l_cpp/l_functors.h>
30 #include <l_cpp/l_exception.h>
31 
32 #include <vector>
33 #include <iterator>
34 #include <algorithm>
35 #include <utility>
36 #include <map>
37 
38 #include <boost/bind.hpp>
39 
40 namespace kjb
41 {
42 
47 class Histogram
48 {
49 private:
50  std::map<double, int> m_hist;
51  int m_num_bins;
52  double m_bin_size;
53 
54 public:
58  template<class InputIterator>
59  Histogram(InputIterator first, InputIterator last, int num_bins) :
60  m_num_bins(num_bins)
61  {
62  if(num_bins > 0)
63  {
64  create_continuous(first, last);
65  }
66  else
67  {
68  create_discrete(first, last);
69  }
70  }
71 
75  int num_bins() const
76  {
77  return m_num_bins;
78  }
79 
83  double bin_size() const
84  {
85  return m_bin_size;
86  }
87 
92  const std::map<double, int>& as_map() const
93  {
94  return m_hist;
95  }
96 
97 private:
98  template<class InputIterator>
99  void create_continuous(InputIterator first, InputIterator last);
100 
101  template<class InputIterator>
102  void create_discrete(InputIterator first, InputIterator last);
103 };
104 
105 inline std::pair<double, int> make_double_int_pair_(double d, int i)
106 {
107  return std::make_pair(d, i);
108 }
109 
110 template<class InputIterator>
111 void Histogram::create_continuous(InputIterator first, InputIterator last)
112 {
113  double mn = *std::min_element(first, last);
114  double mx = *std::max_element(first, last);
115 
116  m_bin_size = (mx - mn) / m_num_bins;
117 
118  std::generate_n(std::insert_iterator<std::map<double, int> >(m_hist, m_hist.begin()), m_num_bins,
119  boost::bind(make_double_int_pair_,
120  boost::bind<double>(Increase_by<double>(mn, m_bin_size)),
121  0));
122 
123  for(InputIterator p = first; p != last; p++)
124  {
125  std::map<double, int>::iterator pair_p = m_hist.upper_bound(*p);
126  pair_p--;
127  pair_p->second++;
128  }
129 }
130 
131 template<class InputIterator>
132 void Histogram::create_discrete(InputIterator first, InputIterator last)
133 {
134  for(InputIterator p = first; p != last; p++)
135  {
136  std::map<double, int>::iterator pair_p = m_hist.find(*p);
137  if(pair_p != m_hist.end())
138  {
139  pair_p->second++;
140  }
141  else
142  {
143  m_hist.insert(std::make_pair(*p, 1));
144  }
145  }
146 
147  m_num_bins = m_hist.size();
148 }
149 
150 /* \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ */
151 
153 {
154 public:
159  template<class InputIterator>
161  (
162  InputIterator first,
163  InputIterator last,
164  size_t num_bins_1,
165  size_t num_bins_2,
166  const std::pair<double, double>& range_1,
167  const std::pair<double, double>& range_2
168  ) : m_num_bins_1(num_bins_1),
169  m_num_bins_2(num_bins_2),
170  m_range_1(range_1),
171  m_range_2(range_2),
172  m_updated(false)
173  {
174  create_map(first, last);
175  }
176 
180  size_t num_bins_1() const;
181  size_t num_bins_2() const;
182 
186  double bin_size_1() const;
187  double bin_size_2() const;
188 
192  const std::pair<double, double>& range_1() const;
193  const std::pair<double, double>& range_2() const;
194 
198  const std::map<double, std::map<double, int> >& as_map() const;
199 
203  size_t& num_bins_1();
204  size_t& num_bins_2();
205 
206  double& bin_size_1();
207  double& bin_size_2();
208 
209  std::pair<double, double>& range_1();
210  std::pair<double, double>& range_2();
211 
216  const Matrix& as_matrix() const;
217 
222  Matrix normalized() const;
223 
230  Matrix densities() const;
231 
232 private:
233  size_t m_num_bins_1;
234  size_t m_num_bins_2;
235  double m_bin_size_1;
236  double m_bin_size_2;
237  std::pair<double, double> m_range_1;
238  std::pair<double, double> m_range_2;
239  std::map<double, std::map<double, int> > m_hist;
240  mutable Matrix m_hist_mat;
241  mutable bool m_updated;
242 
243 private:
244  template<class InputIterator>
245  void create_map (InputIterator first, InputIterator last);
246  void compute_matrix() const;
247 
248 }; //class Histogram_2d
249 
250 double chi_square(const Histogram_2d& hist_1, const Histogram_2d& hist_2);
251 
252 double chi_square(const Matrix& h1, const Matrix& h2);
253 
254 inline size_t Histogram_2d::num_bins_1() const
255 {
256  return m_num_bins_1;
257 }
258 
259 inline size_t Histogram_2d::num_bins_2() const
260 {
261  return m_num_bins_2;
262 }
263 
264 inline double Histogram_2d::bin_size_1() const
265 {
266  return m_bin_size_1;
267 }
268 
269 inline double Histogram_2d::bin_size_2() const
270 {
271  return m_bin_size_2;
272 }
273 
274 inline const std::pair<double, double>& Histogram_2d::range_1() const
275 {
276  return m_range_1;
277 }
278 
279 inline const std::pair<double, double>& Histogram_2d::range_2() const
280 {
281  return m_range_2;
282 }
283 
284 /* \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ */
285 
286 inline size_t& Histogram_2d::num_bins_1()
287 {
288  return m_num_bins_1;
289 }
290 
291 inline size_t& Histogram_2d::num_bins_2()
292 {
293  return m_num_bins_2;
294 }
295 
296 inline double& Histogram_2d::bin_size_1()
297 {
298  return m_bin_size_1;
299 }
300 
301 inline double& Histogram_2d::bin_size_2()
302 {
303  return m_bin_size_2;
304 }
305 
306 inline std::pair<double, double>& Histogram_2d::range_1()
307 {
308  return m_range_1;
309 }
310 
311 inline std::pair<double, double>& Histogram_2d::range_2()
312 {
313  return m_range_2;
314 }
315 
316 /* \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ */
317 
318 template<typename InputIterator>
319 void Histogram_2d::create_map
320 (
321  InputIterator first,
322  InputIterator last
323 )
324 {
325  double d1_range = m_range_1.second - m_range_1.first;
326  double d2_range = m_range_2.second - m_range_2.first;
327  assert(d1_range >= 0.0 && d2_range >= 0.0);
328  m_bin_size_1 = d1_range/(m_num_bins_1 - 1);
329  m_bin_size_2 = d2_range/(m_num_bins_2 - 1);
330 
331 
332  int r = 0;
333  double d1 = m_range_1.first;
334  for(size_t i = 0; i < m_num_bins_1; i++, d1 += m_bin_size_1)
335  {
336  double d2 = m_range_2.first;
337  for(size_t j = 0; j < m_num_bins_2; j++, d2 += m_bin_size_2)
338  {
339  m_hist[d1][d2] = 0;
340  }
341  r++;
342  }
343 
344  for(InputIterator p = first; p != last; p++)
345  {
346  std::map<double, std::map<double, int> >::iterator pair_p = m_hist.upper_bound((*p)[0]);
347  pair_p--;
348  std::map<double, int>::iterator pair_pp = (pair_p->second).upper_bound((*p)[1]);
349  pair_pp--;
350  pair_pp->second++;
351  }
352 }
353 
354 /* \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ */
355 
356 inline const Matrix& Histogram_2d::as_matrix() const
357 {
358  if(!m_updated)
359  {
360  compute_matrix();
361  }
362 
363  return m_hist_mat;
364 }
365 
366 /* \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ */
367 
369 {
370  if(!m_updated)
371  {
372  compute_matrix();
373  }
374  double total_counts = sum_matrix_cols(m_hist_mat).sum_vector_elements();
375  Matrix n_hist(m_hist_mat);
376  n_hist /= total_counts;
377  return n_hist;
378 }
379 
380 /* \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ */
381 
383 {
384  if(!m_updated)
385  {
386  compute_matrix();
387  }
388  double total_counts = sum_matrix_cols(m_hist_mat).sum_vector_elements();
389  double area = m_bin_size_1 * m_bin_size_2;
390  Matrix n_hist(m_hist_mat);
391  n_hist = n_hist / total_counts * area;
392  return n_hist;
393 }
394 
395 
396 } // namespace kjb
397 
398 #endif /* KJB_PROB_HISTOGRAM */
399 
size_t num_bins_1() const
Return the number of bins of this histogram.
Definition: prob_histogram.h:254
const std::pair< double, double > & range_1() const
Return the ranges histogram.
Definition: prob_histogram.h:274
Definition for the Matrix class, a thin wrapper on the KJB Matrix struct and its related functionalit...
double bin_size_2() const
Definition: prob_histogram.h:269
const std::map< double, std::map< double, int > > & as_map() const
Return a map of the histogram.
double bin_size() const
Return the bin size of this histogram.
Definition: prob_histogram.h:83
r
Definition: APPgetLargeConnectedEdges.m:127
Matrix normalized() const
Return the normalized 2D histogram of which the sum of all the elements 1.0.
Definition: prob_histogram.h:368
double chi_square(const Histogram_2d &hist_1, const Histogram_2d &hist_2)
Definition: prob_histogram.cpp:45
double bin_size_1() const
Return the bin size of this histogram.
Definition: prob_histogram.h:264
std::pair< double, int > make_double_int_pair_(double d, int i)
Definition: prob_histogram.h:105
const std::map< double, int > & as_map() const
Return a vector of the bins, represented by their lower bounds.
Definition: prob_histogram.h:92
Matrix::Vec_type sum_matrix_cols(const Matrix &m)
Compute the matrix's sum across columns (a.k.a. row-wise sum)
Definition: m_matrix.cpp:1972
const std::pair< double, double > & range_2() const
Definition: prob_histogram.h:279
A class that represents a histogram of data. ATM, the data must be doubles.
Definition: prob_histogram.h:47
const Matrix & as_matrix() const
Return the 2D histogram as a kjb::Matrix Note: the 2D histogrma is not normalized.
Definition: prob_histogram.h:356
Matrix densities() const
Return the densities of the 2D histogram for each cell p, density(p) = counts(p) * area(p)/total_coun...
Definition: prob_histogram.h:382
get the indices of edges in each direction for i
Definition: APPgetLargeConnectedEdges.m:48
This class implements matrices, in the linear-algebra sense, with real-valued elements.
Definition: m_matrix.h:94
Histogram(InputIterator first, InputIterator last, int num_bins)
Create a histogram from data with the given number of bins.
Definition: prob_histogram.h:59
size_t num_bins_2() const
Definition: prob_histogram.h:259
int num_bins() const
Return the number of bins of this histogram.
Definition: prob_histogram.h:75
Support for error handling exception classes in libKJB.
Histogram_2d(InputIterator first, InputIterator last, size_t num_bins_1, size_t num_bins_2, const std::pair< double, double > &range_1, const std::pair< double, double > &range_2)
Create a 2D histogram from bivirate data given the number of bins of each dimension.
Definition: prob_histogram.h:161
Definition: prob_histogram.h:152
Value_type sum_vector_elements() const
Return this vector's elements sum.
Definition: m_vector.h:1512