OpenMPCD
OnTheFlyStatistics.hpp
Go to the documentation of this file.
1 /**
2  * @file
3  * Defines the `OpenMPCD::OnTheFlyStatistics` class.
4  */
5 
6 #ifndef OPENMPCD_ONTHEFLYSTATISTICS_HPP
7 #define OPENMPCD_ONTHEFLYSTATISTICS_HPP
8 
11 
12 #include <boost/static_assert.hpp>
13 #include <boost/type_traits/is_integral.hpp>
14 
15 #include <cmath>
16 #include <string>
17 
18 namespace OpenMPCD
19 {
20 
21 /**
22  * Computes sample means and variances "on-the-fly" or "online", i.e. without
23  * storing the individual data points.
24  *
25  * For a description of the algorithm used, see "Formulas for Robust, One-Pass
26  * Parallel Computation of Covariances and Arbitrary-Order Statistical Moments"
27  * by Philippe Pébay, Sandia Report SAND2008-6212, 2008.
28  *
29  * @tparam T
30  * The data type. It must support addition and multiplication of two
31  * objects of this type, and division by an `unsigned long int`.
32  */
33 template<typename T>
35 {
36  public:
37  /**
38  * The constructor.
39  */
41  : sampleSize(0), mean(0), varianceHelper(0)
42  {
43  }
44 
45  public:
46  /**
47  * Returns the number of data points added so far.
48  */
49  std::size_t getSampleSize() const
50  {
51  return sampleSize;
52  }
53 
54  /**
55  * Returns the mean of all the values added so far.
56  *
57  * If no values have been added yet, returns `0`.
58  */
59  const T getSampleMean() const
60  {
61  return mean;
62  }
63 
64  /**
65  * Returns the unbiased sample variance of all the values added so far.
66  *
67  * The returned value contains Bessel's correction, i.e. the sum of
68  * squares of differences is divided by \f$ n - 1 \f$ rather than
69  * \f$ n \f$, where \$ n \f$ is the sample size.
70  *
71  * If fewer than two values have been added so far, returns `0`.
72  */
73  const T getSampleVariance() const
74  {
75  if(getSampleSize() < 2)
76  return 0;
77 
78  return varianceHelper / (getSampleSize() - 1);
79  }
80 
81  /**
82  * Returns the unbiased sample standard deviation of all the values
83  * added so far.
84  *
85  * The returned value contains Bessel's correction, i.e. the sum of
86  * squares of differences is divided by \f$ n - 1 \f$ rather than
87  * \f$ n \f$, where \$ n \f$ is the sample size.
88  *
89  * If fewer than two values have been added so far, returns `0`.
90  */
92  {
93  return sqrt(getSampleVariance());
94  }
95 
96  /**
97  * Returns the standard error of the mean, i.e. the unbiased sample
98  * standard deviation divided by the square root of the sample size.
99  *
100  * @throw OpenMPCD::InvalidCallException
101  * If `OPENMPCD_DEBUG` is defined, throws if `getSampleSize()==0`.
102  */
104  {
107 
109  }
110 
111  /**
112  * Adds a datum to the sample.
113  *
114  * @param[in] datum The datum to add.
115  */
116  void addDatum(const T& datum)
117  {
118  //Since the division operator is used in in this function, allowing
119  //integral data types here would lead to incorrect results. If
120  //needed, one could provide a template specialization for this.
121  BOOST_STATIC_ASSERT(!boost::is_integral<T>::value);
122 
123  ++sampleSize;
124 
125  const T delta(datum - mean);
126 
127  mean += delta / sampleSize;
128  varianceHelper += delta * (datum - mean);
129  }
130 
131  /**
132  * Returns a string that contains the state of this instance.
133  *
134  * Since the serialized state is stored a string, it is representing
135  * the state only approximately.
136  *
137  * Currently, this is implemented only for the cases where
138  * <c> boost::is_arithmetic<T>::value </c> is `true`.
139  *
140  * @see unserializeFromString
141  */
142  const std::string serializeToString() const;
143 
144  /**
145  * Discards the current state, and loads the state specified in the
146  * given string instead.
147  *
148  * Currently, this is implemented only for the cases where
149  * <c> boost::is_arithmetic<T>::value </c> is `true`.
150  *
151  * The user is responsible for attempting to load only such serialized
152  * states that have been generated with a compatible with `T`.
153  *
154  * @throw InvalidArgumentException
155  * Throws if `state` does not encode a valid state.
156  *
157  * @param[in] state
158  * The state to load. Must be a string created by
159  * `serializeToString`.
160  */
161  void unserializeFromString(const std::string& state);
162 
163  private:
164  std::size_t sampleSize; ///< The number of data points sampled.
165  T mean; ///< Holds the current arithmetic mean.
166  T varianceHelper; ///< Helper value for the calculation of the variance.
167 }; //class OnTheFlyStatistics
168 }
169 
171 
172 #endif //OPENMPCD_ONTHEFLYSTATISTICS_HPP
OpenMPCD::OnTheFlyStatistics::OnTheFlyStatistics
OnTheFlyStatistics()
The constructor.
Definition: OnTheFlyStatistics.hpp:40
OpenMPCD::InvalidCallException
Exception for a forbidden function call.
Definition: Exceptions.hpp:144
Exceptions.hpp
OpenMPCD::OnTheFlyStatistics::addDatum
void addDatum(const T &datum)
Adds a datum to the sample.
Definition: OnTheFlyStatistics.hpp:116
OpenMPCD::OnTheFlyStatistics::getSampleSize
std::size_t getSampleSize() const
Returns the number of data points added so far.
Definition: OnTheFlyStatistics.hpp:49
OpenMPCD::OnTheFlyStatistics::getSampleStandardDeviation
const T getSampleStandardDeviation() const
Returns the unbiased sample variance of all the values added so far.
Definition: OnTheFlyStatistics.hpp:91
OpenMPCD::OnTheFlyStatistics::getSampleMean
const T getSampleMean() const
Returns the mean of all the values added so far.
Definition: OnTheFlyStatistics.hpp:59
OPENMPCD_DEBUG_ASSERT.hpp
OPENMPCD_DEBUG_ASSERT_EXCEPTIONTYPE
#define OPENMPCD_DEBUG_ASSERT_EXCEPTIONTYPE(assertion, ExceptionType)
Definition: OPENMPCD_DEBUG_ASSERT.hpp:76
OpenMPCD::OnTheFlyStatistics::serializeToString
const std::string serializeToString() const
Returns a string that contains the state of this instance.
Definition: ImplementationDetails/OnTheFlyStatistics.hpp:23
OpenMPCD::OnTheFlyStatistics::getStandardErrorOfTheMean
const T getStandardErrorOfTheMean() const
Returns the standard error of the mean, i.e.
Definition: OnTheFlyStatistics.hpp:103
OnTheFlyStatistics.hpp
OpenMPCD::OnTheFlyStatistics::unserializeFromString
void unserializeFromString(const std::string &state)
Discards the current state, and loads the state specified in the given string instead.
Definition: ImplementationDetails/OnTheFlyStatistics.hpp:40
OpenMPCD::Utility::MathematicalFunctions::sqrt
OPENMPCD_CUDA_HOST_AND_DEVICE T sqrt(const T x)
Returns the sqaure root of the argument.
OpenMPCD::OnTheFlyStatistics
Computes sample means and variances "on-the-fly" or "online", i.e.
Definition: OnTheFlyStatistics.hpp:34