OpenMPCD
OnTheFlyStatisticsDDDA.hpp
Go to the documentation of this file.
1 /**
2  * @file
3  * Defines the `OpenMPCD::OnTheFlyStatisticsDDDA` class.
4  */
5 
6 #ifndef OPENMPCD_ONTHEFLYSTATISTICSDDDA_HPP
7 #define OPENMPCD_ONTHEFLYSTATISTICSDDDA_HPP
8 
10 
11 #include <boost/optional/optional.hpp>
12 
13 #include <vector>
14 
15 namespace OpenMPCD
16 {
17 
18 /**
19  * Computes sample means and their errors for (possibly) serially correlated
20  * data.
21  *
22  * The algorithm used is called "Dynamic Distributable Decorrelation
23  * Algorithm" (DDDA), and is described in @cite Kent2007,
24  * which in turn is partly based on @cite Flyvbjerg1989.
25  *
26  * @tparam T
27  * The data type. It must support addition and multiplication of two
28  * objects of this type, and division by an `unsigned long int`.
29  */
30 template<typename T>
32 {
33 public:
34  /**
35  * The constructor.
36  */
38 
39 public:
40  /**
41  * Adds a datum to the sample.
42  *
43  * It is assumed that the "time" intervals between subsequently added data
44  * are constant; here, "time" may, for example, refer to Molecular Dynamics
45  * or Monte Carlo steps.
46  *
47  * @param[in] datum The datum to add.
48  */
49  void addDatum(const T& datum);
50 
51  /**
52  * Returns the number of data points added so far.
53  */
54  std::size_t getSampleSize() const;
55 
56  /**
57  * Returns the mean of all the values added so far.
58  *
59  * Since the mean of all values added is returned, and the sample size may
60  * not be a power of 2, statistics with different blocking length may
61  * not incorporate the same amount of information. This may lead to
62  * difficulties when using the error estimates of statistics of different
63  * block lengths to estimate the error in the entire, possibly correlated
64  * data set, since the statistics of different blocking lengths do not
65  * necessarily incorporate the same measurements.
66  *
67  * @throw OpenMPCD::InvalidCallException
68  * Throws if no data have been added so far.
69  */
70  const T getSampleMean() const;
71 
72  /**
73  * Returns the largest block size for which there is at least one data
74  * point.
75  *
76  * @throw OpenMPCD::InvalidCallException
77  * Throws if no data have been added so far.
78  */
79  std::size_t getMaximumBlockSize() const;
80 
81  /**
82  * Returns the ID of the largest block size created so far.
83  */
84  std::size_t getMaximumBlockID() const;
85 
86  /**
87  * Returns whether the block with the given `blockID` has enough data to
88  * compute a sample variance.
89  *
90  * @throw OpenMPCD::InvalidArgumentException
91  * If `OPENMPCD_DEBUG` is defined, throws if `blockID` is out of
92  * range.
93  *
94  * @param[in] blockID
95  * The block ID, which must be in the range
96  * `[0, getMaximumBlockID()]`.
97  */
98  bool hasBlockVariance(const std::size_t blockID) const;
99 
100  /**
101  * Returns the sample variance in the block with the given `blockID`.
102  *
103  * @throw OpenMPCD::InvalidArgumentException
104  * If `OPENMPCD_DEBUG` is defined, throws if `blockID` is out of
105  * range.
106  * @throw OpenMPCD::InvalidCallException
107  * Throws if `!hasBlockVariance(blockID)`.
108  *
109  * @param[in] blockID
110  * The block ID, which must be in the range
111  * [0, `getMaximumBlockID()`].
112  */
113  const T getBlockVariance(const std::size_t blockID) const;
114 
115  /**
116  * Returns the sample standard deviation in the block with the given
117  * `blockID`.
118  *
119  * @throw OpenMPCD::InvalidArgumentException
120  * If `OPENMPCD_DEBUG` is defined, throws if `blockID` is out of
121  * range.
122  * @throw OpenMPCD::InvalidCallException
123  * Throws if `!hasBlockVariance(blockID)`.
124  *
125  * @param[in] blockID
126  * The block ID, which must be in the range
127  * [0, `getMaximumBlockID()`].
128  */
129  const T getBlockStandardDeviation(const std::size_t blockID) const;
130 
131  /**
132  * Returns the raw sample standard deviation, i.e. the sample standard
133  * deviation in block `0`.
134  *
135  * @throw OpenMPCD::InvalidCallException
136  * Throws if `!hasBlockVariance(0)`.
137  */
138  const T getSampleStandardDeviation() const;
139 
140  /**
141  * Returns an estimate for the standard deviation of the standard error of
142  * the mean for a given `blockID`.
143  *
144  * @throw OpenMPCD::InvalidArgumentException
145  * If `OPENMPCD_DEBUG` is defined, throws if `blockID` is out of
146  * range.
147  * @throw OpenMPCD::InvalidCallException
148  * Throws if `!hasBlockVariance(blockID)`.
149  *
150  * @param[in] blockID
151  * The block ID, which must be in the range
152  * [0, `getMaximumBlockID()`].
153  */
154  const T getBlockStandardErrorOfTheMean(const std::size_t blockID) const;
155 
156  /**
157  * Returns an estimate for the standard deviation of the standard error of
158  * the mean for a given `blockID`.
159  *
160  * The returned estimate corresponds to Eq. (28) in @cite Flyvbjerg1989.
161  *
162  * @throw OpenMPCD::InvalidArgumentException
163  * If `OPENMPCD_DEBUG` is defined, throws if `blockID` is out of
164  * range.
165  * @throw OpenMPCD::InvalidCallException
166  * Throws if `!hasBlockVariance(blockID)`.
167  *
168  * @param[in] blockID
169  * The block ID, which must be in the range
170  * [0, `getMaximumBlockID()`].
171  */
173  const std::size_t blockID) const;
174 
175  /**
176  * Returns the block ID corresponding to the optimal block size, in the
177  * sense that the corresponding block provides the most accurate estimate
178  * for the standard error of the mean.
179  *
180  * If there is no variance in the data, `0` is returned.
181  *
182  * The algorithm used is described in Section IV of @cite Lee2011.
183  *
184  * @throw OpenMPCD::InvalidCallException
185  * Throws if fewer than two data points have been added so far.
186  */
187  std::size_t getOptimalBlockIDForStandardErrorOfTheMean() const;
188 
189  /**
190  * Returns whether the sample is large enough for the estimate of the
191  * standard error of the mean, as provided by the block indicated by
192  * `getOptimalBlockIDForStandardErrorOfTheMean`, to be reliable.
193  *
194  * The algorithm used is described in Section IV of @cite Lee2011.
195  *
196  * @throw OpenMPCD::InvalidCallException
197  * Throws if fewer than two data points have been added so far.
198  */
200 
201  /**
202  * Returns the best estimation of the true standard error of the mean of
203  * the data, after decorrelation.
204  *
205  * @see optimalStandardErrorOfTheMeanEstimateIsReliable
206  *
207  * The algorithm used is described in Section IV of @cite Lee2011.
208  *
209  * @throw OpenMPCD::InvalidCallException
210  * Throws if fewer than two data points have been added so far.
211  */
212  const T getOptimalStandardErrorOfTheMean() const;
213 
214  /**
215  * Returns a string that contains the state of this instance.
216  *
217  * Since the serialized state is stored a string, it is representing the
218  * state only approximately.
219  *
220  * Currently, this is implemented only for the cases where
221  * <c> boost::is_arithmetic<T>::value </c> is `true`.
222  *
223  * @see unserializeFromString
224  */
225  const std::string serializeToString() const;
226 
227  /**
228  * Discards the current state, and loads the state specified in the given
229  * string instead.
230  *
231  * Currently, this is implemented only for the cases where
232  * <c> boost::is_arithmetic<T>::value </c> is `true`.
233  *
234  * The user is responsible for attempting to load only such serialized
235  * states that have been generated with a compatible with `T`.
236  *
237  * @throw InvalidArgumentException
238  * Throws if `state` does not encode a valid state.
239  *
240  * @param[in] state
241  * The state to load. Must be a string created by
242  * `serializeToString`.
243  */
244  void unserializeFromString(const std::string& state);
245 
246 private:
247  /**
248  * Adds a datum to the sample to the given block, and propagates the datum
249  * to the next block level.
250  *
251  * @param[in] datum The datum to add.
252  * @param[in] blockID The block to add the datum to.
253  */
254  void addDatum(const T& datum, const std::size_t blockID);
255 
256 private:
257  std::vector<OnTheFlyStatistics<T> > blocks;
258  ///< Holds statistics for the time series blocks.
259  std::vector<boost::optional<T> > waiting;
260  ///< Waiting data for higher-level blocks.
261 }; //class OnTheFlyStatisticsDDDA
262 } //namespace OpenMPCD
263 
265 
266 #endif //OPENMPCD_ONTHEFLYSTATISTICSDDDA_HPP
OpenMPCD::OnTheFlyStatisticsDDDA::getBlockStandardErrorOfTheMean
const T getBlockStandardErrorOfTheMean(const std::size_t blockID) const
Returns an estimate for the standard deviation of the standard error of the mean for a given blockID.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:114
OpenMPCD::OnTheFlyStatisticsDDDA::getMaximumBlockSize
std::size_t getMaximumBlockSize() const
Returns the largest block size for which there is at least one data point.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:55
OpenMPCD::OnTheFlyStatisticsDDDA::unserializeFromString
void unserializeFromString(const std::string &state)
Discards the current state, and loads the state specified in the given string instead.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:260
OpenMPCD::OnTheFlyStatisticsDDDA::getOptimalBlockIDForStandardErrorOfTheMean
std::size_t getOptimalBlockIDForStandardErrorOfTheMean() const
Returns the block ID corresponding to the optimal block size, in the sense that the corresponding blo...
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:147
OpenMPCD::OnTheFlyStatisticsDDDA::addDatum
void addDatum(const T &datum)
Adds a datum to the sample.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:30
OpenMPCD::OnTheFlyStatisticsDDDA::getBlockVariance
const T getBlockVariance(const std::size_t blockID) const
Returns the sample variance in the block with the given blockID.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:87
OnTheFlyStatisticsDDDA.hpp
OpenMPCD::OnTheFlyStatisticsDDDA::getMaximumBlockID
std::size_t getMaximumBlockID() const
Returns the ID of the largest block size created so far.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:68
OpenMPCD::OnTheFlyStatisticsDDDA::getBlockStandardDeviation
const T getBlockStandardDeviation(const std::size_t blockID) const
Returns the sample standard deviation in the block with the given blockID.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:101
OpenMPCD::OnTheFlyStatisticsDDDA::serializeToString
const std::string serializeToString() const
Returns a string that contains the state of this instance.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:235
OpenMPCD::OnTheFlyStatisticsDDDA::getEstimatedStandardDeviationOfBlockStandardErrorOfTheMean
const T getEstimatedStandardDeviationOfBlockStandardErrorOfTheMean(const std::size_t blockID) const
Returns an estimate for the standard deviation of the standard error of the mean for a given blockID.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:129
OnTheFlyStatistics.hpp
OpenMPCD::OnTheFlyStatisticsDDDA::OnTheFlyStatisticsDDDA
OnTheFlyStatisticsDDDA()
The constructor.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:20
OpenMPCD::OnTheFlyStatisticsDDDA::getSampleMean
const T getSampleMean() const
Returns the mean of all the values added so far.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:42
OpenMPCD::OnTheFlyStatisticsDDDA::hasBlockVariance
bool hasBlockVariance(const std::size_t blockID) const
Returns whether the block with the given blockID has enough data to compute a sample variance.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:76
OpenMPCD::OnTheFlyStatisticsDDDA::getSampleStandardDeviation
const T getSampleStandardDeviation() const
Returns the raw sample standard deviation, i.e.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:108
OpenMPCD::OnTheFlyStatisticsDDDA
Computes sample means and their errors for (possibly) serially correlated data.
Definition: OnTheFlyStatisticsDDDA.hpp:31
OpenMPCD::OnTheFlyStatisticsDDDA::getOptimalStandardErrorOfTheMean
const T getOptimalStandardErrorOfTheMean() const
Returns the best estimation of the true standard error of the mean of the data, after decorrelation.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:199
OpenMPCD::OnTheFlyStatisticsDDDA::optimalStandardErrorOfTheMeanEstimateIsReliable
bool optimalStandardErrorOfTheMeanEstimateIsReliable() const
Returns whether the sample is large enough for the estimate of the standard error of the mean,...
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:189
OpenMPCD::OnTheFlyStatisticsDDDA::getSampleSize
std::size_t getSampleSize() const
Returns the number of data points added so far.
Definition: ImplementationDetails/OnTheFlyStatisticsDDDA.hpp:36