OpenMPCD
OnTheFlyStatisticsCollection.py
1 from __future__ import print_function
2 
3 class OnTheFlyStatisticsCollection:
4  """
5  Represents a collection of instances of `OnTheFlyStatistics`.
6  """
7 
8  def __init__(self):
9  """
10  The constructor.
11  """
12 
13  self._data = {}
14 
15 
16  def addData(self, data):
17  """
18  Adds the given data to the statistics in this instance.
19 
20  @param[in] data
21  A dictionary, the values of which will be supplied to the
22  instances of `OnTheFlyStatistics` stored at the respective
23  keys. The values may either be integers or floating point
24  values, or instances of `OnTheFlyStatistics`, in which case
25  the underlying sample will be merged with the current state.
26  """
27 
28  from .OnTheFlyStatistics import OnTheFlyStatistics
29 
30  if not isinstance(data, dict):
31  raise TypeError()
32 
33  for key, value in data.items():
34  allowedTypes = (int, float, OnTheFlyStatistics)
35  if not isinstance(value, allowedTypes):
36  raise TypeError()
37 
38  if key not in self._data:
39  self._data[key] = OnTheFlyStatistics()
40 
41  if isinstance(value, OnTheFlyStatistics):
42  self._data[key].mergeSample(value)
43  else:
44  self._data[key].addDatum(value)
45 
46 
47  def getData(self):
48  """
49  Returns the current dictionary of instances of `OnTheFlyStatistics`.
50  """
51 
52  return self._data
53 
54 
55  def getOrderedData(self):
56  """
57  Returns the current instances of `OnTheFlyStatistics`, ordered by their
58  key in an instance of `collections.OrderedDict`.
59 
60  The returned value can be freely altered, without changing the state of
61  this instance.
62  """
63 
64  from collections import OrderedDict
65 
66  ret = OrderedDict()
67  for key in sorted(self._data):
68  ret[key] = self._data[key]
69 
70  return ret
71 
72 
73  def keepOnlyEveryNthDataPoint(self, keepEveryNth):
74  """
75  Keeps only every `keepEveryNth` data point, and discards the rest.
76 
77  @param[in] keepEveryNth
78  Set to any integer value greater than `1` to discard
79  `keepEveryNth - 1` data points between data points that are
80  kept.
81  Setting this to `1` amounts to not changing this instance at
82  all.
83  """
84 
85  if not isinstance(keepEveryNth, int):
86  raise TypeError()
87  if keepEveryNth <= 0:
88  raise ValueError()
89 
90  if keepEveryNth == 1:
91  return
92 
93 
94  fullData = self._data
95  self._data = {}
96 
97  skipped = 0
98  for key in sorted(fullData):
99  skipped += 1
100  if skipped != keepEveryNth:
101  continue
102 
103  skipped = 0
104  self._data[key] = fullData[key]
105 
106 
107  def getMPLAxes(
108  self,
109  showStandardErrorOfTheMean = True,
110  showStandardDeviation = 0.3,
111  singularSampleTreatment = "raise",
112  plotEveryNth = 1):
113  """
114  Returns an `matplotlib.axes.Axes` object that contains the current data.
115 
116  @throw ValueError
117  Throws if both `showStandardErrorOfTheMean` and
118  `showStandardDeviation` are of floating-point type.
119 
120  @param[in] showStandardErrorOfTheMean
121  Whether to show, for each data point, the standard error of
122  the mean using errorbars.
123  Set to `False` to not show the information. Set to `True` to
124  show the information as error bars. Set to a floating-point
125  value between `0.0` and `1.0` (excluding `0.0`) to use a
126  shaded area to show the information.
127  @param[in] showStandardDeviation
128  Whether to show the standard deviation as a shaded region
129  around the data points.
130  Set to `False` to not show the information. Set to `True` to
131  show the information as error bars. Set to a floating-point
132  value between `0.0` and `1.0` (excluding `0.0`) to use a
133  shaded area to show the information.
134  @param[in] singularSampleTreatment
135  If neither `showStandardErrorOfTheMean` nor
136  `showStandardDeviation` are `True`, this parameter has no
137  effect. Otherwise, it controls whether an exception of type
138  `RuntimeError` is to be raised if a data point consists of a
139  sample of sample size `1`, in which case no error bars or
140  standard deviations can be computed. To cause this behavior,
141  pass the string `"raise"`.
142  If the string `"discard"`, is passed, those data points are
143  silently discarded.
144  If the string `"warn"` is passed, a warning message is
145  printed to the standard output stream if there are singular
146  samples. The affected data points are assigned a standard
147  deviation and standard error of the mean of `0`.
148  If the string `"warnAndDiscard"` is passed, a warning message
149  is printed to the standard output stream if there are
150  singular samples. Those samples will be discarded.
151  Other parameter values are not allowed in any case.
152  """
153 
154  if not isinstance(showStandardErrorOfTheMean, (bool, float)):
155  raise TypeError
156  if isinstance(showStandardErrorOfTheMean, float):
157  if showStandardErrorOfTheMean <= 0:
158  raise ValueError()
159  if showStandardErrorOfTheMean > 1:
160  raise ValueError()
161 
162  if not isinstance(showStandardDeviation, (bool, float)):
163  raise TypeError
164  if isinstance(showStandardDeviation, float):
165  if showStandardDeviation <= 0:
166  raise ValueError()
167  if showStandardDeviation > 1:
168  raise ValueError()
169 
170  if isinstance(showStandardErrorOfTheMean, bool) and \
171  isinstance(showStandardDeviation, bool) and \
172  showStandardErrorOfTheMean and showStandardDeviation:
173  raise ValueError()
174 
175  singularSampleTreatmentOptions = \
176  ["raise", "discard", "warn", "warnAndDiscard"]
177  if singularSampleTreatment not in singularSampleTreatmentOptions:
178  raise ValueError()
179 
180 
181  discardSingularSamples = False
182  if showStandardErrorOfTheMean or showStandardDeviation:
183  if singularSampleTreatment in ["discard", "warnAndDiscard"]:
184  discardSingularSamples = True
185 
186 
187  data = self.getOrderedData()
188 
189 
190  import matplotlib.figure
191 
192  figure = matplotlib.figure.Figure()
193  axes = figure.add_subplot(1, 1, 1)
194 
195  values = []
196  singularSampleKeys = []
197  for key, value in data.items():
198  assert value.getSampleSize() != 0
199 
200  if value.getSampleSize() == 1:
201  singularSampleKeys.append(key)
202  if discardSingularSamples:
203  continue
204  values.append(value.getSampleMean())
205 
206  if showStandardErrorOfTheMean or showStandardDeviation:
207  if singularSampleTreatment == "raise":
208  if len(singularSampleKeys) != 0:
209  message = "Samples with sample size 1 encountered:\n"
210  message += str(singularSampleKeys)
211  raise RuntimeError(message)
212  if singularSampleTreatment in ["warn", "warnAndDiscard"]:
213  if len(singularSampleKeys) != 0:
214  print("WARNING: ", end = "")
215  print(len(singularSampleKeys), end = "")
216  print(" samples with sample size 1 encountered:")
217  print(singularSampleKeys)
218  if singularSampleTreatment in ["discard", "warnAndDiscard"]:
219  for key in singularSampleKeys:
220  del data[key]
221 
222 
223  standardErrorsOfTheMean = None
224  if showStandardErrorOfTheMean != False:
225  standardErrorsOfTheMean = []
226  for value in data.values():
227  if value.getSampleSize() == 1:
228  standardErrorsOfTheMean.append(0)
229  continue
230 
231  standardErrorsOfTheMean.append(
232  value.getStandardErrorOfTheMean())
233 
234  standardDeviations = None
235  if showStandardDeviation != False:
236  standardDeviations = []
237  for value in data.values():
238  if value.getSampleSize() == 1:
239  standardDeviations.append(0)
240  continue
241  standardDeviations.append(value.getSampleStandardDeviation())
242 
243  errorbars = None
244  if isinstance(showStandardErrorOfTheMean, bool) and \
245  showStandardErrorOfTheMean == True:
246  errorbars = standardErrorsOfTheMean
247  if isinstance(showStandardDeviation, bool) and \
248  showStandardDeviation == True:
249  errorbars = standardDeviations
250 
251  axes.errorbar(data.keys(), values, yerr = errorbars)
252 
253  if isinstance(showStandardErrorOfTheMean, float):
254  import numpy
255  axes.fill_between(
256  data.keys(),
257  numpy.array(values) - numpy.array(standardErrorsOfTheMean),
258  numpy.array(values) + numpy.array(standardErrorsOfTheMean),
259  alpha = showStandardErrorOfTheMean
260  )
261 
262  if isinstance(showStandardDeviation, float):
263  import numpy
264  axes.fill_between(
265  data.keys(),
266  numpy.array(values) - numpy.array(standardDeviations),
267  numpy.array(values) + numpy.array(standardDeviations),
268  alpha = showStandardDeviation
269  )
270 
271  return axes
MPCDAnalysis.OnTheFlyStatisticsCollection.OnTheFlyStatisticsCollection.keepOnlyEveryNthDataPoint
def keepOnlyEveryNthDataPoint(self, keepEveryNth)
Definition: OnTheFlyStatisticsCollection.py:89
MPCDAnalysis.OnTheFlyStatisticsCollection.OnTheFlyStatisticsCollection.getOrderedData
def getOrderedData(self)
Definition: OnTheFlyStatisticsCollection.py:67
MPCDAnalysis.OnTheFlyStatisticsCollection.OnTheFlyStatisticsCollection.addData
def addData(self, data)
Definition: OnTheFlyStatisticsCollection.py:29
MPCDAnalysis.OnTheFlyStatisticsCollection.OnTheFlyStatisticsCollection.__init__
def __init__(self)
Definition: OnTheFlyStatisticsCollection.py:13
MPCDAnalysis.OnTheFlyStatisticsCollection.OnTheFlyStatisticsCollection._data
_data
Definition: OnTheFlyStatisticsCollection.py:15
MPCDAnalysis.OnTheFlyStatisticsCollection.OnTheFlyStatisticsCollection.getMPLAxes
def getMPLAxes(self, showStandardErrorOfTheMean=True, showStandardDeviation=0.3, singularSampleTreatment="raise", plotEveryNth=1)
Definition: OnTheFlyStatisticsCollection.py:154
MPCDAnalysis.OnTheFlyStatisticsCollection.OnTheFlyStatisticsCollection.getData
def getData(self)
Definition: OnTheFlyStatisticsCollection.py:54