1 from __future__
import division
7 def test_emptyInstance():
8 ddda = OnTheFlyStatisticsDDDA()
10 assert ddda.getSampleSize() == 0
12 with pytest.raises(Exception):
15 with pytest.raises(Exception):
16 ddda.getMaximumBlockSize()
18 assert ddda.getMaximumBlockID() == 0
22 ddda = OnTheFlyStatisticsDDDA()
26 assert ddda.getSampleSize() == 1
27 assert ddda.getSampleMean() == 5
28 assert ddda.getMaximumBlockSize() == 1
29 assert ddda.getMaximumBlockID() == 0
32 def test_twoData_int():
33 ddda = OnTheFlyStatisticsDDDA()
40 assert ddda.getSampleSize() == 2
41 assert ddda.getSampleMean() == sum(data) / len(data)
42 assert ddda.getMaximumBlockSize() == 2
43 assert ddda.getMaximumBlockID() == 1
46 def test_twoData_float():
47 ddda = OnTheFlyStatisticsDDDA()
54 assert ddda.getSampleSize() == 2
55 assert ddda.getSampleMean() == sum(data) / len(data)
56 assert ddda.getMaximumBlockSize() == 2
57 assert ddda.getMaximumBlockID() == 1
61 ddda = OnTheFlyStatisticsDDDA()
68 assert ddda.getSampleSize() == 3
69 assert ddda.getSampleMean() == sum(data) / len(data)
70 assert ddda.getMaximumBlockSize() == 2
71 assert ddda.getMaximumBlockID() == 1
75 ddda = OnTheFlyStatisticsDDDA()
77 data = [5, 2.0, -1.2, 40.0]
82 from pytest
import approx
84 assert ddda.getSampleSize() == 4
85 assert ddda.getSampleMean() == approx(sum(data) / len(data))
86 assert ddda.getMaximumBlockSize() == 4
87 assert ddda.getMaximumBlockID() == 2
90 def test_dynamicData():
102 for dataSize
in dataSizes:
105 data = [random.random()
for i
in range(0, dataSize)]
107 ddda = OnTheFlyStatisticsDDDA()
114 if maximumBlockSize * 2 > dataSize:
117 maximumBlockSize *= 2
119 assert ddda.getSampleSize() == len(data)
120 assert ddda.getSampleMean() == pytest.approx(sum(data) / len(data))
121 assert ddda.getMaximumBlockSize() == maximumBlockSize
122 assert ddda.getMaximumBlockID() == blockSizeCount - 1
138 ddda = OnTheFlyStatisticsDDDA()
139 with pytest.raises(TypeError):
141 with pytest.raises(TypeError):
143 with pytest.raises(TypeError):
145 ddda.merge(OnTheFlyStatistics())
148 def approximatelyEquivalent(lhs, rhs):
149 assert lhs
is not rhs
151 if lhs.getSampleSize() != rhs.getSampleSize():
153 if lhs.getSampleMean() != pytest.approx(rhs.getSampleMean()):
155 if lhs.getMaximumBlockSize() != rhs.getMaximumBlockSize():
157 if lhs.getMaximumBlockID() != rhs.getMaximumBlockID():
159 for i
in range(0, lhs.getMaximumBlockID() + 1):
160 if lhs.hasBlockVariance(i) != rhs.hasBlockVariance(i):
162 if not lhs.hasBlockVariance(i):
165 blockVarianceLHS = lhs.getBlockVariance(i)
166 blockVarianceRHS = rhs.getBlockVariance(i)
169 if blockVarianceLHS != pytest.approx(blockVarianceRHS):
182 for dataSize1
in dataSizes:
183 for dataSize2
in dataSizes:
184 data1 = [random.random()
for _
in range(0, dataSize1)]
185 data2 = [random.random()
for _
in range(0, dataSize2)]
187 ddda1 = OnTheFlyStatisticsDDDA()
188 ddda2 = OnTheFlyStatisticsDDDA()
190 ddda11 = OnTheFlyStatisticsDDDA()
191 ddda12 = OnTheFlyStatisticsDDDA()
194 ddda1.addDatum(datum)
195 ddda11.addDatum(datum)
196 ddda12.addDatum(datum)
199 ddda11.addDatum(datum)
202 ddda2.addDatum(datum)
203 ddda12.addDatum(datum)
206 copied = copy.deepcopy(ddda1)
207 assert ddda1 == copied
208 assert not ddda1
is copied
210 assert not approximatelyEquivalent(ddda1, ddda12)
212 assert approximatelyEquivalent(ddda1, ddda12)
215 assert not approximatelyEquivalent(copied, ddda11)
217 assert approximatelyEquivalent(copied, ddda11)
221 def test_hasBlockVariance_getBlockVariance():
233 for dataSize
in dataSizes:
235 data = [random.random()
for i
in range(0, dataSize)]
237 ddda = OnTheFlyStatisticsDDDA()
241 with pytest.raises(TypeError):
242 ddda.hasBlockVariance(0.0)
244 with pytest.raises(TypeError):
245 ddda.getBlockVariance(0.0)
247 with pytest.raises(ValueError):
248 ddda.getBlockVariance(-1)
250 with pytest.raises(ValueError):
251 ddda.hasBlockVariance(-1)
253 with pytest.raises(ValueError):
254 ddda.hasBlockVariance(ddda.getMaximumBlockID() + 1)
256 with pytest.raises(ValueError):
257 ddda.getBlockVariance(ddda.getMaximumBlockID() + 1)
260 for blockID
in range(0, ddda.getMaximumBlockID() + 1):
261 blockSize = 2 ** blockID
263 if dataSize / blockSize < 2:
264 assert not ddda.hasBlockVariance(blockID)
265 with pytest.raises(RuntimeError):
266 ddda.getBlockVariance(blockID)
269 stat = OnTheFlyStatistics()
270 tmp = OnTheFlyStatistics()
274 if tmp.getSampleSize() == blockSize:
275 stat.addDatum(tmp.getSampleMean())
276 tmp = OnTheFlyStatistics()
278 expected = stat.getSampleVariance()
280 assert ddda.hasBlockVariance(blockID)
281 assert ddda.getBlockVariance(blockID) == pytest.approx(expected)
284 def test_getBlockStandardDeviation():
296 for dataSize
in dataSizes:
298 data = [random.random()
for i
in range(0, dataSize)]
300 ddda = OnTheFlyStatisticsDDDA()
304 with pytest.raises(TypeError):
305 ddda.getBlockStandardDeviation(0.0)
307 with pytest.raises(ValueError):
308 ddda.getBlockStandardDeviation(-1)
310 with pytest.raises(ValueError):
311 ddda.getBlockStandardDeviation(ddda.getMaximumBlockID() + 1)
314 for blockID
in range(0, ddda.getMaximumBlockID() + 1):
315 if not ddda.hasBlockVariance(blockID):
316 with pytest.raises(RuntimeError):
317 ddda.getBlockStandardDeviation(blockID)
322 expected = math.sqrt(ddda.getBlockVariance(blockID))
323 assert ddda.getBlockStandardDeviation(blockID) == expected
326 def test_getSampleStandardDeviation():
338 for dataSize
in dataSizes:
340 data = [random.random()
for i
in range(0, dataSize)]
342 ddda = OnTheFlyStatisticsDDDA()
346 if not ddda.hasBlockVariance(0):
347 with pytest.raises(RuntimeError):
348 ddda.getSampleStandardDeviation()
352 expected = ddda.getBlockStandardDeviation(0)
353 assert ddda.getSampleStandardDeviation() == expected
357 def test_getBlockStandardErrorOfTheMean():
369 for dataSize
in dataSizes:
371 data = [random.random()
for i
in range(0, dataSize)]
373 ddda = OnTheFlyStatisticsDDDA()
378 with pytest.raises(TypeError):
379 ddda.getBlockStandardErrorOfTheMean(0.0)
381 with pytest.raises(ValueError):
382 ddda.getBlockStandardErrorOfTheMean(-1)
384 with pytest.raises(ValueError):
385 ddda.getBlockStandardErrorOfTheMean(ddda.getMaximumBlockID() + 1)
388 for blockID
in range(0, ddda.getMaximumBlockID() + 1):
389 blockSize = 2 ** blockID
391 if ddda.hasBlockVariance(blockID):
393 stat = OnTheFlyStatistics()
394 tmp = OnTheFlyStatistics()
398 if tmp.getSampleSize() == blockSize:
399 stat.addDatum(tmp.getSampleMean())
400 tmp = OnTheFlyStatistics()
402 result = ddda.getBlockStandardErrorOfTheMean(blockID)
403 expected = stat.getStandardErrorOfTheMean()
405 assert result == pytest.approx(expected)
407 with pytest.raises(RuntimeError):
408 ddda.getBlockStandardErrorOfTheMean(blockID)
411 def test_getEstimatedStandardDeviationOfBlockStandardErrorOfTheMean():
423 for dataSize
in dataSizes:
425 data = [random.random()
for i
in range(0, dataSize)]
427 ddda = OnTheFlyStatisticsDDDA()
429 DDDA = OnTheFlyStatisticsDDDA
430 f = DDDA.getEstimatedStandardDeviationOfBlockStandardErrorOfTheMean
436 with pytest.raises(TypeError):
437 ddda.getEstimatedStandardDeviationOfBlockStandardErrorOfTheMean(0.0)
439 with pytest.raises(ValueError):
440 ddda.getEstimatedStandardDeviationOfBlockStandardErrorOfTheMean(-1)
442 with pytest.raises(ValueError):
443 ddda.getEstimatedStandardDeviationOfBlockStandardErrorOfTheMean(
444 ddda.getMaximumBlockID() + 1)
447 for blockID
in range(0, ddda.getMaximumBlockID() + 1):
448 if not ddda.hasBlockVariance(blockID):
449 with pytest.raises(RuntimeError):
453 esd = f(ddda, blockID)
454 sem = ddda.getBlockStandardErrorOfTheMean(blockID)
456 blockSize = 2 ** blockID
457 reducedSampleSize = dataSize // blockSize
460 assert esd == sem / math.sqrt(2 * reducedSampleSize)
463 def test_optimal_standard_error():
477 for dataSize
in dataSizes:
479 data = [random.random()
for i
in range(0, dataSize)]
481 ddda = OnTheFlyStatisticsDDDA()
482 stat = OnTheFlyStatistics()
489 with pytest.raises(RuntimeError):
490 ddda.getOptimalBlockIDForStandardErrorOfTheMean()
491 with pytest.raises(RuntimeError):
492 ddda.optimalStandardErrorOfTheMeanEstimateIsReliable()
493 with pytest.raises(RuntimeError):
494 ddda.getOptimalStandardErrorOfTheMean()
500 optimalBlockID = ddda.getOptimalBlockIDForStandardErrorOfTheMean()
501 optimalBlockSize = 2 ** optimalBlockID
503 rawSE = stat.getStandardErrorOfTheMean()
506 for block
in range(0, ddda.getMaximumBlockID()):
507 blockSize = 2 ** block
509 blockStat = OnTheFlyStatistics()
510 tmp = OnTheFlyStatistics()
514 if tmp.getSampleSize() == blockSize:
515 blockStat.addDatum(tmp.getSampleMean())
516 tmp = OnTheFlyStatistics()
517 blocks.append(blockStat)
519 expectedSE = blocks[optimalBlockID].getStandardErrorOfTheMean()
521 _resultSE = ddda.getOptimalStandardErrorOfTheMean()
522 assert _resultSE == pytest.approx(expectedSE)
525 if ddda.optimalStandardErrorOfTheMeanEstimateIsReliable():
526 assert optimalBlockSize < dataSize / 50.0
528 assert optimalBlockSize >= dataSize / 50.0
531 for blockID
in range(0, ddda.getMaximumBlockID() + 1):
532 if not ddda.hasBlockVariance(blockID):
533 criteria.append(
False)
536 currentSE = blocks[blockID].getStandardErrorOfTheMean()
537 quotient = currentSE / rawSE
538 criterion = 2 ** (blockID * 3) > 2 * dataSize * quotient ** 4
540 criteria.append(criterion)
542 assert len(criteria) == ddda.getMaximumBlockID() + 1
543 for blockID, criterion
in enumerate(criteria):
544 if blockID < optimalBlockID - 1:
547 if blockID == optimalBlockID - 1:
548 assert criterion ==
False
551 if blockID == ddda.getMaximumBlockID() - 1:
552 if not ddda.hasBlockVariance(ddda.getMaximumBlockID()):
553 if optimalBlockID == blockID:
556 if blockID == ddda.getMaximumBlockID():
557 if optimalBlockID == ddda.getMaximumBlockID():
560 if not ddda.hasBlockVariance(ddda.getMaximumBlockID()):
563 assert criterion ==
True
566 def test_getOptimalBlockIDForStandardErrorOfTheMean_zero_variance():
577 for dataSize
in dataSizes:
579 datum = random.random()
580 data = [datum
for i
in range(0, dataSize)]
582 ddda = OnTheFlyStatisticsDDDA()
586 assert ddda.getOptimalBlockIDForStandardErrorOfTheMean() == 0
589 def test_serializeToString():
601 for dataSize
in dataSizes:
603 data = [random.random()
for _
in range(0, dataSize)]
605 stat = OnTheFlyStatisticsDDDA()
606 assert isinstance(stat.serializeToString(), str)
609 assert isinstance(stat.serializeToString(), str)
612 def test_unserializeFromString():
613 stat = OnTheFlyStatisticsDDDA()
615 with pytest.raises(TypeError):
616 stat.unserializeFromString([
"foo"])
617 with pytest.raises(TypeError):
618 stat.unserializeFromString(stat)
620 with pytest.raises(ValueError):
621 stat.unserializeFromString(
"")
622 with pytest.raises(ValueError):
623 stat.unserializeFromString(
"foo")
624 with pytest.raises(ValueError):
625 stat.unserializeFromString(
"123|0")
626 with pytest.raises(ValueError):
627 stat.unserializeFromString(
"1;0;0;0")
628 with pytest.raises(ValueError):
629 stat.unserializeFromString(
"1|1|1;0;0;1|")
630 with pytest.raises(ValueError):
631 stat.unserializeFromString(
"1|-1")
634 stat.unserializeFromString(
"1|0")
635 assert stat.getSampleSize() == 0
637 stat = OnTheFlyStatisticsDDDA()
638 unserialized = OnTheFlyStatisticsDDDA()
639 unserialized.unserializeFromString(stat.serializeToString())
640 assert unserialized == stat
641 unserialized.unserializeFromString(stat.serializeToString())
642 assert unserialized == stat
645 def approximatelyEqual(lhs, rhs):
646 if lhs.getSampleSize() != rhs.getSampleSize():
649 if lhs.getSampleSize() == 0:
652 if lhs.getSampleMean() != pytest.approx(rhs.getSampleMean()):
655 if lhs.getMaximumBlockID() != rhs.getMaximumBlockID():
658 for i
in range(0, lhs.getMaximumBlockID() + 1):
659 if lhs.hasBlockVariance(i) != rhs.hasBlockVariance(i):
661 if not lhs.hasBlockVariance(i):
664 expected = pytest.approx(rhs.getBlockVariance(i))
665 if lhs.getBlockVariance(i) != expected:
672 for _
in range(0, 50):
673 for _
in range(0, random.randint(1, 5)):
674 stat.addDatum(random.random())
676 assert not approximatelyEqual(unserialized, stat)
677 unserialized.unserializeFromString(stat.serializeToString())
678 assert approximatelyEqual(unserialized, stat)
679 unserialized.unserializeFromString(stat.serializeToString())
680 assert approximatelyEqual(unserialized, stat)
683 statCopy = copy.deepcopy(stat)
684 unserializedCopy = copy.deepcopy(unserialized)
685 for _
in range(0, 50):
687 datum = random.random()
688 statCopy.addDatum(datum)
689 unserializedCopy.addDatum(datum)
690 assert approximatelyEqual(unserializedCopy, statCopy)
693 stat = OnTheFlyStatisticsDDDA()
701 block1 = OnTheFlyStatistics()
702 block2 = OnTheFlyStatistics()
703 block3 = OnTheFlyStatistics()
709 block2.addDatum((1 + 2) / 2.0)
710 block2.addDatum((3 + 4) / 2.0)
711 block3.addDatum(((1 + 2) / 2.0) + ((3 + 4) / 2.0))
714 myState +=
"|" + block1.serializeToString()
715 myState +=
"|" + block2.serializeToString()
716 myState +=
"|" + block3.serializeToString()
717 myState +=
"|" + str(5)
721 unserialized = OnTheFlyStatisticsDDDA()
722 unserialized.unserializeFromString(myState)
723 assert approximatelyEqual(unserialized, stat)
726 def test_getMPLAxes():
738 for dataSize
in dataSizes:
740 data = [random.random()
for _
in range(0, dataSize)]
742 ddda = OnTheFlyStatisticsDDDA()
746 with pytest.raises(TypeError):
748 with pytest.raises(TypeError):
751 import matplotlib.axes
752 assert isinstance(ddda.getMPLAxes(
False), matplotlib.axes.Axes)
753 assert isinstance(ddda.getMPLAxes(
True), matplotlib.axes.Axes)
756 def test___eq_____ne__():
769 ddda = OnTheFlyStatisticsDDDA()
770 with pytest.raises(TypeError):
772 with pytest.raises(TypeError):
774 with pytest.raises(TypeError):
776 ddda.__eq__(OnTheFlyStatistics())
778 with pytest.raises(TypeError):
780 with pytest.raises(TypeError):
782 with pytest.raises(TypeError):
784 ddda == OnTheFlyStatistics()
786 with pytest.raises(TypeError):
788 with pytest.raises(TypeError):
790 with pytest.raises(TypeError):
792 ddda.__ne__(OnTheFlyStatistics())
794 with pytest.raises(TypeError):
796 with pytest.raises(TypeError):
798 with pytest.raises(TypeError):
800 ddda != OnTheFlyStatistics()
803 for dataSize
in dataSizes:
806 original = OnTheFlyStatisticsDDDA()
807 same = OnTheFlyStatisticsDDDA()
808 missingBeginning = OnTheFlyStatisticsDDDA()
809 missingEnd = OnTheFlyStatisticsDDDA()
810 modifiedBeginning = OnTheFlyStatisticsDDDA()
811 modifiedEnd = OnTheFlyStatisticsDDDA()
812 for i
in range(0, dataSize):
813 datum = random.random()
815 original.addDatum(datum)
819 modifiedBeginning.addDatum(datum + 1)
821 missingBeginning.addDatum(datum)
822 modifiedBeginning.addDatum(datum)
824 if i == dataSize - 1:
825 modifiedEnd.addDatum(datum - 1)
827 missingEnd.addDatum(datum)
828 modifiedEnd.addDatum(datum)
831 copied = copy.deepcopy(original)
834 original, same, copied,
835 missingBeginning, missingEnd,
836 modifiedBeginning, modifiedEnd
839 for ddda
in instances:
841 assert ddda.__eq__(ddda)
842 assert not ddda != ddda
843 assert not ddda.__ne__(ddda)
845 for ddda1
in instances:
846 for ddda2
in instances:
847 shouldCompareEqual1 =
False
848 if ddda1
is original:
849 shouldCompareEqual1 =
True
851 shouldCompareEqual1 =
True
852 elif ddda1
is copied:
853 shouldCompareEqual1 =
True
855 shouldCompareEqual2 =
False
856 if ddda2
is original:
857 shouldCompareEqual2 =
True
859 shouldCompareEqual2 =
True
860 elif ddda2
is copied:
861 shouldCompareEqual2 =
True
863 shouldCompareEqual = shouldCompareEqual1
and shouldCompareEqual2
866 shouldCompareEqual =
True
868 tmp1 = ddda1
is missingBeginning
or ddda1
is missingEnd
869 tmp2 = ddda2
is missingBeginning
or ddda2
is missingEnd
871 shouldCompareEqual =
True
874 shouldCompareEqual =
True
876 if shouldCompareEqual:
877 assert ddda1 == ddda2
878 assert ddda1.__eq__(ddda2)
879 assert not ddda1 != ddda2
880 assert not ddda1.__ne__(ddda2)
882 assert not ddda1 == ddda2
883 assert not ddda1.__eq__(ddda2)
884 assert ddda1 != ddda2
885 assert ddda1.__ne__(ddda2)
889 ddda1 = OnTheFlyStatisticsDDDA()
890 ddda2 = OnTheFlyStatisticsDDDA()
898 assert ddda1 == ddda2
899 assert ddda1.__eq__(ddda2)
900 assert not ddda1 != ddda2
901 assert not ddda1.__ne__(ddda2)
917 for dataSize
in dataSizes:
919 data = [random.random()
for _
in range(0, dataSize)]
921 ddda = OnTheFlyStatisticsDDDA()
925 assert isinstance(ddda.__repr__(), str)