CBICA Toolkit  1.0.0
cbicaStatistics.h
1 #pragma once
2 
3 #include <cmath>
4 #include <algorithm>
5 
6 namespace cbica
7 {
8  /**
9  \brief Stand-alone helper class to generate statistics in a efficient manner
10 
11  Usage:
12  std::vector< int > myArray;
13  cbica::Statistics< int > calculator(myArray); // OR use 'calculator.SetInput(myArray)' after doing 'cbica::Statistics< int > calculator;'
14  std::cout << "Sum = " << calculator.GetSum() << "\n;
15  std::cout << "Mean = " << calculator.GetMean() << "\n;
16  std::cout << "Variance = " << calculator.GetVariance() << "\n;
17  std::cout << "StandardDev = " << calculator.GetStandardDeviation() << "\n;
18  std::cout << "Kurtosis = " << calculator.GetKurtosis() << "\n;
19  std::cout << "Skewness = " << calculator.GetSkewness() << "\n;
20 
21  */
22  template< class TDataType = float >
23  class Statistics
24  {
25  public:
26  //! Default Constructor
27  Statistics() {};
28 
29  //! Constructor with input
30  Statistics(std::vector< TDataType >& inputArray)
31  {
32  InitializeClass(inputArray);
33  }
34 
35  //! Set new input
36  void SetInput(std::vector< TDataType >& inputArray)
37  {
38  InitializeClass(inputArray);
39  }
40 
41  //! Default Destructor
43  {
44  m_input.clear(); // not really needed
45  }
46 
47  //! Does exactly what it says
48  double GetMaximum()
49  {
50  return m_max;
51  }
52 
53  //! Does exactly what it says
54  double GetMinimum()
55  {
56  return m_min;
57  }
58 
59  //! Does exactly what it says
60  double GetSum()
61  {
62  return m_sum;
63  }
64 
65  //! Does exactly what it says
66  double GetMean()
67  {
68  return m_mean;
69  }
70 
71  //! Does exactly what it says
72  double GetVariance()
73  {
74  return m_variance;
75  }
76 
77  TDataType GetMode()
78  {
79  if (!mode_calculated)
80  {
81  m_mode = m_input_Sorted[0];
82  auto number = m_input_Sorted[0];
83  int count = 1;
84  int countMode = 1;
85 
86  for (size_t i = 1; i < m_input_Sorted.size(); i++)
87  {
88  if (m_input_Sorted[i] == number)
89  { // count occurrences of the current number
90  ++count;
91  }
92  else
93  { // now this is a different number
94  if (count > countMode)
95  {
96  countMode = count; // mode is the biggest ocurrences
97  m_mode = number;
98  }
99  count = 1; // reset count for the new number
100  number = m_input_Sorted[i];
101  }
102  }
103  mode_calculated = true;
104  }
105 
106  return m_mode;
107  }
108 
109  TDataType GetMedian()
110  {
111  if (!median_calculated)
112  {
113  if (!m_input_Sorted.empty())
114  {
115  auto size = m_input_Sorted.size();
116  if (size % 2 == 0)
117  {
118  m_median = (m_input_Sorted[size / 2 - 1] + m_input_Sorted[size / 2]) / 2;
119  }
120  else
121  {
122  m_median = m_input_Sorted[size / 2];
123  }
124  }
125  else
126  {
127  std::cerr << "Array cannot be empty.\n";
128  return std::numeric_limits< TDataType >::min();
129  }
130  median_calculated = true;
131  }
132 
133  return m_median;
134  }
135 
136  //! Does exactly what it says
138  {
139  CalculateSTD();
140  return m_stdDev;
141  }
142 
143  //! Does exactly what it says
144  double GetKurtosis()
145  {
146  if (!kurtosis_calculated)
147  {
148  CalculateSTD();
149 
150  m_kurtosis = weirdStatistics(4);
151  kurtosis_calculated = true;
152  }
153 
154  return m_kurtosis;
155  }
156 
157  //! Does exactly what it says
158  double GetSkewness()
159  {
160  if (!skewness_calculated)
161  {
162  CalculateSTD();
163 
164  m_skewness = weirdStatistics(3);
165  skewness_calculated = true;
166  }
167 
168  return m_skewness;
169  }
170 
171  //! Gets the element at the Nth percentile (always defined between 1-99)
172  TDataType GetNthPercentileElement(size_t n)
173  {
174  if (n < 1) // contingen
175  {
176  std::cerr << "Cannot calculate percentile less than 1. Giving Minimum, instead.\n";
177  return GetMinimum();
178  }
179  auto nth = m_input_Sorted.begin() + (n * m_input_Sorted.size()) / 100;
180  std::nth_element(m_input_Sorted.begin(), nth, m_input_Sorted.end());
181  return *nth;
182  }
183 
184  //! Get the Range
185  TDataType GetRange()
186  {
187  return (m_max - m_min);
188  }
189 
190  //! Get the InterQuartile Range
192  {
194  }
195 
196  //! Get the Studentized Range
198  {
199  return (static_cast<double>(m_max - m_min) / m_variance);
200  }
201 
202  //! Get the Mean Absolute Deviation
204  {
205  double mad = 0;
206  for (size_t i = 0; i < m_input.size(); i++)
207  {
208  mad += (m_input[i] - m_mean);
209  }
210  return (mad / m_inputSize_double);
211  }
212 
213  //! Get the Robust Mean Absolute Deviation
214  double GetRobustMeanAbsoluteDeviation(size_t lowerQuantile, size_t upperQuantile)
215  {
216  std::vector< TDataType > truncatedVector;
217  auto lower = GetNthPercentileElement(lowerQuantile);
218  auto upper = GetNthPercentileElement(upperQuantile);
219  for (size_t i = 0; i < m_input_Sorted.size(); i++)
220  {
221  if ((m_input_Sorted[i] >= lower) && (m_input_Sorted[i] <= upper))
222  {
223  truncatedVector.push_back(m_input_Sorted[i]);
224  }
225  }
226 
227  double truncated_sum = std::accumulate(truncatedVector.begin(), truncatedVector.end(), 0.0);
228  double truncated_mean = truncated_sum / static_cast<double>(truncatedVector.size());
229 
230  double rmad = 0;
231  for (size_t i = 0; i < truncatedVector.size(); i++)
232  {
233  rmad += std::abs(truncatedVector[i] - truncated_mean);
234  }
235  return (rmad / static_cast<double>(truncatedVector.size()));
236  }
237 
238  //! Get Median Absolute Deviation
240  {
241  double mad = 0;
242  for (size_t i = 0; i < m_input.size(); i++)
243  {
244  mad += (m_input[i] - GetMedian());
245  }
246  return (mad / m_inputSize_double);
247  }
248 
249  //! Get Coefficient of Variation
251  {
252  if (!stdDev_calculated)
253  {
254  CalculateSTD();
255  }
256  return (m_stdDev / m_mean);
257  }
258 
259  //! Get Quartile Coefficient Of Dispersion
261  {
262  auto seventyFifth = GetNthPercentileElement(75);
263  auto twentyFifth = GetNthPercentileElement(25);
264 
265  return (static_cast<double>(seventyFifth - twentyFifth) / static_cast<double>(seventyFifth + twentyFifth));
266  }
267 
268  //! Get the Energy
269  double GetEnergy()
270  {
271  return m_energy;
272  }
273 
274  //! Get the Root Mean Square (also called Quadratic Mean)
276  {
277  return (std::sqrt(m_energy / m_inputSize_double));
278  }
279 
280  //! Does exactly what it says
281  std::vector< double > GetZScores()
282  {
283  if (!zscores_calculated)
284  {
285  if (!stdDev_calculated)
286  {
287  CalculateSTD();
288  }
289  m_zscores.clear();
290  for (const TDataType& element : m_input)
291  {
292  m_zscores.push_back((element - m_mean) / m_stdDev);
293  }
294  zscores_calculated = true;
295  }
296 
297  return m_zscores;
298  }
299 
300  private:
301  std::vector< TDataType > m_input;
302  std::vector< TDataType > m_input_Sorted;
303 
304  //! actual outputs
305  double m_inputSize_double = 0, m_sum = 0.0, m_mean = 0.0, m_variance = 0.0, m_stdDev = 0.0, m_kurtosis = 0.0, m_skewness = 0.0,
306  m_max = 0.0, m_min = 0.0, m_energy = 0.0;
307 
308  TDataType m_mode, m_median;
309 
310  std::vector< double > m_zscores;
311 
312  //! flags to check if something has been calculated or not
313  bool stdDev_calculated = false, kurtosis_calculated = false, skewness_calculated = false, zscores_calculated = false,
314  mode_calculated = false, median_calculated = false;
315 
316  //! This function gets called every time the input is set, for obvious reasons
317  void InitializeClass(std::vector< TDataType >& inputArray)
318  {
319  m_input = inputArray;
320  m_input_Sorted = inputArray;
321  std::sort(m_input_Sorted.begin(), m_input_Sorted.end());
322  m_inputSize_double = static_cast<double>(m_input.size());
323 
324  stdDev_calculated = false;
325  kurtosis_calculated = false;
326  skewness_calculated = false;
327  zscores_calculated = false;
328  mode_calculated = false;
329  median_calculated = false;
330  m_sum = std::accumulate(m_input.begin(), m_input.end(), 0.0);
331  m_mean = m_sum / m_inputSize_double;
332  auto result = std::minmax_element(m_input.begin(), m_input.end());
333  m_min = *result.first;
334  m_max = *result.second;
335  for (const TDataType& element : m_input)
336  {
337  m_variance += std::pow(static_cast<double>(element - m_mean), 2);
338  m_energy += std::pow(element, 2);
339  }
340 
341  m_variance = m_variance / (m_inputSize_double - 1);
342  }
343 
344  //! Helper function for calculating kurtosis and skewness
345  double weirdStatistics(size_t power)
346  {
347  if (!stdDev_calculated)
348  {
349  CalculateSTD();
350  }
351  double returnStat = 0.0;
352  for (size_t x = 0; x < m_input.size(); x++)
353  {
354  returnStat += std::pow(((m_input[x] - m_mean) / m_stdDev), power);
355  }
356  return (returnStat / m_inputSize_double);
357  }
358 
359  //! Single place to calculate STD Dev to reduce number of moving parts
360  void CalculateSTD()
361  {
362  if (!stdDev_calculated)
363  {
364  m_stdDev = std::sqrt(m_variance);
365  stdDev_calculated = true;
366  }
367  }
368  };
369 
370 }
double GetCoefficientOfVariation()
Get Coefficient of Variation.
Definition: cbicaStatistics.h:250
double GetRootMeanSquare()
Get the Root Mean Square (also called Quadratic Mean)
Definition: cbicaStatistics.h:275
std::vector< double > GetZScores()
Does exactly what it says.
Definition: cbicaStatistics.h:281
double GetRobustMeanAbsoluteDeviation(size_t lowerQuantile, size_t upperQuantile)
Get the Robust Mean Absolute Deviation.
Definition: cbicaStatistics.h:214
Statistics()
Default Constructor.
Definition: cbicaStatistics.h:27
TDataType GetInterQuartileRange()
Get the InterQuartile Range.
Definition: cbicaStatistics.h:191
double GetMedianAbsoluteDeviation()
Get Median Absolute Deviation.
Definition: cbicaStatistics.h:239
double GetMeanAbsoluteDeviation()
Get the Mean Absolute Deviation.
Definition: cbicaStatistics.h:203
double GetStandardDeviation()
Does exactly what it says.
Definition: cbicaStatistics.h:137
TDataType GetNthPercentileElement(size_t n)
Gets the element at the Nth percentile (always defined between 1-99)
Definition: cbicaStatistics.h:172
double GetSkewness()
Does exactly what it says.
Definition: cbicaStatistics.h:158
double GetMean()
Does exactly what it says.
Definition: cbicaStatistics.h:66
double GetMinimum()
Does exactly what it says.
Definition: cbicaStatistics.h:54
double GetMaximum()
Does exactly what it says.
Definition: cbicaStatistics.h:48
double GetVariance()
Does exactly what it says.
Definition: cbicaStatistics.h:72
Stand-alone helper class to generate statistics in a efficient manner.
Definition: cbicaStatistics.h:23
double GetKurtosis()
Does exactly what it says.
Definition: cbicaStatistics.h:144
void SetInput(std::vector< TDataType > &inputArray)
Set new input.
Definition: cbicaStatistics.h:36
TDataType GetRange()
Get the Range.
Definition: cbicaStatistics.h:185
double GetStudentizedRange()
Get the Studentized Range.
Definition: cbicaStatistics.h:197
~Statistics()
Default Destructor.
Definition: cbicaStatistics.h:42
double GetEnergy()
Get the Energy.
Definition: cbicaStatistics.h:269
double GetSum()
Does exactly what it says.
Definition: cbicaStatistics.h:60
Statistics(std::vector< TDataType > &inputArray)
Constructor with input.
Definition: cbicaStatistics.h:30
double GetQuartileCoefficientOfDispersion()
Get Quartile Coefficient Of Dispersion.
Definition: cbicaStatistics.h:260