82 lines
2.5 KiB
Go
82 lines
2.5 KiB
Go
package stats
|
|
|
|
import "fmt"
|
|
|
|
// Holds information about the dataset provided to Describe
|
|
type Description struct {
|
|
Count int
|
|
Mean float64
|
|
Std float64
|
|
Max float64
|
|
Min float64
|
|
DescriptionPercentiles []descriptionPercentile
|
|
AllowedNaN bool
|
|
}
|
|
|
|
// Specifies percentiles to be computed
|
|
type descriptionPercentile struct {
|
|
Percentile float64
|
|
Value float64
|
|
}
|
|
|
|
// Describe generates descriptive statistics about a provided dataset, similar to python's pandas.describe()
|
|
func Describe(input Float64Data, allowNaN bool, percentiles *[]float64) (*Description, error) {
|
|
return DescribePercentileFunc(input, allowNaN, percentiles, Percentile)
|
|
}
|
|
|
|
// Describe generates descriptive statistics about a provided dataset, similar to python's pandas.describe()
|
|
// Takes in a function to use for percentile calculation
|
|
func DescribePercentileFunc(input Float64Data, allowNaN bool, percentiles *[]float64, percentileFunc func(Float64Data, float64) (float64, error)) (*Description, error) {
|
|
var description Description
|
|
description.AllowedNaN = allowNaN
|
|
description.Count = input.Len()
|
|
|
|
if description.Count == 0 && !allowNaN {
|
|
return &description, ErrEmptyInput
|
|
}
|
|
|
|
// Disregard error, since it cannot be thrown if Count is > 0 and allowNaN is false, else NaN is accepted
|
|
description.Std, _ = StandardDeviation(input)
|
|
description.Max, _ = Max(input)
|
|
description.Min, _ = Min(input)
|
|
description.Mean, _ = Mean(input)
|
|
|
|
if percentiles != nil {
|
|
for _, percentile := range *percentiles {
|
|
if value, err := percentileFunc(input, percentile); err == nil || allowNaN {
|
|
description.DescriptionPercentiles = append(description.DescriptionPercentiles, descriptionPercentile{Percentile: percentile, Value: value})
|
|
}
|
|
}
|
|
}
|
|
|
|
return &description, nil
|
|
}
|
|
|
|
/*
|
|
Represents the Description instance in a string format with specified number of decimals
|
|
|
|
count 3
|
|
mean 2.00
|
|
std 0.82
|
|
max 3.00
|
|
min 1.00
|
|
25.00% NaN
|
|
50.00% 1.50
|
|
75.00% 2.50
|
|
NaN OK true
|
|
*/
|
|
func (d *Description) String(decimals int) string {
|
|
var str string
|
|
|
|
str += fmt.Sprintf("count\t%d\n", d.Count)
|
|
str += fmt.Sprintf("mean\t%.*f\n", decimals, d.Mean)
|
|
str += fmt.Sprintf("std\t%.*f\n", decimals, d.Std)
|
|
str += fmt.Sprintf("max\t%.*f\n", decimals, d.Max)
|
|
str += fmt.Sprintf("min\t%.*f\n", decimals, d.Min)
|
|
for _, percentile := range d.DescriptionPercentiles {
|
|
str += fmt.Sprintf("%.2f%%\t%.*f\n", percentile.Percentile, decimals, percentile.Value)
|
|
}
|
|
str += fmt.Sprintf("NaN OK\t%t", d.AllowedNaN)
|
|
return str
|
|
}
|