2016-11-13 06:59:56 +08:00
|
|
|
package main
|
|
|
|
|
2016-11-13 23:26:23 +08:00
|
|
|
import (
|
|
|
|
"math"
|
|
|
|
)
|
|
|
|
|
2016-11-13 06:59:56 +08:00
|
|
|
type Histogram struct {
|
|
|
|
Bins []Bin
|
|
|
|
Maxbins int
|
|
|
|
Total uint64
|
2016-11-13 23:26:23 +08:00
|
|
|
|
|
|
|
min, max uint64
|
2016-11-13 06:59:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
type Bin struct {
|
|
|
|
Value, Count uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewHistogram returns a new Histogram with a maximum of n bins.
|
|
|
|
//
|
|
|
|
// There is no "optimal" bin count, but somewhere between 20 and 80 bins
|
|
|
|
// should be sufficient.
|
|
|
|
func NewHistogram(n int) *Histogram {
|
|
|
|
return &Histogram{
|
2016-11-13 23:26:23 +08:00
|
|
|
Bins: make([]Bin, 0, n),
|
2016-11-13 06:59:56 +08:00
|
|
|
Maxbins: n,
|
2016-11-13 23:26:23 +08:00
|
|
|
|
|
|
|
min: math.MaxUint64,
|
2016-11-13 06:59:56 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Histogram) Add(n uint64) {
|
|
|
|
h.Total++
|
2016-11-13 23:26:23 +08:00
|
|
|
|
|
|
|
if n > h.max {
|
|
|
|
h.max = n
|
|
|
|
}
|
|
|
|
if n < h.min {
|
|
|
|
h.min = n
|
|
|
|
}
|
|
|
|
|
|
|
|
defer h.trim()
|
2016-11-13 06:59:56 +08:00
|
|
|
for i := range h.Bins {
|
|
|
|
if h.Bins[i].Value == n {
|
|
|
|
h.Bins[i].Count++
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if h.Bins[i].Value > n {
|
|
|
|
newbin := Bin{Value: n, Count: 1}
|
|
|
|
head := append(make([]Bin, 0), h.Bins[0:i]...)
|
|
|
|
|
|
|
|
head = append(head, newbin)
|
|
|
|
tail := h.Bins[i:]
|
|
|
|
h.Bins = append(head, tail...)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
h.Bins = append(h.Bins, Bin{Count: 1, Value: n})
|
|
|
|
}
|
|
|
|
|
2016-11-13 23:26:23 +08:00
|
|
|
func (h *Histogram) Quantile(q float64) int64 {
|
|
|
|
count := q * float64(h.Total)
|
2016-11-13 06:59:56 +08:00
|
|
|
for i := range h.Bins {
|
2016-11-13 23:26:23 +08:00
|
|
|
count -= float64(h.Bins[i].Count)
|
2016-11-13 06:59:56 +08:00
|
|
|
|
|
|
|
if count <= 0 {
|
|
|
|
return int64(h.Bins[i].Value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-13 23:26:23 +08:00
|
|
|
return 0
|
2016-11-13 06:59:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// CDF returns the value of the cumulative distribution function
|
|
|
|
// at x
|
|
|
|
func (h *Histogram) CDF(x uint64) uint64 {
|
|
|
|
var count uint64
|
|
|
|
for i := range h.Bins {
|
|
|
|
if h.Bins[i].Value <= x {
|
|
|
|
count += h.Bins[i].Count
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return count / h.Total
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mean returns the sample mean of the distribution
|
|
|
|
func (h *Histogram) Mean() float64 {
|
|
|
|
if h.Total == 0 {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
sum := 0.0
|
|
|
|
|
|
|
|
for i := range h.Bins {
|
|
|
|
sum += float64(h.Bins[i].Value * h.Bins[i].Count)
|
|
|
|
}
|
|
|
|
|
|
|
|
return sum / float64(h.Total)
|
|
|
|
}
|
|
|
|
|
2016-11-13 23:26:23 +08:00
|
|
|
// Min returns the minimal recorder value.
|
|
|
|
func (h *Histogram) Min() uint64 {
|
|
|
|
if h.Total == 0 {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
return h.min
|
|
|
|
}
|
|
|
|
|
|
|
|
// Max returns the maximum recorder value.
|
|
|
|
func (h *Histogram) Max() uint64 {
|
|
|
|
if h.Total == 0 {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
return h.max
|
|
|
|
}
|
|
|
|
|
2016-11-13 06:59:56 +08:00
|
|
|
// Variance returns the variance of the distribution
|
|
|
|
func (h *Histogram) Variance() float64 {
|
|
|
|
if h.Total == 0 {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
sum := 0.0
|
|
|
|
mean := h.Mean()
|
|
|
|
|
|
|
|
for i := range h.Bins {
|
|
|
|
sum += float64(h.Bins[i].Count * (h.Bins[i].Value - uint64(mean)) * (h.Bins[i].Value - uint64(mean)))
|
|
|
|
}
|
|
|
|
|
|
|
|
return sum / float64(h.Total)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Histogram) Count() uint64 {
|
|
|
|
return h.Total
|
|
|
|
}
|
|
|
|
|
|
|
|
// trim merges adjacent bins to decrease the bin count to the maximum value
|
|
|
|
func (h *Histogram) trim() {
|
|
|
|
for len(h.Bins) > h.Maxbins {
|
|
|
|
// Find closest bins in terms of value
|
|
|
|
minDelta := uint64(1e10)
|
|
|
|
minDeltaIndex := 0
|
|
|
|
for i := range h.Bins {
|
|
|
|
if i == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if delta := h.Bins[i].Value - h.Bins[i-1].Value; delta < minDelta {
|
|
|
|
minDelta = delta
|
|
|
|
minDeltaIndex = i
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We need to merge bins minDeltaIndex-1 and minDeltaIndex
|
|
|
|
totalCount := h.Bins[minDeltaIndex-1].Count + h.Bins[minDeltaIndex].Count
|
|
|
|
mergedbin := Bin{
|
|
|
|
Value: (h.Bins[minDeltaIndex-1].Value*
|
|
|
|
h.Bins[minDeltaIndex-1].Count +
|
|
|
|
h.Bins[minDeltaIndex].Value*
|
|
|
|
h.Bins[minDeltaIndex].Count) /
|
|
|
|
totalCount, // weighted average
|
|
|
|
Count: totalCount, // summed heights
|
|
|
|
}
|
|
|
|
head := append(make([]Bin, 0), h.Bins[0:minDeltaIndex-1]...)
|
|
|
|
tail := append([]Bin{mergedbin}, h.Bins[minDeltaIndex+1:]...)
|
|
|
|
h.Bins = append(head, tail...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Histogram) BarchartData() ([]uint64, []int) {
|
|
|
|
values := make([]uint64, len(h.Bins))
|
|
|
|
counts := make([]int, len(h.Bins))
|
|
|
|
for i := 0; i < len(h.Bins); i++ {
|
|
|
|
values[i] = h.Bins[i].Value
|
|
|
|
counts[i] = int(h.Bins[i].Count)
|
|
|
|
}
|
|
|
|
return values, counts
|
|
|
|
}
|