traefik/vendor/github.com/VividCortex/gohistogram/weightedhistogram.go
2017-11-08 15:14:03 +01:00

190 lines
4.3 KiB
Go

// Package gohistogram contains implementations of weighted and exponential histograms.
package gohistogram
// Copyright (c) 2013 VividCortex, Inc. All rights reserved.
// Please see the LICENSE file for applicable license terms.
import "fmt"
// A WeightedHistogram implements Histogram. A WeightedHistogram has bins that have values
// which are exponentially weighted moving averages. This allows you keep inserting large
// amounts of data into the histogram and approximate quantiles with recency factored in.
type WeightedHistogram struct {
bins []bin
maxbins int
total float64
alpha float64
}
// NewWeightedHistogram returns a new WeightedHistogram with a maximum of n bins with a decay factor
// of alpha.
//
// There is no "optimal" bin count, but somewhere between 20 and 80 bins should be
// sufficient.
//
// Alpha should be set to 2 / (N+1), where N represents the average age of the moving window.
// For example, a 60-second window with an average age of 30 seconds would yield an
// alpha of 0.064516129.
func NewWeightedHistogram(n int, alpha float64) *WeightedHistogram {
return &WeightedHistogram{
bins: make([]bin, 0),
maxbins: n,
total: 0,
alpha: alpha,
}
}
func ewma(existingVal float64, newVal float64, alpha float64) (result float64) {
result = newVal*(1-alpha) + existingVal*alpha
return
}
func (h *WeightedHistogram) scaleDown(except int) {
for i := range h.bins {
if i != except {
h.bins[i].count = ewma(h.bins[i].count, 0, h.alpha)
}
}
}
func (h *WeightedHistogram) Add(n float64) {
defer h.trim()
for i := range h.bins {
if h.bins[i].value == n {
h.bins[i].count++
defer h.scaleDown(i)
return
}
if h.bins[i].value > n {
newbin := bin{value: n, count: 1}
head := append(make([]bin, 0), h.bins[0:i]...)
head = append(head, newbin)
tail := h.bins[i:]
h.bins = append(head, tail...)
defer h.scaleDown(i)
return
}
}
h.bins = append(h.bins, bin{count: 1, value: n})
}
func (h *WeightedHistogram) Quantile(q float64) float64 {
count := q * h.total
for i := range h.bins {
count -= float64(h.bins[i].count)
if count <= 0 {
return h.bins[i].value
}
}
return -1
}
// CDF returns the value of the cumulative distribution function
// at x
func (h *WeightedHistogram) CDF(x float64) float64 {
count := 0.0
for i := range h.bins {
if h.bins[i].value <= x {
count += float64(h.bins[i].count)
}
}
return count / h.total
}
// Mean returns the sample mean of the distribution
func (h *WeightedHistogram) Mean() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
for i := range h.bins {
sum += h.bins[i].value * h.bins[i].count
}
return sum / h.total
}
// Variance returns the variance of the distribution
func (h *WeightedHistogram) Variance() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
mean := h.Mean()
for i := range h.bins {
sum += (h.bins[i].count * (h.bins[i].value - mean) * (h.bins[i].value - mean))
}
return sum / h.total
}
func (h *WeightedHistogram) Count() float64 {
return h.total
}
func (h *WeightedHistogram) trim() {
total := 0.0
for i := range h.bins {
total += h.bins[i].count
}
h.total = total
for len(h.bins) > h.maxbins {
// Find closest bins in terms of value
minDelta := 1e99
minDeltaIndex := 0
for i := range h.bins {
if i == 0 {
continue
}
if delta := h.bins[i].value - h.bins[i-1].value; delta < minDelta {
minDelta = delta
minDeltaIndex = i
}
}
// We need to merge bins minDeltaIndex-1 and minDeltaIndex
totalCount := h.bins[minDeltaIndex-1].count + h.bins[minDeltaIndex].count
mergedbin := bin{
value: (h.bins[minDeltaIndex-1].value*
h.bins[minDeltaIndex-1].count +
h.bins[minDeltaIndex].value*
h.bins[minDeltaIndex].count) /
totalCount, // weighted average
count: totalCount, // summed heights
}
head := append(make([]bin, 0), h.bins[0:minDeltaIndex-1]...)
tail := append([]bin{mergedbin}, h.bins[minDeltaIndex+1:]...)
h.bins = append(head, tail...)
}
}
// String returns a string reprentation of the histogram,
// which is useful for printing to a terminal.
func (h *WeightedHistogram) String() (str string) {
str += fmt.Sprintln("Total:", h.total)
for i := range h.bins {
var bar string
for j := 0; j < int(float64(h.bins[i].count)/float64(h.total)*200); j++ {
bar += "."
}
str += fmt.Sprintln(h.bins[i].value, "\t", bar)
}
return
}