564 lines
14 KiB
Go
564 lines
14 KiB
Go
// Package hdrhistogram provides an implementation of Gil Tene's HDR Histogram
|
|
// data structure. The HDR Histogram allows for fast and accurate analysis of
|
|
// the extreme ranges of data with non-normal distributions, like latency.
|
|
package hdrhistogram
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
)
|
|
|
|
// A Bracket is a part of a cumulative distribution.
|
|
type Bracket struct {
|
|
Quantile float64
|
|
Count, ValueAt int64
|
|
}
|
|
|
|
// A Snapshot is an exported view of a Histogram, useful for serializing them.
|
|
// A Histogram can be constructed from it by passing it to Import.
|
|
type Snapshot struct {
|
|
LowestTrackableValue int64
|
|
HighestTrackableValue int64
|
|
SignificantFigures int64
|
|
Counts []int64
|
|
}
|
|
|
|
// A Histogram is a lossy data structure used to record the distribution of
|
|
// non-normally distributed data (like latency) with a high degree of accuracy
|
|
// and a bounded degree of precision.
|
|
type Histogram struct {
|
|
lowestTrackableValue int64
|
|
highestTrackableValue int64
|
|
unitMagnitude int64
|
|
significantFigures int64
|
|
subBucketHalfCountMagnitude int32
|
|
subBucketHalfCount int32
|
|
subBucketMask int64
|
|
subBucketCount int32
|
|
bucketCount int32
|
|
countsLen int32
|
|
totalCount int64
|
|
counts []int64
|
|
}
|
|
|
|
// New returns a new Histogram instance capable of tracking values in the given
|
|
// range and with the given amount of precision.
|
|
func New(minValue, maxValue int64, sigfigs int) *Histogram {
|
|
if sigfigs < 1 || 5 < sigfigs {
|
|
panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs))
|
|
}
|
|
|
|
largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs)
|
|
subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution))))
|
|
|
|
subBucketHalfCountMagnitude := subBucketCountMagnitude
|
|
if subBucketHalfCountMagnitude < 1 {
|
|
subBucketHalfCountMagnitude = 1
|
|
}
|
|
subBucketHalfCountMagnitude--
|
|
|
|
unitMagnitude := int32(math.Floor(math.Log2(float64(minValue))))
|
|
if unitMagnitude < 0 {
|
|
unitMagnitude = 0
|
|
}
|
|
|
|
subBucketCount := int32(math.Pow(2, float64(subBucketHalfCountMagnitude)+1))
|
|
|
|
subBucketHalfCount := subBucketCount / 2
|
|
subBucketMask := int64(subBucketCount-1) << uint(unitMagnitude)
|
|
|
|
// determine exponent range needed to support the trackable value with no
|
|
// overflow:
|
|
smallestUntrackableValue := int64(subBucketCount) << uint(unitMagnitude)
|
|
bucketsNeeded := int32(1)
|
|
for smallestUntrackableValue < maxValue {
|
|
smallestUntrackableValue <<= 1
|
|
bucketsNeeded++
|
|
}
|
|
|
|
bucketCount := bucketsNeeded
|
|
countsLen := (bucketCount + 1) * (subBucketCount / 2)
|
|
|
|
return &Histogram{
|
|
lowestTrackableValue: minValue,
|
|
highestTrackableValue: maxValue,
|
|
unitMagnitude: int64(unitMagnitude),
|
|
significantFigures: int64(sigfigs),
|
|
subBucketHalfCountMagnitude: subBucketHalfCountMagnitude,
|
|
subBucketHalfCount: subBucketHalfCount,
|
|
subBucketMask: subBucketMask,
|
|
subBucketCount: subBucketCount,
|
|
bucketCount: bucketCount,
|
|
countsLen: countsLen,
|
|
totalCount: 0,
|
|
counts: make([]int64, countsLen),
|
|
}
|
|
}
|
|
|
|
// ByteSize returns an estimate of the amount of memory allocated to the
|
|
// histogram in bytes.
|
|
//
|
|
// N.B.: This does not take into account the overhead for slices, which are
|
|
// small, constant, and specific to the compiler version.
|
|
func (h *Histogram) ByteSize() int {
|
|
return 6*8 + 5*4 + len(h.counts)*8
|
|
}
|
|
|
|
// Merge merges the data stored in the given histogram with the receiver,
|
|
// returning the number of recorded values which had to be dropped.
|
|
func (h *Histogram) Merge(from *Histogram) (dropped int64) {
|
|
i := from.rIterator()
|
|
for i.next() {
|
|
v := i.valueFromIdx
|
|
c := i.countAtIdx
|
|
|
|
if h.RecordValues(v, c) != nil {
|
|
dropped += c
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
// TotalCount returns total number of values recorded.
|
|
func (h *Histogram) TotalCount() int64 {
|
|
return h.totalCount
|
|
}
|
|
|
|
// Max returns the approximate maximum recorded value.
|
|
func (h *Histogram) Max() int64 {
|
|
var max int64
|
|
i := h.iterator()
|
|
for i.next() {
|
|
if i.countAtIdx != 0 {
|
|
max = i.highestEquivalentValue
|
|
}
|
|
}
|
|
return h.highestEquivalentValue(max)
|
|
}
|
|
|
|
// Min returns the approximate minimum recorded value.
|
|
func (h *Histogram) Min() int64 {
|
|
var min int64
|
|
i := h.iterator()
|
|
for i.next() {
|
|
if i.countAtIdx != 0 && min == 0 {
|
|
min = i.highestEquivalentValue
|
|
break
|
|
}
|
|
}
|
|
return h.lowestEquivalentValue(min)
|
|
}
|
|
|
|
// Mean returns the approximate arithmetic mean of the recorded values.
|
|
func (h *Histogram) Mean() float64 {
|
|
if h.totalCount == 0 {
|
|
return 0
|
|
}
|
|
var total int64
|
|
i := h.iterator()
|
|
for i.next() {
|
|
if i.countAtIdx != 0 {
|
|
total += i.countAtIdx * h.medianEquivalentValue(i.valueFromIdx)
|
|
}
|
|
}
|
|
return float64(total) / float64(h.totalCount)
|
|
}
|
|
|
|
// StdDev returns the approximate standard deviation of the recorded values.
|
|
func (h *Histogram) StdDev() float64 {
|
|
if h.totalCount == 0 {
|
|
return 0
|
|
}
|
|
|
|
mean := h.Mean()
|
|
geometricDevTotal := 0.0
|
|
|
|
i := h.iterator()
|
|
for i.next() {
|
|
if i.countAtIdx != 0 {
|
|
dev := float64(h.medianEquivalentValue(i.valueFromIdx)) - mean
|
|
geometricDevTotal += (dev * dev) * float64(i.countAtIdx)
|
|
}
|
|
}
|
|
|
|
return math.Sqrt(geometricDevTotal / float64(h.totalCount))
|
|
}
|
|
|
|
// Reset deletes all recorded values and restores the histogram to its original
|
|
// state.
|
|
func (h *Histogram) Reset() {
|
|
h.totalCount = 0
|
|
for i := range h.counts {
|
|
h.counts[i] = 0
|
|
}
|
|
}
|
|
|
|
// RecordValue records the given value, returning an error if the value is out
|
|
// of range.
|
|
func (h *Histogram) RecordValue(v int64) error {
|
|
return h.RecordValues(v, 1)
|
|
}
|
|
|
|
// RecordCorrectedValue records the given value, correcting for stalls in the
|
|
// recording process. This only works for processes which are recording values
|
|
// at an expected interval (e.g., doing jitter analysis). Processes which are
|
|
// recording ad-hoc values (e.g., latency for incoming requests) can't take
|
|
// advantage of this.
|
|
func (h *Histogram) RecordCorrectedValue(v, expectedInterval int64) error {
|
|
if err := h.RecordValue(v); err != nil {
|
|
return err
|
|
}
|
|
|
|
if expectedInterval <= 0 || v <= expectedInterval {
|
|
return nil
|
|
}
|
|
|
|
missingValue := v - expectedInterval
|
|
for missingValue >= expectedInterval {
|
|
if err := h.RecordValue(missingValue); err != nil {
|
|
return err
|
|
}
|
|
missingValue -= expectedInterval
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// RecordValues records n occurrences of the given value, returning an error if
|
|
// the value is out of range.
|
|
func (h *Histogram) RecordValues(v, n int64) error {
|
|
idx := h.countsIndexFor(v)
|
|
if idx < 0 || int(h.countsLen) <= idx {
|
|
return fmt.Errorf("value %d is too large to be recorded", v)
|
|
}
|
|
h.counts[idx] += n
|
|
h.totalCount += n
|
|
|
|
return nil
|
|
}
|
|
|
|
// ValueAtQuantile returns the recorded value at the given quantile (0..100).
|
|
func (h *Histogram) ValueAtQuantile(q float64) int64 {
|
|
if q > 100 {
|
|
q = 100
|
|
}
|
|
|
|
total := int64(0)
|
|
countAtPercentile := int64(((q / 100) * float64(h.totalCount)) + 0.5)
|
|
|
|
i := h.iterator()
|
|
for i.next() {
|
|
total += i.countAtIdx
|
|
if total >= countAtPercentile {
|
|
return h.highestEquivalentValue(i.valueFromIdx)
|
|
}
|
|
}
|
|
|
|
return 0
|
|
}
|
|
|
|
// CumulativeDistribution returns an ordered list of brackets of the
|
|
// distribution of recorded values.
|
|
func (h *Histogram) CumulativeDistribution() []Bracket {
|
|
var result []Bracket
|
|
|
|
i := h.pIterator(1)
|
|
for i.next() {
|
|
result = append(result, Bracket{
|
|
Quantile: i.percentile,
|
|
Count: i.countToIdx,
|
|
ValueAt: i.highestEquivalentValue,
|
|
})
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// SignificantFigures returns the significant figures used to create the
|
|
// histogram
|
|
func (h *Histogram) SignificantFigures() int64 {
|
|
return h.significantFigures
|
|
}
|
|
|
|
// LowestTrackableValue returns the lower bound on values that will be added
|
|
// to the histogram
|
|
func (h *Histogram) LowestTrackableValue() int64 {
|
|
return h.lowestTrackableValue
|
|
}
|
|
|
|
// HighestTrackableValue returns the upper bound on values that will be added
|
|
// to the histogram
|
|
func (h *Histogram) HighestTrackableValue() int64 {
|
|
return h.highestTrackableValue
|
|
}
|
|
|
|
// Histogram bar for plotting
|
|
type Bar struct {
|
|
From, To, Count int64
|
|
}
|
|
|
|
// Pretty print as csv for easy plotting
|
|
func (b Bar) String() string {
|
|
return fmt.Sprintf("%v, %v, %v\n", b.From, b.To, b.Count)
|
|
}
|
|
|
|
// Distribution returns an ordered list of bars of the
|
|
// distribution of recorded values, counts can be normalized to a probability
|
|
func (h *Histogram) Distribution() (result []Bar) {
|
|
i := h.iterator()
|
|
for i.next() {
|
|
result = append(result, Bar{
|
|
Count: i.countAtIdx,
|
|
From: h.lowestEquivalentValue(i.valueFromIdx),
|
|
To: i.highestEquivalentValue,
|
|
})
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// Equals returns true if the two Histograms are equivalent, false if not.
|
|
func (h *Histogram) Equals(other *Histogram) bool {
|
|
switch {
|
|
case
|
|
h.lowestTrackableValue != other.lowestTrackableValue,
|
|
h.highestTrackableValue != other.highestTrackableValue,
|
|
h.unitMagnitude != other.unitMagnitude,
|
|
h.significantFigures != other.significantFigures,
|
|
h.subBucketHalfCountMagnitude != other.subBucketHalfCountMagnitude,
|
|
h.subBucketHalfCount != other.subBucketHalfCount,
|
|
h.subBucketMask != other.subBucketMask,
|
|
h.subBucketCount != other.subBucketCount,
|
|
h.bucketCount != other.bucketCount,
|
|
h.countsLen != other.countsLen,
|
|
h.totalCount != other.totalCount:
|
|
return false
|
|
default:
|
|
for i, c := range h.counts {
|
|
if c != other.counts[i] {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Export returns a snapshot view of the Histogram. This can be later passed to
|
|
// Import to construct a new Histogram with the same state.
|
|
func (h *Histogram) Export() *Snapshot {
|
|
return &Snapshot{
|
|
LowestTrackableValue: h.lowestTrackableValue,
|
|
HighestTrackableValue: h.highestTrackableValue,
|
|
SignificantFigures: h.significantFigures,
|
|
Counts: append([]int64(nil), h.counts...), // copy
|
|
}
|
|
}
|
|
|
|
// Import returns a new Histogram populated from the Snapshot data (which the
|
|
// caller must stop accessing).
|
|
func Import(s *Snapshot) *Histogram {
|
|
h := New(s.LowestTrackableValue, s.HighestTrackableValue, int(s.SignificantFigures))
|
|
h.counts = s.Counts
|
|
totalCount := int64(0)
|
|
for i := int32(0); i < h.countsLen; i++ {
|
|
countAtIndex := h.counts[i]
|
|
if countAtIndex > 0 {
|
|
totalCount += countAtIndex
|
|
}
|
|
}
|
|
h.totalCount = totalCount
|
|
return h
|
|
}
|
|
|
|
func (h *Histogram) iterator() *iterator {
|
|
return &iterator{
|
|
h: h,
|
|
subBucketIdx: -1,
|
|
}
|
|
}
|
|
|
|
func (h *Histogram) rIterator() *rIterator {
|
|
return &rIterator{
|
|
iterator: iterator{
|
|
h: h,
|
|
subBucketIdx: -1,
|
|
},
|
|
}
|
|
}
|
|
|
|
func (h *Histogram) pIterator(ticksPerHalfDistance int32) *pIterator {
|
|
return &pIterator{
|
|
iterator: iterator{
|
|
h: h,
|
|
subBucketIdx: -1,
|
|
},
|
|
ticksPerHalfDistance: ticksPerHalfDistance,
|
|
}
|
|
}
|
|
|
|
func (h *Histogram) sizeOfEquivalentValueRange(v int64) int64 {
|
|
bucketIdx := h.getBucketIndex(v)
|
|
subBucketIdx := h.getSubBucketIdx(v, bucketIdx)
|
|
adjustedBucket := bucketIdx
|
|
if subBucketIdx >= h.subBucketCount {
|
|
adjustedBucket++
|
|
}
|
|
return int64(1) << uint(h.unitMagnitude+int64(adjustedBucket))
|
|
}
|
|
|
|
func (h *Histogram) valueFromIndex(bucketIdx, subBucketIdx int32) int64 {
|
|
return int64(subBucketIdx) << uint(int64(bucketIdx)+h.unitMagnitude)
|
|
}
|
|
|
|
func (h *Histogram) lowestEquivalentValue(v int64) int64 {
|
|
bucketIdx := h.getBucketIndex(v)
|
|
subBucketIdx := h.getSubBucketIdx(v, bucketIdx)
|
|
return h.valueFromIndex(bucketIdx, subBucketIdx)
|
|
}
|
|
|
|
func (h *Histogram) nextNonEquivalentValue(v int64) int64 {
|
|
return h.lowestEquivalentValue(v) + h.sizeOfEquivalentValueRange(v)
|
|
}
|
|
|
|
func (h *Histogram) highestEquivalentValue(v int64) int64 {
|
|
return h.nextNonEquivalentValue(v) - 1
|
|
}
|
|
|
|
func (h *Histogram) medianEquivalentValue(v int64) int64 {
|
|
return h.lowestEquivalentValue(v) + (h.sizeOfEquivalentValueRange(v) >> 1)
|
|
}
|
|
|
|
func (h *Histogram) getCountAtIndex(bucketIdx, subBucketIdx int32) int64 {
|
|
return h.counts[h.countsIndex(bucketIdx, subBucketIdx)]
|
|
}
|
|
|
|
func (h *Histogram) countsIndex(bucketIdx, subBucketIdx int32) int32 {
|
|
bucketBaseIdx := (bucketIdx + 1) << uint(h.subBucketHalfCountMagnitude)
|
|
offsetInBucket := subBucketIdx - h.subBucketHalfCount
|
|
return bucketBaseIdx + offsetInBucket
|
|
}
|
|
|
|
func (h *Histogram) getBucketIndex(v int64) int32 {
|
|
pow2Ceiling := bitLen(v | h.subBucketMask)
|
|
return int32(pow2Ceiling - int64(h.unitMagnitude) -
|
|
int64(h.subBucketHalfCountMagnitude+1))
|
|
}
|
|
|
|
func (h *Histogram) getSubBucketIdx(v int64, idx int32) int32 {
|
|
return int32(v >> uint(int64(idx)+int64(h.unitMagnitude)))
|
|
}
|
|
|
|
func (h *Histogram) countsIndexFor(v int64) int {
|
|
bucketIdx := h.getBucketIndex(v)
|
|
subBucketIdx := h.getSubBucketIdx(v, bucketIdx)
|
|
return int(h.countsIndex(bucketIdx, subBucketIdx))
|
|
}
|
|
|
|
type iterator struct {
|
|
h *Histogram
|
|
bucketIdx, subBucketIdx int32
|
|
countAtIdx, countToIdx, valueFromIdx int64
|
|
highestEquivalentValue int64
|
|
}
|
|
|
|
func (i *iterator) next() bool {
|
|
if i.countToIdx >= i.h.totalCount {
|
|
return false
|
|
}
|
|
|
|
// increment bucket
|
|
i.subBucketIdx++
|
|
if i.subBucketIdx >= i.h.subBucketCount {
|
|
i.subBucketIdx = i.h.subBucketHalfCount
|
|
i.bucketIdx++
|
|
}
|
|
|
|
if i.bucketIdx >= i.h.bucketCount {
|
|
return false
|
|
}
|
|
|
|
i.countAtIdx = i.h.getCountAtIndex(i.bucketIdx, i.subBucketIdx)
|
|
i.countToIdx += i.countAtIdx
|
|
i.valueFromIdx = i.h.valueFromIndex(i.bucketIdx, i.subBucketIdx)
|
|
i.highestEquivalentValue = i.h.highestEquivalentValue(i.valueFromIdx)
|
|
|
|
return true
|
|
}
|
|
|
|
type rIterator struct {
|
|
iterator
|
|
countAddedThisStep int64
|
|
}
|
|
|
|
func (r *rIterator) next() bool {
|
|
for r.iterator.next() {
|
|
if r.countAtIdx != 0 {
|
|
r.countAddedThisStep = r.countAtIdx
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
type pIterator struct {
|
|
iterator
|
|
seenLastValue bool
|
|
ticksPerHalfDistance int32
|
|
percentileToIteratorTo float64
|
|
percentile float64
|
|
}
|
|
|
|
func (p *pIterator) next() bool {
|
|
if !(p.countToIdx < p.h.totalCount) {
|
|
if p.seenLastValue {
|
|
return false
|
|
}
|
|
|
|
p.seenLastValue = true
|
|
p.percentile = 100
|
|
|
|
return true
|
|
}
|
|
|
|
if p.subBucketIdx == -1 && !p.iterator.next() {
|
|
return false
|
|
}
|
|
|
|
var done = false
|
|
for !done {
|
|
currentPercentile := (100.0 * float64(p.countToIdx)) / float64(p.h.totalCount)
|
|
if p.countAtIdx != 0 && p.percentileToIteratorTo <= currentPercentile {
|
|
p.percentile = p.percentileToIteratorTo
|
|
halfDistance := math.Trunc(math.Pow(2, math.Trunc(math.Log2(100.0/(100.0-p.percentileToIteratorTo)))+1))
|
|
percentileReportingTicks := float64(p.ticksPerHalfDistance) * halfDistance
|
|
p.percentileToIteratorTo += 100.0 / percentileReportingTicks
|
|
return true
|
|
}
|
|
done = !p.iterator.next()
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func bitLen(x int64) (n int64) {
|
|
for ; x >= 0x8000; x >>= 16 {
|
|
n += 16
|
|
}
|
|
if x >= 0x80 {
|
|
x >>= 8
|
|
n += 8
|
|
}
|
|
if x >= 0x8 {
|
|
x >>= 4
|
|
n += 4
|
|
}
|
|
if x >= 0x2 {
|
|
x >>= 2
|
|
n += 2
|
|
}
|
|
if x >= 0x1 {
|
|
n++
|
|
}
|
|
return
|
|
}
|