fix statsd scale for duration based metrics

This commit is contained in:
Traefiker Bot 2020-03-05 15:10:07 +01:00 committed by GitHub
parent dccc075f2c
commit b5d205b78c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 161 additions and 25 deletions

View file

@ -50,14 +50,14 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
if config.AddEntryPointsLabels {
registry.epEnabled = config.AddEntryPointsLabels
registry.entryPointReqsCounter = datadogClient.NewCounter(ddEntryPointReqsName, 1.0)
registry.entryPointReqDurationHistogram = datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0)
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0), time.Second)
registry.entryPointOpenConnsGauge = datadogClient.NewGauge(ddEntryPointOpenConnsName)
}
if config.AddServicesLabels {
registry.svcEnabled = config.AddServicesLabels
registry.serviceReqsCounter = datadogClient.NewCounter(ddMetricsServiceReqsName, 1.0)
registry.serviceReqDurationHistogram = datadogClient.NewHistogram(ddMetricsServiceLatencyName, 1.0)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddMetricsServiceLatencyName, 1.0), time.Second)
registry.serviceRetriesCounter = datadogClient.NewCounter(ddRetriesTotalName, 1.0)
registry.serviceOpenConnsGauge = datadogClient.NewGauge(ddOpenConnsName)
registry.serviceServerUpGauge = datadogClient.NewGauge(ddServerUpName)

View file

@ -64,14 +64,14 @@ func RegisterInfluxDB(ctx context.Context, config *types.InfluxDB) Registry {
if config.AddEntryPointsLabels {
registry.epEnabled = config.AddEntryPointsLabels
registry.entryPointReqsCounter = influxDBClient.NewCounter(influxDBEntryPointReqsName)
registry.entryPointReqDurationHistogram = influxDBClient.NewHistogram(influxDBEntryPointReqDurationName)
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(influxDBClient.NewHistogram(influxDBEntryPointReqDurationName), time.Second)
registry.entryPointOpenConnsGauge = influxDBClient.NewGauge(influxDBEntryPointOpenConnsName)
}
if config.AddServicesLabels {
registry.svcEnabled = config.AddServicesLabels
registry.serviceReqsCounter = influxDBClient.NewCounter(influxDBMetricsServiceReqsName)
registry.serviceReqDurationHistogram = influxDBClient.NewHistogram(influxDBMetricsServiceLatencyName)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(influxDBClient.NewHistogram(influxDBMetricsServiceLatencyName), time.Second)
registry.serviceRetriesCounter = influxDBClient.NewCounter(influxDBRetriesTotalName)
registry.serviceOpenConnsGauge = influxDBClient.NewGauge(influxDBOpenConnsName)
registry.serviceServerUpGauge = influxDBClient.NewGauge(influxDBServerUpName)

View file

@ -1,6 +1,9 @@
package metrics
import (
"errors"
"time"
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/multi"
)
@ -20,12 +23,12 @@ type Registry interface {
// entry point metrics
EntryPointReqsCounter() metrics.Counter
EntryPointReqDurationHistogram() metrics.Histogram
EntryPointReqDurationHistogram() ScalableHistogram
EntryPointOpenConnsGauge() metrics.Gauge
// service metrics
ServiceReqsCounter() metrics.Counter
ServiceReqDurationHistogram() metrics.Histogram
ServiceReqDurationHistogram() ScalableHistogram
ServiceOpenConnsGauge() metrics.Gauge
ServiceRetriesCounter() metrics.Counter
ServiceServerUpGauge() metrics.Gauge
@ -46,10 +49,10 @@ func NewMultiRegistry(registries []Registry) Registry {
var lastConfigReloadSuccessGauge []metrics.Gauge
var lastConfigReloadFailureGauge []metrics.Gauge
var entryPointReqsCounter []metrics.Counter
var entryPointReqDurationHistogram []metrics.Histogram
var entryPointReqDurationHistogram []ScalableHistogram
var entryPointOpenConnsGauge []metrics.Gauge
var serviceReqsCounter []metrics.Counter
var serviceReqDurationHistogram []metrics.Histogram
var serviceReqDurationHistogram []ScalableHistogram
var serviceOpenConnsGauge []metrics.Gauge
var serviceRetriesCounter []metrics.Counter
var serviceServerUpGauge []metrics.Gauge
@ -101,10 +104,10 @@ func NewMultiRegistry(registries []Registry) Registry {
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...),
entryPointReqsCounter: multi.NewCounter(entryPointReqsCounter...),
entryPointReqDurationHistogram: multi.NewHistogram(entryPointReqDurationHistogram...),
entryPointReqDurationHistogram: NewMultiHistogram(entryPointReqDurationHistogram...),
entryPointOpenConnsGauge: multi.NewGauge(entryPointOpenConnsGauge...),
serviceReqsCounter: multi.NewCounter(serviceReqsCounter...),
serviceReqDurationHistogram: multi.NewHistogram(serviceReqDurationHistogram...),
serviceReqDurationHistogram: NewMultiHistogram(serviceReqDurationHistogram...),
serviceOpenConnsGauge: multi.NewGauge(serviceOpenConnsGauge...),
serviceRetriesCounter: multi.NewCounter(serviceRetriesCounter...),
serviceServerUpGauge: multi.NewGauge(serviceServerUpGauge...),
@ -119,10 +122,10 @@ type standardRegistry struct {
lastConfigReloadSuccessGauge metrics.Gauge
lastConfigReloadFailureGauge metrics.Gauge
entryPointReqsCounter metrics.Counter
entryPointReqDurationHistogram metrics.Histogram
entryPointReqDurationHistogram ScalableHistogram
entryPointOpenConnsGauge metrics.Gauge
serviceReqsCounter metrics.Counter
serviceReqDurationHistogram metrics.Histogram
serviceReqDurationHistogram ScalableHistogram
serviceOpenConnsGauge metrics.Gauge
serviceRetriesCounter metrics.Counter
serviceServerUpGauge metrics.Gauge
@ -156,7 +159,7 @@ func (r *standardRegistry) EntryPointReqsCounter() metrics.Counter {
return r.entryPointReqsCounter
}
func (r *standardRegistry) EntryPointReqDurationHistogram() metrics.Histogram {
func (r *standardRegistry) EntryPointReqDurationHistogram() ScalableHistogram {
return r.entryPointReqDurationHistogram
}
@ -168,7 +171,7 @@ func (r *standardRegistry) ServiceReqsCounter() metrics.Counter {
return r.serviceReqsCounter
}
func (r *standardRegistry) ServiceReqDurationHistogram() metrics.Histogram {
func (r *standardRegistry) ServiceReqDurationHistogram() ScalableHistogram {
return r.serviceReqDurationHistogram
}
@ -183,3 +186,97 @@ func (r *standardRegistry) ServiceRetriesCounter() metrics.Counter {
func (r *standardRegistry) ServiceServerUpGauge() metrics.Gauge {
return r.serviceServerUpGauge
}
// ScalableHistogram is a Histogram with a predefined time unit,
// used when producing observations without explicitly setting the observed value.
type ScalableHistogram interface {
With(labelValues ...string) ScalableHistogram
StartAt(t time.Time)
Observe(v float64)
ObserveDuration()
}
// HistogramWithScale is a histogram that will convert its observed value to the specified unit.
type HistogramWithScale struct {
histogram metrics.Histogram
unit time.Duration
start time.Time
}
// With implements ScalableHistogram.
func (s *HistogramWithScale) With(labelValues ...string) ScalableHistogram {
s.histogram = s.histogram.With(labelValues...)
return s
}
// StartAt implements ScalableHistogram.
func (s *HistogramWithScale) StartAt(t time.Time) {
s.start = t
}
// ObserveDuration implements ScalableHistogram.
func (s *HistogramWithScale) ObserveDuration() {
if s.unit <= 0 {
return
}
d := float64(time.Since(s.start).Nanoseconds()) / float64(s.unit)
if d < 0 {
d = 0
}
s.histogram.Observe(d)
}
// Observe implements ScalableHistogram.
func (s *HistogramWithScale) Observe(v float64) {
s.histogram.Observe(v)
}
// NewHistogramWithScale returns a ScalableHistogram. It returns an error if the given unit is <= 0.
func NewHistogramWithScale(histogram metrics.Histogram, unit time.Duration) (ScalableHistogram, error) {
if unit <= 0 {
return nil, errors.New("invalid time unit")
}
return &HistogramWithScale{
histogram: histogram,
unit: unit,
}, nil
}
// MultiHistogram collects multiple individual histograms and treats them as a unit.
type MultiHistogram []ScalableHistogram
// NewMultiHistogram returns a multi-histogram, wrapping the passed histograms.
func NewMultiHistogram(h ...ScalableHistogram) MultiHistogram {
return MultiHistogram(h)
}
// StartAt implements ScalableHistogram.
func (h MultiHistogram) StartAt(t time.Time) {
for _, histogram := range h {
histogram.StartAt(t)
}
}
// ObserveDuration implements ScalableHistogram.
func (h MultiHistogram) ObserveDuration() {
for _, histogram := range h {
histogram.ObserveDuration()
}
}
// Observe implements ScalableHistogram.
func (h MultiHistogram) Observe(v float64) {
for _, histogram := range h {
histogram.Observe(v)
}
}
// With implements ScalableHistogram.
func (h MultiHistogram) With(labelValues ...string) ScalableHistogram {
next := make(MultiHistogram, len(h))
for i := range h {
next[i] = h[i].With(labelValues...)
}
return next
}

View file

@ -1,18 +1,44 @@
package metrics
import (
"bytes"
"strings"
"testing"
"time"
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/generic"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestScalableHistogram(t *testing.T) {
h := generic.NewHistogram("test", 1)
sh, err := NewHistogramWithScale(h, time.Millisecond)
require.NoError(t, err)
ticker := time.NewTicker(500 * time.Millisecond)
<-ticker.C
sh.StartAt(time.Now())
<-ticker.C
sh.ObserveDuration()
var b bytes.Buffer
h.Print(&b)
extractedDurationString := strings.Split(strings.Split(b.String(), "\n")[1], " ")
measuredDuration, err := time.ParseDuration(extractedDurationString[0] + "ms")
assert.NoError(t, err)
assert.InDelta(t, 500*time.Millisecond, measuredDuration, float64(1*time.Millisecond))
}
func TestNewMultiRegistry(t *testing.T) {
registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()}
registry := NewMultiRegistry(registries)
registry.ServiceReqsCounter().With("key", "requests").Add(1)
registry.ServiceReqDurationHistogram().With("key", "durations").Observe(2)
registry.ServiceReqDurationHistogram().With("key", "durations").Observe(float64(2))
registry.ServiceRetriesCounter().With("key", "retries").Add(3)
for _, collectingRegistry := range registries {
@ -66,11 +92,17 @@ type histogramMock struct {
lastLabelValues []string
}
func (c *histogramMock) With(labelValues ...string) metrics.Histogram {
func (c *histogramMock) With(labelValues ...string) ScalableHistogram {
c.lastLabelValues = labelValues
return c
}
func (c *histogramMock) Observe(value float64) {
c.lastHistogramValue = value
func (c *histogramMock) Start() {}
func (c *histogramMock) StartAt(t time.Time) {}
func (c *histogramMock) ObserveDuration() {}
func (c *histogramMock) Observe(v float64) {
c.lastHistogramValue = v
}

View file

@ -6,6 +6,7 @@ import (
"sort"
"strings"
"sync"
"time"
"github.com/containous/traefik/v2/pkg/config/dynamic"
"github.com/containous/traefik/v2/pkg/log"
@ -152,7 +153,7 @@ func initStandardRegistry(config *types.Prometheus) Registry {
entryPointOpenConns.gv.Describe,
}...)
reg.entryPointReqsCounter = entryPointReqs
reg.entryPointReqDurationHistogram = entryPointReqDurations
reg.entryPointReqDurationHistogram, _ = NewHistogramWithScale(entryPointReqDurations, time.Second)
reg.entryPointOpenConnsGauge = entryPointOpenConns
}
if config.AddServicesLabels {
@ -187,7 +188,7 @@ func initStandardRegistry(config *types.Prometheus) Registry {
}...)
reg.serviceReqsCounter = serviceReqs
reg.serviceReqDurationHistogram = serviceReqDurations
reg.serviceReqDurationHistogram, _ = NewHistogramWithScale(serviceReqDurations, time.Second)
reg.serviceOpenConnsGauge = serviceOpenConns
reg.serviceRetriesCounter = serviceRetries
reg.serviceServerUpGauge = serviceServerUp

View file

@ -55,14 +55,14 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry {
if config.AddEntryPointsLabels {
registry.epEnabled = config.AddEntryPointsLabels
registry.entryPointReqsCounter = statsdClient.NewCounter(statsdEntryPointReqsName, 1.0)
registry.entryPointReqDurationHistogram = statsdClient.NewTiming(statsdEntryPointReqDurationName, 1.0)
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdEntryPointReqDurationName, 1.0), time.Millisecond)
registry.entryPointOpenConnsGauge = statsdClient.NewGauge(statsdEntryPointOpenConnsName)
}
if config.AddServicesLabels {
registry.svcEnabled = config.AddServicesLabels
registry.serviceReqsCounter = statsdClient.NewCounter(statsdMetricsServiceReqsName, 1.0)
registry.serviceReqDurationHistogram = statsdClient.NewTiming(statsdMetricsServiceLatencyName, 1.0)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdMetricsServiceLatencyName, 1.0), time.Millisecond)
registry.serviceRetriesCounter = statsdClient.NewCounter(statsdRetriesTotalName, 1.0)
registry.serviceOpenConnsGauge = statsdClient.NewGauge(statsdOpenConnsName)
registry.serviceServerUpGauge = statsdClient.NewGauge(statsdServerUpName)

View file

@ -32,7 +32,7 @@ type metricsMiddleware struct {
openConns int64
next http.Handler
reqsCounter gokitmetrics.Counter
reqDurationHistogram gokitmetrics.Histogram
reqDurationHistogram metrics.ScalableHistogram
openConnsGauge gokitmetrics.Gauge
baseLabels []string
}
@ -88,13 +88,19 @@ func (m *metricsMiddleware) ServeHTTP(rw http.ResponseWriter, req *http.Request)
m.openConnsGauge.With(labelValues...).Set(float64(openConns))
}(labels)
start := time.Now()
recorder := newResponseRecorder(rw)
start := time.Now()
m.next.ServeHTTP(recorder, req)
labels = append(labels, "code", strconv.Itoa(recorder.getCode()))
histograms := m.reqDurationHistogram.With(labels...)
histograms.StartAt(start)
m.reqsCounter.With(labels...).Add(1)
m.reqDurationHistogram.With(labels...).Observe(time.Since(start).Seconds())
histograms.ObserveDuration()
}
func getRequestProtocol(req *http.Request) string {