From b5d205b78c23095546c6ff7e66d98df6dbb58b20 Mon Sep 17 00:00:00 2001 From: Traefiker Bot <30906710+traefiker@users.noreply.github.com> Date: Thu, 5 Mar 2020 15:10:07 +0100 Subject: [PATCH] fix statsd scale for duration based metrics --- pkg/metrics/datadog.go | 4 +- pkg/metrics/influxdb.go | 4 +- pkg/metrics/metrics.go | 117 ++++++++++++++++++++++++++--- pkg/metrics/metrics_test.go | 40 +++++++++- pkg/metrics/prometheus.go | 5 +- pkg/metrics/statsd.go | 4 +- pkg/middlewares/metrics/metrics.go | 12 ++- 7 files changed, 161 insertions(+), 25 deletions(-) diff --git a/pkg/metrics/datadog.go b/pkg/metrics/datadog.go index 988fb0b7a..34994d7a6 100644 --- a/pkg/metrics/datadog.go +++ b/pkg/metrics/datadog.go @@ -50,14 +50,14 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry { if config.AddEntryPointsLabels { registry.epEnabled = config.AddEntryPointsLabels registry.entryPointReqsCounter = datadogClient.NewCounter(ddEntryPointReqsName, 1.0) - registry.entryPointReqDurationHistogram = datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0) + registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0), time.Second) registry.entryPointOpenConnsGauge = datadogClient.NewGauge(ddEntryPointOpenConnsName) } if config.AddServicesLabels { registry.svcEnabled = config.AddServicesLabels registry.serviceReqsCounter = datadogClient.NewCounter(ddMetricsServiceReqsName, 1.0) - registry.serviceReqDurationHistogram = datadogClient.NewHistogram(ddMetricsServiceLatencyName, 1.0) + registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddMetricsServiceLatencyName, 1.0), time.Second) registry.serviceRetriesCounter = datadogClient.NewCounter(ddRetriesTotalName, 1.0) registry.serviceOpenConnsGauge = datadogClient.NewGauge(ddOpenConnsName) registry.serviceServerUpGauge = datadogClient.NewGauge(ddServerUpName) diff --git a/pkg/metrics/influxdb.go b/pkg/metrics/influxdb.go index 3bc859889..b8bbc363c 100644 --- a/pkg/metrics/influxdb.go +++ b/pkg/metrics/influxdb.go @@ -64,14 +64,14 @@ func RegisterInfluxDB(ctx context.Context, config *types.InfluxDB) Registry { if config.AddEntryPointsLabels { registry.epEnabled = config.AddEntryPointsLabels registry.entryPointReqsCounter = influxDBClient.NewCounter(influxDBEntryPointReqsName) - registry.entryPointReqDurationHistogram = influxDBClient.NewHistogram(influxDBEntryPointReqDurationName) + registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(influxDBClient.NewHistogram(influxDBEntryPointReqDurationName), time.Second) registry.entryPointOpenConnsGauge = influxDBClient.NewGauge(influxDBEntryPointOpenConnsName) } if config.AddServicesLabels { registry.svcEnabled = config.AddServicesLabels registry.serviceReqsCounter = influxDBClient.NewCounter(influxDBMetricsServiceReqsName) - registry.serviceReqDurationHistogram = influxDBClient.NewHistogram(influxDBMetricsServiceLatencyName) + registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(influxDBClient.NewHistogram(influxDBMetricsServiceLatencyName), time.Second) registry.serviceRetriesCounter = influxDBClient.NewCounter(influxDBRetriesTotalName) registry.serviceOpenConnsGauge = influxDBClient.NewGauge(influxDBOpenConnsName) registry.serviceServerUpGauge = influxDBClient.NewGauge(influxDBServerUpName) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 8cdb1eae9..4e7749d07 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -1,6 +1,9 @@ package metrics import ( + "errors" + "time" + "github.com/go-kit/kit/metrics" "github.com/go-kit/kit/metrics/multi" ) @@ -20,12 +23,12 @@ type Registry interface { // entry point metrics EntryPointReqsCounter() metrics.Counter - EntryPointReqDurationHistogram() metrics.Histogram + EntryPointReqDurationHistogram() ScalableHistogram EntryPointOpenConnsGauge() metrics.Gauge // service metrics ServiceReqsCounter() metrics.Counter - ServiceReqDurationHistogram() metrics.Histogram + ServiceReqDurationHistogram() ScalableHistogram ServiceOpenConnsGauge() metrics.Gauge ServiceRetriesCounter() metrics.Counter ServiceServerUpGauge() metrics.Gauge @@ -46,10 +49,10 @@ func NewMultiRegistry(registries []Registry) Registry { var lastConfigReloadSuccessGauge []metrics.Gauge var lastConfigReloadFailureGauge []metrics.Gauge var entryPointReqsCounter []metrics.Counter - var entryPointReqDurationHistogram []metrics.Histogram + var entryPointReqDurationHistogram []ScalableHistogram var entryPointOpenConnsGauge []metrics.Gauge var serviceReqsCounter []metrics.Counter - var serviceReqDurationHistogram []metrics.Histogram + var serviceReqDurationHistogram []ScalableHistogram var serviceOpenConnsGauge []metrics.Gauge var serviceRetriesCounter []metrics.Counter var serviceServerUpGauge []metrics.Gauge @@ -101,10 +104,10 @@ func NewMultiRegistry(registries []Registry) Registry { lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...), lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...), entryPointReqsCounter: multi.NewCounter(entryPointReqsCounter...), - entryPointReqDurationHistogram: multi.NewHistogram(entryPointReqDurationHistogram...), + entryPointReqDurationHistogram: NewMultiHistogram(entryPointReqDurationHistogram...), entryPointOpenConnsGauge: multi.NewGauge(entryPointOpenConnsGauge...), serviceReqsCounter: multi.NewCounter(serviceReqsCounter...), - serviceReqDurationHistogram: multi.NewHistogram(serviceReqDurationHistogram...), + serviceReqDurationHistogram: NewMultiHistogram(serviceReqDurationHistogram...), serviceOpenConnsGauge: multi.NewGauge(serviceOpenConnsGauge...), serviceRetriesCounter: multi.NewCounter(serviceRetriesCounter...), serviceServerUpGauge: multi.NewGauge(serviceServerUpGauge...), @@ -119,10 +122,10 @@ type standardRegistry struct { lastConfigReloadSuccessGauge metrics.Gauge lastConfigReloadFailureGauge metrics.Gauge entryPointReqsCounter metrics.Counter - entryPointReqDurationHistogram metrics.Histogram + entryPointReqDurationHistogram ScalableHistogram entryPointOpenConnsGauge metrics.Gauge serviceReqsCounter metrics.Counter - serviceReqDurationHistogram metrics.Histogram + serviceReqDurationHistogram ScalableHistogram serviceOpenConnsGauge metrics.Gauge serviceRetriesCounter metrics.Counter serviceServerUpGauge metrics.Gauge @@ -156,7 +159,7 @@ func (r *standardRegistry) EntryPointReqsCounter() metrics.Counter { return r.entryPointReqsCounter } -func (r *standardRegistry) EntryPointReqDurationHistogram() metrics.Histogram { +func (r *standardRegistry) EntryPointReqDurationHistogram() ScalableHistogram { return r.entryPointReqDurationHistogram } @@ -168,7 +171,7 @@ func (r *standardRegistry) ServiceReqsCounter() metrics.Counter { return r.serviceReqsCounter } -func (r *standardRegistry) ServiceReqDurationHistogram() metrics.Histogram { +func (r *standardRegistry) ServiceReqDurationHistogram() ScalableHistogram { return r.serviceReqDurationHistogram } @@ -183,3 +186,97 @@ func (r *standardRegistry) ServiceRetriesCounter() metrics.Counter { func (r *standardRegistry) ServiceServerUpGauge() metrics.Gauge { return r.serviceServerUpGauge } + +// ScalableHistogram is a Histogram with a predefined time unit, +// used when producing observations without explicitly setting the observed value. +type ScalableHistogram interface { + With(labelValues ...string) ScalableHistogram + StartAt(t time.Time) + Observe(v float64) + ObserveDuration() +} + +// HistogramWithScale is a histogram that will convert its observed value to the specified unit. +type HistogramWithScale struct { + histogram metrics.Histogram + unit time.Duration + start time.Time +} + +// With implements ScalableHistogram. +func (s *HistogramWithScale) With(labelValues ...string) ScalableHistogram { + s.histogram = s.histogram.With(labelValues...) + return s +} + +// StartAt implements ScalableHistogram. +func (s *HistogramWithScale) StartAt(t time.Time) { + s.start = t +} + +// ObserveDuration implements ScalableHistogram. +func (s *HistogramWithScale) ObserveDuration() { + if s.unit <= 0 { + return + } + + d := float64(time.Since(s.start).Nanoseconds()) / float64(s.unit) + if d < 0 { + d = 0 + } + s.histogram.Observe(d) +} + +// Observe implements ScalableHistogram. +func (s *HistogramWithScale) Observe(v float64) { + s.histogram.Observe(v) +} + +// NewHistogramWithScale returns a ScalableHistogram. It returns an error if the given unit is <= 0. +func NewHistogramWithScale(histogram metrics.Histogram, unit time.Duration) (ScalableHistogram, error) { + if unit <= 0 { + return nil, errors.New("invalid time unit") + } + return &HistogramWithScale{ + histogram: histogram, + unit: unit, + }, nil +} + +// MultiHistogram collects multiple individual histograms and treats them as a unit. +type MultiHistogram []ScalableHistogram + +// NewMultiHistogram returns a multi-histogram, wrapping the passed histograms. +func NewMultiHistogram(h ...ScalableHistogram) MultiHistogram { + return MultiHistogram(h) +} + +// StartAt implements ScalableHistogram. +func (h MultiHistogram) StartAt(t time.Time) { + for _, histogram := range h { + histogram.StartAt(t) + } +} + +// ObserveDuration implements ScalableHistogram. +func (h MultiHistogram) ObserveDuration() { + for _, histogram := range h { + histogram.ObserveDuration() + } +} + +// Observe implements ScalableHistogram. +func (h MultiHistogram) Observe(v float64) { + for _, histogram := range h { + histogram.Observe(v) + } +} + +// With implements ScalableHistogram. +func (h MultiHistogram) With(labelValues ...string) ScalableHistogram { + next := make(MultiHistogram, len(h)) + for i := range h { + next[i] = h[i].With(labelValues...) + } + return next +} diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go index 6477aac96..221193a42 100644 --- a/pkg/metrics/metrics_test.go +++ b/pkg/metrics/metrics_test.go @@ -1,18 +1,44 @@ package metrics import ( + "bytes" + "strings" "testing" + "time" "github.com/go-kit/kit/metrics" + "github.com/go-kit/kit/metrics/generic" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) +func TestScalableHistogram(t *testing.T) { + h := generic.NewHistogram("test", 1) + sh, err := NewHistogramWithScale(h, time.Millisecond) + require.NoError(t, err) + + ticker := time.NewTicker(500 * time.Millisecond) + <-ticker.C + sh.StartAt(time.Now()) + <-ticker.C + sh.ObserveDuration() + + var b bytes.Buffer + h.Print(&b) + + extractedDurationString := strings.Split(strings.Split(b.String(), "\n")[1], " ") + measuredDuration, err := time.ParseDuration(extractedDurationString[0] + "ms") + assert.NoError(t, err) + + assert.InDelta(t, 500*time.Millisecond, measuredDuration, float64(1*time.Millisecond)) +} + func TestNewMultiRegistry(t *testing.T) { registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()} registry := NewMultiRegistry(registries) registry.ServiceReqsCounter().With("key", "requests").Add(1) - registry.ServiceReqDurationHistogram().With("key", "durations").Observe(2) + registry.ServiceReqDurationHistogram().With("key", "durations").Observe(float64(2)) registry.ServiceRetriesCounter().With("key", "retries").Add(3) for _, collectingRegistry := range registries { @@ -66,11 +92,17 @@ type histogramMock struct { lastLabelValues []string } -func (c *histogramMock) With(labelValues ...string) metrics.Histogram { +func (c *histogramMock) With(labelValues ...string) ScalableHistogram { c.lastLabelValues = labelValues return c } -func (c *histogramMock) Observe(value float64) { - c.lastHistogramValue = value +func (c *histogramMock) Start() {} + +func (c *histogramMock) StartAt(t time.Time) {} + +func (c *histogramMock) ObserveDuration() {} + +func (c *histogramMock) Observe(v float64) { + c.lastHistogramValue = v } diff --git a/pkg/metrics/prometheus.go b/pkg/metrics/prometheus.go index 6003da55b..822eeff01 100644 --- a/pkg/metrics/prometheus.go +++ b/pkg/metrics/prometheus.go @@ -6,6 +6,7 @@ import ( "sort" "strings" "sync" + "time" "github.com/containous/traefik/v2/pkg/config/dynamic" "github.com/containous/traefik/v2/pkg/log" @@ -152,7 +153,7 @@ func initStandardRegistry(config *types.Prometheus) Registry { entryPointOpenConns.gv.Describe, }...) reg.entryPointReqsCounter = entryPointReqs - reg.entryPointReqDurationHistogram = entryPointReqDurations + reg.entryPointReqDurationHistogram, _ = NewHistogramWithScale(entryPointReqDurations, time.Second) reg.entryPointOpenConnsGauge = entryPointOpenConns } if config.AddServicesLabels { @@ -187,7 +188,7 @@ func initStandardRegistry(config *types.Prometheus) Registry { }...) reg.serviceReqsCounter = serviceReqs - reg.serviceReqDurationHistogram = serviceReqDurations + reg.serviceReqDurationHistogram, _ = NewHistogramWithScale(serviceReqDurations, time.Second) reg.serviceOpenConnsGauge = serviceOpenConns reg.serviceRetriesCounter = serviceRetries reg.serviceServerUpGauge = serviceServerUp diff --git a/pkg/metrics/statsd.go b/pkg/metrics/statsd.go index eb08de8f4..f8f14de63 100644 --- a/pkg/metrics/statsd.go +++ b/pkg/metrics/statsd.go @@ -55,14 +55,14 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry { if config.AddEntryPointsLabels { registry.epEnabled = config.AddEntryPointsLabels registry.entryPointReqsCounter = statsdClient.NewCounter(statsdEntryPointReqsName, 1.0) - registry.entryPointReqDurationHistogram = statsdClient.NewTiming(statsdEntryPointReqDurationName, 1.0) + registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdEntryPointReqDurationName, 1.0), time.Millisecond) registry.entryPointOpenConnsGauge = statsdClient.NewGauge(statsdEntryPointOpenConnsName) } if config.AddServicesLabels { registry.svcEnabled = config.AddServicesLabels registry.serviceReqsCounter = statsdClient.NewCounter(statsdMetricsServiceReqsName, 1.0) - registry.serviceReqDurationHistogram = statsdClient.NewTiming(statsdMetricsServiceLatencyName, 1.0) + registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdMetricsServiceLatencyName, 1.0), time.Millisecond) registry.serviceRetriesCounter = statsdClient.NewCounter(statsdRetriesTotalName, 1.0) registry.serviceOpenConnsGauge = statsdClient.NewGauge(statsdOpenConnsName) registry.serviceServerUpGauge = statsdClient.NewGauge(statsdServerUpName) diff --git a/pkg/middlewares/metrics/metrics.go b/pkg/middlewares/metrics/metrics.go index 34d9dccc8..53ce890fa 100644 --- a/pkg/middlewares/metrics/metrics.go +++ b/pkg/middlewares/metrics/metrics.go @@ -32,7 +32,7 @@ type metricsMiddleware struct { openConns int64 next http.Handler reqsCounter gokitmetrics.Counter - reqDurationHistogram gokitmetrics.Histogram + reqDurationHistogram metrics.ScalableHistogram openConnsGauge gokitmetrics.Gauge baseLabels []string } @@ -88,13 +88,19 @@ func (m *metricsMiddleware) ServeHTTP(rw http.ResponseWriter, req *http.Request) m.openConnsGauge.With(labelValues...).Set(float64(openConns)) }(labels) - start := time.Now() recorder := newResponseRecorder(rw) + start := time.Now() + m.next.ServeHTTP(recorder, req) labels = append(labels, "code", strconv.Itoa(recorder.getCode())) + + histograms := m.reqDurationHistogram.With(labels...) + histograms.StartAt(start) + m.reqsCounter.With(labels...).Add(1) - m.reqDurationHistogram.With(labels...).Observe(time.Since(start).Seconds()) + + histograms.ObserveDuration() } func getRequestProtocol(req *http.Request) string {