extend metrics and rebuild prometheus exporting logic

This commit is contained in:
Marco Jantke 2018-01-26 11:58:03 +01:00 committed by Traefiker
parent fa1f4f761d
commit cc5ee00b89
17 changed files with 997 additions and 226 deletions

View file

@ -12,6 +12,7 @@ import (
"github.com/containous/traefik/log" "github.com/containous/traefik/log"
"github.com/containous/traefik/safe" "github.com/containous/traefik/safe"
"github.com/go-kit/kit/metrics"
"github.com/vulcand/oxy/roundrobin" "github.com/vulcand/oxy/roundrobin"
) )
@ -19,9 +20,9 @@ var singleton *HealthCheck
var once sync.Once var once sync.Once
// GetHealthCheck returns the health check which is guaranteed to be a singleton. // GetHealthCheck returns the health check which is guaranteed to be a singleton.
func GetHealthCheck() *HealthCheck { func GetHealthCheck(metrics metricsRegistry) *HealthCheck {
once.Do(func() { once.Do(func() {
singleton = newHealthCheck() singleton = newHealthCheck(metrics)
}) })
return singleton return singleton
} }
@ -50,6 +51,7 @@ type BackendHealthCheck struct {
//HealthCheck struct //HealthCheck struct
type HealthCheck struct { type HealthCheck struct {
Backends map[string]*BackendHealthCheck Backends map[string]*BackendHealthCheck
metrics metricsRegistry
cancel context.CancelFunc cancel context.CancelFunc
} }
@ -60,12 +62,19 @@ type LoadBalancer interface {
Servers() []*url.URL Servers() []*url.URL
} }
func newHealthCheck() *HealthCheck { func newHealthCheck(metrics metricsRegistry) *HealthCheck {
return &HealthCheck{ return &HealthCheck{
Backends: make(map[string]*BackendHealthCheck), Backends: make(map[string]*BackendHealthCheck),
metrics: metrics,
} }
} }
// metricsRegistry is a local interface in the healthcheck package, exposing only the required metrics
// necessary for the healthcheck package. This makes it easier for the tests.
type metricsRegistry interface {
BackendServerUpGauge() metrics.Gauge
}
// NewBackendHealthCheck Instantiate a new BackendHealthCheck // NewBackendHealthCheck Instantiate a new BackendHealthCheck
func NewBackendHealthCheck(options Options, backendName string) *BackendHealthCheck { func NewBackendHealthCheck(options Options, backendName string) *BackendHealthCheck {
return &BackendHealthCheck{ return &BackendHealthCheck{
@ -113,22 +122,30 @@ func (hc *HealthCheck) checkBackend(backend *BackendHealthCheck) {
enabledURLs := backend.LB.Servers() enabledURLs := backend.LB.Servers()
var newDisabledURLs []*url.URL var newDisabledURLs []*url.URL
for _, url := range backend.disabledURLs { for _, url := range backend.disabledURLs {
serverUpMetricValue := float64(0)
if err := checkHealth(url, backend); err == nil { if err := checkHealth(url, backend); err == nil {
log.Warnf("Health check up: Returning to server list. Backend: %q URL: %q", backend.name, url.String()) log.Warnf("Health check up: Returning to server list. Backend: %q URL: %q", backend.name, url.String())
backend.LB.UpsertServer(url, roundrobin.Weight(1)) backend.LB.UpsertServer(url, roundrobin.Weight(1))
serverUpMetricValue = 1
} else { } else {
log.Warnf("Health check still failing. Backend: %q URL: %q Reason: %s", backend.name, url.String(), err) log.Warnf("Health check still failing. Backend: %q URL: %q Reason: %s", backend.name, url.String(), err)
newDisabledURLs = append(newDisabledURLs, url) newDisabledURLs = append(newDisabledURLs, url)
} }
labelValues := []string{"backend", backend.name, "url", url.String()}
hc.metrics.BackendServerUpGauge().With(labelValues...).Set(serverUpMetricValue)
} }
backend.disabledURLs = newDisabledURLs backend.disabledURLs = newDisabledURLs
for _, url := range enabledURLs { for _, url := range enabledURLs {
serverUpMetricValue := float64(1)
if err := checkHealth(url, backend); err != nil { if err := checkHealth(url, backend); err != nil {
log.Warnf("Health check failed: Remove from server list. Backend: %q URL: %q Reason: %s", backend.name, url.String(), err) log.Warnf("Health check failed: Remove from server list. Backend: %q URL: %q Reason: %s", backend.name, url.String(), err)
backend.LB.RemoveServer(url) backend.LB.RemoveServer(url)
backend.disabledURLs = append(backend.disabledURLs, url) backend.disabledURLs = append(backend.disabledURLs, url)
serverUpMetricValue = 0
} }
labelValues := []string{"backend", backend.name, "url", url.String()}
hc.metrics.BackendServerUpGauge().With(labelValues...).Set(serverUpMetricValue)
} }
} }

View file

@ -27,6 +27,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
healthSequence []bool healthSequence []bool
wantNumRemovedServers int wantNumRemovedServers int
wantNumUpsertedServers int wantNumUpsertedServers int
wantGaugeValue float64
}{ }{
{ {
desc: "healthy server staying healthy", desc: "healthy server staying healthy",
@ -34,6 +35,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
healthSequence: []bool{true}, healthSequence: []bool{true},
wantNumRemovedServers: 0, wantNumRemovedServers: 0,
wantNumUpsertedServers: 0, wantNumUpsertedServers: 0,
wantGaugeValue: 1,
}, },
{ {
desc: "healthy server becoming sick", desc: "healthy server becoming sick",
@ -41,6 +43,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
healthSequence: []bool{false}, healthSequence: []bool{false},
wantNumRemovedServers: 1, wantNumRemovedServers: 1,
wantNumUpsertedServers: 0, wantNumUpsertedServers: 0,
wantGaugeValue: 0,
}, },
{ {
desc: "sick server becoming healthy", desc: "sick server becoming healthy",
@ -48,6 +51,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
healthSequence: []bool{true}, healthSequence: []bool{true},
wantNumRemovedServers: 0, wantNumRemovedServers: 0,
wantNumUpsertedServers: 1, wantNumUpsertedServers: 1,
wantGaugeValue: 1,
}, },
{ {
desc: "sick server staying sick", desc: "sick server staying sick",
@ -55,6 +59,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
healthSequence: []bool{false}, healthSequence: []bool{false},
wantNumRemovedServers: 0, wantNumRemovedServers: 0,
wantNumUpsertedServers: 0, wantNumUpsertedServers: 0,
wantGaugeValue: 0,
}, },
{ {
desc: "healthy server toggling to sick and back to healthy", desc: "healthy server toggling to sick and back to healthy",
@ -62,6 +67,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
healthSequence: []bool{false, true}, healthSequence: []bool{false, true},
wantNumRemovedServers: 1, wantNumRemovedServers: 1,
wantNumUpsertedServers: 1, wantNumUpsertedServers: 1,
wantGaugeValue: 1,
}, },
} }
@ -89,8 +95,10 @@ func TestSetBackendsConfiguration(t *testing.T) {
backend.disabledURLs = append(backend.disabledURLs, serverURL) backend.disabledURLs = append(backend.disabledURLs, serverURL)
} }
collectingMetrics := testhelpers.NewCollectingHealthCheckMetrics()
check := HealthCheck{ check := HealthCheck{
Backends: make(map[string]*BackendHealthCheck), Backends: make(map[string]*BackendHealthCheck),
metrics: collectingMetrics,
} }
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
wg.Add(1) wg.Add(1)
@ -118,6 +126,10 @@ func TestSetBackendsConfiguration(t *testing.T) {
if lb.numUpsertedServers != test.wantNumUpsertedServers { if lb.numUpsertedServers != test.wantNumUpsertedServers {
t.Errorf("got %d upserted servers, wanted %d", lb.numUpsertedServers, test.wantNumUpsertedServers) t.Errorf("got %d upserted servers, wanted %d", lb.numUpsertedServers, test.wantNumUpsertedServers)
} }
if collectingMetrics.Gauge.GaugeValue != test.wantGaugeValue {
t.Errorf("got %v ServerUp Gauge, want %v", collectingMetrics.Gauge.GaugeValue, test.wantGaugeValue)
}
}) })
} }
} }

View file

@ -31,10 +31,10 @@ func RegisterDatadog(config *types.Datadog) Registry {
} }
registry := &standardRegistry{ registry := &standardRegistry{
enabled: true, enabled: true,
reqsCounter: datadogClient.NewCounter(ddMetricsReqsName, 1.0), backendReqsCounter: datadogClient.NewCounter(ddMetricsReqsName, 1.0),
reqDurationHistogram: datadogClient.NewHistogram(ddMetricsLatencyName, 1.0), backendReqDurationHistogram: datadogClient.NewHistogram(ddMetricsLatencyName, 1.0),
retriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0), backendRetriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0),
} }
return registry return registry

View file

@ -31,10 +31,10 @@ func TestDatadog(t *testing.T) {
} }
udp.ShouldReceiveAll(t, expected, func() { udp.ShouldReceiveAll(t, expected, func() {
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
datadogRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) datadogRegistry.BackendReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
datadogRegistry.RetriesCounter().With("service", "test").Add(1) datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
datadogRegistry.RetriesCounter().With("service", "test").Add(1) datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
}) })
} }

View file

@ -37,10 +37,10 @@ func RegisterInfluxDB(config *types.InfluxDB) Registry {
} }
return &standardRegistry{ return &standardRegistry{
enabled: true, enabled: true,
reqsCounter: influxDBClient.NewCounter(influxDBMetricsReqsName), backendReqsCounter: influxDBClient.NewCounter(influxDBMetricsReqsName),
reqDurationHistogram: influxDBClient.NewHistogram(influxDBMetricsLatencyName), backendReqDurationHistogram: influxDBClient.NewHistogram(influxDBMetricsLatencyName),
retriesCounter: influxDBClient.NewCounter(influxDBRetriesTotalName), backendRetriesCounter: influxDBClient.NewCounter(influxDBRetriesTotalName),
} }
} }

View file

@ -31,11 +31,11 @@ func TestInfluxDB(t *testing.T) {
} }
msg := udp.ReceiveString(t, func() { msg := udp.ReceiveString(t, func() {
influxDBRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) influxDBRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
influxDBRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) influxDBRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
influxDBRegistry.RetriesCounter().With("service", "test").Add(1) influxDBRegistry.BackendRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.RetriesCounter().With("service", "test").Add(1) influxDBRegistry.BackendRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) influxDBRegistry.BackendReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
}) })
assertMessage(t, msg, expected) assertMessage(t, msg, expected)

View file

@ -9,71 +9,168 @@ import (
type Registry interface { type Registry interface {
// IsEnabled shows whether metrics instrumentation is enabled. // IsEnabled shows whether metrics instrumentation is enabled.
IsEnabled() bool IsEnabled() bool
ReqsCounter() metrics.Counter // server metrics
ReqDurationHistogram() metrics.Histogram ConfigReloadsCounter() metrics.Counter
RetriesCounter() metrics.Counter ConfigReloadsFailureCounter() metrics.Counter
LastConfigReloadSuccessGauge() metrics.Gauge
LastConfigReloadFailureGauge() metrics.Gauge
// entry point metrics
EntrypointReqsCounter() metrics.Counter
EntrypointReqDurationHistogram() metrics.Histogram
EntrypointOpenConnsGauge() metrics.Gauge
// backend metrics
BackendReqsCounter() metrics.Counter
BackendReqDurationHistogram() metrics.Histogram
BackendOpenConnsGauge() metrics.Gauge
BackendRetriesCounter() metrics.Counter
BackendServerUpGauge() metrics.Gauge
} }
// NewMultiRegistry creates a new standardRegistry that wraps multiple Registries. // NewVoidRegistry is a noop implementation of metrics.Registry.
// It is used to avoid nil checking in components that do metric collections.
func NewVoidRegistry() Registry {
return NewMultiRegistry([]Registry{})
}
// NewMultiRegistry is an implementation of metrics.Registry that wraps multiple registries.
// It handles the case when a registry hasn't registered some metric and returns nil.
// This allows for feature imparity between the different metric implementations.
func NewMultiRegistry(registries []Registry) Registry { func NewMultiRegistry(registries []Registry) Registry {
reqsCounters := []metrics.Counter{} configReloadsCounter := []metrics.Counter{}
reqDurationHistograms := []metrics.Histogram{} configReloadsFailureCounter := []metrics.Counter{}
retriesCounters := []metrics.Counter{} lastConfigReloadSuccessGauge := []metrics.Gauge{}
lastConfigReloadFailureGauge := []metrics.Gauge{}
entrypointReqsCounter := []metrics.Counter{}
entrypointReqDurationHistogram := []metrics.Histogram{}
entrypointOpenConnsGauge := []metrics.Gauge{}
backendReqsCounter := []metrics.Counter{}
backendReqDurationHistogram := []metrics.Histogram{}
backendOpenConnsGauge := []metrics.Gauge{}
backendRetriesCounter := []metrics.Counter{}
backendServerUpGauge := []metrics.Gauge{}
for _, r := range registries { for _, r := range registries {
reqsCounters = append(reqsCounters, r.ReqsCounter()) if r.ConfigReloadsCounter() != nil {
reqDurationHistograms = append(reqDurationHistograms, r.ReqDurationHistogram()) configReloadsCounter = append(configReloadsCounter, r.ConfigReloadsCounter())
retriesCounters = append(retriesCounters, r.RetriesCounter()) }
if r.ConfigReloadsFailureCounter() != nil {
configReloadsFailureCounter = append(configReloadsFailureCounter, r.ConfigReloadsFailureCounter())
}
if r.LastConfigReloadSuccessGauge() != nil {
lastConfigReloadSuccessGauge = append(lastConfigReloadSuccessGauge, r.LastConfigReloadSuccessGauge())
}
if r.LastConfigReloadFailureGauge() != nil {
lastConfigReloadFailureGauge = append(lastConfigReloadFailureGauge, r.LastConfigReloadFailureGauge())
}
if r.EntrypointReqsCounter() != nil {
entrypointReqsCounter = append(entrypointReqsCounter, r.EntrypointReqsCounter())
}
if r.EntrypointReqDurationHistogram() != nil {
entrypointReqDurationHistogram = append(entrypointReqDurationHistogram, r.EntrypointReqDurationHistogram())
}
if r.EntrypointOpenConnsGauge() != nil {
entrypointOpenConnsGauge = append(entrypointOpenConnsGauge, r.EntrypointOpenConnsGauge())
}
if r.BackendReqsCounter() != nil {
backendReqsCounter = append(backendReqsCounter, r.BackendReqsCounter())
}
if r.BackendReqDurationHistogram() != nil {
backendReqDurationHistogram = append(backendReqDurationHistogram, r.BackendReqDurationHistogram())
}
if r.BackendOpenConnsGauge() != nil {
backendOpenConnsGauge = append(backendOpenConnsGauge, r.BackendOpenConnsGauge())
}
if r.BackendRetriesCounter() != nil {
backendRetriesCounter = append(backendRetriesCounter, r.BackendRetriesCounter())
}
if r.BackendServerUpGauge() != nil {
backendServerUpGauge = append(backendServerUpGauge, r.BackendServerUpGauge())
}
} }
return &standardRegistry{ return &standardRegistry{
enabled: true, enabled: len(registries) > 0,
reqsCounter: multi.NewCounter(reqsCounters...), configReloadsCounter: multi.NewCounter(configReloadsCounter...),
reqDurationHistogram: multi.NewHistogram(reqDurationHistograms...), configReloadsFailureCounter: multi.NewCounter(configReloadsFailureCounter...),
retriesCounter: multi.NewCounter(retriesCounters...), lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...),
entrypointReqsCounter: multi.NewCounter(entrypointReqsCounter...),
entrypointReqDurationHistogram: multi.NewHistogram(entrypointReqDurationHistogram...),
entrypointOpenConnsGauge: multi.NewGauge(entrypointOpenConnsGauge...),
backendReqsCounter: multi.NewCounter(backendReqsCounter...),
backendReqDurationHistogram: multi.NewHistogram(backendReqDurationHistogram...),
backendOpenConnsGauge: multi.NewGauge(backendOpenConnsGauge...),
backendRetriesCounter: multi.NewCounter(backendRetriesCounter...),
backendServerUpGauge: multi.NewGauge(backendServerUpGauge...),
} }
} }
type standardRegistry struct { type standardRegistry struct {
enabled bool enabled bool
reqsCounter metrics.Counter configReloadsCounter metrics.Counter
reqDurationHistogram metrics.Histogram configReloadsFailureCounter metrics.Counter
retriesCounter metrics.Counter lastConfigReloadSuccessGauge metrics.Gauge
lastConfigReloadFailureGauge metrics.Gauge
entrypointReqsCounter metrics.Counter
entrypointReqDurationHistogram metrics.Histogram
entrypointOpenConnsGauge metrics.Gauge
backendReqsCounter metrics.Counter
backendReqDurationHistogram metrics.Histogram
backendOpenConnsGauge metrics.Gauge
backendRetriesCounter metrics.Counter
backendServerUpGauge metrics.Gauge
} }
func (r *standardRegistry) IsEnabled() bool { func (r *standardRegistry) IsEnabled() bool {
return r.enabled return r.enabled
} }
func (r *standardRegistry) ReqsCounter() metrics.Counter { func (r *standardRegistry) ConfigReloadsCounter() metrics.Counter {
return r.reqsCounter return r.configReloadsCounter
} }
func (r *standardRegistry) ReqDurationHistogram() metrics.Histogram { func (r *standardRegistry) ConfigReloadsFailureCounter() metrics.Counter {
return r.reqDurationHistogram return r.configReloadsFailureCounter
} }
func (r *standardRegistry) RetriesCounter() metrics.Counter { func (r *standardRegistry) LastConfigReloadSuccessGauge() metrics.Gauge {
return r.retriesCounter return r.lastConfigReloadSuccessGauge
} }
// NewVoidRegistry is a noop implementation of metrics.Registry. func (r *standardRegistry) LastConfigReloadFailureGauge() metrics.Gauge {
// It is used to avoid nil checking in components that do metric collections. return r.lastConfigReloadFailureGauge
func NewVoidRegistry() Registry {
return &standardRegistry{
enabled: false,
reqsCounter: &voidCounter{},
reqDurationHistogram: &voidHistogram{},
retriesCounter: &voidCounter{},
}
} }
type voidCounter struct{} func (r *standardRegistry) EntrypointReqsCounter() metrics.Counter {
return r.entrypointReqsCounter
}
func (v *voidCounter) With(labelValues ...string) metrics.Counter { return v } func (r *standardRegistry) EntrypointReqDurationHistogram() metrics.Histogram {
func (v *voidCounter) Add(delta float64) {} return r.entrypointReqDurationHistogram
}
type voidHistogram struct{} func (r *standardRegistry) EntrypointOpenConnsGauge() metrics.Gauge {
return r.entrypointOpenConnsGauge
}
func (h *voidHistogram) With(labelValues ...string) metrics.Histogram { return h } func (r *standardRegistry) BackendReqsCounter() metrics.Counter {
func (h *voidHistogram) Observe(value float64) {} return r.backendReqsCounter
}
func (r *standardRegistry) BackendReqDurationHistogram() metrics.Histogram {
return r.backendReqDurationHistogram
}
func (r *standardRegistry) BackendOpenConnsGauge() metrics.Gauge {
return r.backendOpenConnsGauge
}
func (r *standardRegistry) BackendRetriesCounter() metrics.Counter {
return r.backendRetriesCounter
}
func (r *standardRegistry) BackendServerUpGauge() metrics.Gauge {
return r.backendServerUpGauge
}

View file

@ -7,29 +7,18 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestNewVoidRegistry(t *testing.T) {
registry := NewVoidRegistry()
if registry.IsEnabled() {
t.Errorf("VoidRegistry should not return true for IsEnabled()")
}
registry.ReqsCounter().With("some", "value").Add(1)
registry.ReqDurationHistogram().With("some", "value").Observe(1)
registry.RetriesCounter().With("some", "value").Add(1)
}
func TestNewMultiRegistry(t *testing.T) { func TestNewMultiRegistry(t *testing.T) {
registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()} registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()}
registry := NewMultiRegistry(registries) registry := NewMultiRegistry(registries)
registry.ReqsCounter().With("key", "requests").Add(1) registry.BackendReqsCounter().With("key", "requests").Add(1)
registry.ReqDurationHistogram().With("key", "durations").Observe(2) registry.BackendReqDurationHistogram().With("key", "durations").Observe(2)
registry.RetriesCounter().With("key", "retries").Add(3) registry.BackendRetriesCounter().With("key", "retries").Add(3)
for _, collectingRegistry := range registries { for _, collectingRegistry := range registries {
cReqsCounter := collectingRegistry.ReqsCounter().(*counterMock) cReqsCounter := collectingRegistry.BackendReqsCounter().(*counterMock)
cReqDurationHistogram := collectingRegistry.ReqDurationHistogram().(*histogramMock) cReqDurationHistogram := collectingRegistry.BackendReqDurationHistogram().(*histogramMock)
cRetriesCounter := collectingRegistry.RetriesCounter().(*counterMock) cRetriesCounter := collectingRegistry.BackendRetriesCounter().(*counterMock)
wantCounterValue := float64(1) wantCounterValue := float64(1)
if cReqsCounter.counterValue != wantCounterValue { if cReqsCounter.counterValue != wantCounterValue {
@ -52,9 +41,9 @@ func TestNewMultiRegistry(t *testing.T) {
func newCollectingRetryMetrics() Registry { func newCollectingRetryMetrics() Registry {
return &standardRegistry{ return &standardRegistry{
reqsCounter: &counterMock{}, backendReqsCounter: &counterMock{},
reqDurationHistogram: &histogramMock{}, backendReqDurationHistogram: &histogramMock{},
retriesCounter: &counterMock{}, backendRetriesCounter: &counterMock{},
} }
} }

View file

@ -2,10 +2,14 @@ package metrics
import ( import (
"net/http" "net/http"
"sort"
"strings"
"sync"
"github.com/containous/mux" "github.com/containous/mux"
"github.com/containous/traefik/safe"
"github.com/containous/traefik/types" "github.com/containous/traefik/types"
"github.com/go-kit/kit/metrics/prometheus" "github.com/go-kit/kit/metrics"
stdprometheus "github.com/prometheus/client_golang/prometheus" stdprometheus "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/client_golang/prometheus/promhttp"
) )
@ -13,15 +17,50 @@ import (
const ( const (
metricNamePrefix = "traefik_" metricNamePrefix = "traefik_"
reqsTotalName = metricNamePrefix + "requests_total" // server meta information
reqDurationName = metricNamePrefix + "request_duration_seconds" configReloadsTotalName = metricNamePrefix + "config_reloads_total"
retriesTotalName = metricNamePrefix + "backend_retries_total" configReloadsFailuresTotalName = metricNamePrefix + "config_reloads_failure_total"
configLastReloadSuccessName = metricNamePrefix + "config_last_reload_success"
configLastReloadFailureName = metricNamePrefix + "config_last_reload_failure"
// entrypoint
entrypointReqsTotalName = metricNamePrefix + "entrypoint_requests_total"
entrypointReqDurationName = metricNamePrefix + "entrypoint_request_duration_seconds"
entrypointOpenConnsName = metricNamePrefix + "entrypoint_open_connections"
// backend level
backendReqsTotalName = metricNamePrefix + "backend_requests_total"
backendReqDurationName = metricNamePrefix + "backend_request_duration_seconds"
backendOpenConnsName = metricNamePrefix + "backend_open_connections"
backendRetriesTotalName = metricNamePrefix + "backend_retries_total"
backendServerUpName = metricNamePrefix + "backend_server_up"
) )
// PrometheusHandler expose Prometheus routes const (
// generationAgeForever indicates that a metric never gets outdated.
generationAgeForever = 0
// generationAgeDefault is the default age of three generations.
generationAgeDefault = 3
)
// promState holds all metric state internally and acts as the only Collector we register for Prometheus.
//
// This enables control to remove metrics that belong to outdated configuration.
// As an example why this is required, consider Traefik learns about a new service.
// It populates the 'traefik_server_backend_up' metric for it with a value of 1 (alive).
// When the backend is undeployed now the metric is still there in the client library
// and will be until Traefik would be restarted.
//
// To solve this problem promState keeps track of configuration generations.
// Every time a new configuration is loaded, the generation is increased by one.
// Metrics that "belong" to a dynamic configuration part of Traefik (e.g. backend, entrypoint)
// are removed, given they were tracked more than 3 generations ago.
var promState = newPrometheusState()
// PrometheusHandler exposes Prometheus routes.
type PrometheusHandler struct{} type PrometheusHandler struct{}
// AddRoutes add Prometheus routes on a router // AddRoutes adds Prometheus routes on a router.
func (h PrometheusHandler) AddRoutes(router *mux.Router) { func (h PrometheusHandler) AddRoutes(router *mux.Router) {
router.Methods(http.MethodGet).Path("/metrics").Handler(promhttp.Handler()) router.Methods(http.MethodGet).Path("/metrics").Handler(promhttp.Handler())
} }
@ -34,24 +73,332 @@ func RegisterPrometheus(config *types.Prometheus) Registry {
buckets = config.Buckets buckets = config.Buckets
} }
reqCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{ safe.Go(func() {
Name: reqsTotalName, promState.ListenValueUpdates()
Help: "How many HTTP requests processed, partitioned by status code and method.", })
}, []string{"service", "code", "method"})
reqDurationHistogram := prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ configReloads := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
Name: reqDurationName, Name: configReloadsTotalName,
Help: "How long it took to process the request.", Help: "Config reloads",
}, []string{})
configReloadsFailures := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
Name: configReloadsFailuresTotalName,
Help: "Config failure reloads",
}, []string{})
lastConfigReloadSuccess := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
Name: configLastReloadSuccessName,
Help: "Last config reload success",
}, []string{})
lastConfigReloadFailure := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
Name: configLastReloadFailureName,
Help: "Last config reload failure",
}, []string{})
entrypointReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
Name: entrypointReqsTotalName,
Help: "How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.",
}, []string{"code", "method", "protocol", "entrypoint"})
entrypointReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
Name: entrypointReqDurationName,
Help: "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.",
Buckets: buckets, Buckets: buckets,
}, []string{"service", "code"}) }, []string{"code", "method", "protocol", "entrypoint"})
retryCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{ entrypointOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
Name: retriesTotalName, Name: entrypointOpenConnsName,
Help: "How many request retries happened in total.", Help: "How many open connections exist on an entrypoint, partitioned by method and protocol.",
}, []string{"service"}) }, []string{"method", "protocol", "entrypoint"})
backendReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
Name: backendReqsTotalName,
Help: "How many HTTP requests processed on a backend, partitioned by status code, protocol, and method.",
}, []string{"code", "method", "protocol", "backend"})
backendReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
Name: backendReqDurationName,
Help: "How long it took to process the request on a backend, partitioned by status code, protocol, and method.",
Buckets: buckets,
}, []string{"code", "method", "protocol", "backend"})
backendOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
Name: backendOpenConnsName,
Help: "How many open connections exist on a backend, partitioned by method and protocol.",
}, []string{"method", "protocol", "backend"})
backendRetries := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
Name: backendRetriesTotalName,
Help: "How many request retries happened on a backend.",
}, []string{"backend"})
backendServerUp := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
Name: backendServerUpName,
Help: "Backend server is up, described by gauge value of 0 or 1.",
}, []string{"backend", "url"})
promState.describers = []func(chan<- *stdprometheus.Desc){
configReloads.cv.Describe,
configReloadsFailures.cv.Describe,
lastConfigReloadSuccess.gv.Describe,
lastConfigReloadFailure.gv.Describe,
entrypointReqs.cv.Describe,
entrypointReqDurations.hv.Describe,
entrypointOpenConns.gv.Describe,
backendReqs.cv.Describe,
backendReqDurations.hv.Describe,
backendOpenConns.gv.Describe,
backendRetries.cv.Describe,
backendServerUp.gv.Describe,
}
stdprometheus.MustRegister(promState)
return &standardRegistry{ return &standardRegistry{
enabled: true, enabled: true,
reqsCounter: reqCounter, configReloadsCounter: configReloads,
reqDurationHistogram: reqDurationHistogram, configReloadsFailureCounter: configReloadsFailures,
retriesCounter: retryCounter, lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
lastConfigReloadFailureGauge: lastConfigReloadFailure,
entrypointReqsCounter: entrypointReqs,
entrypointReqDurationHistogram: entrypointReqDurations,
entrypointOpenConnsGauge: entrypointOpenConns,
backendReqsCounter: backendReqs,
backendReqDurationHistogram: backendReqDurations,
backendOpenConnsGauge: backendOpenConns,
backendRetriesCounter: backendRetries,
backendServerUpGauge: backendServerUp,
} }
} }
// OnConfigurationUpdate increases the current generation of the prometheus state.
func OnConfigurationUpdate() {
promState.IncGeneration()
}
func newPrometheusState() *prometheusState {
collectors := make(chan *collector)
state := make(map[string]*collector)
return &prometheusState{
collectors: collectors,
state: state,
}
}
type prometheusState struct {
currentGeneration int
collectors chan *collector
describers []func(ch chan<- *stdprometheus.Desc)
mtx sync.Mutex
state map[string]*collector
}
func (ps *prometheusState) IncGeneration() {
ps.mtx.Lock()
defer ps.mtx.Unlock()
ps.currentGeneration++
}
func (ps *prometheusState) ListenValueUpdates() {
for collector := range ps.collectors {
ps.mtx.Lock()
collector.lastTrackedGeneration = ps.currentGeneration
ps.state[collector.id] = collector
ps.mtx.Unlock()
}
}
// Describe implements prometheus.Collector and simply calls
// the registered describer functions.
func (ps *prometheusState) Describe(ch chan<- *stdprometheus.Desc) {
for _, desc := range ps.describers {
desc(ch)
}
}
// Collect implements prometheus.Collector. It calls the Collect
// method of all metrics it received on the collectors channel.
// It's also responsible to remove metrics that were tracked
// at least three generations ago. Those metrics are cleaned up
// after the Collect of them were called.
func (ps *prometheusState) Collect(ch chan<- stdprometheus.Metric) {
ps.mtx.Lock()
defer ps.mtx.Unlock()
outdatedKeys := []string{}
for key, cs := range ps.state {
cs.collector.Collect(ch)
if cs.maxAge == generationAgeForever {
continue
}
if ps.currentGeneration-cs.lastTrackedGeneration >= cs.maxAge {
outdatedKeys = append(outdatedKeys, key)
}
}
for _, key := range outdatedKeys {
delete(ps.state, key)
}
}
func newCollector(metricName string, lnvs labelNamesValues, c stdprometheus.Collector) *collector {
maxAge := generationAgeDefault
// metrics without labels should never become outdated
if len(lnvs) == 0 {
maxAge = generationAgeForever
}
return &collector{
id: buildMetricID(metricName, lnvs),
maxAge: maxAge,
collector: c,
}
}
// collector wraps a Collector object from the Prometheus client library.
// It adds information on how many generations this metric should be present
// in the /metrics output, relatived to the time it was last tracked.
type collector struct {
id string
collector stdprometheus.Collector
lastTrackedGeneration int
maxAge int
}
func buildMetricID(metricName string, lnvs labelNamesValues) string {
newLnvs := append([]string{}, lnvs...)
sort.Strings(newLnvs)
return metricName + ":" + strings.Join(newLnvs, "|")
}
func newCounterFrom(collectors chan<- *collector, opts stdprometheus.CounterOpts, labelNames []string) *counter {
cv := stdprometheus.NewCounterVec(opts, labelNames)
c := &counter{
name: opts.Name,
cv: cv,
collectors: collectors,
}
if len(labelNames) == 0 {
c.Add(0)
}
return c
}
type counter struct {
name string
cv *stdprometheus.CounterVec
labelNamesValues labelNamesValues
collectors chan<- *collector
}
func (c *counter) With(labelValues ...string) metrics.Counter {
return &counter{
name: c.name,
cv: c.cv,
labelNamesValues: c.labelNamesValues.With(labelValues...),
collectors: c.collectors,
}
}
func (c *counter) Add(delta float64) {
collector := c.cv.With(c.labelNamesValues.ToLabels())
collector.Add(delta)
c.collectors <- newCollector(c.name, c.labelNamesValues, collector)
}
func (c *counter) Describe(ch chan<- *stdprometheus.Desc) {
c.cv.Describe(ch)
}
func newGaugeFrom(collectors chan<- *collector, opts stdprometheus.GaugeOpts, labelNames []string) *gauge {
gv := stdprometheus.NewGaugeVec(opts, labelNames)
g := &gauge{
name: opts.Name,
gv: gv,
collectors: collectors,
}
if len(labelNames) == 0 {
g.Set(0)
}
return g
}
type gauge struct {
name string
gv *stdprometheus.GaugeVec
labelNamesValues labelNamesValues
collectors chan<- *collector
}
func (g *gauge) With(labelValues ...string) metrics.Gauge {
return &gauge{
name: g.name,
gv: g.gv,
labelNamesValues: g.labelNamesValues.With(labelValues...),
collectors: g.collectors,
}
}
func (g *gauge) Set(value float64) {
collector := g.gv.With(g.labelNamesValues.ToLabels())
collector.Set(value)
g.collectors <- newCollector(g.name, g.labelNamesValues, collector)
}
func (g *gauge) Describe(ch chan<- *stdprometheus.Desc) {
g.gv.Describe(ch)
}
func newHistogramFrom(collectors chan<- *collector, opts stdprometheus.HistogramOpts, labelNames []string) *histogram {
hv := stdprometheus.NewHistogramVec(opts, labelNames)
return &histogram{
name: opts.Name,
hv: hv,
collectors: collectors,
}
}
type histogram struct {
name string
hv *stdprometheus.HistogramVec
labelNamesValues labelNamesValues
collectors chan<- *collector
}
func (h *histogram) With(labelValues ...string) metrics.Histogram {
return &histogram{
name: h.name,
hv: h.hv,
labelNamesValues: h.labelNamesValues.With(labelValues...),
collectors: h.collectors,
}
}
func (h *histogram) Observe(value float64) {
collector := h.hv.With(h.labelNamesValues.ToLabels())
collector.Observe(value)
h.collectors <- newCollector(h.name, h.labelNamesValues, collector)
}
func (h *histogram) Describe(ch chan<- *stdprometheus.Desc) {
h.hv.Describe(ch)
}
// labelNamesValues is a type alias that provides validation on its With method.
// Metrics may include it as a member to help them satisfy With semantics and
// save some code duplication.
type labelNamesValues []string
// With validates the input, and returns a new aggregate labelNamesValues.
func (lvs labelNamesValues) With(labelValues ...string) labelNamesValues {
if len(labelValues)%2 != 0 {
labelValues = append(labelValues, "unknown")
}
return append(lvs, labelValues...)
}
// ToLabels is a convenience method to convert a labelNamesValues
// to the native prometheus.Labels.
func (lvs labelNamesValues) ToLabels() stdprometheus.Labels {
labels := stdprometheus.Labels{}
for i := 0; i < len(lvs); i += 2 {
labels[lvs[i]] = lvs[i+1]
}
return labels
}

View file

@ -1,9 +1,11 @@
package metrics package metrics
import ( import (
"fmt"
"net/http" "net/http"
"strconv" "strconv"
"testing" "testing"
"time"
"github.com/containous/traefik/types" "github.com/containous/traefik/types"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
@ -12,20 +14,54 @@ import (
func TestPrometheus(t *testing.T) { func TestPrometheus(t *testing.T) {
prometheusRegistry := RegisterPrometheus(&types.Prometheus{}) prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
defer prometheus.Unregister(promState)
if !prometheusRegistry.IsEnabled() { if !prometheusRegistry.IsEnabled() {
t.Errorf("PrometheusRegistry should return true for IsEnabled()") t.Errorf("PrometheusRegistry should return true for IsEnabled()")
} }
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
prometheusRegistry.RetriesCounter().With("service", "test").Add(1)
metricsFamilies, err := prometheus.DefaultGatherer.Gather() prometheusRegistry.ConfigReloadsCounter().Add(1)
if err != nil { prometheusRegistry.ConfigReloadsFailureCounter().Add(1)
t.Fatalf("could not gather metrics families: %s", err) prometheusRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
} prometheusRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
prometheusRegistry.
EntrypointReqsCounter().
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
Add(1)
prometheusRegistry.
EntrypointReqDurationHistogram().
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
Observe(1)
prometheusRegistry.
EntrypointOpenConnsGauge().
With("method", http.MethodGet, "protocol", "http", "entrypoint", "http").
Set(1)
prometheusRegistry.
BackendReqsCounter().
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
Add(1)
prometheusRegistry.
BackendReqDurationHistogram().
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
Observe(10000)
prometheusRegistry.
BackendOpenConnsGauge().
With("backend", "backend1", "method", http.MethodGet, "protocol", "http").
Set(1)
prometheusRegistry.
BackendRetriesCounter().
With("backend", "backend1").
Add(1)
prometheusRegistry.
BackendServerUpGauge().
With("backend", "backend1", "url", "http://127.0.0.10:80").
Set(1)
delayForTrackingCompletion()
metricsFamilies := mustScrape()
tests := []struct { tests := []struct {
name string name string
@ -33,46 +69,93 @@ func TestPrometheus(t *testing.T) {
assert func(*dto.MetricFamily) assert func(*dto.MetricFamily)
}{ }{
{ {
name: reqsTotalName, name: configReloadsTotalName,
labels: map[string]string{ assert: buildCounterAssert(t, configReloadsTotalName, 1),
"code": "200",
"method": http.MethodGet,
"service": "test",
},
assert: func(family *dto.MetricFamily) {
cv := family.Metric[0].Counter.GetValue()
expectedCv := float64(2)
if cv != expectedCv {
t.Errorf("gathered metrics do not contain correct value for total requests, got %f expected %f", cv, expectedCv)
}
},
}, },
{ {
name: reqDurationName, name: configReloadsFailuresTotalName,
labels: map[string]string{ assert: buildCounterAssert(t, configReloadsFailuresTotalName, 1),
"service": "test",
"code": "200",
},
assert: func(family *dto.MetricFamily) {
sc := family.Metric[0].Histogram.GetSampleCount()
expectedSc := uint64(2)
if sc != expectedSc {
t.Errorf("gathered metrics do not contain correct sample count for request duration, got %d expected %d", sc, expectedSc)
}
},
}, },
{ {
name: retriesTotalName, name: configLastReloadSuccessName,
assert: buildTimestampAssert(t, configLastReloadSuccessName),
},
{
name: configLastReloadFailureName,
assert: buildTimestampAssert(t, configLastReloadFailureName),
},
{
name: entrypointReqsTotalName,
labels: map[string]string{ labels: map[string]string{
"service": "test", "code": "200",
"method": http.MethodGet,
"protocol": "http",
"entrypoint": "http",
}, },
assert: func(family *dto.MetricFamily) { assert: buildCounterAssert(t, entrypointReqsTotalName, 1),
cv := family.Metric[0].Counter.GetValue() },
expectedCv := float64(1) {
if cv != expectedCv { name: entrypointReqDurationName,
t.Errorf("gathered metrics do not contain correct value for total retries, got %f expected %f", cv, expectedCv) labels: map[string]string{
} "code": "200",
"method": http.MethodGet,
"protocol": "http",
"entrypoint": "http",
}, },
assert: buildHistogramAssert(t, entrypointReqDurationName, 1),
},
{
name: entrypointOpenConnsName,
labels: map[string]string{
"method": http.MethodGet,
"protocol": "http",
"entrypoint": "http",
},
assert: buildGaugeAssert(t, entrypointOpenConnsName, 1),
},
{
name: backendReqsTotalName,
labels: map[string]string{
"code": "200",
"method": http.MethodGet,
"protocol": "http",
"backend": "backend1",
},
assert: buildCounterAssert(t, backendReqsTotalName, 1),
},
{
name: backendReqDurationName,
labels: map[string]string{
"code": "200",
"method": http.MethodGet,
"protocol": "http",
"backend": "backend1",
},
assert: buildHistogramAssert(t, backendReqDurationName, 1),
},
{
name: backendOpenConnsName,
labels: map[string]string{
"method": http.MethodGet,
"protocol": "http",
"backend": "backend1",
},
assert: buildGaugeAssert(t, backendOpenConnsName, 1),
},
{
name: backendRetriesTotalName,
labels: map[string]string{
"backend": "backend1",
},
assert: buildGreaterThanCounterAssert(t, backendRetriesTotalName, 1),
},
{
name: backendServerUpName,
labels: map[string]string{
"backend": "backend1",
"url": "http://127.0.0.10:80",
},
assert: buildGaugeAssert(t, backendServerUpName, 1),
}, },
} }
@ -94,6 +177,90 @@ func TestPrometheus(t *testing.T) {
} }
} }
func TestPrometheusGenerationLogicForMetricWithLabel(t *testing.T) {
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
defer prometheus.Unregister(promState)
// Metrics with labels belonging to a specific configuration in Traefik
// should be removed when the generationMaxAge is exceeded. As example
// we use the traefik_backend_requests_total metric.
prometheusRegistry.
BackendReqsCounter().
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
Add(1)
delayForTrackingCompletion()
assertMetricExists(t, backendReqsTotalName, mustScrape())
// Increase the config generation one more than the max age of a metric.
for i := 0; i < generationAgeDefault+1; i++ {
OnConfigurationUpdate()
}
// On the next scrape the metric still exists and will be removed
// after the scrape completed.
assertMetricExists(t, backendReqsTotalName, mustScrape())
// Now the metric should be absent.
assertMetricAbsent(t, backendReqsTotalName, mustScrape())
}
func TestPrometheusGenerationLogicForMetricWithoutLabel(t *testing.T) {
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
defer prometheus.Unregister(promState)
// Metrics without labels like traefik_config_reloads_total should live forever
// and never get removed.
prometheusRegistry.ConfigReloadsCounter().Add(1)
delayForTrackingCompletion()
assertMetricExists(t, configReloadsTotalName, mustScrape())
// Increase the config generation one more than the max age of a metric.
for i := 0; i < generationAgeDefault+100; i++ {
OnConfigurationUpdate()
}
// Scrape two times in order to verify, that it is not removed after the
// first scrape completed.
assertMetricExists(t, configReloadsTotalName, mustScrape())
assertMetricExists(t, configReloadsTotalName, mustScrape())
}
// Tracking and gathering the metrics happens concurrently.
// In practice this is no problem, because in case a tracked metric would miss
// the current scrape, it would just be there in the next one.
// That we can test reliably the tracking of all metrics here, we sleep
// for a short amount of time, to make sure the metric will be present
// in the next scrape.
func delayForTrackingCompletion() {
time.Sleep(250 * time.Millisecond)
}
func mustScrape() []*dto.MetricFamily {
families, err := prometheus.DefaultGatherer.Gather()
if err != nil {
panic(fmt.Sprintf("could not gather metrics families: %s", err))
}
return families
}
func assertMetricExists(t *testing.T, name string, families []*dto.MetricFamily) {
t.Helper()
if findMetricFamily(name, families) == nil {
t.Errorf("gathered metrics do not contain %q", name)
}
}
func assertMetricAbsent(t *testing.T, name string, families []*dto.MetricFamily) {
t.Helper()
if findMetricFamily(name, families) != nil {
t.Errorf("gathered metrics contain %q, but should not", name)
}
}
func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFamily { func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFamily {
for _, family := range families { for _, family := range families {
if family.GetName() == name { if family.GetName() == name {
@ -102,3 +269,43 @@ func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFami
} }
return nil return nil
} }
func buildCounterAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
return func(family *dto.MetricFamily) {
if cv := int(family.Metric[0].Counter.GetValue()); cv != expectedValue {
t.Errorf("metric %s has value %d, want %d", metricName, cv, expectedValue)
}
}
}
func buildGreaterThanCounterAssert(t *testing.T, metricName string, expectedMinValue int) func(family *dto.MetricFamily) {
return func(family *dto.MetricFamily) {
if cv := int(family.Metric[0].Counter.GetValue()); cv < expectedMinValue {
t.Errorf("metric %s has value %d, want at least %d", metricName, cv, expectedMinValue)
}
}
}
func buildHistogramAssert(t *testing.T, metricName string, expectedSampleCount int) func(family *dto.MetricFamily) {
return func(family *dto.MetricFamily) {
if sc := int(family.Metric[0].Histogram.GetSampleCount()); sc != expectedSampleCount {
t.Errorf("metric %s has sample count value %d, want %d", metricName, sc, expectedSampleCount)
}
}
}
func buildGaugeAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
return func(family *dto.MetricFamily) {
if gv := int(family.Metric[0].Gauge.GetValue()); gv != expectedValue {
t.Errorf("metric %s has value %d, want %d", metricName, gv, expectedValue)
}
}
}
func buildTimestampAssert(t *testing.T, metricName string) func(family *dto.MetricFamily) {
return func(family *dto.MetricFamily) {
if ts := time.Unix(int64(family.Metric[0].Gauge.GetValue()), 0); time.Since(ts) > time.Minute {
t.Errorf("metric %s has wrong timestamp %v", metricName, ts)
}
}
}

View file

@ -30,10 +30,10 @@ func RegisterStatsd(config *types.Statsd) Registry {
} }
return &standardRegistry{ return &standardRegistry{
enabled: true, enabled: true,
reqsCounter: statsdClient.NewCounter(statsdMetricsReqsName, 1.0), backendReqsCounter: statsdClient.NewCounter(statsdMetricsReqsName, 1.0),
reqDurationHistogram: statsdClient.NewTiming(statsdMetricsLatencyName, 1.0), backendReqDurationHistogram: statsdClient.NewTiming(statsdMetricsLatencyName, 1.0),
retriesCounter: statsdClient.NewCounter(statsdRetriesTotalName, 1.0), backendRetriesCounter: statsdClient.NewCounter(statsdRetriesTotalName, 1.0),
} }
} }

View file

@ -29,10 +29,10 @@ func TestStatsD(t *testing.T) {
} }
udp.ShouldReceiveAll(t, expected, func() { udp.ShouldReceiveAll(t, expected, func() {
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1) statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1) statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
statsdRegistry.RetriesCounter().With("service", "test").Add(1) statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
statsdRegistry.RetriesCounter().With("service", "test").Add(1) statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
statsdRegistry.ReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000) statsdRegistry.BackendReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
}) })
} }

View file

@ -3,51 +3,100 @@ package middlewares
import ( import (
"net/http" "net/http"
"strconv" "strconv"
"strings"
"sync/atomic"
"time" "time"
"unicode/utf8" "unicode/utf8"
"github.com/containous/traefik/log" "github.com/containous/traefik/log"
"github.com/containous/traefik/metrics" "github.com/containous/traefik/metrics"
gokitmetrics "github.com/go-kit/kit/metrics" gokitmetrics "github.com/go-kit/kit/metrics"
"github.com/urfave/negroni"
) )
// MetricsWrapper is a Negroni compatible Handler which relies on a const (
// given Metrics implementation to expose and monitor Traefik Metrics. protoHTTP = "http"
type MetricsWrapper struct { protoSSE = "sse"
registry metrics.Registry protoWebsocket = "websocket"
serviceName string )
}
// NewMetricsWrapper return a MetricsWrapper struct with // NewEntryPointMetricsMiddleware creates a new metrics middleware for an Entrypoint.
// a given Metrics implementation func NewEntryPointMetricsMiddleware(registry metrics.Registry, entryPointName string) negroni.Handler {
func NewMetricsWrapper(registry metrics.Registry, service string) *MetricsWrapper { return &metricsMiddleware{
var metricsWrapper = MetricsWrapper{ reqsCounter: registry.EntrypointReqsCounter(),
registry: registry, reqDurationHistogram: registry.EntrypointReqDurationHistogram(),
serviceName: service, openConnsGauge: registry.EntrypointOpenConnsGauge(),
baseLabels: []string{"entrypoint", entryPointName},
} }
return &metricsWrapper
} }
func (m *MetricsWrapper) ServeHTTP(rw http.ResponseWriter, r *http.Request, next http.HandlerFunc) { // NewBackendMetricsMiddleware creates a new metrics middleware for a Backend.
func NewBackendMetricsMiddleware(registry metrics.Registry, backendName string) negroni.Handler {
return &metricsMiddleware{
reqsCounter: registry.BackendReqsCounter(),
reqDurationHistogram: registry.BackendReqDurationHistogram(),
openConnsGauge: registry.BackendOpenConnsGauge(),
baseLabels: []string{"backend", backendName},
}
}
type metricsMiddleware struct {
reqsCounter gokitmetrics.Counter
reqDurationHistogram gokitmetrics.Histogram
openConnsGauge gokitmetrics.Gauge
baseLabels []string
openConns int64
}
func (m *metricsMiddleware) ServeHTTP(rw http.ResponseWriter, r *http.Request, next http.HandlerFunc) {
labels := []string{"method", getMethod(r), "protocol", getRequestProtocol(r)}
labels = append(labels, m.baseLabels...)
openConns := atomic.AddInt64(&m.openConns, 1)
m.openConnsGauge.With(labels...).Set(float64(openConns))
defer func(labelValues []string) {
openConns := atomic.AddInt64(&m.openConns, -1)
m.openConnsGauge.With(labelValues...).Set(float64(openConns))
}(labels)
start := time.Now() start := time.Now()
prw := &responseRecorder{rw, http.StatusOK} recorder := &responseRecorder{rw, http.StatusOK}
next(prw, r) next(recorder, r)
reqLabels := []string{"service", m.serviceName, "code", strconv.Itoa(prw.statusCode), "method", getMethod(r)} labels = append(labels, "code", strconv.Itoa(recorder.statusCode))
m.registry.ReqsCounter().With(reqLabels...).Add(1) m.reqsCounter.With(labels...).Add(1)
m.reqDurationHistogram.With(labels...).Observe(float64(time.Since(start).Seconds()))
reqDurationLabels := []string{"service", m.serviceName, "code", strconv.Itoa(prw.statusCode)}
m.registry.ReqDurationHistogram().With(reqDurationLabels...).Observe(time.Since(start).Seconds())
} }
type retryMetrics interface { func getRequestProtocol(req *http.Request) string {
RetriesCounter() gokitmetrics.Counter switch {
case isWebsocketRequest(req):
return protoWebsocket
case isSSERequest(req):
return protoSSE
default:
return protoHTTP
}
} }
// NewMetricsRetryListener instantiates a MetricsRetryListener with the given retryMetrics. // isWebsocketRequest determines if the specified HTTP request is a websocket handshake request.
func NewMetricsRetryListener(retryMetrics retryMetrics, backendName string) RetryListener { func isWebsocketRequest(req *http.Request) bool {
return &MetricsRetryListener{retryMetrics: retryMetrics, backendName: backendName} return containsHeader(req, "Connection", "upgrade") && containsHeader(req, "Upgrade", "websocket")
}
// isSSERequest determines if the specified HTTP request is a request for an event subscription.
func isSSERequest(req *http.Request) bool {
return containsHeader(req, "Accept", "text/event-stream")
}
func containsHeader(req *http.Request, name, value string) bool {
items := strings.Split(req.Header.Get(name), ",")
for _, item := range items {
if value == strings.ToLower(strings.TrimSpace(item)) {
return true
}
}
return false
} }
func getMethod(r *http.Request) string { func getMethod(r *http.Request) string {
@ -58,6 +107,15 @@ func getMethod(r *http.Request) string {
return r.Method return r.Method
} }
type retryMetrics interface {
BackendRetriesCounter() gokitmetrics.Counter
}
// NewMetricsRetryListener instantiates a MetricsRetryListener with the given retryMetrics.
func NewMetricsRetryListener(retryMetrics retryMetrics, backendName string) RetryListener {
return &MetricsRetryListener{retryMetrics: retryMetrics, backendName: backendName}
}
// MetricsRetryListener is an implementation of the RetryListener interface to // MetricsRetryListener is an implementation of the RetryListener interface to
// record RequestMetrics about retry attempts. // record RequestMetrics about retry attempts.
type MetricsRetryListener struct { type MetricsRetryListener struct {
@ -67,5 +125,5 @@ type MetricsRetryListener struct {
// Retried tracks the retry in the RequestMetrics implementation. // Retried tracks the retry in the RequestMetrics implementation.
func (m *MetricsRetryListener) Retried(req *http.Request, attempt int) { func (m *MetricsRetryListener) Retried(req *http.Request, attempt int) {
m.retryMetrics.RetriesCounter().With("backend", m.backendName).Add(1) m.retryMetrics.BackendRetriesCounter().With("backend", m.backendName).Add(1)
} }

View file

@ -6,6 +6,7 @@ import (
"reflect" "reflect"
"testing" "testing"
"github.com/containous/traefik/testhelpers"
"github.com/go-kit/kit/metrics" "github.com/go-kit/kit/metrics"
) )
@ -17,39 +18,25 @@ func TestMetricsRetryListener(t *testing.T) {
retryListener.Retried(req, 2) retryListener.Retried(req, 2)
wantCounterValue := float64(2) wantCounterValue := float64(2)
if retryMetrics.retryCounter.counterValue != wantCounterValue { if retryMetrics.retriesCounter.CounterValue != wantCounterValue {
t.Errorf("got counter value of %d, want %d", retryMetrics.retryCounter.counterValue, wantCounterValue) t.Errorf("got counter value of %d, want %d", retryMetrics.retriesCounter.CounterValue, wantCounterValue)
} }
wantLabelValues := []string{"backend", "backendName"} wantLabelValues := []string{"backend", "backendName"}
if !reflect.DeepEqual(retryMetrics.retryCounter.lastLabelValues, wantLabelValues) { if !reflect.DeepEqual(retryMetrics.retriesCounter.LastLabelValues, wantLabelValues) {
t.Errorf("wrong label values %v used, want %v", retryMetrics.retryCounter.lastLabelValues, wantLabelValues) t.Errorf("wrong label values %v used, want %v", retryMetrics.retriesCounter.LastLabelValues, wantLabelValues)
} }
} }
// collectingRetryMetrics is an implementation of the retryMetrics interface that can be used inside tests to collect the times Add() was called. // collectingRetryMetrics is an implementation of the retryMetrics interface that can be used inside tests to collect the times Add() was called.
type collectingRetryMetrics struct { type collectingRetryMetrics struct {
retryCounter *collectingCounter retriesCounter *testhelpers.CollectingCounter
} }
func newCollectingRetryMetrics() collectingRetryMetrics { func newCollectingRetryMetrics() *collectingRetryMetrics {
return collectingRetryMetrics{retryCounter: &collectingCounter{}} return &collectingRetryMetrics{retriesCounter: &testhelpers.CollectingCounter{}}
} }
func (metrics collectingRetryMetrics) RetriesCounter() metrics.Counter { func (metrics *collectingRetryMetrics) BackendRetriesCounter() metrics.Counter {
return metrics.retryCounter return metrics.retriesCounter
}
type collectingCounter struct {
counterValue float64
lastLabelValues []string
}
func (c *collectingCounter) With(labelValues ...string) metrics.Counter {
c.lastLabelValues = labelValues
return c
}
func (c *collectingCounter) Add(delta float64) {
c.counterValue += delta
} }

View file

@ -122,10 +122,7 @@ func NewServer(globalConfiguration configuration.GlobalConfiguration) *Server {
server.tracingMiddleware.Setup() server.tracingMiddleware.Setup()
} }
server.metricsRegistry = metrics.NewVoidRegistry() server.metricsRegistry = registerMetricClients(globalConfiguration.Metrics)
if globalConfiguration.Metrics != nil {
server.registerMetricClients(globalConfiguration.Metrics)
}
if globalConfiguration.Cluster != nil { if globalConfiguration.Cluster != nil {
// leadership creation if cluster mode // leadership creation if cluster mode
@ -304,7 +301,7 @@ func (s *Server) setupServerEntryPoint(newServerEntryPointName string, newServer
serverMiddlewares = append(serverMiddlewares, s.accessLoggerMiddleware) serverMiddlewares = append(serverMiddlewares, s.accessLoggerMiddleware)
} }
if s.metricsRegistry.IsEnabled() { if s.metricsRegistry.IsEnabled() {
serverMiddlewares = append(serverMiddlewares, middlewares.NewMetricsWrapper(s.metricsRegistry, newServerEntryPointName)) serverMiddlewares = append(serverMiddlewares, middlewares.NewEntryPointMetricsMiddleware(s.metricsRegistry, newServerEntryPointName))
} }
if s.globalConfiguration.API != nil { if s.globalConfiguration.API != nil {
if s.globalConfiguration.API.Stats == nil { if s.globalConfiguration.API.Stats == nil {
@ -449,8 +446,10 @@ func (s *Server) loadConfiguration(configMsg types.ConfigMessage) {
} }
newConfigurations[configMsg.ProviderName] = configMsg.Configuration newConfigurations[configMsg.ProviderName] = configMsg.Configuration
s.metricsRegistry.ConfigReloadsCounter().Add(1)
newServerEntryPoints, err := s.loadConfig(newConfigurations, s.globalConfiguration) newServerEntryPoints, err := s.loadConfig(newConfigurations, s.globalConfiguration)
if err == nil { if err == nil {
s.metricsRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
for newServerEntryPointName, newServerEntryPoint := range newServerEntryPoints { for newServerEntryPointName, newServerEntryPoint := range newServerEntryPoints {
s.serverEntryPoints[newServerEntryPointName].httpRouter.UpdateHandler(newServerEntryPoint.httpRouter.GetHandler()) s.serverEntryPoints[newServerEntryPointName].httpRouter.UpdateHandler(newServerEntryPoint.httpRouter.GetHandler())
if s.globalConfiguration.EntryPoints[newServerEntryPointName].TLS == nil { if s.globalConfiguration.EntryPoints[newServerEntryPointName].TLS == nil {
@ -465,6 +464,8 @@ func (s *Server) loadConfiguration(configMsg types.ConfigMessage) {
s.currentConfigurations.Set(newConfigurations) s.currentConfigurations.Set(newConfigurations)
s.postLoadConfiguration() s.postLoadConfiguration()
} else { } else {
s.metricsRegistry.ConfigReloadsFailureCounter().Add(1)
s.metricsRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
log.Error("Error loading new configuration, aborted ", err) log.Error("Error loading new configuration, aborted ", err)
} }
} }
@ -501,6 +502,8 @@ func (s *serverEntryPoint) getCertificate(clientHello *tls.ClientHelloInfo) (*tl
} }
func (s *Server) postLoadConfiguration() { func (s *Server) postLoadConfiguration() {
metrics.OnConfigurationUpdate()
if s.globalConfiguration.ACME == nil { if s.globalConfiguration.ACME == nil {
return return
} }
@ -1070,7 +1073,7 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
rebalancer, _ = roundrobin.NewRebalancer(rr, roundrobin.RebalancerStickySession(sticky)) rebalancer, _ = roundrobin.NewRebalancer(rr, roundrobin.RebalancerStickySession(sticky))
} }
lb = rebalancer lb = rebalancer
if err := configureLBServers(rebalancer, config, frontend); err != nil { if err := s.configureLBServers(rebalancer, config, frontend); err != nil {
log.Errorf("Skipping frontend %s...", frontendName) log.Errorf("Skipping frontend %s...", frontendName)
continue frontend continue frontend
} }
@ -1092,7 +1095,7 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
} }
} }
lb = rr lb = rr
if err := configureLBServers(rr, config, frontend); err != nil { if err := s.configureLBServers(rr, config, frontend); err != nil {
log.Errorf("Skipping frontend %s...", frontendName) log.Errorf("Skipping frontend %s...", frontendName)
continue frontend continue frontend
} }
@ -1154,7 +1157,7 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
} }
if s.metricsRegistry.IsEnabled() { if s.metricsRegistry.IsEnabled() {
n.Use(middlewares.NewMetricsWrapper(s.metricsRegistry, frontend.Backend)) n.Use(middlewares.NewBackendMetricsMiddleware(s.metricsRegistry, frontend.Backend))
} }
ipWhitelistMiddleware, err := configureIPWhitelistMiddleware(frontend.WhitelistSourceRange) ipWhitelistMiddleware, err := configureIPWhitelistMiddleware(frontend.WhitelistSourceRange)
@ -1233,7 +1236,7 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
} }
} }
} }
healthcheck.GetHealthCheck().SetBackendsConfiguration(s.routinesPool.Ctx(), backendsHealthCheck) healthcheck.GetHealthCheck(s.metricsRegistry).SetBackendsConfiguration(s.routinesPool.Ctx(), backendsHealthCheck)
// Get new certificates list sorted per entrypoints // Get new certificates list sorted per entrypoints
// Update certificates // Update certificates
entryPointsCertificates, err := s.loadHTTPSConfiguration(configurations, globalConfiguration.DefaultEntryPoints) entryPointsCertificates, err := s.loadHTTPSConfiguration(configurations, globalConfiguration.DefaultEntryPoints)
@ -1249,18 +1252,19 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
return serverEntryPoints, err return serverEntryPoints, err
} }
func configureLBServers(lb healthcheck.LoadBalancer, config *types.Configuration, frontend *types.Frontend) error { func (s *Server) configureLBServers(lb healthcheck.LoadBalancer, config *types.Configuration, frontend *types.Frontend) error {
for serverName, server := range config.Backends[frontend.Backend].Servers { for name, srv := range config.Backends[frontend.Backend].Servers {
u, err := url.Parse(server.URL) u, err := url.Parse(srv.URL)
if err != nil { if err != nil {
log.Errorf("Error parsing server URL %s: %v", server.URL, err) log.Errorf("Error parsing server URL %s: %v", srv.URL, err)
return err return err
} }
log.Debugf("Creating server %s at %s with weight %d", serverName, u, server.Weight) log.Debugf("Creating server %s at %s with weight %d", name, u, srv.Weight)
if err := lb.UpsertServer(u, roundrobin.Weight(server.Weight)); err != nil { if err := lb.UpsertServer(u, roundrobin.Weight(srv.Weight)); err != nil {
log.Errorf("Error adding server %s to load balancer: %v", server.URL, err) log.Errorf("Error adding server %s to load balancer: %v", srv.URL, err)
return err return err
} }
s.metricsRegistry.BackendServerUpGauge().With("backend", frontend.Backend, "url", srv.URL).Set(1)
} }
return nil return nil
} }
@ -1477,9 +1481,12 @@ func configureBackends(backends map[string]*types.Backend) {
} }
} }
func (s *Server) registerMetricClients(metricsConfig *types.Metrics) { func registerMetricClients(metricsConfig *types.Metrics) metrics.Registry {
var registries []metrics.Registry if metricsConfig == nil {
return metrics.NewVoidRegistry()
}
registries := []metrics.Registry{}
if metricsConfig.Prometheus != nil { if metricsConfig.Prometheus != nil {
registries = append(registries, metrics.RegisterPrometheus(metricsConfig.Prometheus)) registries = append(registries, metrics.RegisterPrometheus(metricsConfig.Prometheus))
log.Debug("Configured Prometheus metrics") log.Debug("Configured Prometheus metrics")
@ -1497,9 +1504,7 @@ func (s *Server) registerMetricClients(metricsConfig *types.Metrics) {
log.Debugf("Configured InfluxDB metrics pushing to %s once every %s", metricsConfig.InfluxDB.Address, metricsConfig.InfluxDB.PushInterval) log.Debugf("Configured InfluxDB metrics pushing to %s once every %s", metricsConfig.InfluxDB.Address, metricsConfig.InfluxDB.PushInterval)
} }
if len(registries) > 0 { return metrics.NewMultiRegistry(registries)
s.metricsRegistry = metrics.NewMultiRegistry(registries)
}
} }
func stopMetricsClients() { func stopMetricsClients() {

View file

@ -484,7 +484,7 @@ func TestServerLoadConfigHealthCheckOptions(t *testing.T) {
if healthCheck != nil { if healthCheck != nil {
wantNumHealthCheckBackends = 1 wantNumHealthCheckBackends = 1
} }
gotNumHealthCheckBackends := len(healthcheck.GetHealthCheck().Backends) gotNumHealthCheckBackends := len(healthcheck.GetHealthCheck(testhelpers.NewCollectingHealthCheckMetrics()).Backends)
if gotNumHealthCheckBackends != wantNumHealthCheckBackends { if gotNumHealthCheckBackends != wantNumHealthCheckBackends {
t.Errorf("got %d health check backends, want %d", gotNumHealthCheckBackends, wantNumHealthCheckBackends) t.Errorf("got %d health check backends, want %d", gotNumHealthCheckBackends, wantNumHealthCheckBackends)
} }

52
testhelpers/metrics.go Normal file
View file

@ -0,0 +1,52 @@
package testhelpers
import "github.com/go-kit/kit/metrics"
// CollectingCounter is a metrics.Counter implementation that enables access to the CounterValue and LastLabelValues.
type CollectingCounter struct {
CounterValue float64
LastLabelValues []string
}
// With is there to satisfy the metrics.Counter interface.
func (c *CollectingCounter) With(labelValues ...string) metrics.Counter {
c.LastLabelValues = labelValues
return c
}
// Add is there to satisfy the metrics.Counter interface.
func (c *CollectingCounter) Add(delta float64) {
c.CounterValue += delta
}
// CollectingGauge is a metrics.Gauge implementation that enables access to the GaugeValue and LastLabelValues.
type CollectingGauge struct {
GaugeValue float64
LastLabelValues []string
}
// With is there to satisfy the metrics.Gauge interface.
func (g *CollectingGauge) With(labelValues ...string) metrics.Gauge {
g.LastLabelValues = labelValues
return g
}
// Set is there to satisfy the metrics.Gauge interface.
func (g *CollectingGauge) Set(delta float64) {
g.GaugeValue = delta
}
// CollectingHealthCheckMetrics can be used for testing the Metrics instrumentation of the HealthCheck package.
type CollectingHealthCheckMetrics struct {
Gauge *CollectingGauge
}
// NewCollectingHealthCheckMetrics creates a new CollectingHealthCheckMetrics instance.
func NewCollectingHealthCheckMetrics() *CollectingHealthCheckMetrics {
return &CollectingHealthCheckMetrics{&CollectingGauge{}}
}
// BackendServerUpGauge is there to satisfy the healthcheck.metricsRegistry interface.
func (m *CollectingHealthCheckMetrics) BackendServerUpGauge() metrics.Gauge {
return m.Gauge
}