extend metrics and rebuild prometheus exporting logic
This commit is contained in:
parent
fa1f4f761d
commit
cc5ee00b89
17 changed files with 997 additions and 226 deletions
|
@ -12,6 +12,7 @@ import (
|
|||
|
||||
"github.com/containous/traefik/log"
|
||||
"github.com/containous/traefik/safe"
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/vulcand/oxy/roundrobin"
|
||||
)
|
||||
|
||||
|
@ -19,9 +20,9 @@ var singleton *HealthCheck
|
|||
var once sync.Once
|
||||
|
||||
// GetHealthCheck returns the health check which is guaranteed to be a singleton.
|
||||
func GetHealthCheck() *HealthCheck {
|
||||
func GetHealthCheck(metrics metricsRegistry) *HealthCheck {
|
||||
once.Do(func() {
|
||||
singleton = newHealthCheck()
|
||||
singleton = newHealthCheck(metrics)
|
||||
})
|
||||
return singleton
|
||||
}
|
||||
|
@ -50,6 +51,7 @@ type BackendHealthCheck struct {
|
|||
//HealthCheck struct
|
||||
type HealthCheck struct {
|
||||
Backends map[string]*BackendHealthCheck
|
||||
metrics metricsRegistry
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
|
@ -60,12 +62,19 @@ type LoadBalancer interface {
|
|||
Servers() []*url.URL
|
||||
}
|
||||
|
||||
func newHealthCheck() *HealthCheck {
|
||||
func newHealthCheck(metrics metricsRegistry) *HealthCheck {
|
||||
return &HealthCheck{
|
||||
Backends: make(map[string]*BackendHealthCheck),
|
||||
metrics: metrics,
|
||||
}
|
||||
}
|
||||
|
||||
// metricsRegistry is a local interface in the healthcheck package, exposing only the required metrics
|
||||
// necessary for the healthcheck package. This makes it easier for the tests.
|
||||
type metricsRegistry interface {
|
||||
BackendServerUpGauge() metrics.Gauge
|
||||
}
|
||||
|
||||
// NewBackendHealthCheck Instantiate a new BackendHealthCheck
|
||||
func NewBackendHealthCheck(options Options, backendName string) *BackendHealthCheck {
|
||||
return &BackendHealthCheck{
|
||||
|
@ -113,22 +122,30 @@ func (hc *HealthCheck) checkBackend(backend *BackendHealthCheck) {
|
|||
enabledURLs := backend.LB.Servers()
|
||||
var newDisabledURLs []*url.URL
|
||||
for _, url := range backend.disabledURLs {
|
||||
serverUpMetricValue := float64(0)
|
||||
if err := checkHealth(url, backend); err == nil {
|
||||
log.Warnf("Health check up: Returning to server list. Backend: %q URL: %q", backend.name, url.String())
|
||||
backend.LB.UpsertServer(url, roundrobin.Weight(1))
|
||||
serverUpMetricValue = 1
|
||||
} else {
|
||||
log.Warnf("Health check still failing. Backend: %q URL: %q Reason: %s", backend.name, url.String(), err)
|
||||
newDisabledURLs = append(newDisabledURLs, url)
|
||||
}
|
||||
labelValues := []string{"backend", backend.name, "url", url.String()}
|
||||
hc.metrics.BackendServerUpGauge().With(labelValues...).Set(serverUpMetricValue)
|
||||
}
|
||||
backend.disabledURLs = newDisabledURLs
|
||||
|
||||
for _, url := range enabledURLs {
|
||||
serverUpMetricValue := float64(1)
|
||||
if err := checkHealth(url, backend); err != nil {
|
||||
log.Warnf("Health check failed: Remove from server list. Backend: %q URL: %q Reason: %s", backend.name, url.String(), err)
|
||||
backend.LB.RemoveServer(url)
|
||||
backend.disabledURLs = append(backend.disabledURLs, url)
|
||||
serverUpMetricValue = 0
|
||||
}
|
||||
labelValues := []string{"backend", backend.name, "url", url.String()}
|
||||
hc.metrics.BackendServerUpGauge().With(labelValues...).Set(serverUpMetricValue)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
|
|||
healthSequence []bool
|
||||
wantNumRemovedServers int
|
||||
wantNumUpsertedServers int
|
||||
wantGaugeValue float64
|
||||
}{
|
||||
{
|
||||
desc: "healthy server staying healthy",
|
||||
|
@ -34,6 +35,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
|
|||
healthSequence: []bool{true},
|
||||
wantNumRemovedServers: 0,
|
||||
wantNumUpsertedServers: 0,
|
||||
wantGaugeValue: 1,
|
||||
},
|
||||
{
|
||||
desc: "healthy server becoming sick",
|
||||
|
@ -41,6 +43,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
|
|||
healthSequence: []bool{false},
|
||||
wantNumRemovedServers: 1,
|
||||
wantNumUpsertedServers: 0,
|
||||
wantGaugeValue: 0,
|
||||
},
|
||||
{
|
||||
desc: "sick server becoming healthy",
|
||||
|
@ -48,6 +51,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
|
|||
healthSequence: []bool{true},
|
||||
wantNumRemovedServers: 0,
|
||||
wantNumUpsertedServers: 1,
|
||||
wantGaugeValue: 1,
|
||||
},
|
||||
{
|
||||
desc: "sick server staying sick",
|
||||
|
@ -55,6 +59,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
|
|||
healthSequence: []bool{false},
|
||||
wantNumRemovedServers: 0,
|
||||
wantNumUpsertedServers: 0,
|
||||
wantGaugeValue: 0,
|
||||
},
|
||||
{
|
||||
desc: "healthy server toggling to sick and back to healthy",
|
||||
|
@ -62,6 +67,7 @@ func TestSetBackendsConfiguration(t *testing.T) {
|
|||
healthSequence: []bool{false, true},
|
||||
wantNumRemovedServers: 1,
|
||||
wantNumUpsertedServers: 1,
|
||||
wantGaugeValue: 1,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -89,8 +95,10 @@ func TestSetBackendsConfiguration(t *testing.T) {
|
|||
backend.disabledURLs = append(backend.disabledURLs, serverURL)
|
||||
}
|
||||
|
||||
collectingMetrics := testhelpers.NewCollectingHealthCheckMetrics()
|
||||
check := HealthCheck{
|
||||
Backends: make(map[string]*BackendHealthCheck),
|
||||
metrics: collectingMetrics,
|
||||
}
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
|
@ -118,6 +126,10 @@ func TestSetBackendsConfiguration(t *testing.T) {
|
|||
if lb.numUpsertedServers != test.wantNumUpsertedServers {
|
||||
t.Errorf("got %d upserted servers, wanted %d", lb.numUpsertedServers, test.wantNumUpsertedServers)
|
||||
}
|
||||
|
||||
if collectingMetrics.Gauge.GaugeValue != test.wantGaugeValue {
|
||||
t.Errorf("got %v ServerUp Gauge, want %v", collectingMetrics.Gauge.GaugeValue, test.wantGaugeValue)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,10 +31,10 @@ func RegisterDatadog(config *types.Datadog) Registry {
|
|||
}
|
||||
|
||||
registry := &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: datadogClient.NewCounter(ddMetricsReqsName, 1.0),
|
||||
reqDurationHistogram: datadogClient.NewHistogram(ddMetricsLatencyName, 1.0),
|
||||
retriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0),
|
||||
enabled: true,
|
||||
backendReqsCounter: datadogClient.NewCounter(ddMetricsReqsName, 1.0),
|
||||
backendReqDurationHistogram: datadogClient.NewHistogram(ddMetricsLatencyName, 1.0),
|
||||
backendRetriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0),
|
||||
}
|
||||
|
||||
return registry
|
||||
|
|
|
@ -31,10 +31,10 @@ func TestDatadog(t *testing.T) {
|
|||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.BackendReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -37,10 +37,10 @@ func RegisterInfluxDB(config *types.InfluxDB) Registry {
|
|||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: influxDBClient.NewCounter(influxDBMetricsReqsName),
|
||||
reqDurationHistogram: influxDBClient.NewHistogram(influxDBMetricsLatencyName),
|
||||
retriesCounter: influxDBClient.NewCounter(influxDBRetriesTotalName),
|
||||
enabled: true,
|
||||
backendReqsCounter: influxDBClient.NewCounter(influxDBMetricsReqsName),
|
||||
backendReqDurationHistogram: influxDBClient.NewHistogram(influxDBMetricsLatencyName),
|
||||
backendRetriesCounter: influxDBClient.NewCounter(influxDBRetriesTotalName),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,11 +31,11 @@ func TestInfluxDB(t *testing.T) {
|
|||
}
|
||||
|
||||
msg := udp.ReceiveString(t, func() {
|
||||
influxDBRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
influxDBRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.BackendReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
})
|
||||
|
||||
assertMessage(t, msg, expected)
|
||||
|
|
|
@ -9,71 +9,168 @@ import (
|
|||
type Registry interface {
|
||||
// IsEnabled shows whether metrics instrumentation is enabled.
|
||||
IsEnabled() bool
|
||||
ReqsCounter() metrics.Counter
|
||||
ReqDurationHistogram() metrics.Histogram
|
||||
RetriesCounter() metrics.Counter
|
||||
// server metrics
|
||||
ConfigReloadsCounter() metrics.Counter
|
||||
ConfigReloadsFailureCounter() metrics.Counter
|
||||
LastConfigReloadSuccessGauge() metrics.Gauge
|
||||
LastConfigReloadFailureGauge() metrics.Gauge
|
||||
|
||||
// entry point metrics
|
||||
EntrypointReqsCounter() metrics.Counter
|
||||
EntrypointReqDurationHistogram() metrics.Histogram
|
||||
EntrypointOpenConnsGauge() metrics.Gauge
|
||||
|
||||
// backend metrics
|
||||
BackendReqsCounter() metrics.Counter
|
||||
BackendReqDurationHistogram() metrics.Histogram
|
||||
BackendOpenConnsGauge() metrics.Gauge
|
||||
BackendRetriesCounter() metrics.Counter
|
||||
BackendServerUpGauge() metrics.Gauge
|
||||
}
|
||||
|
||||
// NewMultiRegistry creates a new standardRegistry that wraps multiple Registries.
|
||||
// NewVoidRegistry is a noop implementation of metrics.Registry.
|
||||
// It is used to avoid nil checking in components that do metric collections.
|
||||
func NewVoidRegistry() Registry {
|
||||
return NewMultiRegistry([]Registry{})
|
||||
}
|
||||
|
||||
// NewMultiRegistry is an implementation of metrics.Registry that wraps multiple registries.
|
||||
// It handles the case when a registry hasn't registered some metric and returns nil.
|
||||
// This allows for feature imparity between the different metric implementations.
|
||||
func NewMultiRegistry(registries []Registry) Registry {
|
||||
reqsCounters := []metrics.Counter{}
|
||||
reqDurationHistograms := []metrics.Histogram{}
|
||||
retriesCounters := []metrics.Counter{}
|
||||
configReloadsCounter := []metrics.Counter{}
|
||||
configReloadsFailureCounter := []metrics.Counter{}
|
||||
lastConfigReloadSuccessGauge := []metrics.Gauge{}
|
||||
lastConfigReloadFailureGauge := []metrics.Gauge{}
|
||||
entrypointReqsCounter := []metrics.Counter{}
|
||||
entrypointReqDurationHistogram := []metrics.Histogram{}
|
||||
entrypointOpenConnsGauge := []metrics.Gauge{}
|
||||
backendReqsCounter := []metrics.Counter{}
|
||||
backendReqDurationHistogram := []metrics.Histogram{}
|
||||
backendOpenConnsGauge := []metrics.Gauge{}
|
||||
backendRetriesCounter := []metrics.Counter{}
|
||||
backendServerUpGauge := []metrics.Gauge{}
|
||||
|
||||
for _, r := range registries {
|
||||
reqsCounters = append(reqsCounters, r.ReqsCounter())
|
||||
reqDurationHistograms = append(reqDurationHistograms, r.ReqDurationHistogram())
|
||||
retriesCounters = append(retriesCounters, r.RetriesCounter())
|
||||
if r.ConfigReloadsCounter() != nil {
|
||||
configReloadsCounter = append(configReloadsCounter, r.ConfigReloadsCounter())
|
||||
}
|
||||
if r.ConfigReloadsFailureCounter() != nil {
|
||||
configReloadsFailureCounter = append(configReloadsFailureCounter, r.ConfigReloadsFailureCounter())
|
||||
}
|
||||
if r.LastConfigReloadSuccessGauge() != nil {
|
||||
lastConfigReloadSuccessGauge = append(lastConfigReloadSuccessGauge, r.LastConfigReloadSuccessGauge())
|
||||
}
|
||||
if r.LastConfigReloadFailureGauge() != nil {
|
||||
lastConfigReloadFailureGauge = append(lastConfigReloadFailureGauge, r.LastConfigReloadFailureGauge())
|
||||
}
|
||||
if r.EntrypointReqsCounter() != nil {
|
||||
entrypointReqsCounter = append(entrypointReqsCounter, r.EntrypointReqsCounter())
|
||||
}
|
||||
if r.EntrypointReqDurationHistogram() != nil {
|
||||
entrypointReqDurationHistogram = append(entrypointReqDurationHistogram, r.EntrypointReqDurationHistogram())
|
||||
}
|
||||
if r.EntrypointOpenConnsGauge() != nil {
|
||||
entrypointOpenConnsGauge = append(entrypointOpenConnsGauge, r.EntrypointOpenConnsGauge())
|
||||
}
|
||||
if r.BackendReqsCounter() != nil {
|
||||
backendReqsCounter = append(backendReqsCounter, r.BackendReqsCounter())
|
||||
}
|
||||
if r.BackendReqDurationHistogram() != nil {
|
||||
backendReqDurationHistogram = append(backendReqDurationHistogram, r.BackendReqDurationHistogram())
|
||||
}
|
||||
if r.BackendOpenConnsGauge() != nil {
|
||||
backendOpenConnsGauge = append(backendOpenConnsGauge, r.BackendOpenConnsGauge())
|
||||
}
|
||||
if r.BackendRetriesCounter() != nil {
|
||||
backendRetriesCounter = append(backendRetriesCounter, r.BackendRetriesCounter())
|
||||
}
|
||||
if r.BackendServerUpGauge() != nil {
|
||||
backendServerUpGauge = append(backendServerUpGauge, r.BackendServerUpGauge())
|
||||
}
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: multi.NewCounter(reqsCounters...),
|
||||
reqDurationHistogram: multi.NewHistogram(reqDurationHistograms...),
|
||||
retriesCounter: multi.NewCounter(retriesCounters...),
|
||||
enabled: len(registries) > 0,
|
||||
configReloadsCounter: multi.NewCounter(configReloadsCounter...),
|
||||
configReloadsFailureCounter: multi.NewCounter(configReloadsFailureCounter...),
|
||||
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
|
||||
lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...),
|
||||
entrypointReqsCounter: multi.NewCounter(entrypointReqsCounter...),
|
||||
entrypointReqDurationHistogram: multi.NewHistogram(entrypointReqDurationHistogram...),
|
||||
entrypointOpenConnsGauge: multi.NewGauge(entrypointOpenConnsGauge...),
|
||||
backendReqsCounter: multi.NewCounter(backendReqsCounter...),
|
||||
backendReqDurationHistogram: multi.NewHistogram(backendReqDurationHistogram...),
|
||||
backendOpenConnsGauge: multi.NewGauge(backendOpenConnsGauge...),
|
||||
backendRetriesCounter: multi.NewCounter(backendRetriesCounter...),
|
||||
backendServerUpGauge: multi.NewGauge(backendServerUpGauge...),
|
||||
}
|
||||
}
|
||||
|
||||
type standardRegistry struct {
|
||||
enabled bool
|
||||
reqsCounter metrics.Counter
|
||||
reqDurationHistogram metrics.Histogram
|
||||
retriesCounter metrics.Counter
|
||||
enabled bool
|
||||
configReloadsCounter metrics.Counter
|
||||
configReloadsFailureCounter metrics.Counter
|
||||
lastConfigReloadSuccessGauge metrics.Gauge
|
||||
lastConfigReloadFailureGauge metrics.Gauge
|
||||
entrypointReqsCounter metrics.Counter
|
||||
entrypointReqDurationHistogram metrics.Histogram
|
||||
entrypointOpenConnsGauge metrics.Gauge
|
||||
backendReqsCounter metrics.Counter
|
||||
backendReqDurationHistogram metrics.Histogram
|
||||
backendOpenConnsGauge metrics.Gauge
|
||||
backendRetriesCounter metrics.Counter
|
||||
backendServerUpGauge metrics.Gauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) IsEnabled() bool {
|
||||
return r.enabled
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ReqsCounter() metrics.Counter {
|
||||
return r.reqsCounter
|
||||
func (r *standardRegistry) ConfigReloadsCounter() metrics.Counter {
|
||||
return r.configReloadsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ReqDurationHistogram() metrics.Histogram {
|
||||
return r.reqDurationHistogram
|
||||
func (r *standardRegistry) ConfigReloadsFailureCounter() metrics.Counter {
|
||||
return r.configReloadsFailureCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) RetriesCounter() metrics.Counter {
|
||||
return r.retriesCounter
|
||||
func (r *standardRegistry) LastConfigReloadSuccessGauge() metrics.Gauge {
|
||||
return r.lastConfigReloadSuccessGauge
|
||||
}
|
||||
|
||||
// NewVoidRegistry is a noop implementation of metrics.Registry.
|
||||
// It is used to avoid nil checking in components that do metric collections.
|
||||
func NewVoidRegistry() Registry {
|
||||
return &standardRegistry{
|
||||
enabled: false,
|
||||
reqsCounter: &voidCounter{},
|
||||
reqDurationHistogram: &voidHistogram{},
|
||||
retriesCounter: &voidCounter{},
|
||||
}
|
||||
func (r *standardRegistry) LastConfigReloadFailureGauge() metrics.Gauge {
|
||||
return r.lastConfigReloadFailureGauge
|
||||
}
|
||||
|
||||
type voidCounter struct{}
|
||||
func (r *standardRegistry) EntrypointReqsCounter() metrics.Counter {
|
||||
return r.entrypointReqsCounter
|
||||
}
|
||||
|
||||
func (v *voidCounter) With(labelValues ...string) metrics.Counter { return v }
|
||||
func (v *voidCounter) Add(delta float64) {}
|
||||
func (r *standardRegistry) EntrypointReqDurationHistogram() metrics.Histogram {
|
||||
return r.entrypointReqDurationHistogram
|
||||
}
|
||||
|
||||
type voidHistogram struct{}
|
||||
func (r *standardRegistry) EntrypointOpenConnsGauge() metrics.Gauge {
|
||||
return r.entrypointOpenConnsGauge
|
||||
}
|
||||
|
||||
func (h *voidHistogram) With(labelValues ...string) metrics.Histogram { return h }
|
||||
func (h *voidHistogram) Observe(value float64) {}
|
||||
func (r *standardRegistry) BackendReqsCounter() metrics.Counter {
|
||||
return r.backendReqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendReqDurationHistogram() metrics.Histogram {
|
||||
return r.backendReqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendOpenConnsGauge() metrics.Gauge {
|
||||
return r.backendOpenConnsGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendRetriesCounter() metrics.Counter {
|
||||
return r.backendRetriesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendServerUpGauge() metrics.Gauge {
|
||||
return r.backendServerUpGauge
|
||||
}
|
||||
|
|
|
@ -7,29 +7,18 @@ import (
|
|||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewVoidRegistry(t *testing.T) {
|
||||
registry := NewVoidRegistry()
|
||||
|
||||
if registry.IsEnabled() {
|
||||
t.Errorf("VoidRegistry should not return true for IsEnabled()")
|
||||
}
|
||||
registry.ReqsCounter().With("some", "value").Add(1)
|
||||
registry.ReqDurationHistogram().With("some", "value").Observe(1)
|
||||
registry.RetriesCounter().With("some", "value").Add(1)
|
||||
}
|
||||
|
||||
func TestNewMultiRegistry(t *testing.T) {
|
||||
registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()}
|
||||
registry := NewMultiRegistry(registries)
|
||||
|
||||
registry.ReqsCounter().With("key", "requests").Add(1)
|
||||
registry.ReqDurationHistogram().With("key", "durations").Observe(2)
|
||||
registry.RetriesCounter().With("key", "retries").Add(3)
|
||||
registry.BackendReqsCounter().With("key", "requests").Add(1)
|
||||
registry.BackendReqDurationHistogram().With("key", "durations").Observe(2)
|
||||
registry.BackendRetriesCounter().With("key", "retries").Add(3)
|
||||
|
||||
for _, collectingRegistry := range registries {
|
||||
cReqsCounter := collectingRegistry.ReqsCounter().(*counterMock)
|
||||
cReqDurationHistogram := collectingRegistry.ReqDurationHistogram().(*histogramMock)
|
||||
cRetriesCounter := collectingRegistry.RetriesCounter().(*counterMock)
|
||||
cReqsCounter := collectingRegistry.BackendReqsCounter().(*counterMock)
|
||||
cReqDurationHistogram := collectingRegistry.BackendReqDurationHistogram().(*histogramMock)
|
||||
cRetriesCounter := collectingRegistry.BackendRetriesCounter().(*counterMock)
|
||||
|
||||
wantCounterValue := float64(1)
|
||||
if cReqsCounter.counterValue != wantCounterValue {
|
||||
|
@ -52,9 +41,9 @@ func TestNewMultiRegistry(t *testing.T) {
|
|||
|
||||
func newCollectingRetryMetrics() Registry {
|
||||
return &standardRegistry{
|
||||
reqsCounter: &counterMock{},
|
||||
reqDurationHistogram: &histogramMock{},
|
||||
retriesCounter: &counterMock{},
|
||||
backendReqsCounter: &counterMock{},
|
||||
backendReqDurationHistogram: &histogramMock{},
|
||||
backendRetriesCounter: &counterMock{},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,10 +2,14 @@ package metrics
|
|||
|
||||
import (
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/containous/mux"
|
||||
"github.com/containous/traefik/safe"
|
||||
"github.com/containous/traefik/types"
|
||||
"github.com/go-kit/kit/metrics/prometheus"
|
||||
"github.com/go-kit/kit/metrics"
|
||||
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
@ -13,15 +17,50 @@ import (
|
|||
const (
|
||||
metricNamePrefix = "traefik_"
|
||||
|
||||
reqsTotalName = metricNamePrefix + "requests_total"
|
||||
reqDurationName = metricNamePrefix + "request_duration_seconds"
|
||||
retriesTotalName = metricNamePrefix + "backend_retries_total"
|
||||
// server meta information
|
||||
configReloadsTotalName = metricNamePrefix + "config_reloads_total"
|
||||
configReloadsFailuresTotalName = metricNamePrefix + "config_reloads_failure_total"
|
||||
configLastReloadSuccessName = metricNamePrefix + "config_last_reload_success"
|
||||
configLastReloadFailureName = metricNamePrefix + "config_last_reload_failure"
|
||||
|
||||
// entrypoint
|
||||
entrypointReqsTotalName = metricNamePrefix + "entrypoint_requests_total"
|
||||
entrypointReqDurationName = metricNamePrefix + "entrypoint_request_duration_seconds"
|
||||
entrypointOpenConnsName = metricNamePrefix + "entrypoint_open_connections"
|
||||
|
||||
// backend level
|
||||
backendReqsTotalName = metricNamePrefix + "backend_requests_total"
|
||||
backendReqDurationName = metricNamePrefix + "backend_request_duration_seconds"
|
||||
backendOpenConnsName = metricNamePrefix + "backend_open_connections"
|
||||
backendRetriesTotalName = metricNamePrefix + "backend_retries_total"
|
||||
backendServerUpName = metricNamePrefix + "backend_server_up"
|
||||
)
|
||||
|
||||
// PrometheusHandler expose Prometheus routes
|
||||
const (
|
||||
// generationAgeForever indicates that a metric never gets outdated.
|
||||
generationAgeForever = 0
|
||||
// generationAgeDefault is the default age of three generations.
|
||||
generationAgeDefault = 3
|
||||
)
|
||||
|
||||
// promState holds all metric state internally and acts as the only Collector we register for Prometheus.
|
||||
//
|
||||
// This enables control to remove metrics that belong to outdated configuration.
|
||||
// As an example why this is required, consider Traefik learns about a new service.
|
||||
// It populates the 'traefik_server_backend_up' metric for it with a value of 1 (alive).
|
||||
// When the backend is undeployed now the metric is still there in the client library
|
||||
// and will be until Traefik would be restarted.
|
||||
//
|
||||
// To solve this problem promState keeps track of configuration generations.
|
||||
// Every time a new configuration is loaded, the generation is increased by one.
|
||||
// Metrics that "belong" to a dynamic configuration part of Traefik (e.g. backend, entrypoint)
|
||||
// are removed, given they were tracked more than 3 generations ago.
|
||||
var promState = newPrometheusState()
|
||||
|
||||
// PrometheusHandler exposes Prometheus routes.
|
||||
type PrometheusHandler struct{}
|
||||
|
||||
// AddRoutes add Prometheus routes on a router
|
||||
// AddRoutes adds Prometheus routes on a router.
|
||||
func (h PrometheusHandler) AddRoutes(router *mux.Router) {
|
||||
router.Methods(http.MethodGet).Path("/metrics").Handler(promhttp.Handler())
|
||||
}
|
||||
|
@ -34,24 +73,332 @@ func RegisterPrometheus(config *types.Prometheus) Registry {
|
|||
buckets = config.Buckets
|
||||
}
|
||||
|
||||
reqCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: reqsTotalName,
|
||||
Help: "How many HTTP requests processed, partitioned by status code and method.",
|
||||
}, []string{"service", "code", "method"})
|
||||
reqDurationHistogram := prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
||||
Name: reqDurationName,
|
||||
Help: "How long it took to process the request.",
|
||||
safe.Go(func() {
|
||||
promState.ListenValueUpdates()
|
||||
})
|
||||
|
||||
configReloads := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: configReloadsTotalName,
|
||||
Help: "Config reloads",
|
||||
}, []string{})
|
||||
configReloadsFailures := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: configReloadsFailuresTotalName,
|
||||
Help: "Config failure reloads",
|
||||
}, []string{})
|
||||
lastConfigReloadSuccess := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: configLastReloadSuccessName,
|
||||
Help: "Last config reload success",
|
||||
}, []string{})
|
||||
lastConfigReloadFailure := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: configLastReloadFailureName,
|
||||
Help: "Last config reload failure",
|
||||
}, []string{})
|
||||
|
||||
entrypointReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: entrypointReqsTotalName,
|
||||
Help: "How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entrypointReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: entrypointReqDurationName,
|
||||
Help: "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"service", "code"})
|
||||
retryCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: retriesTotalName,
|
||||
Help: "How many request retries happened in total.",
|
||||
}, []string{"service"})
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entrypointOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: entrypointOpenConnsName,
|
||||
Help: "How many open connections exist on an entrypoint, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "entrypoint"})
|
||||
|
||||
backendReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: backendReqsTotalName,
|
||||
Help: "How many HTTP requests processed on a backend, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "backend"})
|
||||
backendReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: backendReqDurationName,
|
||||
Help: "How long it took to process the request on a backend, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "backend"})
|
||||
backendOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: backendOpenConnsName,
|
||||
Help: "How many open connections exist on a backend, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "backend"})
|
||||
backendRetries := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: backendRetriesTotalName,
|
||||
Help: "How many request retries happened on a backend.",
|
||||
}, []string{"backend"})
|
||||
backendServerUp := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: backendServerUpName,
|
||||
Help: "Backend server is up, described by gauge value of 0 or 1.",
|
||||
}, []string{"backend", "url"})
|
||||
|
||||
promState.describers = []func(chan<- *stdprometheus.Desc){
|
||||
configReloads.cv.Describe,
|
||||
configReloadsFailures.cv.Describe,
|
||||
lastConfigReloadSuccess.gv.Describe,
|
||||
lastConfigReloadFailure.gv.Describe,
|
||||
entrypointReqs.cv.Describe,
|
||||
entrypointReqDurations.hv.Describe,
|
||||
entrypointOpenConns.gv.Describe,
|
||||
backendReqs.cv.Describe,
|
||||
backendReqDurations.hv.Describe,
|
||||
backendOpenConns.gv.Describe,
|
||||
backendRetries.cv.Describe,
|
||||
backendServerUp.gv.Describe,
|
||||
}
|
||||
stdprometheus.MustRegister(promState)
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: reqCounter,
|
||||
reqDurationHistogram: reqDurationHistogram,
|
||||
retriesCounter: retryCounter,
|
||||
enabled: true,
|
||||
configReloadsCounter: configReloads,
|
||||
configReloadsFailureCounter: configReloadsFailures,
|
||||
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
|
||||
lastConfigReloadFailureGauge: lastConfigReloadFailure,
|
||||
entrypointReqsCounter: entrypointReqs,
|
||||
entrypointReqDurationHistogram: entrypointReqDurations,
|
||||
entrypointOpenConnsGauge: entrypointOpenConns,
|
||||
backendReqsCounter: backendReqs,
|
||||
backendReqDurationHistogram: backendReqDurations,
|
||||
backendOpenConnsGauge: backendOpenConns,
|
||||
backendRetriesCounter: backendRetries,
|
||||
backendServerUpGauge: backendServerUp,
|
||||
}
|
||||
}
|
||||
|
||||
// OnConfigurationUpdate increases the current generation of the prometheus state.
|
||||
func OnConfigurationUpdate() {
|
||||
promState.IncGeneration()
|
||||
}
|
||||
|
||||
func newPrometheusState() *prometheusState {
|
||||
collectors := make(chan *collector)
|
||||
state := make(map[string]*collector)
|
||||
|
||||
return &prometheusState{
|
||||
collectors: collectors,
|
||||
state: state,
|
||||
}
|
||||
}
|
||||
|
||||
type prometheusState struct {
|
||||
currentGeneration int
|
||||
collectors chan *collector
|
||||
describers []func(ch chan<- *stdprometheus.Desc)
|
||||
|
||||
mtx sync.Mutex
|
||||
state map[string]*collector
|
||||
}
|
||||
|
||||
func (ps *prometheusState) IncGeneration() {
|
||||
ps.mtx.Lock()
|
||||
defer ps.mtx.Unlock()
|
||||
ps.currentGeneration++
|
||||
}
|
||||
|
||||
func (ps *prometheusState) ListenValueUpdates() {
|
||||
for collector := range ps.collectors {
|
||||
ps.mtx.Lock()
|
||||
collector.lastTrackedGeneration = ps.currentGeneration
|
||||
ps.state[collector.id] = collector
|
||||
ps.mtx.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// Describe implements prometheus.Collector and simply calls
|
||||
// the registered describer functions.
|
||||
func (ps *prometheusState) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
for _, desc := range ps.describers {
|
||||
desc(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// Collect implements prometheus.Collector. It calls the Collect
|
||||
// method of all metrics it received on the collectors channel.
|
||||
// It's also responsible to remove metrics that were tracked
|
||||
// at least three generations ago. Those metrics are cleaned up
|
||||
// after the Collect of them were called.
|
||||
func (ps *prometheusState) Collect(ch chan<- stdprometheus.Metric) {
|
||||
ps.mtx.Lock()
|
||||
defer ps.mtx.Unlock()
|
||||
|
||||
outdatedKeys := []string{}
|
||||
for key, cs := range ps.state {
|
||||
cs.collector.Collect(ch)
|
||||
|
||||
if cs.maxAge == generationAgeForever {
|
||||
continue
|
||||
}
|
||||
if ps.currentGeneration-cs.lastTrackedGeneration >= cs.maxAge {
|
||||
outdatedKeys = append(outdatedKeys, key)
|
||||
}
|
||||
}
|
||||
|
||||
for _, key := range outdatedKeys {
|
||||
delete(ps.state, key)
|
||||
}
|
||||
}
|
||||
|
||||
func newCollector(metricName string, lnvs labelNamesValues, c stdprometheus.Collector) *collector {
|
||||
maxAge := generationAgeDefault
|
||||
|
||||
// metrics without labels should never become outdated
|
||||
if len(lnvs) == 0 {
|
||||
maxAge = generationAgeForever
|
||||
}
|
||||
|
||||
return &collector{
|
||||
id: buildMetricID(metricName, lnvs),
|
||||
maxAge: maxAge,
|
||||
collector: c,
|
||||
}
|
||||
}
|
||||
|
||||
// collector wraps a Collector object from the Prometheus client library.
|
||||
// It adds information on how many generations this metric should be present
|
||||
// in the /metrics output, relatived to the time it was last tracked.
|
||||
type collector struct {
|
||||
id string
|
||||
collector stdprometheus.Collector
|
||||
lastTrackedGeneration int
|
||||
maxAge int
|
||||
}
|
||||
|
||||
func buildMetricID(metricName string, lnvs labelNamesValues) string {
|
||||
newLnvs := append([]string{}, lnvs...)
|
||||
sort.Strings(newLnvs)
|
||||
return metricName + ":" + strings.Join(newLnvs, "|")
|
||||
}
|
||||
|
||||
func newCounterFrom(collectors chan<- *collector, opts stdprometheus.CounterOpts, labelNames []string) *counter {
|
||||
cv := stdprometheus.NewCounterVec(opts, labelNames)
|
||||
c := &counter{
|
||||
name: opts.Name,
|
||||
cv: cv,
|
||||
collectors: collectors,
|
||||
}
|
||||
if len(labelNames) == 0 {
|
||||
c.Add(0)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
type counter struct {
|
||||
name string
|
||||
cv *stdprometheus.CounterVec
|
||||
labelNamesValues labelNamesValues
|
||||
collectors chan<- *collector
|
||||
}
|
||||
|
||||
func (c *counter) With(labelValues ...string) metrics.Counter {
|
||||
return &counter{
|
||||
name: c.name,
|
||||
cv: c.cv,
|
||||
labelNamesValues: c.labelNamesValues.With(labelValues...),
|
||||
collectors: c.collectors,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *counter) Add(delta float64) {
|
||||
collector := c.cv.With(c.labelNamesValues.ToLabels())
|
||||
collector.Add(delta)
|
||||
c.collectors <- newCollector(c.name, c.labelNamesValues, collector)
|
||||
}
|
||||
|
||||
func (c *counter) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
c.cv.Describe(ch)
|
||||
}
|
||||
|
||||
func newGaugeFrom(collectors chan<- *collector, opts stdprometheus.GaugeOpts, labelNames []string) *gauge {
|
||||
gv := stdprometheus.NewGaugeVec(opts, labelNames)
|
||||
g := &gauge{
|
||||
name: opts.Name,
|
||||
gv: gv,
|
||||
collectors: collectors,
|
||||
}
|
||||
if len(labelNames) == 0 {
|
||||
g.Set(0)
|
||||
}
|
||||
return g
|
||||
}
|
||||
|
||||
type gauge struct {
|
||||
name string
|
||||
gv *stdprometheus.GaugeVec
|
||||
labelNamesValues labelNamesValues
|
||||
collectors chan<- *collector
|
||||
}
|
||||
|
||||
func (g *gauge) With(labelValues ...string) metrics.Gauge {
|
||||
return &gauge{
|
||||
name: g.name,
|
||||
gv: g.gv,
|
||||
labelNamesValues: g.labelNamesValues.With(labelValues...),
|
||||
collectors: g.collectors,
|
||||
}
|
||||
}
|
||||
|
||||
func (g *gauge) Set(value float64) {
|
||||
collector := g.gv.With(g.labelNamesValues.ToLabels())
|
||||
collector.Set(value)
|
||||
g.collectors <- newCollector(g.name, g.labelNamesValues, collector)
|
||||
}
|
||||
|
||||
func (g *gauge) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
g.gv.Describe(ch)
|
||||
}
|
||||
|
||||
func newHistogramFrom(collectors chan<- *collector, opts stdprometheus.HistogramOpts, labelNames []string) *histogram {
|
||||
hv := stdprometheus.NewHistogramVec(opts, labelNames)
|
||||
return &histogram{
|
||||
name: opts.Name,
|
||||
hv: hv,
|
||||
collectors: collectors,
|
||||
}
|
||||
}
|
||||
|
||||
type histogram struct {
|
||||
name string
|
||||
hv *stdprometheus.HistogramVec
|
||||
labelNamesValues labelNamesValues
|
||||
collectors chan<- *collector
|
||||
}
|
||||
|
||||
func (h *histogram) With(labelValues ...string) metrics.Histogram {
|
||||
return &histogram{
|
||||
name: h.name,
|
||||
hv: h.hv,
|
||||
labelNamesValues: h.labelNamesValues.With(labelValues...),
|
||||
collectors: h.collectors,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *histogram) Observe(value float64) {
|
||||
collector := h.hv.With(h.labelNamesValues.ToLabels())
|
||||
collector.Observe(value)
|
||||
h.collectors <- newCollector(h.name, h.labelNamesValues, collector)
|
||||
}
|
||||
|
||||
func (h *histogram) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
h.hv.Describe(ch)
|
||||
}
|
||||
|
||||
// labelNamesValues is a type alias that provides validation on its With method.
|
||||
// Metrics may include it as a member to help them satisfy With semantics and
|
||||
// save some code duplication.
|
||||
type labelNamesValues []string
|
||||
|
||||
// With validates the input, and returns a new aggregate labelNamesValues.
|
||||
func (lvs labelNamesValues) With(labelValues ...string) labelNamesValues {
|
||||
if len(labelValues)%2 != 0 {
|
||||
labelValues = append(labelValues, "unknown")
|
||||
}
|
||||
return append(lvs, labelValues...)
|
||||
}
|
||||
|
||||
// ToLabels is a convenience method to convert a labelNamesValues
|
||||
// to the native prometheus.Labels.
|
||||
func (lvs labelNamesValues) ToLabels() stdprometheus.Labels {
|
||||
labels := stdprometheus.Labels{}
|
||||
for i := 0; i < len(lvs); i += 2 {
|
||||
labels[lvs[i]] = lvs[i+1]
|
||||
}
|
||||
return labels
|
||||
}
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
@ -12,20 +14,54 @@ import (
|
|||
|
||||
func TestPrometheus(t *testing.T) {
|
||||
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
|
||||
if !prometheusRegistry.IsEnabled() {
|
||||
t.Errorf("PrometheusRegistry should return true for IsEnabled()")
|
||||
}
|
||||
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
prometheusRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
|
||||
metricsFamilies, err := prometheus.DefaultGatherer.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("could not gather metrics families: %s", err)
|
||||
}
|
||||
prometheusRegistry.ConfigReloadsCounter().Add(1)
|
||||
prometheusRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
prometheusRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
|
||||
prometheusRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
|
||||
|
||||
prometheusRegistry.
|
||||
EntrypointReqsCounter().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
EntrypointReqDurationHistogram().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Observe(1)
|
||||
prometheusRegistry.
|
||||
EntrypointOpenConnsGauge().
|
||||
With("method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Set(1)
|
||||
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendReqDurationHistogram().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Observe(10000)
|
||||
prometheusRegistry.
|
||||
BackendOpenConnsGauge().
|
||||
With("backend", "backend1", "method", http.MethodGet, "protocol", "http").
|
||||
Set(1)
|
||||
prometheusRegistry.
|
||||
BackendRetriesCounter().
|
||||
With("backend", "backend1").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendServerUpGauge().
|
||||
With("backend", "backend1", "url", "http://127.0.0.10:80").
|
||||
Set(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies := mustScrape()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
|
@ -33,46 +69,93 @@ func TestPrometheus(t *testing.T) {
|
|||
assert func(*dto.MetricFamily)
|
||||
}{
|
||||
{
|
||||
name: reqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"service": "test",
|
||||
},
|
||||
assert: func(family *dto.MetricFamily) {
|
||||
cv := family.Metric[0].Counter.GetValue()
|
||||
expectedCv := float64(2)
|
||||
if cv != expectedCv {
|
||||
t.Errorf("gathered metrics do not contain correct value for total requests, got %f expected %f", cv, expectedCv)
|
||||
}
|
||||
},
|
||||
name: configReloadsTotalName,
|
||||
assert: buildCounterAssert(t, configReloadsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: reqDurationName,
|
||||
labels: map[string]string{
|
||||
"service": "test",
|
||||
"code": "200",
|
||||
},
|
||||
assert: func(family *dto.MetricFamily) {
|
||||
sc := family.Metric[0].Histogram.GetSampleCount()
|
||||
expectedSc := uint64(2)
|
||||
if sc != expectedSc {
|
||||
t.Errorf("gathered metrics do not contain correct sample count for request duration, got %d expected %d", sc, expectedSc)
|
||||
}
|
||||
},
|
||||
name: configReloadsFailuresTotalName,
|
||||
assert: buildCounterAssert(t, configReloadsFailuresTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: retriesTotalName,
|
||||
name: configLastReloadSuccessName,
|
||||
assert: buildTimestampAssert(t, configLastReloadSuccessName),
|
||||
},
|
||||
{
|
||||
name: configLastReloadFailureName,
|
||||
assert: buildTimestampAssert(t, configLastReloadFailureName),
|
||||
},
|
||||
{
|
||||
name: entrypointReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"service": "test",
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: func(family *dto.MetricFamily) {
|
||||
cv := family.Metric[0].Counter.GetValue()
|
||||
expectedCv := float64(1)
|
||||
if cv != expectedCv {
|
||||
t.Errorf("gathered metrics do not contain correct value for total retries, got %f expected %f", cv, expectedCv)
|
||||
}
|
||||
assert: buildCounterAssert(t, entrypointReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: entrypointReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildHistogramAssert(t, entrypointReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: entrypointOpenConnsName,
|
||||
labels: map[string]string{
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildGaugeAssert(t, entrypointOpenConnsName, 1),
|
||||
},
|
||||
{
|
||||
name: backendReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildCounterAssert(t, backendReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: backendReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildHistogramAssert(t, backendReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: backendOpenConnsName,
|
||||
labels: map[string]string{
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildGaugeAssert(t, backendOpenConnsName, 1),
|
||||
},
|
||||
{
|
||||
name: backendRetriesTotalName,
|
||||
labels: map[string]string{
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildGreaterThanCounterAssert(t, backendRetriesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: backendServerUpName,
|
||||
labels: map[string]string{
|
||||
"backend": "backend1",
|
||||
"url": "http://127.0.0.10:80",
|
||||
},
|
||||
assert: buildGaugeAssert(t, backendServerUpName, 1),
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -94,6 +177,90 @@ func TestPrometheus(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestPrometheusGenerationLogicForMetricWithLabel(t *testing.T) {
|
||||
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
|
||||
// Metrics with labels belonging to a specific configuration in Traefik
|
||||
// should be removed when the generationMaxAge is exceeded. As example
|
||||
// we use the traefik_backend_requests_total metric.
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
assertMetricExists(t, backendReqsTotalName, mustScrape())
|
||||
|
||||
// Increase the config generation one more than the max age of a metric.
|
||||
for i := 0; i < generationAgeDefault+1; i++ {
|
||||
OnConfigurationUpdate()
|
||||
}
|
||||
|
||||
// On the next scrape the metric still exists and will be removed
|
||||
// after the scrape completed.
|
||||
assertMetricExists(t, backendReqsTotalName, mustScrape())
|
||||
|
||||
// Now the metric should be absent.
|
||||
assertMetricAbsent(t, backendReqsTotalName, mustScrape())
|
||||
}
|
||||
|
||||
func TestPrometheusGenerationLogicForMetricWithoutLabel(t *testing.T) {
|
||||
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
|
||||
// Metrics without labels like traefik_config_reloads_total should live forever
|
||||
// and never get removed.
|
||||
prometheusRegistry.ConfigReloadsCounter().Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
assertMetricExists(t, configReloadsTotalName, mustScrape())
|
||||
|
||||
// Increase the config generation one more than the max age of a metric.
|
||||
for i := 0; i < generationAgeDefault+100; i++ {
|
||||
OnConfigurationUpdate()
|
||||
}
|
||||
|
||||
// Scrape two times in order to verify, that it is not removed after the
|
||||
// first scrape completed.
|
||||
assertMetricExists(t, configReloadsTotalName, mustScrape())
|
||||
assertMetricExists(t, configReloadsTotalName, mustScrape())
|
||||
}
|
||||
|
||||
// Tracking and gathering the metrics happens concurrently.
|
||||
// In practice this is no problem, because in case a tracked metric would miss
|
||||
// the current scrape, it would just be there in the next one.
|
||||
// That we can test reliably the tracking of all metrics here, we sleep
|
||||
// for a short amount of time, to make sure the metric will be present
|
||||
// in the next scrape.
|
||||
func delayForTrackingCompletion() {
|
||||
time.Sleep(250 * time.Millisecond)
|
||||
}
|
||||
|
||||
func mustScrape() []*dto.MetricFamily {
|
||||
families, err := prometheus.DefaultGatherer.Gather()
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("could not gather metrics families: %s", err))
|
||||
}
|
||||
return families
|
||||
}
|
||||
|
||||
func assertMetricExists(t *testing.T, name string, families []*dto.MetricFamily) {
|
||||
t.Helper()
|
||||
if findMetricFamily(name, families) == nil {
|
||||
t.Errorf("gathered metrics do not contain %q", name)
|
||||
}
|
||||
}
|
||||
|
||||
func assertMetricAbsent(t *testing.T, name string, families []*dto.MetricFamily) {
|
||||
t.Helper()
|
||||
if findMetricFamily(name, families) != nil {
|
||||
t.Errorf("gathered metrics contain %q, but should not", name)
|
||||
}
|
||||
}
|
||||
|
||||
func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFamily {
|
||||
for _, family := range families {
|
||||
if family.GetName() == name {
|
||||
|
@ -102,3 +269,43 @@ func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFami
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildCounterAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if cv := int(family.Metric[0].Counter.GetValue()); cv != expectedValue {
|
||||
t.Errorf("metric %s has value %d, want %d", metricName, cv, expectedValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildGreaterThanCounterAssert(t *testing.T, metricName string, expectedMinValue int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if cv := int(family.Metric[0].Counter.GetValue()); cv < expectedMinValue {
|
||||
t.Errorf("metric %s has value %d, want at least %d", metricName, cv, expectedMinValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildHistogramAssert(t *testing.T, metricName string, expectedSampleCount int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if sc := int(family.Metric[0].Histogram.GetSampleCount()); sc != expectedSampleCount {
|
||||
t.Errorf("metric %s has sample count value %d, want %d", metricName, sc, expectedSampleCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildGaugeAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if gv := int(family.Metric[0].Gauge.GetValue()); gv != expectedValue {
|
||||
t.Errorf("metric %s has value %d, want %d", metricName, gv, expectedValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildTimestampAssert(t *testing.T, metricName string) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if ts := time.Unix(int64(family.Metric[0].Gauge.GetValue()), 0); time.Since(ts) > time.Minute {
|
||||
t.Errorf("metric %s has wrong timestamp %v", metricName, ts)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,10 +30,10 @@ func RegisterStatsd(config *types.Statsd) Registry {
|
|||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: statsdClient.NewCounter(statsdMetricsReqsName, 1.0),
|
||||
reqDurationHistogram: statsdClient.NewTiming(statsdMetricsLatencyName, 1.0),
|
||||
retriesCounter: statsdClient.NewCounter(statsdRetriesTotalName, 1.0),
|
||||
enabled: true,
|
||||
backendReqsCounter: statsdClient.NewCounter(statsdMetricsReqsName, 1.0),
|
||||
backendReqDurationHistogram: statsdClient.NewTiming(statsdMetricsLatencyName, 1.0),
|
||||
backendRetriesCounter: statsdClient.NewCounter(statsdRetriesTotalName, 1.0),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,10 +29,10 @@ func TestStatsD(t *testing.T) {
|
|||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.ReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
|
||||
statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.BackendReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -3,51 +3,100 @@ package middlewares
|
|||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/containous/traefik/log"
|
||||
"github.com/containous/traefik/metrics"
|
||||
gokitmetrics "github.com/go-kit/kit/metrics"
|
||||
"github.com/urfave/negroni"
|
||||
)
|
||||
|
||||
// MetricsWrapper is a Negroni compatible Handler which relies on a
|
||||
// given Metrics implementation to expose and monitor Traefik Metrics.
|
||||
type MetricsWrapper struct {
|
||||
registry metrics.Registry
|
||||
serviceName string
|
||||
}
|
||||
const (
|
||||
protoHTTP = "http"
|
||||
protoSSE = "sse"
|
||||
protoWebsocket = "websocket"
|
||||
)
|
||||
|
||||
// NewMetricsWrapper return a MetricsWrapper struct with
|
||||
// a given Metrics implementation
|
||||
func NewMetricsWrapper(registry metrics.Registry, service string) *MetricsWrapper {
|
||||
var metricsWrapper = MetricsWrapper{
|
||||
registry: registry,
|
||||
serviceName: service,
|
||||
// NewEntryPointMetricsMiddleware creates a new metrics middleware for an Entrypoint.
|
||||
func NewEntryPointMetricsMiddleware(registry metrics.Registry, entryPointName string) negroni.Handler {
|
||||
return &metricsMiddleware{
|
||||
reqsCounter: registry.EntrypointReqsCounter(),
|
||||
reqDurationHistogram: registry.EntrypointReqDurationHistogram(),
|
||||
openConnsGauge: registry.EntrypointOpenConnsGauge(),
|
||||
baseLabels: []string{"entrypoint", entryPointName},
|
||||
}
|
||||
|
||||
return &metricsWrapper
|
||||
}
|
||||
|
||||
func (m *MetricsWrapper) ServeHTTP(rw http.ResponseWriter, r *http.Request, next http.HandlerFunc) {
|
||||
// NewBackendMetricsMiddleware creates a new metrics middleware for a Backend.
|
||||
func NewBackendMetricsMiddleware(registry metrics.Registry, backendName string) negroni.Handler {
|
||||
return &metricsMiddleware{
|
||||
reqsCounter: registry.BackendReqsCounter(),
|
||||
reqDurationHistogram: registry.BackendReqDurationHistogram(),
|
||||
openConnsGauge: registry.BackendOpenConnsGauge(),
|
||||
baseLabels: []string{"backend", backendName},
|
||||
}
|
||||
}
|
||||
|
||||
type metricsMiddleware struct {
|
||||
reqsCounter gokitmetrics.Counter
|
||||
reqDurationHistogram gokitmetrics.Histogram
|
||||
openConnsGauge gokitmetrics.Gauge
|
||||
baseLabels []string
|
||||
openConns int64
|
||||
}
|
||||
|
||||
func (m *metricsMiddleware) ServeHTTP(rw http.ResponseWriter, r *http.Request, next http.HandlerFunc) {
|
||||
labels := []string{"method", getMethod(r), "protocol", getRequestProtocol(r)}
|
||||
labels = append(labels, m.baseLabels...)
|
||||
|
||||
openConns := atomic.AddInt64(&m.openConns, 1)
|
||||
m.openConnsGauge.With(labels...).Set(float64(openConns))
|
||||
defer func(labelValues []string) {
|
||||
openConns := atomic.AddInt64(&m.openConns, -1)
|
||||
m.openConnsGauge.With(labelValues...).Set(float64(openConns))
|
||||
}(labels)
|
||||
|
||||
start := time.Now()
|
||||
prw := &responseRecorder{rw, http.StatusOK}
|
||||
next(prw, r)
|
||||
recorder := &responseRecorder{rw, http.StatusOK}
|
||||
next(recorder, r)
|
||||
|
||||
reqLabels := []string{"service", m.serviceName, "code", strconv.Itoa(prw.statusCode), "method", getMethod(r)}
|
||||
m.registry.ReqsCounter().With(reqLabels...).Add(1)
|
||||
|
||||
reqDurationLabels := []string{"service", m.serviceName, "code", strconv.Itoa(prw.statusCode)}
|
||||
m.registry.ReqDurationHistogram().With(reqDurationLabels...).Observe(time.Since(start).Seconds())
|
||||
labels = append(labels, "code", strconv.Itoa(recorder.statusCode))
|
||||
m.reqsCounter.With(labels...).Add(1)
|
||||
m.reqDurationHistogram.With(labels...).Observe(float64(time.Since(start).Seconds()))
|
||||
}
|
||||
|
||||
type retryMetrics interface {
|
||||
RetriesCounter() gokitmetrics.Counter
|
||||
func getRequestProtocol(req *http.Request) string {
|
||||
switch {
|
||||
case isWebsocketRequest(req):
|
||||
return protoWebsocket
|
||||
case isSSERequest(req):
|
||||
return protoSSE
|
||||
default:
|
||||
return protoHTTP
|
||||
}
|
||||
}
|
||||
|
||||
// NewMetricsRetryListener instantiates a MetricsRetryListener with the given retryMetrics.
|
||||
func NewMetricsRetryListener(retryMetrics retryMetrics, backendName string) RetryListener {
|
||||
return &MetricsRetryListener{retryMetrics: retryMetrics, backendName: backendName}
|
||||
// isWebsocketRequest determines if the specified HTTP request is a websocket handshake request.
|
||||
func isWebsocketRequest(req *http.Request) bool {
|
||||
return containsHeader(req, "Connection", "upgrade") && containsHeader(req, "Upgrade", "websocket")
|
||||
}
|
||||
|
||||
// isSSERequest determines if the specified HTTP request is a request for an event subscription.
|
||||
func isSSERequest(req *http.Request) bool {
|
||||
return containsHeader(req, "Accept", "text/event-stream")
|
||||
}
|
||||
|
||||
func containsHeader(req *http.Request, name, value string) bool {
|
||||
items := strings.Split(req.Header.Get(name), ",")
|
||||
for _, item := range items {
|
||||
if value == strings.ToLower(strings.TrimSpace(item)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getMethod(r *http.Request) string {
|
||||
|
@ -58,6 +107,15 @@ func getMethod(r *http.Request) string {
|
|||
return r.Method
|
||||
}
|
||||
|
||||
type retryMetrics interface {
|
||||
BackendRetriesCounter() gokitmetrics.Counter
|
||||
}
|
||||
|
||||
// NewMetricsRetryListener instantiates a MetricsRetryListener with the given retryMetrics.
|
||||
func NewMetricsRetryListener(retryMetrics retryMetrics, backendName string) RetryListener {
|
||||
return &MetricsRetryListener{retryMetrics: retryMetrics, backendName: backendName}
|
||||
}
|
||||
|
||||
// MetricsRetryListener is an implementation of the RetryListener interface to
|
||||
// record RequestMetrics about retry attempts.
|
||||
type MetricsRetryListener struct {
|
||||
|
@ -67,5 +125,5 @@ type MetricsRetryListener struct {
|
|||
|
||||
// Retried tracks the retry in the RequestMetrics implementation.
|
||||
func (m *MetricsRetryListener) Retried(req *http.Request, attempt int) {
|
||||
m.retryMetrics.RetriesCounter().With("backend", m.backendName).Add(1)
|
||||
m.retryMetrics.BackendRetriesCounter().With("backend", m.backendName).Add(1)
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/containous/traefik/testhelpers"
|
||||
"github.com/go-kit/kit/metrics"
|
||||
)
|
||||
|
||||
|
@ -17,39 +18,25 @@ func TestMetricsRetryListener(t *testing.T) {
|
|||
retryListener.Retried(req, 2)
|
||||
|
||||
wantCounterValue := float64(2)
|
||||
if retryMetrics.retryCounter.counterValue != wantCounterValue {
|
||||
t.Errorf("got counter value of %d, want %d", retryMetrics.retryCounter.counterValue, wantCounterValue)
|
||||
if retryMetrics.retriesCounter.CounterValue != wantCounterValue {
|
||||
t.Errorf("got counter value of %d, want %d", retryMetrics.retriesCounter.CounterValue, wantCounterValue)
|
||||
}
|
||||
|
||||
wantLabelValues := []string{"backend", "backendName"}
|
||||
if !reflect.DeepEqual(retryMetrics.retryCounter.lastLabelValues, wantLabelValues) {
|
||||
t.Errorf("wrong label values %v used, want %v", retryMetrics.retryCounter.lastLabelValues, wantLabelValues)
|
||||
if !reflect.DeepEqual(retryMetrics.retriesCounter.LastLabelValues, wantLabelValues) {
|
||||
t.Errorf("wrong label values %v used, want %v", retryMetrics.retriesCounter.LastLabelValues, wantLabelValues)
|
||||
}
|
||||
}
|
||||
|
||||
// collectingRetryMetrics is an implementation of the retryMetrics interface that can be used inside tests to collect the times Add() was called.
|
||||
type collectingRetryMetrics struct {
|
||||
retryCounter *collectingCounter
|
||||
retriesCounter *testhelpers.CollectingCounter
|
||||
}
|
||||
|
||||
func newCollectingRetryMetrics() collectingRetryMetrics {
|
||||
return collectingRetryMetrics{retryCounter: &collectingCounter{}}
|
||||
func newCollectingRetryMetrics() *collectingRetryMetrics {
|
||||
return &collectingRetryMetrics{retriesCounter: &testhelpers.CollectingCounter{}}
|
||||
}
|
||||
|
||||
func (metrics collectingRetryMetrics) RetriesCounter() metrics.Counter {
|
||||
return metrics.retryCounter
|
||||
}
|
||||
|
||||
type collectingCounter struct {
|
||||
counterValue float64
|
||||
lastLabelValues []string
|
||||
}
|
||||
|
||||
func (c *collectingCounter) With(labelValues ...string) metrics.Counter {
|
||||
c.lastLabelValues = labelValues
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *collectingCounter) Add(delta float64) {
|
||||
c.counterValue += delta
|
||||
func (metrics *collectingRetryMetrics) BackendRetriesCounter() metrics.Counter {
|
||||
return metrics.retriesCounter
|
||||
}
|
||||
|
|
|
@ -122,10 +122,7 @@ func NewServer(globalConfiguration configuration.GlobalConfiguration) *Server {
|
|||
server.tracingMiddleware.Setup()
|
||||
}
|
||||
|
||||
server.metricsRegistry = metrics.NewVoidRegistry()
|
||||
if globalConfiguration.Metrics != nil {
|
||||
server.registerMetricClients(globalConfiguration.Metrics)
|
||||
}
|
||||
server.metricsRegistry = registerMetricClients(globalConfiguration.Metrics)
|
||||
|
||||
if globalConfiguration.Cluster != nil {
|
||||
// leadership creation if cluster mode
|
||||
|
@ -304,7 +301,7 @@ func (s *Server) setupServerEntryPoint(newServerEntryPointName string, newServer
|
|||
serverMiddlewares = append(serverMiddlewares, s.accessLoggerMiddleware)
|
||||
}
|
||||
if s.metricsRegistry.IsEnabled() {
|
||||
serverMiddlewares = append(serverMiddlewares, middlewares.NewMetricsWrapper(s.metricsRegistry, newServerEntryPointName))
|
||||
serverMiddlewares = append(serverMiddlewares, middlewares.NewEntryPointMetricsMiddleware(s.metricsRegistry, newServerEntryPointName))
|
||||
}
|
||||
if s.globalConfiguration.API != nil {
|
||||
if s.globalConfiguration.API.Stats == nil {
|
||||
|
@ -449,8 +446,10 @@ func (s *Server) loadConfiguration(configMsg types.ConfigMessage) {
|
|||
}
|
||||
newConfigurations[configMsg.ProviderName] = configMsg.Configuration
|
||||
|
||||
s.metricsRegistry.ConfigReloadsCounter().Add(1)
|
||||
newServerEntryPoints, err := s.loadConfig(newConfigurations, s.globalConfiguration)
|
||||
if err == nil {
|
||||
s.metricsRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
|
||||
for newServerEntryPointName, newServerEntryPoint := range newServerEntryPoints {
|
||||
s.serverEntryPoints[newServerEntryPointName].httpRouter.UpdateHandler(newServerEntryPoint.httpRouter.GetHandler())
|
||||
if s.globalConfiguration.EntryPoints[newServerEntryPointName].TLS == nil {
|
||||
|
@ -465,6 +464,8 @@ func (s *Server) loadConfiguration(configMsg types.ConfigMessage) {
|
|||
s.currentConfigurations.Set(newConfigurations)
|
||||
s.postLoadConfiguration()
|
||||
} else {
|
||||
s.metricsRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
s.metricsRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
|
||||
log.Error("Error loading new configuration, aborted ", err)
|
||||
}
|
||||
}
|
||||
|
@ -501,6 +502,8 @@ func (s *serverEntryPoint) getCertificate(clientHello *tls.ClientHelloInfo) (*tl
|
|||
}
|
||||
|
||||
func (s *Server) postLoadConfiguration() {
|
||||
metrics.OnConfigurationUpdate()
|
||||
|
||||
if s.globalConfiguration.ACME == nil {
|
||||
return
|
||||
}
|
||||
|
@ -1070,7 +1073,7 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
|
|||
rebalancer, _ = roundrobin.NewRebalancer(rr, roundrobin.RebalancerStickySession(sticky))
|
||||
}
|
||||
lb = rebalancer
|
||||
if err := configureLBServers(rebalancer, config, frontend); err != nil {
|
||||
if err := s.configureLBServers(rebalancer, config, frontend); err != nil {
|
||||
log.Errorf("Skipping frontend %s...", frontendName)
|
||||
continue frontend
|
||||
}
|
||||
|
@ -1092,7 +1095,7 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
|
|||
}
|
||||
}
|
||||
lb = rr
|
||||
if err := configureLBServers(rr, config, frontend); err != nil {
|
||||
if err := s.configureLBServers(rr, config, frontend); err != nil {
|
||||
log.Errorf("Skipping frontend %s...", frontendName)
|
||||
continue frontend
|
||||
}
|
||||
|
@ -1154,7 +1157,7 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
|
|||
}
|
||||
|
||||
if s.metricsRegistry.IsEnabled() {
|
||||
n.Use(middlewares.NewMetricsWrapper(s.metricsRegistry, frontend.Backend))
|
||||
n.Use(middlewares.NewBackendMetricsMiddleware(s.metricsRegistry, frontend.Backend))
|
||||
}
|
||||
|
||||
ipWhitelistMiddleware, err := configureIPWhitelistMiddleware(frontend.WhitelistSourceRange)
|
||||
|
@ -1233,7 +1236,7 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
|
|||
}
|
||||
}
|
||||
}
|
||||
healthcheck.GetHealthCheck().SetBackendsConfiguration(s.routinesPool.Ctx(), backendsHealthCheck)
|
||||
healthcheck.GetHealthCheck(s.metricsRegistry).SetBackendsConfiguration(s.routinesPool.Ctx(), backendsHealthCheck)
|
||||
// Get new certificates list sorted per entrypoints
|
||||
// Update certificates
|
||||
entryPointsCertificates, err := s.loadHTTPSConfiguration(configurations, globalConfiguration.DefaultEntryPoints)
|
||||
|
@ -1249,18 +1252,19 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
|
|||
return serverEntryPoints, err
|
||||
}
|
||||
|
||||
func configureLBServers(lb healthcheck.LoadBalancer, config *types.Configuration, frontend *types.Frontend) error {
|
||||
for serverName, server := range config.Backends[frontend.Backend].Servers {
|
||||
u, err := url.Parse(server.URL)
|
||||
func (s *Server) configureLBServers(lb healthcheck.LoadBalancer, config *types.Configuration, frontend *types.Frontend) error {
|
||||
for name, srv := range config.Backends[frontend.Backend].Servers {
|
||||
u, err := url.Parse(srv.URL)
|
||||
if err != nil {
|
||||
log.Errorf("Error parsing server URL %s: %v", server.URL, err)
|
||||
log.Errorf("Error parsing server URL %s: %v", srv.URL, err)
|
||||
return err
|
||||
}
|
||||
log.Debugf("Creating server %s at %s with weight %d", serverName, u, server.Weight)
|
||||
if err := lb.UpsertServer(u, roundrobin.Weight(server.Weight)); err != nil {
|
||||
log.Errorf("Error adding server %s to load balancer: %v", server.URL, err)
|
||||
log.Debugf("Creating server %s at %s with weight %d", name, u, srv.Weight)
|
||||
if err := lb.UpsertServer(u, roundrobin.Weight(srv.Weight)); err != nil {
|
||||
log.Errorf("Error adding server %s to load balancer: %v", srv.URL, err)
|
||||
return err
|
||||
}
|
||||
s.metricsRegistry.BackendServerUpGauge().With("backend", frontend.Backend, "url", srv.URL).Set(1)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -1477,9 +1481,12 @@ func configureBackends(backends map[string]*types.Backend) {
|
|||
}
|
||||
}
|
||||
|
||||
func (s *Server) registerMetricClients(metricsConfig *types.Metrics) {
|
||||
var registries []metrics.Registry
|
||||
func registerMetricClients(metricsConfig *types.Metrics) metrics.Registry {
|
||||
if metricsConfig == nil {
|
||||
return metrics.NewVoidRegistry()
|
||||
}
|
||||
|
||||
registries := []metrics.Registry{}
|
||||
if metricsConfig.Prometheus != nil {
|
||||
registries = append(registries, metrics.RegisterPrometheus(metricsConfig.Prometheus))
|
||||
log.Debug("Configured Prometheus metrics")
|
||||
|
@ -1497,9 +1504,7 @@ func (s *Server) registerMetricClients(metricsConfig *types.Metrics) {
|
|||
log.Debugf("Configured InfluxDB metrics pushing to %s once every %s", metricsConfig.InfluxDB.Address, metricsConfig.InfluxDB.PushInterval)
|
||||
}
|
||||
|
||||
if len(registries) > 0 {
|
||||
s.metricsRegistry = metrics.NewMultiRegistry(registries)
|
||||
}
|
||||
return metrics.NewMultiRegistry(registries)
|
||||
}
|
||||
|
||||
func stopMetricsClients() {
|
||||
|
|
|
@ -484,7 +484,7 @@ func TestServerLoadConfigHealthCheckOptions(t *testing.T) {
|
|||
if healthCheck != nil {
|
||||
wantNumHealthCheckBackends = 1
|
||||
}
|
||||
gotNumHealthCheckBackends := len(healthcheck.GetHealthCheck().Backends)
|
||||
gotNumHealthCheckBackends := len(healthcheck.GetHealthCheck(testhelpers.NewCollectingHealthCheckMetrics()).Backends)
|
||||
if gotNumHealthCheckBackends != wantNumHealthCheckBackends {
|
||||
t.Errorf("got %d health check backends, want %d", gotNumHealthCheckBackends, wantNumHealthCheckBackends)
|
||||
}
|
||||
|
|
52
testhelpers/metrics.go
Normal file
52
testhelpers/metrics.go
Normal file
|
@ -0,0 +1,52 @@
|
|||
package testhelpers
|
||||
|
||||
import "github.com/go-kit/kit/metrics"
|
||||
|
||||
// CollectingCounter is a metrics.Counter implementation that enables access to the CounterValue and LastLabelValues.
|
||||
type CollectingCounter struct {
|
||||
CounterValue float64
|
||||
LastLabelValues []string
|
||||
}
|
||||
|
||||
// With is there to satisfy the metrics.Counter interface.
|
||||
func (c *CollectingCounter) With(labelValues ...string) metrics.Counter {
|
||||
c.LastLabelValues = labelValues
|
||||
return c
|
||||
}
|
||||
|
||||
// Add is there to satisfy the metrics.Counter interface.
|
||||
func (c *CollectingCounter) Add(delta float64) {
|
||||
c.CounterValue += delta
|
||||
}
|
||||
|
||||
// CollectingGauge is a metrics.Gauge implementation that enables access to the GaugeValue and LastLabelValues.
|
||||
type CollectingGauge struct {
|
||||
GaugeValue float64
|
||||
LastLabelValues []string
|
||||
}
|
||||
|
||||
// With is there to satisfy the metrics.Gauge interface.
|
||||
func (g *CollectingGauge) With(labelValues ...string) metrics.Gauge {
|
||||
g.LastLabelValues = labelValues
|
||||
return g
|
||||
}
|
||||
|
||||
// Set is there to satisfy the metrics.Gauge interface.
|
||||
func (g *CollectingGauge) Set(delta float64) {
|
||||
g.GaugeValue = delta
|
||||
}
|
||||
|
||||
// CollectingHealthCheckMetrics can be used for testing the Metrics instrumentation of the HealthCheck package.
|
||||
type CollectingHealthCheckMetrics struct {
|
||||
Gauge *CollectingGauge
|
||||
}
|
||||
|
||||
// NewCollectingHealthCheckMetrics creates a new CollectingHealthCheckMetrics instance.
|
||||
func NewCollectingHealthCheckMetrics() *CollectingHealthCheckMetrics {
|
||||
return &CollectingHealthCheckMetrics{&CollectingGauge{}}
|
||||
}
|
||||
|
||||
// BackendServerUpGauge is there to satisfy the healthcheck.metricsRegistry interface.
|
||||
func (m *CollectingHealthCheckMetrics) BackendServerUpGauge() metrics.Gauge {
|
||||
return m.Gauge
|
||||
}
|
Loading…
Reference in a new issue