2017-08-23 18:46:03 +00:00
|
|
|
package metrics
|
|
|
|
|
|
|
|
import (
|
2018-11-14 09:18:03 +00:00
|
|
|
"context"
|
2020-11-06 08:26:03 +00:00
|
|
|
"errors"
|
2017-11-20 08:40:03 +00:00
|
|
|
"net/http"
|
2018-01-26 10:58:03 +00:00
|
|
|
"sync"
|
2020-03-05 14:10:07 +00:00
|
|
|
"time"
|
2017-11-20 08:40:03 +00:00
|
|
|
|
2018-01-26 10:58:03 +00:00
|
|
|
"github.com/go-kit/kit/metrics"
|
2017-08-23 18:46:03 +00:00
|
|
|
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
2021-10-04 15:54:10 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
2017-11-09 15:12:04 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
2022-11-21 17:36:05 +00:00
|
|
|
"github.com/rs/zerolog/log"
|
2020-09-16 13:46:04 +00:00
|
|
|
"github.com/traefik/traefik/v2/pkg/config/dynamic"
|
|
|
|
"github.com/traefik/traefik/v2/pkg/types"
|
2017-08-23 18:46:03 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2020-08-21 09:12:04 +00:00
|
|
|
// MetricNamePrefix prefix of all metric names.
|
2018-08-06 12:58:03 +00:00
|
|
|
MetricNamePrefix = "traefik_"
|
2017-08-23 18:46:03 +00:00
|
|
|
|
2020-08-21 09:12:04 +00:00
|
|
|
// server meta information.
|
2018-08-06 12:58:03 +00:00
|
|
|
metricConfigPrefix = MetricNamePrefix + "config_"
|
|
|
|
configReloadsTotalName = metricConfigPrefix + "reloads_total"
|
|
|
|
configReloadsFailuresTotalName = metricConfigPrefix + "reloads_failure_total"
|
|
|
|
configLastReloadSuccessName = metricConfigPrefix + "last_reload_success"
|
|
|
|
configLastReloadFailureName = metricConfigPrefix + "last_reload_failure"
|
2018-01-26 10:58:03 +00:00
|
|
|
|
2020-12-18 17:44:03 +00:00
|
|
|
// TLS.
|
|
|
|
metricsTLSPrefix = MetricNamePrefix + "tls_"
|
|
|
|
tlsCertsNotAfterTimestamp = metricsTLSPrefix + "certs_not_after"
|
|
|
|
|
2020-08-21 09:12:04 +00:00
|
|
|
// entry point.
|
2022-09-12 15:10:09 +00:00
|
|
|
metricEntryPointPrefix = MetricNamePrefix + "entrypoint_"
|
|
|
|
entryPointReqsTotalName = metricEntryPointPrefix + "requests_total"
|
|
|
|
entryPointReqsTLSTotalName = metricEntryPointPrefix + "requests_tls_total"
|
|
|
|
entryPointReqDurationName = metricEntryPointPrefix + "request_duration_seconds"
|
|
|
|
entryPointOpenConnsName = metricEntryPointPrefix + "open_connections"
|
|
|
|
entryPointReqsBytesTotalName = metricEntryPointPrefix + "requests_bytes_total"
|
|
|
|
entryPointRespsBytesTotalName = metricEntryPointPrefix + "responses_bytes_total"
|
2019-07-18 19:36:05 +00:00
|
|
|
|
2021-04-30 08:22:04 +00:00
|
|
|
// router level.
|
2022-09-12 15:10:09 +00:00
|
|
|
metricRouterPrefix = MetricNamePrefix + "router_"
|
|
|
|
routerReqsTotalName = metricRouterPrefix + "requests_total"
|
|
|
|
routerReqsTLSTotalName = metricRouterPrefix + "requests_tls_total"
|
|
|
|
routerReqDurationName = metricRouterPrefix + "request_duration_seconds"
|
|
|
|
routerOpenConnsName = metricRouterPrefix + "open_connections"
|
|
|
|
routerReqsBytesTotalName = metricRouterPrefix + "requests_bytes_total"
|
|
|
|
routerRespsBytesTotalName = metricRouterPrefix + "responses_bytes_total"
|
2019-07-18 19:36:05 +00:00
|
|
|
|
2021-04-30 08:22:04 +00:00
|
|
|
// service level.
|
2022-09-12 15:10:09 +00:00
|
|
|
metricServicePrefix = MetricNamePrefix + "service_"
|
|
|
|
serviceReqsTotalName = metricServicePrefix + "requests_total"
|
|
|
|
serviceReqsTLSTotalName = metricServicePrefix + "requests_tls_total"
|
|
|
|
serviceReqDurationName = metricServicePrefix + "request_duration_seconds"
|
|
|
|
serviceOpenConnsName = metricServicePrefix + "open_connections"
|
|
|
|
serviceRetriesTotalName = metricServicePrefix + "retries_total"
|
|
|
|
serviceServerUpName = metricServicePrefix + "server_up"
|
|
|
|
serviceReqsBytesTotalName = metricServicePrefix + "requests_bytes_total"
|
|
|
|
serviceRespsBytesTotalName = metricServicePrefix + "responses_bytes_total"
|
2018-01-26 10:58:03 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// promState holds all metric state internally and acts as the only Collector we register for Prometheus.
|
|
|
|
//
|
|
|
|
// This enables control to remove metrics that belong to outdated configuration.
|
|
|
|
// As an example why this is required, consider Traefik learns about a new service.
|
2019-07-18 19:36:05 +00:00
|
|
|
// It populates the 'traefik_server_service_up' metric for it with a value of 1 (alive).
|
|
|
|
// When the service is undeployed now the metric is still there in the client library
|
2018-06-05 10:32:03 +00:00
|
|
|
// and will be returned on the metrics endpoint until Traefik would be restarted.
|
2018-01-26 10:58:03 +00:00
|
|
|
//
|
2018-06-05 10:32:03 +00:00
|
|
|
// To solve this problem promState keeps track of Traefik's dynamic configuration.
|
2019-07-18 19:36:05 +00:00
|
|
|
// Metrics that "belong" to a dynamic configuration part like services or entryPoints
|
2018-06-05 10:32:03 +00:00
|
|
|
// are removed after they were scraped at least once when the corresponding object
|
|
|
|
// doesn't exist anymore.
|
2018-01-26 10:58:03 +00:00
|
|
|
var promState = newPrometheusState()
|
|
|
|
|
2019-07-18 19:36:05 +00:00
|
|
|
var promRegistry = stdprometheus.NewRegistry()
|
|
|
|
|
2018-01-26 10:58:03 +00:00
|
|
|
// PrometheusHandler exposes Prometheus routes.
|
2019-11-14 15:40:05 +00:00
|
|
|
func PrometheusHandler() http.Handler {
|
|
|
|
return promhttp.HandlerFor(promRegistry, promhttp.HandlerOpts{})
|
2017-11-09 15:12:04 +00:00
|
|
|
}
|
|
|
|
|
2017-08-23 18:46:03 +00:00
|
|
|
// RegisterPrometheus registers all Prometheus metrics.
|
|
|
|
// It must be called only once and failing to register the metrics will lead to a panic.
|
2018-11-14 09:18:03 +00:00
|
|
|
func RegisterPrometheus(ctx context.Context, config *types.Prometheus) Registry {
|
2018-08-06 12:58:03 +00:00
|
|
|
standardRegistry := initStandardRegistry(config)
|
|
|
|
|
2021-10-04 15:54:10 +00:00
|
|
|
if err := promRegistry.Register(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})); err != nil {
|
2020-11-06 08:26:03 +00:00
|
|
|
var arErr stdprometheus.AlreadyRegisteredError
|
|
|
|
if !errors.As(err, &arErr) {
|
2022-11-21 17:36:05 +00:00
|
|
|
log.Ctx(ctx).Warn().Msg("ProcessCollector is already registered")
|
2019-07-18 19:36:05 +00:00
|
|
|
}
|
|
|
|
}
|
2020-11-06 08:26:03 +00:00
|
|
|
|
2021-10-04 15:54:10 +00:00
|
|
|
if err := promRegistry.Register(collectors.NewGoCollector()); err != nil {
|
2020-11-06 08:26:03 +00:00
|
|
|
var arErr stdprometheus.AlreadyRegisteredError
|
|
|
|
if !errors.As(err, &arErr) {
|
2022-11-21 17:36:05 +00:00
|
|
|
log.Ctx(ctx).Warn().Msg("GoCollector is already registered")
|
2019-07-18 19:36:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-14 09:18:03 +00:00
|
|
|
if !registerPromState(ctx) {
|
2018-08-06 12:58:03 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return standardRegistry
|
|
|
|
}
|
|
|
|
|
|
|
|
func initStandardRegistry(config *types.Prometheus) Registry {
|
2017-08-23 18:46:03 +00:00
|
|
|
buckets := []float64{0.1, 0.3, 1.2, 5.0}
|
|
|
|
if config.Buckets != nil {
|
|
|
|
buckets = config.Buckets
|
|
|
|
}
|
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
configReloads := newCounterFrom(stdprometheus.CounterOpts{
|
2018-01-26 10:58:03 +00:00
|
|
|
Name: configReloadsTotalName,
|
|
|
|
Help: "Config reloads",
|
|
|
|
}, []string{})
|
2022-07-07 16:00:09 +00:00
|
|
|
configReloadsFailures := newCounterFrom(stdprometheus.CounterOpts{
|
2018-01-26 10:58:03 +00:00
|
|
|
Name: configReloadsFailuresTotalName,
|
|
|
|
Help: "Config failure reloads",
|
|
|
|
}, []string{})
|
2022-07-07 16:00:09 +00:00
|
|
|
lastConfigReloadSuccess := newGaugeFrom(stdprometheus.GaugeOpts{
|
2018-01-26 10:58:03 +00:00
|
|
|
Name: configLastReloadSuccessName,
|
|
|
|
Help: "Last config reload success",
|
|
|
|
}, []string{})
|
2022-07-07 16:00:09 +00:00
|
|
|
lastConfigReloadFailure := newGaugeFrom(stdprometheus.GaugeOpts{
|
2018-01-26 10:58:03 +00:00
|
|
|
Name: configLastReloadFailureName,
|
|
|
|
Help: "Last config reload failure",
|
|
|
|
}, []string{})
|
2022-07-07 16:00:09 +00:00
|
|
|
tlsCertsNotAfterTimestamp := newGaugeFrom(stdprometheus.GaugeOpts{
|
2020-12-18 17:44:03 +00:00
|
|
|
Name: tlsCertsNotAfterTimestamp,
|
|
|
|
Help: "Certificate expiration timestamp",
|
|
|
|
}, []string{"cn", "serial", "sans"})
|
2018-01-26 10:58:03 +00:00
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
promState.vectors = []vector{
|
|
|
|
configReloads.cv,
|
|
|
|
configReloadsFailures.cv,
|
|
|
|
lastConfigReloadSuccess.gv,
|
|
|
|
lastConfigReloadFailure.gv,
|
|
|
|
tlsCertsNotAfterTimestamp.gv,
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
2019-07-18 19:36:05 +00:00
|
|
|
|
|
|
|
reg := &standardRegistry{
|
2020-12-18 17:44:03 +00:00
|
|
|
epEnabled: config.AddEntryPointsLabels,
|
2021-04-30 08:22:04 +00:00
|
|
|
routerEnabled: config.AddRoutersLabels,
|
2020-12-18 17:44:03 +00:00
|
|
|
svcEnabled: config.AddServicesLabels,
|
|
|
|
configReloadsCounter: configReloads,
|
|
|
|
configReloadsFailureCounter: configReloadsFailures,
|
|
|
|
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
|
|
|
|
lastConfigReloadFailureGauge: lastConfigReloadFailure,
|
2022-02-21 11:40:09 +00:00
|
|
|
tlsCertsNotAfterTimestampGauge: tlsCertsNotAfterTimestamp,
|
2019-07-18 19:36:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if config.AddEntryPointsLabels {
|
2022-07-07 16:00:09 +00:00
|
|
|
entryPointReqs := newCounterFrom(stdprometheus.CounterOpts{
|
2019-07-18 19:36:05 +00:00
|
|
|
Name: entryPointReqsTotalName,
|
|
|
|
Help: "How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.",
|
|
|
|
}, []string{"code", "method", "protocol", "entrypoint"})
|
2022-07-07 16:00:09 +00:00
|
|
|
entryPointReqsTLS := newCounterFrom(stdprometheus.CounterOpts{
|
2020-03-05 12:30:05 +00:00
|
|
|
Name: entryPointReqsTLSTotalName,
|
|
|
|
Help: "How many HTTP requests with TLS processed on an entrypoint, partitioned by TLS Version and TLS cipher Used.",
|
|
|
|
}, []string{"tls_version", "tls_cipher", "entrypoint"})
|
2022-07-07 16:00:09 +00:00
|
|
|
entryPointReqDurations := newHistogramFrom(stdprometheus.HistogramOpts{
|
2019-07-18 19:36:05 +00:00
|
|
|
Name: entryPointReqDurationName,
|
|
|
|
Help: "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.",
|
|
|
|
Buckets: buckets,
|
|
|
|
}, []string{"code", "method", "protocol", "entrypoint"})
|
2022-07-07 16:00:09 +00:00
|
|
|
entryPointOpenConns := newGaugeFrom(stdprometheus.GaugeOpts{
|
2019-07-18 19:36:05 +00:00
|
|
|
Name: entryPointOpenConnsName,
|
|
|
|
Help: "How many open connections exist on an entrypoint, partitioned by method and protocol.",
|
|
|
|
}, []string{"method", "protocol", "entrypoint"})
|
2022-09-12 15:10:09 +00:00
|
|
|
entryPointReqsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
|
|
|
Name: entryPointReqsBytesTotalName,
|
|
|
|
Help: "The total size of requests in bytes handled by an entrypoint, partitioned by status code, protocol, and method.",
|
|
|
|
}, []string{"code", "method", "protocol", "entrypoint"})
|
|
|
|
entryPointRespsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
|
|
|
Name: entryPointRespsBytesTotalName,
|
|
|
|
Help: "The total size of responses in bytes handled by an entrypoint, partitioned by status code, protocol, and method.",
|
|
|
|
}, []string{"code", "method", "protocol", "entrypoint"})
|
2019-07-18 19:36:05 +00:00
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
promState.vectors = append(promState.vectors,
|
|
|
|
entryPointReqs.cv,
|
|
|
|
entryPointReqsTLS.cv,
|
|
|
|
entryPointReqDurations.hv,
|
|
|
|
entryPointOpenConns.gv,
|
2022-09-12 15:10:09 +00:00
|
|
|
entryPointReqsBytesTotal.cv,
|
|
|
|
entryPointRespsBytesTotal.cv,
|
2022-07-07 16:00:09 +00:00
|
|
|
)
|
2020-12-18 17:44:03 +00:00
|
|
|
|
2019-07-18 19:36:05 +00:00
|
|
|
reg.entryPointReqsCounter = entryPointReqs
|
2020-03-05 12:30:05 +00:00
|
|
|
reg.entryPointReqsTLSCounter = entryPointReqsTLS
|
2020-03-05 14:10:07 +00:00
|
|
|
reg.entryPointReqDurationHistogram, _ = NewHistogramWithScale(entryPointReqDurations, time.Second)
|
2019-07-18 19:36:05 +00:00
|
|
|
reg.entryPointOpenConnsGauge = entryPointOpenConns
|
2022-09-12 15:10:09 +00:00
|
|
|
reg.entryPointReqsBytesCounter = entryPointReqsBytesTotal
|
|
|
|
reg.entryPointRespsBytesCounter = entryPointRespsBytesTotal
|
2019-07-18 19:36:05 +00:00
|
|
|
}
|
2020-12-18 17:44:03 +00:00
|
|
|
|
2021-04-30 08:22:04 +00:00
|
|
|
if config.AddRoutersLabels {
|
2022-07-07 16:00:09 +00:00
|
|
|
routerReqs := newCounterFrom(stdprometheus.CounterOpts{
|
2021-04-30 08:22:04 +00:00
|
|
|
Name: routerReqsTotalName,
|
|
|
|
Help: "How many HTTP requests are processed on a router, partitioned by service, status code, protocol, and method.",
|
|
|
|
}, []string{"code", "method", "protocol", "router", "service"})
|
2022-07-07 16:00:09 +00:00
|
|
|
routerReqsTLS := newCounterFrom(stdprometheus.CounterOpts{
|
2021-04-30 08:22:04 +00:00
|
|
|
Name: routerReqsTLSTotalName,
|
|
|
|
Help: "How many HTTP requests with TLS are processed on a router, partitioned by service, TLS Version, and TLS cipher Used.",
|
|
|
|
}, []string{"tls_version", "tls_cipher", "router", "service"})
|
2022-07-07 16:00:09 +00:00
|
|
|
routerReqDurations := newHistogramFrom(stdprometheus.HistogramOpts{
|
2021-04-30 08:22:04 +00:00
|
|
|
Name: routerReqDurationName,
|
|
|
|
Help: "How long it took to process the request on a router, partitioned by service, status code, protocol, and method.",
|
|
|
|
Buckets: buckets,
|
|
|
|
}, []string{"code", "method", "protocol", "router", "service"})
|
2022-07-07 16:00:09 +00:00
|
|
|
routerOpenConns := newGaugeFrom(stdprometheus.GaugeOpts{
|
2021-04-30 08:22:04 +00:00
|
|
|
Name: routerOpenConnsName,
|
|
|
|
Help: "How many open connections exist on a router, partitioned by service, method, and protocol.",
|
|
|
|
}, []string{"method", "protocol", "router", "service"})
|
2022-09-12 15:10:09 +00:00
|
|
|
routerReqsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
|
|
|
Name: routerReqsBytesTotalName,
|
|
|
|
Help: "The total size of requests in bytes handled by a router, partitioned by service, status code, protocol, and method.",
|
|
|
|
}, []string{"code", "method", "protocol", "router", "service"})
|
|
|
|
routerRespsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
|
|
|
Name: routerRespsBytesTotalName,
|
|
|
|
Help: "The total size of responses in bytes handled by a router, partitioned by service, status code, protocol, and method.",
|
|
|
|
}, []string{"code", "method", "protocol", "router", "service"})
|
2021-04-30 08:22:04 +00:00
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
promState.vectors = append(promState.vectors,
|
|
|
|
routerReqs.cv,
|
|
|
|
routerReqsTLS.cv,
|
|
|
|
routerReqDurations.hv,
|
|
|
|
routerOpenConns.gv,
|
2022-09-12 15:10:09 +00:00
|
|
|
routerReqsBytesTotal.cv,
|
|
|
|
routerRespsBytesTotal.cv,
|
2022-07-07 16:00:09 +00:00
|
|
|
)
|
2021-04-30 08:22:04 +00:00
|
|
|
reg.routerReqsCounter = routerReqs
|
|
|
|
reg.routerReqsTLSCounter = routerReqsTLS
|
|
|
|
reg.routerReqDurationHistogram, _ = NewHistogramWithScale(routerReqDurations, time.Second)
|
|
|
|
reg.routerOpenConnsGauge = routerOpenConns
|
2022-09-12 15:10:09 +00:00
|
|
|
reg.routerReqsBytesCounter = routerReqsBytesTotal
|
|
|
|
reg.routerRespsBytesCounter = routerRespsBytesTotal
|
2021-04-30 08:22:04 +00:00
|
|
|
}
|
|
|
|
|
2019-07-18 19:36:05 +00:00
|
|
|
if config.AddServicesLabels {
|
2022-07-07 16:00:09 +00:00
|
|
|
serviceReqs := newCounterFrom(stdprometheus.CounterOpts{
|
2019-07-18 19:36:05 +00:00
|
|
|
Name: serviceReqsTotalName,
|
|
|
|
Help: "How many HTTP requests processed on a service, partitioned by status code, protocol, and method.",
|
|
|
|
}, []string{"code", "method", "protocol", "service"})
|
2022-07-07 16:00:09 +00:00
|
|
|
serviceReqsTLS := newCounterFrom(stdprometheus.CounterOpts{
|
2020-03-05 12:30:05 +00:00
|
|
|
Name: serviceReqsTLSTotalName,
|
|
|
|
Help: "How many HTTP requests with TLS processed on a service, partitioned by TLS version and TLS cipher.",
|
|
|
|
}, []string{"tls_version", "tls_cipher", "service"})
|
2022-07-07 16:00:09 +00:00
|
|
|
serviceReqDurations := newHistogramFrom(stdprometheus.HistogramOpts{
|
2019-07-18 19:36:05 +00:00
|
|
|
Name: serviceReqDurationName,
|
|
|
|
Help: "How long it took to process the request on a service, partitioned by status code, protocol, and method.",
|
|
|
|
Buckets: buckets,
|
|
|
|
}, []string{"code", "method", "protocol", "service"})
|
2022-07-07 16:00:09 +00:00
|
|
|
serviceOpenConns := newGaugeFrom(stdprometheus.GaugeOpts{
|
2019-07-18 19:36:05 +00:00
|
|
|
Name: serviceOpenConnsName,
|
|
|
|
Help: "How many open connections exist on a service, partitioned by method and protocol.",
|
|
|
|
}, []string{"method", "protocol", "service"})
|
2022-07-07 16:00:09 +00:00
|
|
|
serviceRetries := newCounterFrom(stdprometheus.CounterOpts{
|
2019-07-18 19:36:05 +00:00
|
|
|
Name: serviceRetriesTotalName,
|
|
|
|
Help: "How many request retries happened on a service.",
|
|
|
|
}, []string{"service"})
|
2022-07-07 16:00:09 +00:00
|
|
|
serviceServerUp := newGaugeFrom(stdprometheus.GaugeOpts{
|
2019-07-18 19:36:05 +00:00
|
|
|
Name: serviceServerUpName,
|
|
|
|
Help: "service server is up, described by gauge value of 0 or 1.",
|
|
|
|
}, []string{"service", "url"})
|
2022-09-12 15:10:09 +00:00
|
|
|
serviceReqsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
|
|
|
Name: serviceReqsBytesTotalName,
|
|
|
|
Help: "The total size of requests in bytes received by a service, partitioned by status code, protocol, and method.",
|
|
|
|
}, []string{"code", "method", "protocol", "service"})
|
|
|
|
serviceRespsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{
|
|
|
|
Name: serviceRespsBytesTotalName,
|
|
|
|
Help: "The total size of responses in bytes returned by a service, partitioned by status code, protocol, and method.",
|
|
|
|
}, []string{"code", "method", "protocol", "service"})
|
2019-07-18 19:36:05 +00:00
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
promState.vectors = append(promState.vectors,
|
|
|
|
serviceReqs.cv,
|
|
|
|
serviceReqsTLS.cv,
|
|
|
|
serviceReqDurations.hv,
|
|
|
|
serviceOpenConns.gv,
|
|
|
|
serviceRetries.cv,
|
|
|
|
serviceServerUp.gv,
|
2022-09-12 15:10:09 +00:00
|
|
|
serviceReqsBytesTotal.cv,
|
|
|
|
serviceRespsBytesTotal.cv,
|
2022-07-07 16:00:09 +00:00
|
|
|
)
|
2019-07-18 19:36:05 +00:00
|
|
|
|
|
|
|
reg.serviceReqsCounter = serviceReqs
|
2020-03-05 12:30:05 +00:00
|
|
|
reg.serviceReqsTLSCounter = serviceReqsTLS
|
2020-03-05 14:10:07 +00:00
|
|
|
reg.serviceReqDurationHistogram, _ = NewHistogramWithScale(serviceReqDurations, time.Second)
|
2019-07-18 19:36:05 +00:00
|
|
|
reg.serviceOpenConnsGauge = serviceOpenConns
|
|
|
|
reg.serviceRetriesCounter = serviceRetries
|
|
|
|
reg.serviceServerUpGauge = serviceServerUp
|
2022-09-12 15:10:09 +00:00
|
|
|
reg.serviceReqsBytesCounter = serviceReqsBytesTotal
|
|
|
|
reg.serviceRespsBytesCounter = serviceRespsBytesTotal
|
2019-07-18 19:36:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return reg
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
2018-11-14 09:18:03 +00:00
|
|
|
func registerPromState(ctx context.Context) bool {
|
2020-11-06 08:26:03 +00:00
|
|
|
err := promRegistry.Register(promState)
|
|
|
|
if err == nil {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2022-11-21 17:36:05 +00:00
|
|
|
logger := log.Ctx(ctx)
|
2020-11-06 08:26:03 +00:00
|
|
|
|
|
|
|
var arErr stdprometheus.AlreadyRegisteredError
|
|
|
|
if errors.As(err, &arErr) {
|
2022-11-21 17:36:05 +00:00
|
|
|
logger.Debug().Msg("Prometheus collector already registered.")
|
2020-11-06 08:26:03 +00:00
|
|
|
return true
|
2018-08-06 12:58:03 +00:00
|
|
|
}
|
2020-11-06 08:26:03 +00:00
|
|
|
|
2022-11-21 17:36:05 +00:00
|
|
|
logger.Error().Err(err).Msg("Unable to register Traefik to Prometheus")
|
2020-11-06 08:26:03 +00:00
|
|
|
return false
|
2018-08-06 12:58:03 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 10:32:03 +00:00
|
|
|
// OnConfigurationUpdate receives the current configuration from Traefik.
|
|
|
|
// It then converts the configuration to the optimized package internal format
|
|
|
|
// and sets it to the promState.
|
2019-11-14 15:40:05 +00:00
|
|
|
func OnConfigurationUpdate(conf dynamic.Configuration, entryPoints []string) {
|
2022-07-07 16:00:09 +00:00
|
|
|
dynCfg := newDynamicConfig()
|
2018-06-05 10:32:03 +00:00
|
|
|
|
2019-07-18 19:36:05 +00:00
|
|
|
for _, value := range entryPoints {
|
2022-07-07 16:00:09 +00:00
|
|
|
dynCfg.entryPoints[value] = true
|
|
|
|
}
|
|
|
|
|
|
|
|
if conf.HTTP == nil {
|
|
|
|
promState.SetDynamicConfig(dynCfg)
|
|
|
|
return
|
2019-07-18 19:36:05 +00:00
|
|
|
}
|
|
|
|
|
2019-11-14 15:40:05 +00:00
|
|
|
for name := range conf.HTTP.Routers {
|
2022-07-07 16:00:09 +00:00
|
|
|
dynCfg.routers[name] = true
|
2019-11-14 15:40:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for serviceName, service := range conf.HTTP.Services {
|
2022-07-07 16:00:09 +00:00
|
|
|
dynCfg.services[serviceName] = make(map[string]bool)
|
2019-11-14 15:40:05 +00:00
|
|
|
if service.LoadBalancer != nil {
|
2019-07-18 19:36:05 +00:00
|
|
|
for _, server := range service.LoadBalancer.Servers {
|
2022-07-07 16:00:09 +00:00
|
|
|
dynCfg.services[serviceName][server.URL] = true
|
2019-07-18 19:36:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-06-05 10:32:03 +00:00
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
promState.SetDynamicConfig(dynCfg)
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func newPrometheusState() *prometheusState {
|
|
|
|
return &prometheusState{
|
2018-06-05 10:32:03 +00:00
|
|
|
dynamicConfig: newDynamicConfig(),
|
2022-07-18 08:36:11 +00:00
|
|
|
deletedURLs: make(map[string][]string),
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
type vector interface {
|
|
|
|
stdprometheus.Collector
|
|
|
|
DeletePartialMatch(labels stdprometheus.Labels) int
|
|
|
|
}
|
|
|
|
|
2018-01-26 10:58:03 +00:00
|
|
|
type prometheusState struct {
|
2022-07-07 16:00:09 +00:00
|
|
|
vectors []vector
|
2018-06-05 10:32:03 +00:00
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
mtx sync.Mutex
|
|
|
|
dynamicConfig *dynamicConfig
|
|
|
|
deletedEP []string
|
|
|
|
deletedRouters []string
|
|
|
|
deletedServices []string
|
2022-07-18 08:36:11 +00:00
|
|
|
deletedURLs map[string][]string
|
2018-06-05 10:32:03 +00:00
|
|
|
}
|
2018-01-26 10:58:03 +00:00
|
|
|
|
2018-06-05 10:32:03 +00:00
|
|
|
func (ps *prometheusState) SetDynamicConfig(dynamicConfig *dynamicConfig) {
|
2018-01-26 10:58:03 +00:00
|
|
|
ps.mtx.Lock()
|
|
|
|
defer ps.mtx.Unlock()
|
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
for ep := range ps.dynamicConfig.entryPoints {
|
|
|
|
if _, ok := dynamicConfig.entryPoints[ep]; !ok {
|
|
|
|
ps.deletedEP = append(ps.deletedEP, ep)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for router := range ps.dynamicConfig.routers {
|
|
|
|
if _, ok := dynamicConfig.routers[router]; !ok {
|
|
|
|
ps.deletedRouters = append(ps.deletedRouters, router)
|
|
|
|
}
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
2022-07-07 16:00:09 +00:00
|
|
|
|
|
|
|
for service, serV := range ps.dynamicConfig.services {
|
|
|
|
actualService, ok := dynamicConfig.services[service]
|
|
|
|
if !ok {
|
|
|
|
ps.deletedServices = append(ps.deletedServices, service)
|
|
|
|
}
|
|
|
|
for url := range serV {
|
|
|
|
if _, ok := actualService[url]; !ok {
|
2022-07-18 08:36:11 +00:00
|
|
|
ps.deletedURLs[service] = append(ps.deletedURLs[service], url)
|
2022-07-07 16:00:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ps.dynamicConfig = dynamicConfig
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Describe implements prometheus.Collector and simply calls
|
|
|
|
// the registered describer functions.
|
|
|
|
func (ps *prometheusState) Describe(ch chan<- *stdprometheus.Desc) {
|
2022-07-07 16:00:09 +00:00
|
|
|
for _, v := range ps.vectors {
|
|
|
|
v.Describe(ch)
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collect implements prometheus.Collector. It calls the Collect
|
|
|
|
// method of all metrics it received on the collectors channel.
|
2018-06-05 10:32:03 +00:00
|
|
|
// It's also responsible to remove metrics that belong to an outdated configuration.
|
|
|
|
// The removal happens only after their Collect method was called to ensure that
|
|
|
|
// also those metrics will be exported on the current scrape.
|
2018-01-26 10:58:03 +00:00
|
|
|
func (ps *prometheusState) Collect(ch chan<- stdprometheus.Metric) {
|
2022-07-07 16:00:09 +00:00
|
|
|
for _, v := range ps.vectors {
|
|
|
|
v.Collect(ch)
|
|
|
|
}
|
|
|
|
|
2018-01-26 10:58:03 +00:00
|
|
|
ps.mtx.Lock()
|
|
|
|
defer ps.mtx.Unlock()
|
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
for _, ep := range ps.deletedEP {
|
|
|
|
if !ps.dynamicConfig.hasEntryPoint(ep) {
|
|
|
|
ps.DeletePartialMatch(map[string]string{"entrypoint": ep})
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
for _, router := range ps.deletedRouters {
|
|
|
|
if !ps.dynamicConfig.hasRouter(router) {
|
|
|
|
ps.DeletePartialMatch(map[string]string{"router": router})
|
|
|
|
}
|
2018-06-05 10:32:03 +00:00
|
|
|
}
|
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
for _, service := range ps.deletedServices {
|
|
|
|
if !ps.dynamicConfig.hasService(service) {
|
|
|
|
ps.DeletePartialMatch(map[string]string{"service": service})
|
2021-09-15 15:26:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-18 08:36:11 +00:00
|
|
|
for service, urls := range ps.deletedURLs {
|
|
|
|
for _, url := range urls {
|
|
|
|
if !ps.dynamicConfig.hasServerURL(service, url) {
|
|
|
|
ps.DeletePartialMatch(map[string]string{"service": service, "url": url})
|
|
|
|
}
|
2018-06-05 10:32:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
ps.deletedEP = nil
|
|
|
|
ps.deletedRouters = nil
|
|
|
|
ps.deletedServices = nil
|
2022-07-18 08:36:11 +00:00
|
|
|
ps.deletedURLs = make(map[string][]string)
|
2022-07-07 16:00:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// DeletePartialMatch deletes all metrics where the variable labels contain all of those passed in as labels.
|
|
|
|
// The order of the labels does not matter.
|
|
|
|
// It returns the number of metrics deleted.
|
|
|
|
func (ps *prometheusState) DeletePartialMatch(labels stdprometheus.Labels) int {
|
|
|
|
var count int
|
|
|
|
for _, elem := range ps.vectors {
|
|
|
|
count += elem.DeletePartialMatch(labels)
|
|
|
|
}
|
|
|
|
return count
|
2018-06-05 10:32:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func newDynamicConfig() *dynamicConfig {
|
|
|
|
return &dynamicConfig{
|
2019-07-18 19:36:05 +00:00
|
|
|
entryPoints: make(map[string]bool),
|
|
|
|
routers: make(map[string]bool),
|
|
|
|
services: make(map[string]map[string]bool),
|
2018-06-05 10:32:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-18 19:36:05 +00:00
|
|
|
// dynamicConfig holds the current configuration for entryPoints, services,
|
2018-06-05 10:32:03 +00:00
|
|
|
// and server URLs in an optimized way to check for existence. This provides
|
|
|
|
// a performant way to check whether the collected metrics belong to the
|
|
|
|
// current configuration or to an outdated one.
|
|
|
|
type dynamicConfig struct {
|
2019-07-18 19:36:05 +00:00
|
|
|
entryPoints map[string]bool
|
|
|
|
routers map[string]bool
|
|
|
|
services map[string]map[string]bool
|
2018-06-05 10:32:03 +00:00
|
|
|
}
|
|
|
|
|
2019-07-18 19:36:05 +00:00
|
|
|
func (d *dynamicConfig) hasEntryPoint(entrypointName string) bool {
|
|
|
|
_, ok := d.entryPoints[entrypointName]
|
2018-06-05 10:32:03 +00:00
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
2019-07-18 19:36:05 +00:00
|
|
|
func (d *dynamicConfig) hasService(serviceName string) bool {
|
|
|
|
_, ok := d.services[serviceName]
|
2018-06-05 10:32:03 +00:00
|
|
|
return ok
|
|
|
|
}
|
2018-01-26 10:58:03 +00:00
|
|
|
|
2021-09-15 15:26:06 +00:00
|
|
|
func (d *dynamicConfig) hasRouter(routerName string) bool {
|
|
|
|
_, ok := d.routers[routerName]
|
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
2019-07-18 19:36:05 +00:00
|
|
|
func (d *dynamicConfig) hasServerURL(serviceName, serverURL string) bool {
|
|
|
|
if service, hasService := d.services[serviceName]; hasService {
|
|
|
|
_, ok := service[serverURL]
|
2018-06-05 10:32:03 +00:00
|
|
|
return ok
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
2018-06-05 10:32:03 +00:00
|
|
|
return false
|
|
|
|
}
|
2018-01-26 10:58:03 +00:00
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
func newCounterFrom(opts stdprometheus.CounterOpts, labelNames []string) *counter {
|
2018-01-26 10:58:03 +00:00
|
|
|
cv := stdprometheus.NewCounterVec(opts, labelNames)
|
|
|
|
c := &counter{
|
2022-08-11 08:58:09 +00:00
|
|
|
name: opts.Name,
|
|
|
|
cv: cv,
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
if len(labelNames) == 0 {
|
2022-07-07 16:00:09 +00:00
|
|
|
c.collector = cv.WithLabelValues()
|
2018-01-26 10:58:03 +00:00
|
|
|
c.Add(0)
|
|
|
|
}
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
|
|
|
type counter struct {
|
|
|
|
name string
|
|
|
|
cv *stdprometheus.CounterVec
|
|
|
|
labelNamesValues labelNamesValues
|
2022-07-07 16:00:09 +00:00
|
|
|
collector stdprometheus.Counter
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *counter) With(labelValues ...string) metrics.Counter {
|
2022-07-07 16:00:09 +00:00
|
|
|
lnv := c.labelNamesValues.With(labelValues...)
|
2018-01-26 10:58:03 +00:00
|
|
|
return &counter{
|
|
|
|
name: c.name,
|
|
|
|
cv: c.cv,
|
2022-07-07 16:00:09 +00:00
|
|
|
labelNamesValues: lnv,
|
|
|
|
collector: c.cv.With(lnv.ToLabels()),
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *counter) Add(delta float64) {
|
2022-07-07 16:00:09 +00:00
|
|
|
c.collector.Add(delta)
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *counter) Describe(ch chan<- *stdprometheus.Desc) {
|
|
|
|
c.cv.Describe(ch)
|
|
|
|
}
|
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
func newGaugeFrom(opts stdprometheus.GaugeOpts, labelNames []string) *gauge {
|
2018-01-26 10:58:03 +00:00
|
|
|
gv := stdprometheus.NewGaugeVec(opts, labelNames)
|
|
|
|
g := &gauge{
|
2022-08-11 08:58:09 +00:00
|
|
|
name: opts.Name,
|
|
|
|
gv: gv,
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
2022-07-07 16:00:09 +00:00
|
|
|
|
2018-01-26 10:58:03 +00:00
|
|
|
if len(labelNames) == 0 {
|
2022-07-07 16:00:09 +00:00
|
|
|
g.collector = gv.WithLabelValues()
|
2018-01-26 10:58:03 +00:00
|
|
|
g.Set(0)
|
|
|
|
}
|
|
|
|
return g
|
|
|
|
}
|
|
|
|
|
|
|
|
type gauge struct {
|
|
|
|
name string
|
|
|
|
gv *stdprometheus.GaugeVec
|
|
|
|
labelNamesValues labelNamesValues
|
2022-07-07 16:00:09 +00:00
|
|
|
collector stdprometheus.Gauge
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (g *gauge) With(labelValues ...string) metrics.Gauge {
|
2022-07-07 16:00:09 +00:00
|
|
|
lnv := g.labelNamesValues.With(labelValues...)
|
2018-01-26 10:58:03 +00:00
|
|
|
return &gauge{
|
|
|
|
name: g.name,
|
|
|
|
gv: g.gv,
|
2022-07-07 16:00:09 +00:00
|
|
|
labelNamesValues: lnv,
|
|
|
|
collector: g.gv.With(lnv.ToLabels()),
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-16 08:28:04 +00:00
|
|
|
func (g *gauge) Add(delta float64) {
|
2022-07-07 16:00:09 +00:00
|
|
|
g.collector.Add(delta)
|
2018-04-16 08:28:04 +00:00
|
|
|
}
|
|
|
|
|
2018-01-26 10:58:03 +00:00
|
|
|
func (g *gauge) Set(value float64) {
|
2022-07-07 16:00:09 +00:00
|
|
|
g.collector.Set(value)
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (g *gauge) Describe(ch chan<- *stdprometheus.Desc) {
|
|
|
|
g.gv.Describe(ch)
|
|
|
|
}
|
|
|
|
|
2022-07-07 16:00:09 +00:00
|
|
|
func newHistogramFrom(opts stdprometheus.HistogramOpts, labelNames []string) *histogram {
|
2018-01-26 10:58:03 +00:00
|
|
|
hv := stdprometheus.NewHistogramVec(opts, labelNames)
|
|
|
|
return &histogram{
|
2022-08-11 08:58:09 +00:00
|
|
|
name: opts.Name,
|
|
|
|
hv: hv,
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type histogram struct {
|
|
|
|
name string
|
|
|
|
hv *stdprometheus.HistogramVec
|
|
|
|
labelNamesValues labelNamesValues
|
2022-07-07 16:00:09 +00:00
|
|
|
collector stdprometheus.Observer
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (h *histogram) With(labelValues ...string) metrics.Histogram {
|
2022-07-07 16:00:09 +00:00
|
|
|
lnv := h.labelNamesValues.With(labelValues...)
|
2018-01-26 10:58:03 +00:00
|
|
|
return &histogram{
|
|
|
|
name: h.name,
|
|
|
|
hv: h.hv,
|
2022-07-07 16:00:09 +00:00
|
|
|
labelNamesValues: lnv,
|
|
|
|
collector: h.hv.With(lnv.ToLabels()),
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *histogram) Observe(value float64) {
|
2022-07-07 16:00:09 +00:00
|
|
|
h.collector.Observe(value)
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (h *histogram) Describe(ch chan<- *stdprometheus.Desc) {
|
|
|
|
h.hv.Describe(ch)
|
|
|
|
}
|
|
|
|
|
|
|
|
// labelNamesValues is a type alias that provides validation on its With method.
|
|
|
|
// Metrics may include it as a member to help them satisfy With semantics and
|
|
|
|
// save some code duplication.
|
|
|
|
type labelNamesValues []string
|
|
|
|
|
|
|
|
// With validates the input, and returns a new aggregate labelNamesValues.
|
|
|
|
func (lvs labelNamesValues) With(labelValues ...string) labelNamesValues {
|
|
|
|
if len(labelValues)%2 != 0 {
|
|
|
|
labelValues = append(labelValues, "unknown")
|
|
|
|
}
|
2022-08-11 08:58:09 +00:00
|
|
|
|
|
|
|
labels := make([]string, len(lvs)+len(labelValues))
|
|
|
|
n := copy(labels, lvs)
|
|
|
|
copy(labels[n:], labelValues)
|
|
|
|
|
|
|
|
return labels
|
2018-01-26 10:58:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// ToLabels is a convenience method to convert a labelNamesValues
|
|
|
|
// to the native prometheus.Labels.
|
|
|
|
func (lvs labelNamesValues) ToLabels() stdprometheus.Labels {
|
2022-07-07 16:00:09 +00:00
|
|
|
labels := make(map[string]string, len(lvs)/2)
|
2018-01-26 10:58:03 +00:00
|
|
|
for i := 0; i < len(lvs); i += 2 {
|
|
|
|
labels[lvs[i]] = lvs[i+1]
|
2017-08-23 18:46:03 +00:00
|
|
|
}
|
2018-01-26 10:58:03 +00:00
|
|
|
return labels
|
2017-08-23 18:46:03 +00:00
|
|
|
}
|