From 7c2af10bbd7791241cea2dce52abf5899e75f3c4 Mon Sep 17 00:00:00 2001 From: mpl Date: Mon, 20 Mar 2023 16:02:06 +0100 Subject: [PATCH] Fix open connections metric Co-authored-by: Romain --- cmd/traefik/traefik.go | 12 ++-- .../content/observability/metrics/overview.md | 50 +++++++++-------- pkg/metrics/datadog.go | 13 ++--- pkg/metrics/datadog_test.go | 8 +-- pkg/metrics/influxdb2.go | 8 +-- pkg/metrics/influxdb2_test.go | 19 +------ pkg/metrics/metrics.go | 50 +++++------------ pkg/metrics/opentelemetry.go | 12 +--- pkg/metrics/opentelemetry_test.go | 19 +------ pkg/metrics/prometheus.go | 34 ++++------- pkg/metrics/prometheus_test.go | 56 +++++-------------- pkg/metrics/statsd.go | 8 +-- pkg/metrics/statsd_test.go | 8 +-- pkg/middlewares/metrics/metrics.go | 8 --- pkg/server/server_entrypoint_tcp.go | 49 ++++++++++------ .../server_entrypoint_tcp_http3_test.go | 2 +- pkg/server/server_entrypoint_tcp_test.go | 6 +- 17 files changed, 126 insertions(+), 236 deletions(-) diff --git a/cmd/traefik/traefik.go b/cmd/traefik/traefik.go index 95d6117ad..661ae8478 100644 --- a/cmd/traefik/traefik.go +++ b/cmd/traefik/traefik.go @@ -193,9 +193,14 @@ func setupServer(staticConfiguration *static.Configuration) (*server.Server, err tsProviders := initTailscaleProviders(staticConfiguration, &providerAggregator) + // Metrics + + metricRegistries := registerMetricClients(staticConfiguration.Metrics) + metricsRegistry := metrics.NewMultiRegistry(metricRegistries) + // Entrypoints - serverEntryPointsTCP, err := server.NewTCPEntryPoints(staticConfiguration.EntryPoints, staticConfiguration.HostResolver) + serverEntryPointsTCP, err := server.NewTCPEntryPoints(staticConfiguration.EntryPoints, staticConfiguration.HostResolver, metricsRegistry) if err != nil { return nil, err } @@ -243,11 +248,6 @@ func setupServer(staticConfiguration *static.Configuration) (*server.Server, err } } - // Metrics - - metricRegistries := registerMetricClients(staticConfiguration.Metrics) - metricsRegistry := metrics.NewMultiRegistry(metricRegistries) - // Service manager factory var spiffeX509Source *workloadapi.X509Source diff --git a/docs/content/observability/metrics/overview.md b/docs/content/observability/metrics/overview.md index 60d4e7d78..795d1a377 100644 --- a/docs/content/observability/metrics/overview.md +++ b/docs/content/observability/metrics/overview.md @@ -16,27 +16,31 @@ Traefik Proxy hosts an official Grafana dashboard for both [on-premises](https:/ ## Global Metrics -| Metric | Type | Description | -|---------------------------------------------|---------|---------------------------------------------------------| -| Config reload total | Count | The total count of configuration reloads. | -| Config reload last success | Gauge | The timestamp of the last configuration reload success. | -| TLS certificates not after | Gauge | The expiration date of certificates. | +| Metric | Type | [Labels](#labels) | Description | +|----------------------------|-------|--------------------------|--------------------------------------------------------------------| +| Config reload total | Count | | The total count of configuration reloads. | +| Config reload last success | Gauge | | The timestamp of the last configuration reload success. | +| Open connections | Gauge | `entrypoint`, `protocol` | The current count of open connections, by entrypoint and protocol. | +| TLS certificates not after | Gauge | | The expiration date of certificates. | ```prom tab="Prometheus" traefik_config_reloads_total traefik_config_last_reload_success +traefik_open_connections traefik_tls_certs_not_after ``` ```dd tab="Datadog" config.reload.total config.reload.lastSuccessTimestamp +open.connections tls.certs.notAfterTimestamp ``` ```influxdb tab="InfluxDB2" traefik.config.reload.total traefik.config.reload.lastSuccessTimestamp +traefik.open.connections traefik.tls.certs.notAfterTimestamp ``` @@ -44,23 +48,35 @@ traefik.tls.certs.notAfterTimestamp # Default prefix: "traefik" {prefix}.config.reload.total {prefix}.config.reload.lastSuccessTimestamp +{prefix}.open.connections {prefix}.tls.certs.notAfterTimestamp ``` ```opentelemetry tab="OpenTelemetry" traefik_config_reloads_total traefik_config_last_reload_success +traefik_open_connections traefik_tls_certs_not_after ``` -## EntryPoint Metrics +### Labels + +Here is a comprehensive list of labels that are provided by the global metrics: + +| Label | Description | example | +|---------------|----------------------------------------|----------------------| +| `entrypoint` | Entrypoint that handled the connection | "example_entrypoint" | +| `protocol` | Connection protocol | "TCP" | + +## HTTP Metrics + +### EntryPoint Metrics | Metric | Type | [Labels](#labels) | Description | |-----------------------|-----------|--------------------------------------------|---------------------------------------------------------------------| | Requests total | Count | `code`, `method`, `protocol`, `entrypoint` | The total count of HTTP requests received by an entrypoint. | | Requests TLS total | Count | `tls_version`, `tls_cipher`, `entrypoint` | The total count of HTTPS requests received by an entrypoint. | | Request duration | Histogram | `code`, `method`, `protocol`, `entrypoint` | Request processing duration histogram on an entrypoint. | -| Open connections | Count | `method`, `protocol`, `entrypoint` | The current count of open connections on an entrypoint. | | Requests bytes total | Count | `code`, `method`, `protocol`, `entrypoint` | The total size of HTTP requests in bytes handled by an entrypoint. | | Responses bytes total | Count | `code`, `method`, `protocol`, `entrypoint` | The total size of HTTP responses in bytes handled by an entrypoint. | @@ -68,7 +84,6 @@ traefik_tls_certs_not_after traefik_entrypoint_requests_total traefik_entrypoint_requests_tls_total traefik_entrypoint_request_duration_seconds -traefik_entrypoint_open_connections traefik_entrypoint_requests_bytes_total traefik_entrypoint_responses_bytes_total ``` @@ -77,7 +92,6 @@ traefik_entrypoint_responses_bytes_total entrypoint.request.total entrypoint.request.tls.total entrypoint.request.duration -entrypoint.connections.open entrypoint.requests.bytes.total entrypoint.responses.bytes.total ``` @@ -86,7 +100,6 @@ entrypoint.responses.bytes.total traefik.entrypoint.requests.total traefik.entrypoint.requests.tls.total traefik.entrypoint.request.duration -traefik.entrypoint.connections.open traefik.entrypoint.requests.bytes.total traefik.entrypoint.responses.bytes.total ``` @@ -96,7 +109,6 @@ traefik.entrypoint.responses.bytes.total {prefix}.entrypoint.request.total {prefix}.entrypoint.request.tls.total {prefix}.entrypoint.request.duration -{prefix}.entrypoint.connections.open {prefix}.entrypoint.requests.bytes.total {prefix}.entrypoint.responses.bytes.total ``` @@ -110,14 +122,13 @@ traefik_entrypoint_requests_bytes_total traefik_entrypoint_responses_bytes_total ``` -## Router Metrics +### Router Metrics | Metric | Type | [Labels](#labels) | Description | |-----------------------|-----------|---------------------------------------------------|----------------------------------------------------------------| | Requests total | Count | `code`, `method`, `protocol`, `router`, `service` | The total count of HTTP requests handled by a router. | | Requests TLS total | Count | `tls_version`, `tls_cipher`, `router`, `service` | The total count of HTTPS requests handled by a router. | | Request duration | Histogram | `code`, `method`, `protocol`, `router`, `service` | Request processing duration histogram on a router. | -| Open connections | Count | `method`, `protocol`, `router`, `service` | The current count of open connections on a router. | | Requests bytes total | Count | `code`, `method`, `protocol`, `router`, `service` | The total size of HTTP requests in bytes handled by a router. | | Responses bytes total | Count | `code`, `method`, `protocol`, `router`, `service` | The total size of HTTP responses in bytes handled by a router. | @@ -125,7 +136,6 @@ traefik_entrypoint_responses_bytes_total traefik_router_requests_total traefik_router_requests_tls_total traefik_router_request_duration_seconds -traefik_router_open_connections traefik_router_requests_bytes_total traefik_router_responses_bytes_total ``` @@ -134,7 +144,6 @@ traefik_router_responses_bytes_total router.request.total router.request.tls.total router.request.duration -router.connections.open router.requests.bytes.total router.responses.bytes.total ``` @@ -143,7 +152,6 @@ router.responses.bytes.total traefik.router.requests.total traefik.router.requests.tls.total traefik.router.request.duration -traefik.router.connections.open traefik.router.requests.bytes.total traefik.router.responses.bytes.total ``` @@ -153,7 +161,6 @@ traefik.router.responses.bytes.total {prefix}.router.request.total {prefix}.router.request.tls.total {prefix}.router.request.duration -{prefix}.router.connections.open {prefix}.router.requests.bytes.total {prefix}.router.responses.bytes.total ``` @@ -167,14 +174,13 @@ traefik_router_requests_bytes_total traefik_router_responses_bytes_total ``` -## Service Metrics +### Service Metrics | Metric | Type | Labels | Description | |-----------------------|-----------|-----------------------------------------|-------------------------------------------------------------| | Requests total | Count | `code`, `method`, `protocol`, `service` | The total count of HTTP requests processed on a service. | | Requests TLS total | Count | `tls_version`, `tls_cipher`, `service` | The total count of HTTPS requests processed on a service. | | Request duration | Histogram | `code`, `method`, `protocol`, `service` | Request processing duration histogram on a service. | -| Open connections | Count | `method`, `protocol`, `service` | The current count of open connections on a service. | | Retries total | Count | `service` | The count of requests retries on a service. | | Server UP | Gauge | `service`, `url` | Current service's server status, 0 for a down or 1 for up. | | Requests bytes total | Count | `code`, `method`, `protocol`, `service` | The total size of requests in bytes received by a service. | @@ -184,7 +190,6 @@ traefik_router_responses_bytes_total traefik_service_requests_total traefik_service_requests_tls_total traefik_service_request_duration_seconds -traefik_service_open_connections traefik_service_retries_total traefik_service_server_up traefik_service_requests_bytes_total @@ -195,7 +200,6 @@ traefik_service_responses_bytes_total service.request.total router.service.tls.total service.request.duration -service.connections.open service.retries.total service.server.up service.requests.bytes.total @@ -206,7 +210,6 @@ service.responses.bytes.total traefik.service.requests.total traefik.service.requests.tls.total traefik.service.request.duration -traefik.service.connections.open traefik.service.retries.total traefik.service.server.up traefik.service.requests.bytes.total @@ -218,7 +221,6 @@ traefik.service.responses.bytes.total {prefix}.service.request.total {prefix}.service.request.tls.total {prefix}.service.request.duration -{prefix}.service.connections.open {prefix}.service.retries.total {prefix}.service.server.up {prefix}.service.requests.bytes.total @@ -236,7 +238,7 @@ traefik_service_requests_bytes_total traefik_service_responses_bytes_total ``` -## Labels +### Labels Here is a comprehensive list of labels that are provided by the metrics: diff --git a/pkg/metrics/datadog.go b/pkg/metrics/datadog.go index 37442e7f2..fe38737e8 100644 --- a/pkg/metrics/datadog.go +++ b/pkg/metrics/datadog.go @@ -18,21 +18,21 @@ var ( // Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64 const ( - ddConfigReloadsName = "config.reload.total" - ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp" + ddConfigReloadsName = "config.reload.total" + ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp" + ddOpenConnsName = "open.connections" + ddTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp" ddEntryPointReqsName = "entrypoint.request.total" ddEntryPointReqsTLSName = "entrypoint.request.tls.total" ddEntryPointReqDurationName = "entrypoint.request.duration" - ddEntryPointOpenConnsName = "entrypoint.connections.open" ddEntryPointReqsBytesName = "entrypoint.requests.bytes.total" ddEntryPointRespsBytesName = "entrypoint.responses.bytes.total" ddRouterReqsName = "router.request.total" ddRouterReqsTLSName = "router.request.tls.total" ddRouterReqsDurationName = "router.request.duration" - ddRouterOpenConnsName = "router.connections.open" ddRouterReqsBytesName = "router.requests.bytes.total" ddRouterRespsBytesName = "router.responses.bytes.total" @@ -40,7 +40,6 @@ const ( ddServiceReqsTLSName = "service.request.tls.total" ddServiceReqsDurationName = "service.request.duration" ddServiceRetriesName = "service.retries.total" - ddServiceOpenConnsName = "service.connections.open" ddServiceServerUpName = "service.server.up" ddServiceReqsBytesName = "service.requests.bytes.total" ddServiceRespsBytesName = "service.responses.bytes.total" @@ -63,6 +62,7 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry { registry := &standardRegistry{ configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0), lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName), + openConnectionsGauge: datadogClient.NewGauge(ddOpenConnsName), tlsCertsNotAfterTimestampGauge: datadogClient.NewGauge(ddTLSCertsNotAfterTimestampName), } @@ -71,7 +71,6 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry { registry.entryPointReqsCounter = datadogClient.NewCounter(ddEntryPointReqsName, 1.0) registry.entryPointReqsTLSCounter = datadogClient.NewCounter(ddEntryPointReqsTLSName, 1.0) registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0), time.Second) - registry.entryPointOpenConnsGauge = datadogClient.NewGauge(ddEntryPointOpenConnsName) registry.entryPointReqsBytesCounter = datadogClient.NewCounter(ddEntryPointReqsBytesName, 1.0) registry.entryPointRespsBytesCounter = datadogClient.NewCounter(ddEntryPointRespsBytesName, 1.0) } @@ -81,7 +80,6 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry { registry.routerReqsCounter = datadogClient.NewCounter(ddRouterReqsName, 1.0) registry.routerReqsTLSCounter = datadogClient.NewCounter(ddRouterReqsTLSName, 1.0) registry.routerReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddRouterReqsDurationName, 1.0), time.Second) - registry.routerOpenConnsGauge = datadogClient.NewGauge(ddRouterOpenConnsName) registry.routerReqsBytesCounter = datadogClient.NewCounter(ddRouterReqsBytesName, 1.0) registry.routerRespsBytesCounter = datadogClient.NewCounter(ddRouterRespsBytesName, 1.0) } @@ -92,7 +90,6 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry { registry.serviceReqsTLSCounter = datadogClient.NewCounter(ddServiceReqsTLSName, 1.0) registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddServiceReqsDurationName, 1.0), time.Second) registry.serviceRetriesCounter = datadogClient.NewCounter(ddServiceRetriesName, 1.0) - registry.serviceOpenConnsGauge = datadogClient.NewGauge(ddServiceOpenConnsName) registry.serviceServerUpGauge = datadogClient.NewGauge(ddServiceServerUpName) registry.serviceReqsBytesCounter = datadogClient.NewCounter(ddServiceReqsBytesName, 1.0) registry.serviceRespsBytesCounter = datadogClient.NewCounter(ddServiceRespsBytesName, 1.0) diff --git a/pkg/metrics/datadog_test.go b/pkg/metrics/datadog_test.go index 5a02b4b94..aaf4c0499 100644 --- a/pkg/metrics/datadog_test.go +++ b/pkg/metrics/datadog_test.go @@ -46,13 +46,13 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg expected := []string{ metricsPrefix + ".config.reload.total:1.000000|c\n", metricsPrefix + ".config.reload.lastSuccessTimestamp:1.000000|g\n", + metricsPrefix + ".open.connections:1.000000|g|#entrypoint:test,protocol:TCP\n", metricsPrefix + ".tls.certs.notAfterTimestamp:1.000000|g|#key:value\n", metricsPrefix + ".entrypoint.request.total:1.000000|c|#entrypoint:test\n", metricsPrefix + ".entrypoint.request.tls.total:1.000000|c|#entrypoint:test,tls_version:foo,tls_cipher:bar\n", metricsPrefix + ".entrypoint.request.duration:10000.000000|h|#entrypoint:test\n", - metricsPrefix + ".entrypoint.connections.open:1.000000|g|#entrypoint:test\n", metricsPrefix + ".entrypoint.requests.bytes.total:1.000000|c|#entrypoint:test\n", metricsPrefix + ".entrypoint.responses.bytes.total:1.000000|c|#entrypoint:test\n", @@ -60,7 +60,6 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg metricsPrefix + ".router.request.total:1.000000|c|#router:demo,service:test,code:200,method:GET\n", metricsPrefix + ".router.request.tls.total:1.000000|c|#router:demo,service:test,tls_version:foo,tls_cipher:bar\n", metricsPrefix + ".router.request.duration:10000.000000|h|#router:demo,service:test,code:200\n", - metricsPrefix + ".router.connections.open:1.000000|g|#router:demo,service:test\n", metricsPrefix + ".router.requests.bytes.total:1.000000|c|#router:demo,service:test,code:200,method:GET\n", metricsPrefix + ".router.responses.bytes.total:1.000000|c|#router:demo,service:test,code:200,method:GET\n", @@ -68,7 +67,6 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg metricsPrefix + ".service.request.total:1.000000|c|#service:test,code:200,method:GET\n", metricsPrefix + ".service.request.tls.total:1.000000|c|#service:test,tls_version:foo,tls_cipher:bar\n", metricsPrefix + ".service.request.duration:10000.000000|h|#service:test,code:200\n", - metricsPrefix + ".service.connections.open:1.000000|g|#service:test\n", metricsPrefix + ".service.retries.total:2.000000|c|#service:test\n", metricsPrefix + ".service.request.duration:10000.000000|h|#service:test,code:200\n", metricsPrefix + ".service.server.up:1.000000|g|#service:test,url:http://127.0.0.1,one:two\n", @@ -79,13 +77,13 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg udp.ShouldReceiveAll(t, expected, func() { datadogRegistry.ConfigReloadsCounter().Add(1) datadogRegistry.LastConfigReloadSuccessGauge().Add(1) + datadogRegistry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Add(1) datadogRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1) datadogRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1) datadogRegistry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) datadogRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000) - datadogRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1) datadogRegistry.EntryPointReqsBytesCounter().With("entrypoint", "test").Add(1) datadogRegistry.EntryPointRespsBytesCounter().With("entrypoint", "test").Add(1) @@ -93,7 +91,6 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg datadogRegistry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) datadogRegistry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) datadogRegistry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) - datadogRegistry.RouterOpenConnsGauge().With("router", "demo", "service", "test").Set(1) datadogRegistry.RouterReqsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) datadogRegistry.RouterRespsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) @@ -101,7 +98,6 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg datadogRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) datadogRegistry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) datadogRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) - datadogRegistry.ServiceOpenConnsGauge().With("service", "test").Set(1) datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1) datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1) datadogRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1", "one", "two").Set(1) diff --git a/pkg/metrics/influxdb2.go b/pkg/metrics/influxdb2.go index 2ad055585..edd610f05 100644 --- a/pkg/metrics/influxdb2.go +++ b/pkg/metrics/influxdb2.go @@ -26,20 +26,19 @@ var ( const ( influxDBConfigReloadsName = "traefik.config.reload.total" influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp" + influxDBOpenConnsName = "traefik.open.connections" influxDBTLSCertsNotAfterTimestampName = "traefik.tls.certs.notAfterTimestamp" influxDBEntryPointReqsName = "traefik.entrypoint.requests.total" influxDBEntryPointReqsTLSName = "traefik.entrypoint.requests.tls.total" influxDBEntryPointReqDurationName = "traefik.entrypoint.request.duration" - influxDBEntryPointOpenConnsName = "traefik.entrypoint.connections.open" influxDBEntryPointReqsBytesName = "traefik.entrypoint.requests.bytes.total" influxDBEntryPointRespsBytesName = "traefik.entrypoint.responses.bytes.total" influxDBRouterReqsName = "traefik.router.requests.total" influxDBRouterReqsTLSName = "traefik.router.requests.tls.total" influxDBRouterReqsDurationName = "traefik.router.request.duration" - influxDBORouterOpenConnsName = "traefik.router.connections.open" influxDBRouterReqsBytesName = "traefik.router.requests.bytes.total" influxDBRouterRespsBytesName = "traefik.router.responses.bytes.total" @@ -47,7 +46,6 @@ const ( influxDBServiceReqsTLSName = "traefik.service.requests.tls.total" influxDBServiceReqsDurationName = "traefik.service.request.duration" influxDBServiceRetriesTotalName = "traefik.service.retries.total" - influxDBServiceOpenConnsName = "traefik.service.connections.open" influxDBServiceServerUpName = "traefik.service.server.up" influxDBServiceReqsBytesName = "traefik.service.requests.bytes.total" influxDBServiceRespsBytesName = "traefik.service.responses.bytes.total" @@ -83,6 +81,7 @@ func RegisterInfluxDB2(ctx context.Context, config *types.InfluxDB2) Registry { registry := &standardRegistry{ configReloadsCounter: influxDB2Store.NewCounter(influxDBConfigReloadsName), lastConfigReloadSuccessGauge: influxDB2Store.NewGauge(influxDBLastConfigReloadSuccessName), + openConnectionsGauge: influxDB2Store.NewGauge(influxDBOpenConnsName), tlsCertsNotAfterTimestampGauge: influxDB2Store.NewGauge(influxDBTLSCertsNotAfterTimestampName), } @@ -91,7 +90,6 @@ func RegisterInfluxDB2(ctx context.Context, config *types.InfluxDB2) Registry { registry.entryPointReqsCounter = influxDB2Store.NewCounter(influxDBEntryPointReqsName) registry.entryPointReqsTLSCounter = influxDB2Store.NewCounter(influxDBEntryPointReqsTLSName) registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(influxDB2Store.NewHistogram(influxDBEntryPointReqDurationName), time.Second) - registry.entryPointOpenConnsGauge = influxDB2Store.NewGauge(influxDBEntryPointOpenConnsName) registry.entryPointReqsBytesCounter = influxDB2Store.NewCounter(influxDBEntryPointReqsBytesName) registry.entryPointRespsBytesCounter = influxDB2Store.NewCounter(influxDBEntryPointRespsBytesName) } @@ -101,7 +99,6 @@ func RegisterInfluxDB2(ctx context.Context, config *types.InfluxDB2) Registry { registry.routerReqsCounter = influxDB2Store.NewCounter(influxDBRouterReqsName) registry.routerReqsTLSCounter = influxDB2Store.NewCounter(influxDBRouterReqsTLSName) registry.routerReqDurationHistogram, _ = NewHistogramWithScale(influxDB2Store.NewHistogram(influxDBRouterReqsDurationName), time.Second) - registry.routerOpenConnsGauge = influxDB2Store.NewGauge(influxDBORouterOpenConnsName) registry.routerReqsBytesCounter = influxDB2Store.NewCounter(influxDBRouterReqsBytesName) registry.routerRespsBytesCounter = influxDB2Store.NewCounter(influxDBRouterRespsBytesName) } @@ -112,7 +109,6 @@ func RegisterInfluxDB2(ctx context.Context, config *types.InfluxDB2) Registry { registry.serviceReqsTLSCounter = influxDB2Store.NewCounter(influxDBServiceReqsTLSName) registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(influxDB2Store.NewHistogram(influxDBServiceReqsDurationName), time.Second) registry.serviceRetriesCounter = influxDB2Store.NewCounter(influxDBServiceRetriesTotalName) - registry.serviceOpenConnsGauge = influxDB2Store.NewGauge(influxDBServiceOpenConnsName) registry.serviceServerUpGauge = influxDB2Store.NewGauge(influxDBServiceServerUpName) registry.serviceReqsBytesCounter = influxDB2Store.NewCounter(influxDBServiceReqsBytesName) registry.serviceRespsBytesCounter = influxDB2Store.NewCounter(influxDBServiceRespsBytesName) diff --git a/pkg/metrics/influxdb2_test.go b/pkg/metrics/influxdb2_test.go index 86f952503..ae4f1a08a 100644 --- a/pkg/metrics/influxdb2_test.go +++ b/pkg/metrics/influxdb2_test.go @@ -48,10 +48,12 @@ func TestInfluxDB2(t *testing.T) { expectedServer := []string{ `(traefik\.config\.reload\.total count=1) [\d]{19}`, `(traefik\.config\.reload\.lastSuccessTimestamp value=1) [\d]{19}`, + `(traefik\.open\.connections,entrypoint=test,protocol=TCP value=1) [\d]{19}`, } influxDB2Registry.ConfigReloadsCounter().Add(1) influxDB2Registry.LastConfigReloadSuccessGauge().Set(1) + influxDB2Registry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Set(1) msgServer := <-c assertMessage(t, *msgServer, expectedServer) @@ -69,7 +71,6 @@ func TestInfluxDB2(t *testing.T) { `(traefik\.entrypoint\.requests\.total,code=200,entrypoint=test,method=GET count=1) [\d]{19}`, `(traefik\.entrypoint\.requests\.tls\.total,entrypoint=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`, `(traefik\.entrypoint\.request\.duration(?:,code=[\d]{3})?,entrypoint=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`, - `(traefik\.entrypoint\.connections\.open,entrypoint=test value=1) [\d]{19}`, `(traefik\.entrypoint\.requests\.bytes\.total,code=200,entrypoint=test,method=GET count=1) [\d]{19}`, `(traefik\.entrypoint\.responses\.bytes\.total,code=200,entrypoint=test,method=GET count=1) [\d]{19}`, } @@ -77,7 +78,6 @@ func TestInfluxDB2(t *testing.T) { influxDB2Registry.EntryPointReqsCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) influxDB2Registry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) influxDB2Registry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000) - influxDB2Registry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1) influxDB2Registry.EntryPointReqsBytesCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) influxDB2Registry.EntryPointRespsBytesCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) msgEntrypoint := <-c @@ -89,7 +89,6 @@ func TestInfluxDB2(t *testing.T) { `(traefik\.router\.requests\.total,code=404,method=GET,router=demo,service=test count=1) [\d]{19}`, `(traefik\.router\.requests\.tls\.total,router=demo,service=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`, `(traefik\.router\.request\.duration,code=200,router=demo,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`, - `(traefik\.router\.connections\.open,router=demo,service=test value=1) [\d]{19}`, `(traefik\.router\.requests\.bytes\.total,code=200,method=GET,router=demo,service=test count=1) [\d]{19}`, `(traefik\.router\.responses\.bytes\.total,code=200,method=GET,router=demo,service=test count=1) [\d]{19}`, } @@ -98,7 +97,6 @@ func TestInfluxDB2(t *testing.T) { influxDB2Registry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) influxDB2Registry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) influxDB2Registry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) - influxDB2Registry.RouterOpenConnsGauge().With("router", "demo", "service", "test").Set(1) influxDB2Registry.RouterReqsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) influxDB2Registry.RouterRespsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) msgRouter := <-c @@ -138,19 +136,6 @@ func TestInfluxDB2(t *testing.T) { msgServiceRetries := <-c assertMessage(t, *msgServiceRetries, expectedServiceRetries) - - expectedServiceOpenConns := []string{ - `(traefik\.service\.connections\.open,service=test value=2) [\d]{19}`, - `(traefik\.service\.connections\.open,service=foobar value=1) [\d]{19}`, - } - - influxDB2Registry.ServiceOpenConnsGauge().With("service", "test").Add(1) - influxDB2Registry.ServiceOpenConnsGauge().With("service", "test").Add(1) - influxDB2Registry.ServiceOpenConnsGauge().With("service", "foobar").Add(1) - - msgServiceOpenConns := <-c - - assertMessage(t, *msgServiceOpenConns, expectedServiceOpenConns) } func assertMessage(t *testing.T, msg string, patterns []string) { diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index cf408adb0..16c4982ff 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -23,6 +23,7 @@ type Registry interface { ConfigReloadsCounter() metrics.Counter LastConfigReloadSuccessGauge() metrics.Gauge + OpenConnectionsGauge() metrics.Gauge // TLS @@ -33,7 +34,6 @@ type Registry interface { EntryPointReqsCounter() metrics.Counter EntryPointReqsTLSCounter() metrics.Counter EntryPointReqDurationHistogram() ScalableHistogram - EntryPointOpenConnsGauge() metrics.Gauge EntryPointReqsBytesCounter() metrics.Counter EntryPointRespsBytesCounter() metrics.Counter @@ -42,7 +42,6 @@ type Registry interface { RouterReqsCounter() metrics.Counter RouterReqsTLSCounter() metrics.Counter RouterReqDurationHistogram() ScalableHistogram - RouterOpenConnsGauge() metrics.Gauge RouterReqsBytesCounter() metrics.Counter RouterRespsBytesCounter() metrics.Counter @@ -51,7 +50,6 @@ type Registry interface { ServiceReqsCounter() metrics.Counter ServiceReqsTLSCounter() metrics.Counter ServiceReqDurationHistogram() ScalableHistogram - ServiceOpenConnsGauge() metrics.Gauge ServiceRetriesCounter() metrics.Counter ServiceServerUpGauge() metrics.Gauge ServiceReqsBytesCounter() metrics.Counter @@ -70,23 +68,21 @@ func NewVoidRegistry() Registry { func NewMultiRegistry(registries []Registry) Registry { var configReloadsCounter []metrics.Counter var lastConfigReloadSuccessGauge []metrics.Gauge + var openConnectionsGauge []metrics.Gauge var tlsCertsNotAfterTimestampGauge []metrics.Gauge var entryPointReqsCounter []metrics.Counter var entryPointReqsTLSCounter []metrics.Counter var entryPointReqDurationHistogram []ScalableHistogram - var entryPointOpenConnsGauge []metrics.Gauge var entryPointReqsBytesCounter []metrics.Counter var entryPointRespsBytesCounter []metrics.Counter var routerReqsCounter []metrics.Counter var routerReqsTLSCounter []metrics.Counter var routerReqDurationHistogram []ScalableHistogram - var routerOpenConnsGauge []metrics.Gauge var routerReqsBytesCounter []metrics.Counter var routerRespsBytesCounter []metrics.Counter var serviceReqsCounter []metrics.Counter var serviceReqsTLSCounter []metrics.Counter var serviceReqDurationHistogram []ScalableHistogram - var serviceOpenConnsGauge []metrics.Gauge var serviceRetriesCounter []metrics.Counter var serviceServerUpGauge []metrics.Gauge var serviceReqsBytesCounter []metrics.Counter @@ -99,6 +95,9 @@ func NewMultiRegistry(registries []Registry) Registry { if r.LastConfigReloadSuccessGauge() != nil { lastConfigReloadSuccessGauge = append(lastConfigReloadSuccessGauge, r.LastConfigReloadSuccessGauge()) } + if r.OpenConnectionsGauge() != nil { + openConnectionsGauge = append(openConnectionsGauge, r.OpenConnectionsGauge()) + } if r.TLSCertsNotAfterTimestampGauge() != nil { tlsCertsNotAfterTimestampGauge = append(tlsCertsNotAfterTimestampGauge, r.TLSCertsNotAfterTimestampGauge()) } @@ -111,9 +110,6 @@ func NewMultiRegistry(registries []Registry) Registry { if r.EntryPointReqDurationHistogram() != nil { entryPointReqDurationHistogram = append(entryPointReqDurationHistogram, r.EntryPointReqDurationHistogram()) } - if r.EntryPointOpenConnsGauge() != nil { - entryPointOpenConnsGauge = append(entryPointOpenConnsGauge, r.EntryPointOpenConnsGauge()) - } if r.EntryPointReqsBytesCounter() != nil { entryPointReqsBytesCounter = append(entryPointReqsBytesCounter, r.EntryPointReqsBytesCounter()) } @@ -129,9 +125,6 @@ func NewMultiRegistry(registries []Registry) Registry { if r.RouterReqDurationHistogram() != nil { routerReqDurationHistogram = append(routerReqDurationHistogram, r.RouterReqDurationHistogram()) } - if r.RouterOpenConnsGauge() != nil { - routerOpenConnsGauge = append(routerOpenConnsGauge, r.RouterOpenConnsGauge()) - } if r.RouterReqsBytesCounter() != nil { routerReqsBytesCounter = append(routerReqsBytesCounter, r.RouterReqsBytesCounter()) } @@ -147,9 +140,6 @@ func NewMultiRegistry(registries []Registry) Registry { if r.ServiceReqDurationHistogram() != nil { serviceReqDurationHistogram = append(serviceReqDurationHistogram, r.ServiceReqDurationHistogram()) } - if r.ServiceOpenConnsGauge() != nil { - serviceOpenConnsGauge = append(serviceOpenConnsGauge, r.ServiceOpenConnsGauge()) - } if r.ServiceRetriesCounter() != nil { serviceRetriesCounter = append(serviceRetriesCounter, r.ServiceRetriesCounter()) } @@ -165,28 +155,26 @@ func NewMultiRegistry(registries []Registry) Registry { } return &standardRegistry{ - epEnabled: len(entryPointReqsCounter) > 0 || len(entryPointReqDurationHistogram) > 0 || len(entryPointOpenConnsGauge) > 0, - svcEnabled: len(serviceReqsCounter) > 0 || len(serviceReqDurationHistogram) > 0 || len(serviceOpenConnsGauge) > 0 || len(serviceRetriesCounter) > 0 || len(serviceServerUpGauge) > 0, - routerEnabled: len(routerReqsCounter) > 0 || len(routerReqDurationHistogram) > 0 || len(routerOpenConnsGauge) > 0, + epEnabled: len(entryPointReqsCounter) > 0 || len(entryPointReqDurationHistogram) > 0, + svcEnabled: len(serviceReqsCounter) > 0 || len(serviceReqDurationHistogram) > 0 || len(serviceRetriesCounter) > 0 || len(serviceServerUpGauge) > 0, + routerEnabled: len(routerReqsCounter) > 0 || len(routerReqDurationHistogram) > 0, configReloadsCounter: multi.NewCounter(configReloadsCounter...), lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...), + openConnectionsGauge: multi.NewGauge(openConnectionsGauge...), tlsCertsNotAfterTimestampGauge: multi.NewGauge(tlsCertsNotAfterTimestampGauge...), entryPointReqsCounter: multi.NewCounter(entryPointReqsCounter...), entryPointReqsTLSCounter: multi.NewCounter(entryPointReqsTLSCounter...), entryPointReqDurationHistogram: MultiHistogram(entryPointReqDurationHistogram), - entryPointOpenConnsGauge: multi.NewGauge(entryPointOpenConnsGauge...), entryPointReqsBytesCounter: multi.NewCounter(entryPointReqsBytesCounter...), entryPointRespsBytesCounter: multi.NewCounter(entryPointRespsBytesCounter...), routerReqsCounter: multi.NewCounter(routerReqsCounter...), routerReqsTLSCounter: multi.NewCounter(routerReqsTLSCounter...), routerReqDurationHistogram: MultiHistogram(routerReqDurationHistogram), - routerOpenConnsGauge: multi.NewGauge(routerOpenConnsGauge...), routerReqsBytesCounter: multi.NewCounter(routerReqsBytesCounter...), routerRespsBytesCounter: multi.NewCounter(routerRespsBytesCounter...), serviceReqsCounter: multi.NewCounter(serviceReqsCounter...), serviceReqsTLSCounter: multi.NewCounter(serviceReqsTLSCounter...), serviceReqDurationHistogram: MultiHistogram(serviceReqDurationHistogram), - serviceOpenConnsGauge: multi.NewGauge(serviceOpenConnsGauge...), serviceRetriesCounter: multi.NewCounter(serviceRetriesCounter...), serviceServerUpGauge: multi.NewGauge(serviceServerUpGauge...), serviceReqsBytesCounter: multi.NewCounter(serviceReqsBytesCounter...), @@ -200,23 +188,21 @@ type standardRegistry struct { svcEnabled bool configReloadsCounter metrics.Counter lastConfigReloadSuccessGauge metrics.Gauge + openConnectionsGauge metrics.Gauge tlsCertsNotAfterTimestampGauge metrics.Gauge entryPointReqsCounter metrics.Counter entryPointReqsTLSCounter metrics.Counter entryPointReqDurationHistogram ScalableHistogram - entryPointOpenConnsGauge metrics.Gauge entryPointReqsBytesCounter metrics.Counter entryPointRespsBytesCounter metrics.Counter routerReqsCounter metrics.Counter routerReqsTLSCounter metrics.Counter routerReqDurationHistogram ScalableHistogram - routerOpenConnsGauge metrics.Gauge routerReqsBytesCounter metrics.Counter routerRespsBytesCounter metrics.Counter serviceReqsCounter metrics.Counter serviceReqsTLSCounter metrics.Counter serviceReqDurationHistogram ScalableHistogram - serviceOpenConnsGauge metrics.Gauge serviceRetriesCounter metrics.Counter serviceServerUpGauge metrics.Gauge serviceReqsBytesCounter metrics.Counter @@ -243,6 +229,10 @@ func (r *standardRegistry) LastConfigReloadSuccessGauge() metrics.Gauge { return r.lastConfigReloadSuccessGauge } +func (r *standardRegistry) OpenConnectionsGauge() metrics.Gauge { + return r.openConnectionsGauge +} + func (r *standardRegistry) TLSCertsNotAfterTimestampGauge() metrics.Gauge { return r.tlsCertsNotAfterTimestampGauge } @@ -259,10 +249,6 @@ func (r *standardRegistry) EntryPointReqDurationHistogram() ScalableHistogram { return r.entryPointReqDurationHistogram } -func (r *standardRegistry) EntryPointOpenConnsGauge() metrics.Gauge { - return r.entryPointOpenConnsGauge -} - func (r *standardRegistry) EntryPointReqsBytesCounter() metrics.Counter { return r.entryPointReqsBytesCounter } @@ -283,10 +269,6 @@ func (r *standardRegistry) RouterReqDurationHistogram() ScalableHistogram { return r.routerReqDurationHistogram } -func (r *standardRegistry) RouterOpenConnsGauge() metrics.Gauge { - return r.routerOpenConnsGauge -} - func (r *standardRegistry) RouterReqsBytesCounter() metrics.Counter { return r.routerReqsBytesCounter } @@ -307,10 +289,6 @@ func (r *standardRegistry) ServiceReqDurationHistogram() ScalableHistogram { return r.serviceReqDurationHistogram } -func (r *standardRegistry) ServiceOpenConnsGauge() metrics.Gauge { - return r.serviceOpenConnsGauge -} - func (r *standardRegistry) ServiceRetriesCounter() metrics.Counter { return r.serviceRetriesCounter } diff --git a/pkg/metrics/opentelemetry.go b/pkg/metrics/opentelemetry.go index 8afeb54ba..0037da428 100644 --- a/pkg/metrics/opentelemetry.go +++ b/pkg/metrics/opentelemetry.go @@ -58,7 +58,8 @@ func RegisterOpenTelemetry(ctx context.Context, config *types.OpenTelemetry) Reg svcEnabled: config.AddServicesLabels, configReloadsCounter: newOTLPCounterFrom(meter, configReloadsTotalName, "Config reloads"), lastConfigReloadSuccessGauge: newOTLPGaugeFrom(meter, configLastReloadSuccessName, "Last config reload success", unit.Milliseconds), - tlsCertsNotAfterTimestampGauge: newOTLPGaugeFrom(meter, tlsCertsNotAfterTimestamp, "Certificate expiration timestamp", unit.Milliseconds), + openConnectionsGauge: newOTLPGaugeFrom(meter, openConnectionsName, "How many open connections exist, by entryPoint and protocol", unit.Dimensionless), + tlsCertsNotAfterTimestampGauge: newOTLPGaugeFrom(meter, tlsCertsNotAfterTimestampName, "Certificate expiration timestamp", unit.Milliseconds), } if config.AddEntryPointsLabels { @@ -69,9 +70,6 @@ func RegisterOpenTelemetry(ctx context.Context, config *types.OpenTelemetry) Reg reg.entryPointReqDurationHistogram, _ = NewHistogramWithScale(newOTLPHistogramFrom(meter, entryPointReqDurationName, "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.", unit.Milliseconds), time.Second) - reg.entryPointOpenConnsGauge = newOTLPGaugeFrom(meter, entryPointOpenConnsName, - "How many open connections exist on an entrypoint, partitioned by method and protocol.", - unit.Dimensionless) } if config.AddRoutersLabels { @@ -82,9 +80,6 @@ func RegisterOpenTelemetry(ctx context.Context, config *types.OpenTelemetry) Reg reg.routerReqDurationHistogram, _ = NewHistogramWithScale(newOTLPHistogramFrom(meter, routerReqDurationName, "How long it took to process the request on a router, partitioned by service, status code, protocol, and method.", unit.Milliseconds), time.Second) - reg.routerOpenConnsGauge = newOTLPGaugeFrom(meter, routerOpenConnsName, - "How many open connections exist on a router, partitioned by service, method, and protocol.", - unit.Dimensionless) } if config.AddServicesLabels { @@ -95,9 +90,6 @@ func RegisterOpenTelemetry(ctx context.Context, config *types.OpenTelemetry) Reg reg.serviceReqDurationHistogram, _ = NewHistogramWithScale(newOTLPHistogramFrom(meter, serviceReqDurationName, "How long it took to process the request on a service, partitioned by status code, protocol, and method.", unit.Milliseconds), time.Second) - reg.serviceOpenConnsGauge = newOTLPGaugeFrom(meter, serviceOpenConnsName, - "How many open connections exist on a service, partitioned by method and protocol.", - unit.Dimensionless) reg.serviceRetriesCounter = newOTLPCounterFrom(meter, serviceRetriesTotalName, "How many request retries happened on a service.") reg.serviceServerUpGauge = newOTLPGaugeFrom(meter, serviceServerUpName, diff --git a/pkg/metrics/opentelemetry_test.go b/pkg/metrics/opentelemetry_test.go index 15d9a214e..8a5339efc 100644 --- a/pkg/metrics/opentelemetry_test.go +++ b/pkg/metrics/opentelemetry_test.go @@ -341,10 +341,12 @@ func TestOpenTelemetry(t *testing.T) { expected = append(expected, `({"name":"traefik_config_reloads_total","description":"Config reloads","unit":"1","sum":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, `({"name":"traefik_config_last_reload_success","description":"Last config reload success","unit":"ms","gauge":{"dataPoints":\[{"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, + `({"name":"traefik_open_connections","description":"How many open connections exist, by entryPoint and protocol","unit":"1","gauge":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test"}},{"key":"protocol","value":{"stringValue":"TCP"}}\],"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, ) registry.ConfigReloadsCounter().Add(1) registry.LastConfigReloadSuccessGauge().Set(1) + registry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Set(1) msgServer := <-c assertMessage(t, *msgServer, expected) @@ -362,13 +364,11 @@ func TestOpenTelemetry(t *testing.T) { `({"name":"traefik_entrypoint_requests_total","description":"How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, `({"name":"traefik_entrypoint_requests_tls_total","description":"How many HTTP requests with TLS processed on an entrypoint, partitioned by TLS Version and TLS cipher Used.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test2"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, `({"name":"traefik_entrypoint_request_duration_seconds","description":"How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.","unit":"ms","histogram":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test3"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.1,0.25,0.5,1,2.5,5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`, - `({"name":"traefik_entrypoint_open_connections","description":"How many open connections exist on an entrypoint, partitioned by method and protocol.","unit":"1","gauge":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test4"}}\],"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, ) registry.EntryPointReqsCounter().With("entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) registry.EntryPointReqsTLSCounter().With("entrypoint", "test2", "tls_version", "foo", "tls_cipher", "bar").Add(1) registry.EntryPointReqDurationHistogram().With("entrypoint", "test3").Observe(10000) - registry.EntryPointOpenConnsGauge().With("entrypoint", "test4").Set(1) msgEntrypoint := <-c assertMessage(t, *msgEntrypoint, expected) @@ -377,14 +377,12 @@ func TestOpenTelemetry(t *testing.T) { `({"name":"traefik_router_requests_total","description":"How many HTTP requests are processed on a router, partitioned by service, status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1},{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, `({"name":"traefik_router_requests_tls_total","description":"How many HTTP requests with TLS are processed on a router, partitioned by service, TLS Version, and TLS cipher Used.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"router","value":{"stringValue":"demo"}},{"key":"service","value":{"stringValue":"test"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, `({"name":"traefik_router_request_duration_seconds","description":"How long it took to process the request on a router, partitioned by service, status code, protocol, and method.","unit":"ms","histogram":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"router","value":{"stringValue":"demo"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.1,0.25,0.5,1,2.5,5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`, - `({"name":"traefik_router_open_connections","description":"How many open connections exist on a router, partitioned by service, method, and protocol.","unit":"1","gauge":{"dataPoints":\[{"attributes":\[{"key":"router","value":{"stringValue":"demo"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, ) registry.RouterReqsCounter().With("router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) registry.RouterReqsCounter().With("router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) registry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) registry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) - registry.RouterOpenConnsGauge().With("router", "demo", "service", "test").Set(1) msgRouter := <-c assertMessage(t, *msgRouter, expected) @@ -417,19 +415,6 @@ func TestOpenTelemetry(t *testing.T) { assertMessage(t, *msgServiceRetries, expected) - expected = append(expected, - `({"attributes":\[{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":3})`, - `({"attributes":\[{"key":"service","value":{"stringValue":"foobar"}}\],"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":1})`, - ) - - registry.ServiceOpenConnsGauge().With("service", "test").Set(1) - registry.ServiceOpenConnsGauge().With("service", "test").Add(1) - registry.ServiceOpenConnsGauge().With("service", "test").Add(1) - registry.ServiceOpenConnsGauge().With("service", "foobar").Add(1) - msgServiceOpenConns := <-c - - assertMessage(t, *msgServiceOpenConns, expected) - // We cannot rely on the previous expected pattern, // because this pattern was for matching only one dataPoint in the histogram, // and as soon as the EntryPointReqDurationHistogram.Observe is called, diff --git a/pkg/metrics/prometheus.go b/pkg/metrics/prometheus.go index ed27cedcb..7bae0b410 100644 --- a/pkg/metrics/prometheus.go +++ b/pkg/metrics/prometheus.go @@ -24,17 +24,17 @@ const ( metricConfigPrefix = MetricNamePrefix + "config_" configReloadsTotalName = metricConfigPrefix + "reloads_total" configLastReloadSuccessName = metricConfigPrefix + "last_reload_success" + openConnectionsName = MetricNamePrefix + "open_connections" // TLS. - metricsTLSPrefix = MetricNamePrefix + "tls_" - tlsCertsNotAfterTimestamp = metricsTLSPrefix + "certs_not_after" + metricsTLSPrefix = MetricNamePrefix + "tls_" + tlsCertsNotAfterTimestampName = metricsTLSPrefix + "certs_not_after" // entry point. metricEntryPointPrefix = MetricNamePrefix + "entrypoint_" entryPointReqsTotalName = metricEntryPointPrefix + "requests_total" entryPointReqsTLSTotalName = metricEntryPointPrefix + "requests_tls_total" entryPointReqDurationName = metricEntryPointPrefix + "request_duration_seconds" - entryPointOpenConnsName = metricEntryPointPrefix + "open_connections" entryPointReqsBytesTotalName = metricEntryPointPrefix + "requests_bytes_total" entryPointRespsBytesTotalName = metricEntryPointPrefix + "responses_bytes_total" @@ -43,7 +43,6 @@ const ( routerReqsTotalName = metricRouterPrefix + "requests_total" routerReqsTLSTotalName = metricRouterPrefix + "requests_tls_total" routerReqDurationName = metricRouterPrefix + "request_duration_seconds" - routerOpenConnsName = metricRouterPrefix + "open_connections" routerReqsBytesTotalName = metricRouterPrefix + "requests_bytes_total" routerRespsBytesTotalName = metricRouterPrefix + "responses_bytes_total" @@ -52,7 +51,6 @@ const ( serviceReqsTotalName = metricServicePrefix + "requests_total" serviceReqsTLSTotalName = metricServicePrefix + "requests_tls_total" serviceReqDurationName = metricServicePrefix + "request_duration_seconds" - serviceOpenConnsName = metricServicePrefix + "open_connections" serviceRetriesTotalName = metricServicePrefix + "retries_total" serviceServerUpName = metricServicePrefix + "server_up" serviceReqsBytesTotalName = metricServicePrefix + "requests_bytes_total" @@ -121,14 +119,19 @@ func initStandardRegistry(config *types.Prometheus) Registry { Help: "Last config reload success", }, []string{}) tlsCertsNotAfterTimestamp := newGaugeFrom(stdprometheus.GaugeOpts{ - Name: tlsCertsNotAfterTimestamp, + Name: tlsCertsNotAfterTimestampName, Help: "Certificate expiration timestamp", }, []string{"cn", "serial", "sans"}) + openConnections := newGaugeFrom(stdprometheus.GaugeOpts{ + Name: openConnectionsName, + Help: "How many open connections exist, by entryPoint and protocol", + }, []string{"entrypoint", "protocol"}) promState.vectors = []vector{ configReloads.cv, lastConfigReloadSuccess.gv, tlsCertsNotAfterTimestamp.gv, + openConnections.gv, } reg := &standardRegistry{ @@ -138,6 +141,7 @@ func initStandardRegistry(config *types.Prometheus) Registry { configReloadsCounter: configReloads, lastConfigReloadSuccessGauge: lastConfigReloadSuccess, tlsCertsNotAfterTimestampGauge: tlsCertsNotAfterTimestamp, + openConnectionsGauge: openConnections, } if config.AddEntryPointsLabels { @@ -154,10 +158,6 @@ func initStandardRegistry(config *types.Prometheus) Registry { Help: "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.", Buckets: buckets, }, []string{"code", "method", "protocol", "entrypoint"}) - entryPointOpenConns := newGaugeFrom(stdprometheus.GaugeOpts{ - Name: entryPointOpenConnsName, - Help: "How many open connections exist on an entrypoint, partitioned by method and protocol.", - }, []string{"method", "protocol", "entrypoint"}) entryPointReqsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{ Name: entryPointReqsBytesTotalName, Help: "The total size of requests in bytes handled by an entrypoint, partitioned by status code, protocol, and method.", @@ -171,7 +171,6 @@ func initStandardRegistry(config *types.Prometheus) Registry { entryPointReqs.cv, entryPointReqsTLS.cv, entryPointReqDurations.hv, - entryPointOpenConns.gv, entryPointReqsBytesTotal.cv, entryPointRespsBytesTotal.cv, ) @@ -179,7 +178,6 @@ func initStandardRegistry(config *types.Prometheus) Registry { reg.entryPointReqsCounter = entryPointReqs reg.entryPointReqsTLSCounter = entryPointReqsTLS reg.entryPointReqDurationHistogram, _ = NewHistogramWithScale(entryPointReqDurations, time.Second) - reg.entryPointOpenConnsGauge = entryPointOpenConns reg.entryPointReqsBytesCounter = entryPointReqsBytesTotal reg.entryPointRespsBytesCounter = entryPointRespsBytesTotal } @@ -198,10 +196,6 @@ func initStandardRegistry(config *types.Prometheus) Registry { Help: "How long it took to process the request on a router, partitioned by service, status code, protocol, and method.", Buckets: buckets, }, []string{"code", "method", "protocol", "router", "service"}) - routerOpenConns := newGaugeFrom(stdprometheus.GaugeOpts{ - Name: routerOpenConnsName, - Help: "How many open connections exist on a router, partitioned by service, method, and protocol.", - }, []string{"method", "protocol", "router", "service"}) routerReqsBytesTotal := newCounterFrom(stdprometheus.CounterOpts{ Name: routerReqsBytesTotalName, Help: "The total size of requests in bytes handled by a router, partitioned by service, status code, protocol, and method.", @@ -215,14 +209,12 @@ func initStandardRegistry(config *types.Prometheus) Registry { routerReqs.cv, routerReqsTLS.cv, routerReqDurations.hv, - routerOpenConns.gv, routerReqsBytesTotal.cv, routerRespsBytesTotal.cv, ) reg.routerReqsCounter = routerReqs reg.routerReqsTLSCounter = routerReqsTLS reg.routerReqDurationHistogram, _ = NewHistogramWithScale(routerReqDurations, time.Second) - reg.routerOpenConnsGauge = routerOpenConns reg.routerReqsBytesCounter = routerReqsBytesTotal reg.routerRespsBytesCounter = routerRespsBytesTotal } @@ -241,10 +233,6 @@ func initStandardRegistry(config *types.Prometheus) Registry { Help: "How long it took to process the request on a service, partitioned by status code, protocol, and method.", Buckets: buckets, }, []string{"code", "method", "protocol", "service"}) - serviceOpenConns := newGaugeFrom(stdprometheus.GaugeOpts{ - Name: serviceOpenConnsName, - Help: "How many open connections exist on a service, partitioned by method and protocol.", - }, []string{"method", "protocol", "service"}) serviceRetries := newCounterFrom(stdprometheus.CounterOpts{ Name: serviceRetriesTotalName, Help: "How many request retries happened on a service.", @@ -266,7 +254,6 @@ func initStandardRegistry(config *types.Prometheus) Registry { serviceReqs.cv, serviceReqsTLS.cv, serviceReqDurations.hv, - serviceOpenConns.gv, serviceRetries.cv, serviceServerUp.gv, serviceReqsBytesTotal.cv, @@ -276,7 +263,6 @@ func initStandardRegistry(config *types.Prometheus) Registry { reg.serviceReqsCounter = serviceReqs reg.serviceReqsTLSCounter = serviceReqsTLS reg.serviceReqDurationHistogram, _ = NewHistogramWithScale(serviceReqDurations, time.Second) - reg.serviceOpenConnsGauge = serviceOpenConns reg.serviceRetriesCounter = serviceRetries reg.serviceServerUpGauge = serviceServerUp reg.serviceReqsBytesCounter = serviceReqsBytesTotal diff --git a/pkg/metrics/prometheus_test.go b/pkg/metrics/prometheus_test.go index a631cf1cd..c83255eb5 100644 --- a/pkg/metrics/prometheus_test.go +++ b/pkg/metrics/prometheus_test.go @@ -101,6 +101,10 @@ func TestPrometheus(t *testing.T) { prometheusRegistry.ConfigReloadsCounter().Add(1) prometheusRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix())) + prometheusRegistry. + OpenConnectionsGauge(). + With("entrypoint", "test", "protocol", "TCP"). + Set(1) prometheusRegistry. TLSCertsNotAfterTimestampGauge(). @@ -115,10 +119,6 @@ func TestPrometheus(t *testing.T) { EntryPointReqDurationHistogram(). With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http"). Observe(1) - prometheusRegistry. - EntryPointOpenConnsGauge(). - With("method", http.MethodGet, "protocol", "http", "entrypoint", "http"). - Set(1) prometheusRegistry. EntryPointRespsBytesCounter(). With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http"). @@ -140,10 +140,6 @@ func TestPrometheus(t *testing.T) { RouterReqDurationHistogram(). With("router", "demo", "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http"). Observe(10000) - prometheusRegistry. - RouterOpenConnsGauge(). - With("router", "demo", "service", "service1", "method", http.MethodGet, "protocol", "http"). - Set(1) prometheusRegistry. RouterRespsBytesCounter(). With("router", "demo", "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http"). @@ -165,10 +161,6 @@ func TestPrometheus(t *testing.T) { ServiceReqDurationHistogram(). With("service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http"). Observe(10000) - prometheusRegistry. - ServiceOpenConnsGauge(). - With("service", "service1", "method", http.MethodGet, "protocol", "http"). - Set(1) prometheusRegistry. ServiceRetriesCounter(). With("service", "service1"). @@ -204,13 +196,21 @@ func TestPrometheus(t *testing.T) { assert: buildTimestampAssert(t, configLastReloadSuccessName), }, { - name: tlsCertsNotAfterTimestamp, + name: openConnectionsName, + labels: map[string]string{ + "protocol": "TCP", + "entrypoint": "test", + }, + assert: buildGaugeAssert(t, openConnectionsName, 1), + }, + { + name: tlsCertsNotAfterTimestampName, labels: map[string]string{ "cn": "value", "serial": "value", "sans": "value", }, - assert: buildTimestampAssert(t, tlsCertsNotAfterTimestamp), + assert: buildTimestampAssert(t, tlsCertsNotAfterTimestampName), }, { name: entryPointReqsTotalName, @@ -232,15 +232,6 @@ func TestPrometheus(t *testing.T) { }, assert: buildHistogramAssert(t, entryPointReqDurationName, 1), }, - { - name: entryPointOpenConnsName, - labels: map[string]string{ - "method": http.MethodGet, - "protocol": "http", - "entrypoint": "http", - }, - assert: buildGaugeAssert(t, entryPointOpenConnsName, 1), - }, { name: entryPointReqsBytesTotalName, labels: map[string]string{ @@ -293,16 +284,6 @@ func TestPrometheus(t *testing.T) { }, assert: buildHistogramAssert(t, routerReqDurationName, 1), }, - { - name: routerOpenConnsName, - labels: map[string]string{ - "method": http.MethodGet, - "protocol": "http", - "service": "service1", - "router": "demo", - }, - assert: buildGaugeAssert(t, routerOpenConnsName, 1), - }, { name: routerReqsBytesTotalName, labels: map[string]string{ @@ -354,15 +335,6 @@ func TestPrometheus(t *testing.T) { }, assert: buildHistogramAssert(t, serviceReqDurationName, 1), }, - { - name: serviceOpenConnsName, - labels: map[string]string{ - "method": http.MethodGet, - "protocol": "http", - "service": "service1", - }, - assert: buildGaugeAssert(t, serviceOpenConnsName, 1), - }, { name: serviceRetriesTotalName, labels: map[string]string{ diff --git a/pkg/metrics/statsd.go b/pkg/metrics/statsd.go index cc2664f8f..5818db989 100644 --- a/pkg/metrics/statsd.go +++ b/pkg/metrics/statsd.go @@ -19,20 +19,19 @@ var ( const ( statsdConfigReloadsName = "config.reload.total" statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp" + statsdOpenConnectionsName = "open.connections" statsdTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp" statsdEntryPointReqsName = "entrypoint.request.total" statsdEntryPointReqsTLSName = "entrypoint.request.tls.total" statsdEntryPointReqDurationName = "entrypoint.request.duration" - statsdEntryPointOpenConnsName = "entrypoint.connections.open" statsdEntryPointReqsBytesName = "entrypoint.requests.bytes.total" statsdEntryPointRespsBytesName = "entrypoint.responses.bytes.total" statsdRouterReqsName = "router.request.total" statsdRouterReqsTLSName = "router.request.tls.total" statsdRouterReqsDurationName = "router.request.duration" - statsdRouterOpenConnsName = "router.connections.open" statsdRouterReqsBytesName = "router.requests.bytes.total" statsdRouterRespsBytesName = "router.responses.bytes.total" @@ -41,7 +40,6 @@ const ( statsdServiceReqsDurationName = "service.request.duration" statsdServiceRetriesTotalName = "service.retries.total" statsdServiceServerUpName = "service.server.up" - statsdServiceOpenConnsName = "service.connections.open" statsdServiceReqsBytesName = "service.requests.bytes.total" statsdServiceRespsBytesName = "service.responses.bytes.total" ) @@ -63,6 +61,7 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry { configReloadsCounter: statsdClient.NewCounter(statsdConfigReloadsName, 1.0), lastConfigReloadSuccessGauge: statsdClient.NewGauge(statsdLastConfigReloadSuccessName), tlsCertsNotAfterTimestampGauge: statsdClient.NewGauge(statsdTLSCertsNotAfterTimestampName), + openConnectionsGauge: statsdClient.NewGauge(statsdOpenConnectionsName), } if config.AddEntryPointsLabels { @@ -70,7 +69,6 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry { registry.entryPointReqsCounter = statsdClient.NewCounter(statsdEntryPointReqsName, 1.0) registry.entryPointReqsTLSCounter = statsdClient.NewCounter(statsdEntryPointReqsTLSName, 1.0) registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdEntryPointReqDurationName, 1.0), time.Millisecond) - registry.entryPointOpenConnsGauge = statsdClient.NewGauge(statsdEntryPointOpenConnsName) registry.entryPointReqsBytesCounter = statsdClient.NewCounter(statsdEntryPointReqsBytesName, 1.0) registry.entryPointRespsBytesCounter = statsdClient.NewCounter(statsdEntryPointRespsBytesName, 1.0) } @@ -80,7 +78,6 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry { registry.routerReqsCounter = statsdClient.NewCounter(statsdRouterReqsName, 1.0) registry.routerReqsTLSCounter = statsdClient.NewCounter(statsdRouterReqsTLSName, 1.0) registry.routerReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdRouterReqsDurationName, 1.0), time.Millisecond) - registry.routerOpenConnsGauge = statsdClient.NewGauge(statsdRouterOpenConnsName) registry.routerReqsBytesCounter = statsdClient.NewCounter(statsdRouterReqsBytesName, 1.0) registry.routerRespsBytesCounter = statsdClient.NewCounter(statsdRouterRespsBytesName, 1.0) } @@ -91,7 +88,6 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry { registry.serviceReqsTLSCounter = statsdClient.NewCounter(statsdServiceReqsTLSName, 1.0) registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdServiceReqsDurationName, 1.0), time.Millisecond) registry.serviceRetriesCounter = statsdClient.NewCounter(statsdServiceRetriesTotalName, 1.0) - registry.serviceOpenConnsGauge = statsdClient.NewGauge(statsdServiceOpenConnsName) registry.serviceServerUpGauge = statsdClient.NewGauge(statsdServiceServerUpName) registry.serviceReqsBytesCounter = statsdClient.NewCounter(statsdServiceReqsBytesName, 1.0) registry.serviceRespsBytesCounter = statsdClient.NewCounter(statsdServiceRespsBytesName, 1.0) diff --git a/pkg/metrics/statsd_test.go b/pkg/metrics/statsd_test.go index 611b26df8..cde3c78d5 100644 --- a/pkg/metrics/statsd_test.go +++ b/pkg/metrics/statsd_test.go @@ -50,27 +50,25 @@ func testRegistry(t *testing.T, metricsPrefix string, registry Registry) { expected := []string{ metricsPrefix + ".config.reload.total:1.000000|c\n", metricsPrefix + ".config.reload.lastSuccessTimestamp:1.000000|g\n", + metricsPrefix + ".open.connections:1.000000|g\n", metricsPrefix + ".tls.certs.notAfterTimestamp:1.000000|g\n", metricsPrefix + ".entrypoint.request.total:1.000000|c\n", metricsPrefix + ".entrypoint.request.tls.total:1.000000|c\n", metricsPrefix + ".entrypoint.request.duration:10000.000000|ms", - metricsPrefix + ".entrypoint.connections.open:1.000000|g\n", metricsPrefix + ".entrypoint.requests.bytes.total:1.000000|c\n", metricsPrefix + ".entrypoint.responses.bytes.total:1.000000|c\n", metricsPrefix + ".router.request.total:2.000000|c\n", metricsPrefix + ".router.request.tls.total:1.000000|c\n", metricsPrefix + ".router.request.duration:10000.000000|ms", - metricsPrefix + ".router.connections.open:1.000000|g\n", metricsPrefix + ".router.requests.bytes.total:1.000000|c\n", metricsPrefix + ".router.responses.bytes.total:1.000000|c\n", metricsPrefix + ".service.request.total:2.000000|c\n", metricsPrefix + ".service.request.tls.total:1.000000|c\n", metricsPrefix + ".service.request.duration:10000.000000|ms", - metricsPrefix + ".service.connections.open:1.000000|g\n", metricsPrefix + ".service.retries.total:2.000000|c\n", metricsPrefix + ".service.server.up:1.000000|g\n", metricsPrefix + ".service.requests.bytes.total:1.000000|c\n", @@ -80,13 +78,13 @@ func testRegistry(t *testing.T, metricsPrefix string, registry Registry) { udp.ShouldReceiveAll(t, expected, func() { registry.ConfigReloadsCounter().Add(1) registry.LastConfigReloadSuccessGauge().Set(1) + registry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Set(1) registry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1) registry.EntryPointReqsCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) registry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) registry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000) - registry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1) registry.EntryPointReqsBytesCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) registry.EntryPointRespsBytesCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) @@ -94,7 +92,6 @@ func testRegistry(t *testing.T, metricsPrefix string, registry Registry) { registry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) registry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) registry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) - registry.RouterOpenConnsGauge().With("router", "demo", "service", "test").Set(1) registry.RouterReqsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) registry.RouterRespsBytesCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) @@ -102,7 +99,6 @@ func testRegistry(t *testing.T, metricsPrefix string, registry Registry) { registry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) registry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) registry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) - registry.ServiceOpenConnsGauge().With("service", "test").Set(1) registry.ServiceRetriesCounter().With("service", "test").Add(1) registry.ServiceRetriesCounter().With("service", "test").Add(1) registry.ServiceServerUpGauge().With("service:test", "url", "http://127.0.0.1").Set(1) diff --git a/pkg/middlewares/metrics/metrics.go b/pkg/middlewares/metrics/metrics.go index 816c453f6..2b1ee097d 100644 --- a/pkg/middlewares/metrics/metrics.go +++ b/pkg/middlewares/metrics/metrics.go @@ -36,7 +36,6 @@ type metricsMiddleware struct { reqsCounter gokitmetrics.Counter reqsTLSCounter gokitmetrics.Counter reqDurationHistogram metrics.ScalableHistogram - openConnsGauge gokitmetrics.Gauge reqsBytesCounter gokitmetrics.Counter respsBytesCounter gokitmetrics.Counter baseLabels []string @@ -51,7 +50,6 @@ func NewEntryPointMiddleware(ctx context.Context, next http.Handler, registry me reqsCounter: registry.EntryPointReqsCounter(), reqsTLSCounter: registry.EntryPointReqsTLSCounter(), reqDurationHistogram: registry.EntryPointReqDurationHistogram(), - openConnsGauge: registry.EntryPointOpenConnsGauge(), reqsBytesCounter: registry.EntryPointReqsBytesCounter(), respsBytesCounter: registry.EntryPointRespsBytesCounter(), baseLabels: []string{"entrypoint", entryPointName}, @@ -67,7 +65,6 @@ func NewRouterMiddleware(ctx context.Context, next http.Handler, registry metric reqsCounter: registry.RouterReqsCounter(), reqsTLSCounter: registry.RouterReqsTLSCounter(), reqDurationHistogram: registry.RouterReqDurationHistogram(), - openConnsGauge: registry.RouterOpenConnsGauge(), reqsBytesCounter: registry.RouterReqsBytesCounter(), respsBytesCounter: registry.RouterRespsBytesCounter(), baseLabels: []string{"router", routerName, "service", serviceName}, @@ -83,7 +80,6 @@ func NewServiceMiddleware(ctx context.Context, next http.Handler, registry metri reqsCounter: registry.ServiceReqsCounter(), reqsTLSCounter: registry.ServiceReqsTLSCounter(), reqDurationHistogram: registry.ServiceReqDurationHistogram(), - openConnsGauge: registry.ServiceOpenConnsGauge(), reqsBytesCounter: registry.ServiceReqsBytesCounter(), respsBytesCounter: registry.ServiceRespsBytesCounter(), baseLabels: []string{"service", serviceName}, @@ -112,10 +108,6 @@ func (m *metricsMiddleware) ServeHTTP(rw http.ResponseWriter, req *http.Request) labels = append(labels, "method", getMethod(req)) labels = append(labels, "protocol", proto) - openConnsGauge := m.openConnsGauge.With(labels...) - openConnsGauge.Add(1) - defer openConnsGauge.Add(-1) - // TLS metrics if req.TLS != nil { var tlsLabels []string diff --git a/pkg/server/server_entrypoint_tcp.go b/pkg/server/server_entrypoint_tcp.go index 49a6bd4fc..6f8bfba02 100644 --- a/pkg/server/server_entrypoint_tcp.go +++ b/pkg/server/server_entrypoint_tcp.go @@ -15,12 +15,14 @@ import ( "time" "github.com/containous/alice" + gokitmetrics "github.com/go-kit/kit/metrics" "github.com/pires/go-proxyproto" "github.com/rs/zerolog" "github.com/rs/zerolog/log" "github.com/traefik/traefik/v3/pkg/config/static" "github.com/traefik/traefik/v3/pkg/ip" "github.com/traefik/traefik/v3/pkg/logs" + "github.com/traefik/traefik/v3/pkg/metrics" "github.com/traefik/traefik/v3/pkg/middlewares" "github.com/traefik/traefik/v3/pkg/middlewares/contenttype" "github.com/traefik/traefik/v3/pkg/middlewares/forwardedheaders" @@ -67,7 +69,7 @@ func (h *httpForwarder) Accept() (net.Conn, error) { type TCPEntryPoints map[string]*TCPEntryPoint // NewTCPEntryPoints creates a new TCPEntryPoints. -func NewTCPEntryPoints(entryPointsConfig static.EntryPoints, hostResolverConfig *types.HostResolverConfig) (TCPEntryPoints, error) { +func NewTCPEntryPoints(entryPointsConfig static.EntryPoints, hostResolverConfig *types.HostResolverConfig, metricsRegistry metrics.Registry) (TCPEntryPoints, error) { serverEntryPointsTCP := make(TCPEntryPoints) for entryPointName, config := range entryPointsConfig { protocol, err := config.GetProtocol() @@ -81,7 +83,11 @@ func NewTCPEntryPoints(entryPointsConfig static.EntryPoints, hostResolverConfig ctx := log.With().Str(logs.EntryPointName, entryPointName).Logger().WithContext(context.Background()) - serverEntryPointsTCP[entryPointName], err = NewTCPEntryPoint(ctx, config, hostResolverConfig) + openConnectionsGauge := metricsRegistry. + OpenConnectionsGauge(). + With("entrypoint", entryPointName, "protocol", "TCP") + + serverEntryPointsTCP[entryPointName], err = NewTCPEntryPoint(ctx, config, hostResolverConfig, openConnectionsGauge) if err != nil { return nil, fmt.Errorf("error while building entryPoint %s: %w", entryPointName, err) } @@ -137,8 +143,8 @@ type TCPEntryPoint struct { } // NewTCPEntryPoint creates a new TCPEntryPoint. -func NewTCPEntryPoint(ctx context.Context, configuration *static.EntryPoint, hostResolverConfig *types.HostResolverConfig) (*TCPEntryPoint, error) { - tracker := newConnectionTracker() +func NewTCPEntryPoint(ctx context.Context, configuration *static.EntryPoint, hostResolverConfig *types.HostResolverConfig, openConnectionsGauge gokitmetrics.Gauge) (*TCPEntryPoint, error) { + tracker := newConnectionTracker(openConnectionsGauge) listener, err := buildListener(ctx, configuration) if err != nil { @@ -440,34 +446,45 @@ func buildListener(ctx context.Context, entryPoint *static.EntryPoint) (net.List return listener, nil } -func newConnectionTracker() *connectionTracker { +func newConnectionTracker(openConnectionsGauge gokitmetrics.Gauge) *connectionTracker { return &connectionTracker{ - conns: make(map[net.Conn]struct{}), + conns: make(map[net.Conn]struct{}), + openConnectionsGauge: openConnectionsGauge, } } type connectionTracker struct { - conns map[net.Conn]struct{} - lock sync.RWMutex + connsMu sync.RWMutex + conns map[net.Conn]struct{} + + openConnectionsGauge gokitmetrics.Gauge } // AddConnection add a connection in the tracked connections list. func (c *connectionTracker) AddConnection(conn net.Conn) { - c.lock.Lock() - defer c.lock.Unlock() + c.connsMu.Lock() c.conns[conn] = struct{}{} + c.connsMu.Unlock() + + if c.openConnectionsGauge != nil { + c.openConnectionsGauge.Add(1) + } } // RemoveConnection remove a connection from the tracked connections list. func (c *connectionTracker) RemoveConnection(conn net.Conn) { - c.lock.Lock() - defer c.lock.Unlock() + c.connsMu.Lock() delete(c.conns, conn) + c.connsMu.Unlock() + + if c.openConnectionsGauge != nil { + c.openConnectionsGauge.Add(-1) + } } func (c *connectionTracker) isEmpty() bool { - c.lock.RLock() - defer c.lock.RUnlock() + c.connsMu.RLock() + defer c.connsMu.RUnlock() return len(c.conns) == 0 } @@ -489,8 +506,8 @@ func (c *connectionTracker) Shutdown(ctx context.Context) error { // Close close all the connections in the tracked connections list. func (c *connectionTracker) Close() { - c.lock.Lock() - defer c.lock.Unlock() + c.connsMu.Lock() + defer c.connsMu.Unlock() for conn := range c.conns { if err := conn.Close(); err != nil { log.Error().Err(err).Msg("Error while closing connection") diff --git a/pkg/server/server_entrypoint_tcp_http3_test.go b/pkg/server/server_entrypoint_tcp_http3_test.go index b4f509f95..9b440be4b 100644 --- a/pkg/server/server_entrypoint_tcp_http3_test.go +++ b/pkg/server/server_entrypoint_tcp_http3_test.go @@ -93,7 +93,7 @@ func TestHTTP3AdvertisedPort(t *testing.T) { HTTP3: &static.HTTP3Config{ AdvertisedPort: 8080, }, - }, nil) + }, nil, nil) require.NoError(t, err) router, err := tcprouter.NewRouter() diff --git a/pkg/server/server_entrypoint_tcp_test.go b/pkg/server/server_entrypoint_tcp_test.go index 8fb5f582c..3567ca47f 100644 --- a/pkg/server/server_entrypoint_tcp_test.go +++ b/pkg/server/server_entrypoint_tcp_test.go @@ -79,7 +79,7 @@ func testShutdown(t *testing.T, router *tcprouter.Router) { Transport: epConfig, ForwardedHeaders: &static.ForwardedHeaders{}, HTTP2: &static.HTTP2Config{}, - }, nil) + }, nil, nil) require.NoError(t, err) conn, err := startEntrypoint(entryPoint, router) @@ -164,7 +164,7 @@ func TestReadTimeoutWithoutFirstByte(t *testing.T) { Transport: epConfig, ForwardedHeaders: &static.ForwardedHeaders{}, HTTP2: &static.HTTP2Config{}, - }, nil) + }, nil, nil) require.NoError(t, err) router := &tcprouter.Router{} @@ -201,7 +201,7 @@ func TestReadTimeoutWithFirstByte(t *testing.T) { Transport: epConfig, ForwardedHeaders: &static.ForwardedHeaders{}, HTTP2: &static.HTTP2Config{}, - }, nil) + }, nil, nil) require.NoError(t, err) router := &tcprouter.Router{}