From eccfcc0924558340f5868dbd7f7db310e5758be5 Mon Sep 17 00:00:00 2001 From: Carlos Martell <95250108+cmartell-at-ocp@users.noreply.github.com> Date: Fri, 27 Sep 2024 02:58:05 -0700 Subject: [PATCH] feat: allow setting service.name for OTLP metrics --- .golangci.yml | 4 + .../observability/metrics/opentelemetry.md | 22 ++ .../reference/static-configuration/cli-ref.md | 3 + .../reference/static-configuration/env-ref.md | 3 + .../reference/static-configuration/file.toml | 1 + .../reference/static-configuration/file.yaml | 1 + pkg/metrics/opentelemetry.go | 2 +- pkg/metrics/opentelemetry_test.go | 307 ++++++++++-------- pkg/types/metrics.go | 2 + 9 files changed, 202 insertions(+), 143 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index f790c5150..c034d0c3b 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -283,3 +283,7 @@ issues: - path: pkg/provider/acme/local_store.go linters: - musttag + - path: pkg/types/metrics.go + linters: + - goconst + diff --git a/docs/content/observability/metrics/opentelemetry.md b/docs/content/observability/metrics/opentelemetry.md index 1c762fc2a..e71e5ec63 100644 --- a/docs/content/observability/metrics/opentelemetry.md +++ b/docs/content/observability/metrics/opentelemetry.md @@ -139,6 +139,28 @@ metrics: --metrics.otlp.pushInterval=10s ``` +#### `serviceName` + +_Optional, Default="traefik"_ + +OTEL service name to use. + +```yaml tab="File (YAML)" +metrics: + otlp: + serviceName: name +``` + +```toml tab="File (TOML)" +[metrics] + [metrics.otlp] + serviceName = "name" +``` + +```bash tab="CLI" +--metrics.otlp.serviceName=name +``` + ### HTTP configuration _Optional_ diff --git a/docs/content/reference/static-configuration/cli-ref.md b/docs/content/reference/static-configuration/cli-ref.md index b0045c025..8bb8850a7 100644 --- a/docs/content/reference/static-configuration/cli-ref.md +++ b/docs/content/reference/static-configuration/cli-ref.md @@ -429,6 +429,9 @@ TLS key `--metrics.otlp.pushinterval`: Period between calls to collect a checkpoint. (Default: ```10```) +`--metrics.otlp.servicename`: +OTEL service name to use. (Default: ```traefik```) + `--metrics.prometheus`: Prometheus metrics exporter type. (Default: ```false```) diff --git a/docs/content/reference/static-configuration/env-ref.md b/docs/content/reference/static-configuration/env-ref.md index 1306419fc..480125d06 100644 --- a/docs/content/reference/static-configuration/env-ref.md +++ b/docs/content/reference/static-configuration/env-ref.md @@ -429,6 +429,9 @@ TLS key `TRAEFIK_METRICS_OTLP_PUSHINTERVAL`: Period between calls to collect a checkpoint. (Default: ```10```) +`TRAEFIK_METRICS_OTLP_SERVICENAME`: +OTEL service name to use. (Default: ```traefik```) + `TRAEFIK_METRICS_PROMETHEUS`: Prometheus metrics exporter type. (Default: ```false```) diff --git a/docs/content/reference/static-configuration/file.toml b/docs/content/reference/static-configuration/file.toml index 94fa46f6d..92244954c 100644 --- a/docs/content/reference/static-configuration/file.toml +++ b/docs/content/reference/static-configuration/file.toml @@ -342,6 +342,7 @@ addServicesLabels = true explicitBoundaries = [42.0, 42.0] pushInterval = "42s" + serviceName = "foobar" [metrics.otlp.grpc] endpoint = "foobar" insecure = true diff --git a/docs/content/reference/static-configuration/file.yaml b/docs/content/reference/static-configuration/file.yaml index 9be48940b..a547708fc 100644 --- a/docs/content/reference/static-configuration/file.yaml +++ b/docs/content/reference/static-configuration/file.yaml @@ -402,6 +402,7 @@ metrics: - 42 - 42 pushInterval: 42s + serviceName: foobar ping: entryPoint: foobar manualRouting: true diff --git a/pkg/metrics/opentelemetry.go b/pkg/metrics/opentelemetry.go index d34ee41f4..21f91a89a 100644 --- a/pkg/metrics/opentelemetry.go +++ b/pkg/metrics/opentelemetry.go @@ -207,7 +207,7 @@ func newOpenTelemetryMeterProvider(ctx context.Context, config *types.OTLP) (*sd } res, err := resource.New(ctx, - resource.WithAttributes(semconv.ServiceNameKey.String("traefik")), + resource.WithAttributes(semconv.ServiceNameKey.String(config.ServiceName)), resource.WithAttributes(semconv.ServiceVersionKey.String(version.Version)), resource.WithFromEnv(), resource.WithTelemetrySDK(), diff --git a/pkg/metrics/opentelemetry_test.go b/pkg/metrics/opentelemetry_test.go index 54b87ce60..61f1cb9d8 100644 --- a/pkg/metrics/opentelemetry_test.go +++ b/pkg/metrics/opentelemetry_test.go @@ -282,151 +282,174 @@ func TestOpenTelemetry_GaugeCollectorSet(t *testing.T) { } func TestOpenTelemetry(t *testing.T) { - c := make(chan *string, 5) - - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - gzr, err := gzip.NewReader(r.Body) - require.NoError(t, err) - - body, err := io.ReadAll(gzr) - require.NoError(t, err) - - req := pmetricotlp.NewExportRequest() - err = req.UnmarshalProto(body) - require.NoError(t, err) - - marshalledReq, err := json.Marshal(req) - require.NoError(t, err) - - bodyStr := string(marshalledReq) - c <- &bodyStr - - w.WriteHeader(http.StatusOK) - })) - - t.Cleanup(func() { - StopOpenTelemetry() - ts.Close() - }) - - var cfg types.OTLP - (&cfg).SetDefaults() - cfg.AddRoutersLabels = true - cfg.HTTP = &types.OtelHTTP{ - Endpoint: ts.URL, - } - cfg.PushInterval = ptypes.Duration(10 * time.Millisecond) - - registry := RegisterOpenTelemetry(context.Background(), &cfg) - require.NotNil(t, registry) - - if !registry.IsEpEnabled() || !registry.IsRouterEnabled() || !registry.IsSvcEnabled() { - t.Fatalf("registry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()") + tests := []struct { + desc string + serviceName string + }{ + { + desc: "default", + }, + { + desc: "custom-service-name", + serviceName: "custom-service-name", + }, } - expected := []string{ - `({"key":"service.name","value":{"stringValue":"traefik"}})`, - `({"key":"service.version","value":{"stringValue":"` + version.Version + `"}})`, + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + c := make(chan *string, 5) + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gzr, err := gzip.NewReader(r.Body) + require.NoError(t, err) + + body, err := io.ReadAll(gzr) + require.NoError(t, err) + + req := pmetricotlp.NewExportRequest() + err = req.UnmarshalProto(body) + require.NoError(t, err) + + marshalledReq, err := json.Marshal(req) + require.NoError(t, err) + + bodyStr := string(marshalledReq) + c <- &bodyStr + + w.WriteHeader(http.StatusOK) + })) + + t.Cleanup(func() { + StopOpenTelemetry() + ts.Close() + }) + + var cfg types.OTLP + (&cfg).SetDefaults() + cfg.AddRoutersLabels = true + cfg.HTTP = &types.OtelHTTP{ + Endpoint: ts.URL, + } + cfg.PushInterval = ptypes.Duration(10 * time.Millisecond) + + wantServiceName := "traefik" + if test.serviceName != "" { + cfg.ServiceName = test.serviceName + wantServiceName = test.serviceName + } + + registry := RegisterOpenTelemetry(context.Background(), &cfg) + require.NotNil(t, registry) + + if !registry.IsEpEnabled() || !registry.IsRouterEnabled() || !registry.IsSvcEnabled() { + t.Fatalf("registry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()") + } + + expected := []string{ + `({"key":"service.name","value":{"stringValue":"` + wantServiceName + `"}})`, + `({"key":"service.version","value":{"stringValue":"` + version.Version + `"}})`, + } + + tryAssertMessage(t, c, expected) + + // TODO: the len of startUnixNano is no supposed to be 20, it should be 19 + expectedConfig := []string{ + `({"name":"traefik_config_reloads_total","description":"Config reloads","unit":"1","sum":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_config_last_reload_success","description":"Last config reload success","unit":"ms","gauge":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, + `({"name":"traefik_open_connections","description":"How many open connections exist, by entryPoint and protocol","unit":"1","gauge":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test"}},{"key":"protocol","value":{"stringValue":"TCP"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, + } + + registry.ConfigReloadsCounter().Add(1) + registry.LastConfigReloadSuccessGauge().Set(1) + registry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Set(1) + + tryAssertMessage(t, c, expectedConfig) + + expectedTLSCerts := []string{ + `({"name":"traefik_tls_certs_not_after","description":"Certificate expiration timestamp","unit":"ms","gauge":{"dataPoints":\[{"attributes":\[{"key":"key","value":{"stringValue":"value"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, + } + + registry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1) + + tryAssertMessage(t, c, expectedTLSCerts) + + expectedEntryPoints := []string{ + `({"name":"traefik_entrypoint_requests_total","description":"How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_entrypoint_requests_tls_total","description":"How many HTTP requests with TLS processed on an entrypoint, partitioned by TLS Version and TLS cipher Used.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test2"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_entrypoint_request_duration_seconds","description":"How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.","unit":"ms","histogram":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test3"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`, + `({"name":"traefik_entrypoint_requests_bytes_total","description":"The total size of requests in bytes handled by an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_entrypoint_responses_bytes_total","description":"The total size of responses in bytes handled by an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + } + + registry.EntryPointReqsCounter().With(nil, "entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) + registry.EntryPointReqsTLSCounter().With("entrypoint", "test2", "tls_version", "foo", "tls_cipher", "bar").Add(1) + registry.EntryPointReqDurationHistogram().With("entrypoint", "test3").Observe(10000) + registry.EntryPointReqsBytesCounter().With("entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) + registry.EntryPointRespsBytesCounter().With("entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) + + tryAssertMessage(t, c, expectedEntryPoints) + + expectedRouters := []string{ + `({"name":"traefik_router_requests_total","description":"How many HTTP requests are processed on a router, partitioned by service, status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1},{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_router_requests_tls_total","description":"How many HTTP requests with TLS are processed on a router, partitioned by service, TLS Version, and TLS cipher Used.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"router","value":{"stringValue":"demo"}},{"key":"service","value":{"stringValue":"test"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_router_request_duration_seconds","description":"How long it took to process the request on a router, partitioned by service, status code, protocol, and method.","unit":"ms","histogram":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"router","value":{"stringValue":"demo"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`, + `({"name":"traefik_router_requests_bytes_total","description":"The total size of requests in bytes handled by a router, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_router_responses_bytes_total","description":"The total size of responses in bytes handled by a router, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + } + + registry.RouterReqsCounter().With(nil, "router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) + registry.RouterReqsCounter().With(nil, "router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) + registry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) + registry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) + registry.RouterReqsBytesCounter().With("router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) + registry.RouterRespsBytesCounter().With("router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) + + tryAssertMessage(t, c, expectedRouters) + + expectedServices := []string{ + `({"name":"traefik_service_requests_total","description":"How many HTTP requests processed on a service, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1},{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_service_requests_tls_total","description":"How many HTTP requests with TLS processed on a service, partitioned by TLS version and TLS cipher.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"service","value":{"stringValue":"test"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_service_request_duration_seconds","description":"How long it took to process the request on a service, partitioned by status code, protocol, and method.","unit":"ms","histogram":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`, + `({"name":"traefik_service_server_up","description":"service server is up, described by gauge value of 0 or 1.","unit":"1","gauge":{"dataPoints":\[{"attributes":\[{"key":"service","value":{"stringValue":"test"}},{"key":"url","value":{"stringValue":"http://127.0.0.1"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, + `({"name":"traefik_service_requests_bytes_total","description":"The total size of requests in bytes received by a service, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + `({"name":"traefik_service_responses_bytes_total","description":"The total size of responses in bytes returned by a service, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, + } + + registry.ServiceReqsCounter().With(nil, "service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) + registry.ServiceReqsCounter().With(nil, "service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) + registry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) + registry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) + registry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1) + registry.ServiceReqsBytesCounter().With("service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) + registry.ServiceRespsBytesCounter().With("service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) + + tryAssertMessage(t, c, expectedServices) + + expectedServicesRetries := []string{ + `({"attributes":\[{"key":"service","value":{"stringValue":"foobar"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1})`, + `({"attributes":\[{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":2})`, + } + + registry.ServiceRetriesCounter().With("service", "test").Add(1) + registry.ServiceRetriesCounter().With("service", "test").Add(1) + registry.ServiceRetriesCounter().With("service", "foobar").Add(1) + + tryAssertMessage(t, c, expectedServicesRetries) + + // We cannot rely on the previous expected pattern, + // because this pattern was for matching only one dataPoint in the histogram, + // and as soon as the EntryPointReqDurationHistogram.Observe is called, + // it adds a new dataPoint to the histogram. + expectedEntryPointReqDuration := []string{ + `({"attributes":\[{"key":"entrypoint","value":{"stringValue":"myEntrypoint"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"2","sum":30000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","2"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":20000})`, + } + + registry.EntryPointReqDurationHistogram().With("entrypoint", "myEntrypoint").Observe(10000) + registry.EntryPointReqDurationHistogram().With("entrypoint", "myEntrypoint").Observe(20000) + + tryAssertMessage(t, c, expectedEntryPointReqDuration) + }) } - - tryAssertMessage(t, c, expected) - - // TODO: the len of startUnixNano is no supposed to be 20, it should be 19 - expectedConfig := []string{ - `({"name":"traefik_config_reloads_total","description":"Config reloads","unit":"1","sum":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_config_last_reload_success","description":"Last config reload success","unit":"ms","gauge":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, - `({"name":"traefik_open_connections","description":"How many open connections exist, by entryPoint and protocol","unit":"1","gauge":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test"}},{"key":"protocol","value":{"stringValue":"TCP"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, - } - - registry.ConfigReloadsCounter().Add(1) - registry.LastConfigReloadSuccessGauge().Set(1) - registry.OpenConnectionsGauge().With("entrypoint", "test", "protocol", "TCP").Set(1) - - tryAssertMessage(t, c, expectedConfig) - - expectedTLSCerts := []string{ - `({"name":"traefik_tls_certs_not_after","description":"Certificate expiration timestamp","unit":"ms","gauge":{"dataPoints":\[{"attributes":\[{"key":"key","value":{"stringValue":"value"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, - } - - registry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1) - - tryAssertMessage(t, c, expectedTLSCerts) - - expectedEntryPoints := []string{ - `({"name":"traefik_entrypoint_requests_total","description":"How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_entrypoint_requests_tls_total","description":"How many HTTP requests with TLS processed on an entrypoint, partitioned by TLS Version and TLS cipher Used.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test2"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_entrypoint_request_duration_seconds","description":"How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.","unit":"ms","histogram":{"dataPoints":\[{"attributes":\[{"key":"entrypoint","value":{"stringValue":"test3"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`, - `({"name":"traefik_entrypoint_requests_bytes_total","description":"The total size of requests in bytes handled by an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_entrypoint_responses_bytes_total","description":"The total size of responses in bytes handled by an entrypoint, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"entrypoint","value":{"stringValue":"test1"}},{"key":"method","value":{"stringValue":"GET"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - } - - registry.EntryPointReqsCounter().With(nil, "entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) - registry.EntryPointReqsTLSCounter().With("entrypoint", "test2", "tls_version", "foo", "tls_cipher", "bar").Add(1) - registry.EntryPointReqDurationHistogram().With("entrypoint", "test3").Observe(10000) - registry.EntryPointReqsBytesCounter().With("entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) - registry.EntryPointRespsBytesCounter().With("entrypoint", "test1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) - - tryAssertMessage(t, c, expectedEntryPoints) - - expectedRouters := []string{ - `({"name":"traefik_router_requests_total","description":"How many HTTP requests are processed on a router, partitioned by service, status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1},{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_router_requests_tls_total","description":"How many HTTP requests with TLS are processed on a router, partitioned by service, TLS Version, and TLS cipher Used.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"router","value":{"stringValue":"demo"}},{"key":"service","value":{"stringValue":"test"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_router_request_duration_seconds","description":"How long it took to process the request on a router, partitioned by service, status code, protocol, and method.","unit":"ms","histogram":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"router","value":{"stringValue":"demo"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`, - `({"name":"traefik_router_requests_bytes_total","description":"The total size of requests in bytes handled by a router, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_router_responses_bytes_total","description":"The total size of responses in bytes handled by a router, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"router","value":{"stringValue":"RouterReqsCounter"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - } - - registry.RouterReqsCounter().With(nil, "router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) - registry.RouterReqsCounter().With(nil, "router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) - registry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) - registry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) - registry.RouterReqsBytesCounter().With("router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) - registry.RouterRespsBytesCounter().With("router", "RouterReqsCounter", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) - - tryAssertMessage(t, c, expectedRouters) - - expectedServices := []string{ - `({"name":"traefik_service_requests_total","description":"How many HTTP requests processed on a service, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1},{"attributes":\[{"key":"code","value":{"stringValue":"(?:200|404)"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_service_requests_tls_total","description":"How many HTTP requests with TLS processed on a service, partitioned by TLS version and TLS cipher.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"service","value":{"stringValue":"test"}},{"key":"tls_cipher","value":{"stringValue":"bar"}},{"key":"tls_version","value":{"stringValue":"foo"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_service_request_duration_seconds","description":"How long it took to process the request on a service, partitioned by status code, protocol, and method.","unit":"ms","histogram":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"200"}},{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"1","sum":10000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":10000}\],"aggregationTemporality":2}})`, - `({"name":"traefik_service_server_up","description":"service server is up, described by gauge value of 0 or 1.","unit":"1","gauge":{"dataPoints":\[{"attributes":\[{"key":"service","value":{"stringValue":"test"}},{"key":"url","value":{"stringValue":"http://127.0.0.1"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`, - `({"name":"traefik_service_requests_bytes_total","description":"The total size of requests in bytes received by a service, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - `({"name":"traefik_service_responses_bytes_total","description":"The total size of responses in bytes returned by a service, partitioned by status code, protocol, and method.","unit":"1","sum":{"dataPoints":\[{"attributes":\[{"key":"code","value":{"stringValue":"404"}},{"key":"method","value":{"stringValue":"GET"}},{"key":"service","value":{"stringValue":"ServiceReqsCounter"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`, - } - - registry.ServiceReqsCounter().With(nil, "service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1) - registry.ServiceReqsCounter().With(nil, "service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) - registry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1) - registry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000) - registry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1) - registry.ServiceReqsBytesCounter().With("service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) - registry.ServiceRespsBytesCounter().With("service", "ServiceReqsCounter", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1) - - tryAssertMessage(t, c, expectedServices) - - expectedServicesRetries := []string{ - `({"attributes":\[{"key":"service","value":{"stringValue":"foobar"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1})`, - `({"attributes":\[{"key":"service","value":{"stringValue":"test"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":2})`, - } - - registry.ServiceRetriesCounter().With("service", "test").Add(1) - registry.ServiceRetriesCounter().With("service", "test").Add(1) - registry.ServiceRetriesCounter().With("service", "foobar").Add(1) - - tryAssertMessage(t, c, expectedServicesRetries) - - // We cannot rely on the previous expected pattern, - // because this pattern was for matching only one dataPoint in the histogram, - // and as soon as the EntryPointReqDurationHistogram.Observe is called, - // it adds a new dataPoint to the histogram. - expectedEntryPointReqDuration := []string{ - `({"attributes":\[{"key":"entrypoint","value":{"stringValue":"myEntrypoint"}}\],"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","count":"2","sum":30000,"bucketCounts":\["0","0","0","0","0","0","0","0","0","0","0","0","0","0","2"\],"explicitBounds":\[0.005,0.01,0.025,0.05,0.075,0.1,0.25,0.5,0.75,1,2.5,5,7.5,10\],"min":10000,"max":20000})`, - } - - registry.EntryPointReqDurationHistogram().With("entrypoint", "myEntrypoint").Observe(10000) - registry.EntryPointReqDurationHistogram().With("entrypoint", "myEntrypoint").Observe(20000) - - tryAssertMessage(t, c, expectedEntryPointReqDuration) } func assertMessage(t *testing.T, msg string, expected []string) { diff --git a/pkg/types/metrics.go b/pkg/types/metrics.go index b97ae9221..88b62c003 100644 --- a/pkg/types/metrics.go +++ b/pkg/types/metrics.go @@ -116,6 +116,7 @@ type OTLP struct { AddServicesLabels bool `description:"Enable metrics on services." json:"addServicesLabels,omitempty" toml:"addServicesLabels,omitempty" yaml:"addServicesLabels,omitempty" export:"true"` ExplicitBoundaries []float64 `description:"Boundaries for latency metrics." json:"explicitBoundaries,omitempty" toml:"explicitBoundaries,omitempty" yaml:"explicitBoundaries,omitempty" export:"true"` PushInterval types.Duration `description:"Period between calls to collect a checkpoint." json:"pushInterval,omitempty" toml:"pushInterval,omitempty" yaml:"pushInterval,omitempty" export:"true"` + ServiceName string `description:"OTEL service name to use." json:"serviceName,omitempty" toml:"serviceName,omitempty" yaml:"serviceName,omitempty" export:"true"` } // SetDefaults sets the default values. @@ -127,6 +128,7 @@ func (o *OTLP) SetDefaults() { o.AddServicesLabels = true o.ExplicitBoundaries = []float64{.005, .01, .025, .05, .075, .1, .25, .5, .75, 1, 2.5, 5, 7.5, 10} o.PushInterval = types.Duration(10 * time.Second) + o.ServiceName = "traefik" } // Statistics provides options for monitoring request and response stats.