Remove config reload failure metrics
This commit is contained in:
parent
20e47d9102
commit
598a257ae1
11 changed files with 3 additions and 79 deletions
|
@ -19,9 +19,7 @@ var (
|
|||
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
|
||||
const (
|
||||
ddConfigReloadsName = "config.reload.total"
|
||||
ddConfigReloadsFailureTagName = "failure"
|
||||
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
|
||||
ddLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
|
||||
ddTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
|
||||
|
||||
ddEntryPointReqsName = "entrypoint.request.total"
|
||||
|
@ -64,9 +62,7 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
|
|||
|
||||
registry := &standardRegistry{
|
||||
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
|
||||
configReloadsFailureCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0).With(ddConfigReloadsFailureTagName, "true"),
|
||||
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: datadogClient.NewGauge(ddLastConfigReloadFailureName),
|
||||
tlsCertsNotAfterTimestampGauge: datadogClient.NewGauge(ddTLSCertsNotAfterTimestampName),
|
||||
}
|
||||
|
||||
|
|
|
@ -45,9 +45,7 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg
|
|||
|
||||
expected := []string{
|
||||
metricsPrefix + ".config.reload.total:1.000000|c\n",
|
||||
metricsPrefix + ".config.reload.total:1.000000|c|#failure:true\n",
|
||||
metricsPrefix + ".config.reload.lastSuccessTimestamp:1.000000|g\n",
|
||||
metricsPrefix + ".config.reload.lastFailureTimestamp:1.000000|g\n",
|
||||
|
||||
metricsPrefix + ".tls.certs.notAfterTimestamp:1.000000|g|#key:value\n",
|
||||
|
||||
|
@ -80,9 +78,7 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg
|
|||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
datadogRegistry.ConfigReloadsCounter().Add(1)
|
||||
datadogRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
datadogRegistry.LastConfigReloadSuccessGauge().Add(1)
|
||||
datadogRegistry.LastConfigReloadFailureGauge().Add(1)
|
||||
|
||||
datadogRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
|
||||
|
||||
|
|
|
@ -25,9 +25,7 @@ var (
|
|||
|
||||
const (
|
||||
influxDBConfigReloadsName = "traefik.config.reload.total"
|
||||
influxDBConfigReloadsFailureName = influxDBConfigReloadsName + ".failure"
|
||||
influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp"
|
||||
influxDBLastConfigReloadFailureName = "traefik.config.reload.lastFailureTimestamp"
|
||||
|
||||
influxDBTLSCertsNotAfterTimestampName = "traefik.tls.certs.notAfterTimestamp"
|
||||
|
||||
|
@ -84,9 +82,7 @@ func RegisterInfluxDB2(ctx context.Context, config *types.InfluxDB2) Registry {
|
|||
|
||||
registry := &standardRegistry{
|
||||
configReloadsCounter: influxDB2Store.NewCounter(influxDBConfigReloadsName),
|
||||
configReloadsFailureCounter: influxDB2Store.NewCounter(influxDBConfigReloadsFailureName),
|
||||
lastConfigReloadSuccessGauge: influxDB2Store.NewGauge(influxDBLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: influxDB2Store.NewGauge(influxDBLastConfigReloadFailureName),
|
||||
tlsCertsNotAfterTimestampGauge: influxDB2Store.NewGauge(influxDBTLSCertsNotAfterTimestampName),
|
||||
}
|
||||
|
||||
|
|
|
@ -47,15 +47,11 @@ func TestInfluxDB2(t *testing.T) {
|
|||
|
||||
expectedServer := []string{
|
||||
`(traefik\.config\.reload\.total count=1) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.total\.failure count=1) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.lastSuccessTimestamp value=1) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.lastFailureTimestamp value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDB2Registry.ConfigReloadsCounter().Add(1)
|
||||
influxDB2Registry.ConfigReloadsFailureCounter().Add(1)
|
||||
influxDB2Registry.LastConfigReloadSuccessGauge().Set(1)
|
||||
influxDB2Registry.LastConfigReloadFailureGauge().Set(1)
|
||||
msgServer := <-c
|
||||
|
||||
assertMessage(t, *msgServer, expectedServer)
|
||||
|
|
|
@ -22,9 +22,7 @@ type Registry interface {
|
|||
// server metrics
|
||||
|
||||
ConfigReloadsCounter() metrics.Counter
|
||||
ConfigReloadsFailureCounter() metrics.Counter
|
||||
LastConfigReloadSuccessGauge() metrics.Gauge
|
||||
LastConfigReloadFailureGauge() metrics.Gauge
|
||||
|
||||
// TLS
|
||||
|
||||
|
@ -71,9 +69,7 @@ func NewVoidRegistry() Registry {
|
|||
// This allows for feature disparity between the different metric implementations.
|
||||
func NewMultiRegistry(registries []Registry) Registry {
|
||||
var configReloadsCounter []metrics.Counter
|
||||
var configReloadsFailureCounter []metrics.Counter
|
||||
var lastConfigReloadSuccessGauge []metrics.Gauge
|
||||
var lastConfigReloadFailureGauge []metrics.Gauge
|
||||
var tlsCertsNotAfterTimestampGauge []metrics.Gauge
|
||||
var entryPointReqsCounter []metrics.Counter
|
||||
var entryPointReqsTLSCounter []metrics.Counter
|
||||
|
@ -100,15 +96,9 @@ func NewMultiRegistry(registries []Registry) Registry {
|
|||
if r.ConfigReloadsCounter() != nil {
|
||||
configReloadsCounter = append(configReloadsCounter, r.ConfigReloadsCounter())
|
||||
}
|
||||
if r.ConfigReloadsFailureCounter() != nil {
|
||||
configReloadsFailureCounter = append(configReloadsFailureCounter, r.ConfigReloadsFailureCounter())
|
||||
}
|
||||
if r.LastConfigReloadSuccessGauge() != nil {
|
||||
lastConfigReloadSuccessGauge = append(lastConfigReloadSuccessGauge, r.LastConfigReloadSuccessGauge())
|
||||
}
|
||||
if r.LastConfigReloadFailureGauge() != nil {
|
||||
lastConfigReloadFailureGauge = append(lastConfigReloadFailureGauge, r.LastConfigReloadFailureGauge())
|
||||
}
|
||||
if r.TLSCertsNotAfterTimestampGauge() != nil {
|
||||
tlsCertsNotAfterTimestampGauge = append(tlsCertsNotAfterTimestampGauge, r.TLSCertsNotAfterTimestampGauge())
|
||||
}
|
||||
|
@ -179,9 +169,7 @@ func NewMultiRegistry(registries []Registry) Registry {
|
|||
svcEnabled: len(serviceReqsCounter) > 0 || len(serviceReqDurationHistogram) > 0 || len(serviceOpenConnsGauge) > 0 || len(serviceRetriesCounter) > 0 || len(serviceServerUpGauge) > 0,
|
||||
routerEnabled: len(routerReqsCounter) > 0 || len(routerReqDurationHistogram) > 0 || len(routerOpenConnsGauge) > 0,
|
||||
configReloadsCounter: multi.NewCounter(configReloadsCounter...),
|
||||
configReloadsFailureCounter: multi.NewCounter(configReloadsFailureCounter...),
|
||||
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
|
||||
lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...),
|
||||
tlsCertsNotAfterTimestampGauge: multi.NewGauge(tlsCertsNotAfterTimestampGauge...),
|
||||
entryPointReqsCounter: multi.NewCounter(entryPointReqsCounter...),
|
||||
entryPointReqsTLSCounter: multi.NewCounter(entryPointReqsTLSCounter...),
|
||||
|
@ -211,9 +199,7 @@ type standardRegistry struct {
|
|||
routerEnabled bool
|
||||
svcEnabled bool
|
||||
configReloadsCounter metrics.Counter
|
||||
configReloadsFailureCounter metrics.Counter
|
||||
lastConfigReloadSuccessGauge metrics.Gauge
|
||||
lastConfigReloadFailureGauge metrics.Gauge
|
||||
tlsCertsNotAfterTimestampGauge metrics.Gauge
|
||||
entryPointReqsCounter metrics.Counter
|
||||
entryPointReqsTLSCounter metrics.Counter
|
||||
|
@ -253,18 +239,10 @@ func (r *standardRegistry) ConfigReloadsCounter() metrics.Counter {
|
|||
return r.configReloadsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ConfigReloadsFailureCounter() metrics.Counter {
|
||||
return r.configReloadsFailureCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) LastConfigReloadSuccessGauge() metrics.Gauge {
|
||||
return r.lastConfigReloadSuccessGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) LastConfigReloadFailureGauge() metrics.Gauge {
|
||||
return r.lastConfigReloadFailureGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) TLSCertsNotAfterTimestampGauge() metrics.Gauge {
|
||||
return r.tlsCertsNotAfterTimestampGauge
|
||||
}
|
||||
|
|
|
@ -57,9 +57,7 @@ func RegisterOpenTelemetry(ctx context.Context, config *types.OpenTelemetry) Reg
|
|||
routerEnabled: config.AddRoutersLabels,
|
||||
svcEnabled: config.AddServicesLabels,
|
||||
configReloadsCounter: newOTLPCounterFrom(meter, configReloadsTotalName, "Config reloads"),
|
||||
configReloadsFailureCounter: newOTLPCounterFrom(meter, configReloadsFailuresTotalName, "Config reload failures"),
|
||||
lastConfigReloadSuccessGauge: newOTLPGaugeFrom(meter, configLastReloadSuccessName, "Last config reload success", unit.Milliseconds),
|
||||
lastConfigReloadFailureGauge: newOTLPGaugeFrom(meter, configLastReloadFailureName, "Last config reload failure", unit.Milliseconds),
|
||||
tlsCertsNotAfterTimestampGauge: newOTLPGaugeFrom(meter, tlsCertsNotAfterTimestamp, "Certificate expiration timestamp", unit.Milliseconds),
|
||||
}
|
||||
|
||||
|
|
|
@ -340,15 +340,11 @@ func TestOpenTelemetry(t *testing.T) {
|
|||
// TODO: the len of startUnixNano is no supposed to be 20, it should be 19
|
||||
expected = append(expected,
|
||||
`({"name":"traefik_config_reloads_total","description":"Config reloads","unit":"1","sum":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_config_reloads_failure_total","description":"Config reload failures","unit":"1","sum":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
|
||||
`({"name":"traefik_config_last_reload_success","description":"Last config reload success","unit":"ms","gauge":{"dataPoints":\[{"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`,
|
||||
`({"name":"traefik_config_last_reload_failure","description":"Last config reload failure","unit":"ms","gauge":{"dataPoints":\[{"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`,
|
||||
)
|
||||
|
||||
registry.ConfigReloadsCounter().Add(1)
|
||||
registry.ConfigReloadsFailureCounter().Add(1)
|
||||
registry.LastConfigReloadSuccessGauge().Set(1)
|
||||
registry.LastConfigReloadFailureGauge().Set(1)
|
||||
msgServer := <-c
|
||||
|
||||
assertMessage(t, *msgServer, expected)
|
||||
|
|
|
@ -21,11 +21,9 @@ const (
|
|||
MetricNamePrefix = "traefik_"
|
||||
|
||||
// server meta information.
|
||||
metricConfigPrefix = MetricNamePrefix + "config_"
|
||||
configReloadsTotalName = metricConfigPrefix + "reloads_total"
|
||||
configReloadsFailuresTotalName = metricConfigPrefix + "reloads_failure_total"
|
||||
configLastReloadSuccessName = metricConfigPrefix + "last_reload_success"
|
||||
configLastReloadFailureName = metricConfigPrefix + "last_reload_failure"
|
||||
metricConfigPrefix = MetricNamePrefix + "config_"
|
||||
configReloadsTotalName = metricConfigPrefix + "reloads_total"
|
||||
configLastReloadSuccessName = metricConfigPrefix + "last_reload_success"
|
||||
|
||||
// TLS.
|
||||
metricsTLSPrefix = MetricNamePrefix + "tls_"
|
||||
|
@ -118,18 +116,10 @@ func initStandardRegistry(config *types.Prometheus) Registry {
|
|||
Name: configReloadsTotalName,
|
||||
Help: "Config reloads",
|
||||
}, []string{})
|
||||
configReloadsFailures := newCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: configReloadsFailuresTotalName,
|
||||
Help: "Config failure reloads",
|
||||
}, []string{})
|
||||
lastConfigReloadSuccess := newGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Name: configLastReloadSuccessName,
|
||||
Help: "Last config reload success",
|
||||
}, []string{})
|
||||
lastConfigReloadFailure := newGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Name: configLastReloadFailureName,
|
||||
Help: "Last config reload failure",
|
||||
}, []string{})
|
||||
tlsCertsNotAfterTimestamp := newGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Name: tlsCertsNotAfterTimestamp,
|
||||
Help: "Certificate expiration timestamp",
|
||||
|
@ -137,9 +127,7 @@ func initStandardRegistry(config *types.Prometheus) Registry {
|
|||
|
||||
promState.vectors = []vector{
|
||||
configReloads.cv,
|
||||
configReloadsFailures.cv,
|
||||
lastConfigReloadSuccess.gv,
|
||||
lastConfigReloadFailure.gv,
|
||||
tlsCertsNotAfterTimestamp.gv,
|
||||
}
|
||||
|
||||
|
@ -148,9 +136,7 @@ func initStandardRegistry(config *types.Prometheus) Registry {
|
|||
routerEnabled: config.AddRoutersLabels,
|
||||
svcEnabled: config.AddServicesLabels,
|
||||
configReloadsCounter: configReloads,
|
||||
configReloadsFailureCounter: configReloadsFailures,
|
||||
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
|
||||
lastConfigReloadFailureGauge: lastConfigReloadFailure,
|
||||
tlsCertsNotAfterTimestampGauge: tlsCertsNotAfterTimestamp,
|
||||
}
|
||||
|
||||
|
|
|
@ -100,9 +100,7 @@ func TestPrometheus(t *testing.T) {
|
|||
}
|
||||
|
||||
prometheusRegistry.ConfigReloadsCounter().Add(1)
|
||||
prometheusRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
prometheusRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
|
||||
prometheusRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
|
||||
|
||||
prometheusRegistry.
|
||||
TLSCertsNotAfterTimestampGauge().
|
||||
|
@ -201,18 +199,10 @@ func TestPrometheus(t *testing.T) {
|
|||
name: configReloadsTotalName,
|
||||
assert: buildCounterAssert(t, configReloadsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: configReloadsFailuresTotalName,
|
||||
assert: buildCounterAssert(t, configReloadsFailuresTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: configLastReloadSuccessName,
|
||||
assert: buildTimestampAssert(t, configLastReloadSuccessName),
|
||||
},
|
||||
{
|
||||
name: configLastReloadFailureName,
|
||||
assert: buildTimestampAssert(t, configLastReloadFailureName),
|
||||
},
|
||||
{
|
||||
name: tlsCertsNotAfterTimestamp,
|
||||
labels: map[string]string{
|
||||
|
|
|
@ -18,9 +18,7 @@ var (
|
|||
|
||||
const (
|
||||
statsdConfigReloadsName = "config.reload.total"
|
||||
statsdConfigReloadsFailureName = statsdConfigReloadsName + ".failure"
|
||||
statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
|
||||
statsdLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
|
||||
|
||||
statsdTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
|
||||
|
||||
|
@ -63,9 +61,7 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry {
|
|||
|
||||
registry := &standardRegistry{
|
||||
configReloadsCounter: statsdClient.NewCounter(statsdConfigReloadsName, 1.0),
|
||||
configReloadsFailureCounter: statsdClient.NewCounter(statsdConfigReloadsFailureName, 1.0),
|
||||
lastConfigReloadSuccessGauge: statsdClient.NewGauge(statsdLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: statsdClient.NewGauge(statsdLastConfigReloadFailureName),
|
||||
tlsCertsNotAfterTimestampGauge: statsdClient.NewGauge(statsdTLSCertsNotAfterTimestampName),
|
||||
}
|
||||
|
||||
|
|
|
@ -49,9 +49,7 @@ func testRegistry(t *testing.T, metricsPrefix string, registry Registry) {
|
|||
|
||||
expected := []string{
|
||||
metricsPrefix + ".config.reload.total:1.000000|c\n",
|
||||
metricsPrefix + ".config.reload.total.failure:1.000000|c\n",
|
||||
metricsPrefix + ".config.reload.lastSuccessTimestamp:1.000000|g\n",
|
||||
metricsPrefix + ".config.reload.lastFailureTimestamp:1.000000|g\n",
|
||||
|
||||
metricsPrefix + ".tls.certs.notAfterTimestamp:1.000000|g\n",
|
||||
|
||||
|
@ -81,9 +79,7 @@ func testRegistry(t *testing.T, metricsPrefix string, registry Registry) {
|
|||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
registry.ConfigReloadsCounter().Add(1)
|
||||
registry.ConfigReloadsFailureCounter().Add(1)
|
||||
registry.LastConfigReloadSuccessGauge().Set(1)
|
||||
registry.LastConfigReloadFailureGauge().Set(1)
|
||||
|
||||
registry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue