Feature: Exponential Backoff in Retry Middleware
This commit is contained in:
parent
3a8cb3f010
commit
74d1d55051
12 changed files with 218 additions and 44 deletions
|
@ -9,17 +9,19 @@ TODO: add schema
|
|||
|
||||
The Retry middleware is in charge of reissuing a request a given number of times to a backend server if that server does not reply.
|
||||
To be clear, as soon as the server answers, the middleware stops retrying, regardless of the response status.
|
||||
The Retry middleware has an optional configuration for exponential backoff.
|
||||
|
||||
## Configuration Examples
|
||||
|
||||
```yaml tab="Docker"
|
||||
# Retry to send request 4 times
|
||||
# Retry to send request 4 times with exponential backoff
|
||||
labels:
|
||||
- "traefik.http.middlewares.test-retry.retry.attempts=4"
|
||||
- "traefik.http.middlewares.test-retry.retry.initialinterval=100ms"
|
||||
```
|
||||
|
||||
```yaml tab="Kubernetes"
|
||||
# Retry to send request 4 times
|
||||
# Retry to send request 4 times with exponential backoff
|
||||
apiVersion: traefik.containo.us/v1alpha1
|
||||
kind: Middleware
|
||||
metadata:
|
||||
|
@ -27,45 +29,55 @@ metadata:
|
|||
spec:
|
||||
retry:
|
||||
attempts: 4
|
||||
initialInterval: 100ms
|
||||
```
|
||||
|
||||
```yaml tab="Consul Catalog"
|
||||
# Retry to send request 4 times
|
||||
# Retry to send request 4 times with exponential backoff
|
||||
- "traefik.http.middlewares.test-retry.retry.attempts=4"
|
||||
- "traefik.http.middlewares.test-retry.retry.initialinterval=100ms"
|
||||
```
|
||||
|
||||
```json tab="Marathon"
|
||||
"labels": {
|
||||
"traefik.http.middlewares.test-retry.retry.attempts": "4"
|
||||
"traefik.http.middlewares.test-retry.retry.attempts": "4",
|
||||
"traefik.http.middlewares.test-retry.retry.initialinterval": "100ms",
|
||||
}
|
||||
```
|
||||
|
||||
```yaml tab="Rancher"
|
||||
# Retry to send request 4 times
|
||||
# Retry to send request 4 times with exponential backoff
|
||||
labels:
|
||||
- "traefik.http.middlewares.test-retry.retry.attempts=4"
|
||||
- "traefik.http.middlewares.test-retry.retry.initialinterval=100ms"
|
||||
```
|
||||
|
||||
```toml tab="File (TOML)"
|
||||
# Retry to send request 4 times
|
||||
[http.middlewares]
|
||||
[http.middlewares.test-retry.retry]
|
||||
attempts = 4
|
||||
attempts = 4
|
||||
initialInterval = "100ms"
|
||||
```
|
||||
|
||||
```yaml tab="File (YAML)"
|
||||
# Retry to send request 4 times
|
||||
# Retry to send request 4 times with exponential backoff
|
||||
http:
|
||||
middlewares:
|
||||
test-retry:
|
||||
retry:
|
||||
attempts: 4
|
||||
attempts: 4
|
||||
initialInterval: 100ms
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### `attempts`
|
||||
### `attempts`
|
||||
|
||||
_mandatory_
|
||||
|
||||
The `attempts` option defines how many times the request should be retried.
|
||||
|
||||
### `initialInterval`
|
||||
|
||||
The `initialInterval` option defines the first wait time in the exponential backoff series (provided in seconds or as a valid duration format, see [time.ParseDuration](https://golang.org/pkg/time/#ParseDuration)). The maximum interval is calculated as twice the `initialInterval`. If unspecified, requests will be retried immediately.
|
||||
|
|
|
@ -112,6 +112,7 @@
|
|||
- "traefik.http.middlewares.middleware19.replacepathregex.regex=foobar"
|
||||
- "traefik.http.middlewares.middleware19.replacepathregex.replacement=foobar"
|
||||
- "traefik.http.middlewares.middleware20.retry.attempts=42"
|
||||
- "traefik.http.middlewares.middleware20.retry.initialinterval=42"
|
||||
- "traefik.http.middlewares.middleware21.stripprefix.forceslash=true"
|
||||
- "traefik.http.middlewares.middleware21.stripprefix.prefixes=foobar, foobar"
|
||||
- "traefik.http.middlewares.middleware22.stripprefixregex.regex=foobar, foobar"
|
||||
|
|
|
@ -261,6 +261,7 @@
|
|||
[http.middlewares.Middleware20]
|
||||
[http.middlewares.Middleware20.retry]
|
||||
attempts = 42
|
||||
initialInterval = 42
|
||||
[http.middlewares.Middleware21]
|
||||
[http.middlewares.Middleware21.stripPrefix]
|
||||
prefixes = ["foobar", "foobar"]
|
||||
|
|
|
@ -298,6 +298,7 @@ http:
|
|||
Middleware20:
|
||||
retry:
|
||||
attempts: 42
|
||||
initialInterval: 42
|
||||
Middleware21:
|
||||
stripPrefix:
|
||||
prefixes:
|
||||
|
|
|
@ -129,6 +129,7 @@
|
|||
| `traefik/http/middlewares/Middleware19/replacePathRegex/regex` | `foobar` |
|
||||
| `traefik/http/middlewares/Middleware19/replacePathRegex/replacement` | `foobar` |
|
||||
| `traefik/http/middlewares/Middleware20/retry/attempts` | `42` |
|
||||
| `traefik/http/middlewares/Middleware20/retry/initialInterval` | `42` |
|
||||
| `traefik/http/middlewares/Middleware21/stripPrefix/forceSlash` | `true` |
|
||||
| `traefik/http/middlewares/Middleware21/stripPrefix/prefixes/0` | `foobar` |
|
||||
| `traefik/http/middlewares/Middleware21/stripPrefix/prefixes/1` | `foobar` |
|
||||
|
|
|
@ -111,6 +111,7 @@
|
|||
"traefik.http.middlewares.middleware19.replacepathregex.regex": "foobar",
|
||||
"traefik.http.middlewares.middleware19.replacepathregex.replacement": "foobar",
|
||||
"traefik.http.middlewares.middleware20.retry.attempts": "42",
|
||||
"traefik.http.middlewares.middleware20.retry.initialinterval": "42",
|
||||
"traefik.http.middlewares.middleware21.stripprefix.forceslash": "true",
|
||||
"traefik.http.middlewares.middleware21.stripprefix.prefixes": "foobar, foobar",
|
||||
"traefik.http.middlewares.middleware22.stripprefixregex.regex": "foobar, foobar",
|
||||
|
|
45
integration/fixtures/retry/backoff.toml
Normal file
45
integration/fixtures/retry/backoff.toml
Normal file
|
@ -0,0 +1,45 @@
|
|||
[global]
|
||||
checkNewVersion = false
|
||||
sendAnonymousUsage = false
|
||||
|
||||
[log]
|
||||
level = "DEBUG"
|
||||
|
||||
[entryPoints]
|
||||
[entryPoints.web]
|
||||
address = ":8000"
|
||||
|
||||
[api]
|
||||
insecure = true
|
||||
|
||||
[providers.file]
|
||||
filename = "{{ .SelfFilename }}"
|
||||
|
||||
## dynamic configuration ##
|
||||
|
||||
[http.routers]
|
||||
[http.routers.router1]
|
||||
service = "service1"
|
||||
middlewares = [ "retry" ]
|
||||
rule = "PathPrefix(`/`)"
|
||||
|
||||
[http.middlewares.retry.retry]
|
||||
attempts = 4
|
||||
initialInterval = "500ms"
|
||||
|
||||
|
||||
[http.services]
|
||||
[http.services.service1]
|
||||
[http.services.service1.loadBalancer]
|
||||
|
||||
[[http.services.service1.loadBalancer.servers]]
|
||||
url = "http://{{.WhoamiEndpoint}}:8080"
|
||||
|
||||
[[http.services.service1.loadBalancer.servers]]
|
||||
url = "http://{{.WhoamiEndpoint}}:8081"
|
||||
|
||||
[[http.services.service1.loadBalancer.servers]]
|
||||
url = "http://{{.WhoamiEndpoint}}:8082"
|
||||
|
||||
[[http.services.service1.loadBalancer.servers]]
|
||||
url = "http://{{.WhoamiEndpoint}}:80"
|
|
@ -34,10 +34,41 @@ func (s *RetrySuite) TestRetry(c *check.C) {
|
|||
err = try.GetRequest("http://127.0.0.1:8080/api/rawdata", 60*time.Second, try.BodyContains("PathPrefix(`/`)"))
|
||||
c.Assert(err, checker.IsNil)
|
||||
|
||||
start := time.Now()
|
||||
// This simulates a DialTimeout when connecting to the backend server.
|
||||
response, err := http.Get("http://127.0.0.1:8000/")
|
||||
duration, allowed := time.Since(start), time.Millisecond*250
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(response.StatusCode, checker.Equals, http.StatusOK)
|
||||
c.Assert(int64(duration), checker.LessThan, int64(allowed))
|
||||
}
|
||||
|
||||
func (s *RetrySuite) TestRetryBackoff(c *check.C) {
|
||||
whoamiEndpoint := s.composeProject.Container(c, "whoami").NetworkSettings.IPAddress
|
||||
file := s.adaptFile(c, "fixtures/retry/backoff.toml", struct {
|
||||
WhoamiEndpoint string
|
||||
}{whoamiEndpoint})
|
||||
defer os.Remove(file)
|
||||
|
||||
cmd, display := s.traefikCmd(withConfigFile(file))
|
||||
defer display(c)
|
||||
err := cmd.Start()
|
||||
c.Assert(err, checker.IsNil)
|
||||
defer s.killCmd(cmd)
|
||||
|
||||
err = try.GetRequest("http://127.0.0.1:8080/api/rawdata", 60*time.Second, try.BodyContains("PathPrefix(`/`)"))
|
||||
c.Assert(err, checker.IsNil)
|
||||
|
||||
start := time.Now()
|
||||
// This simulates a DialTimeout when connecting to the backend server.
|
||||
response, err := http.Get("http://127.0.0.1:8000/")
|
||||
duration := time.Since(start)
|
||||
// test case delays: 500 + 700 + 1000ms with randomization. It should be safely > 1500ms
|
||||
minAllowed := time.Millisecond * 1500
|
||||
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(response.StatusCode, checker.Equals, http.StatusOK)
|
||||
c.Assert(int64(duration), checker.GreaterThan, int64(minAllowed))
|
||||
}
|
||||
|
||||
func (s *RetrySuite) TestRetryWebsocket(c *check.C) {
|
||||
|
|
|
@ -376,7 +376,8 @@ type ReplacePathRegex struct {
|
|||
|
||||
// Retry holds the retry configuration.
|
||||
type Retry struct {
|
||||
Attempts int `json:"attempts,omitempty" toml:"attempts,omitempty" yaml:"attempts,omitempty" export:"true"`
|
||||
Attempts int `json:"attempts,omitempty" toml:"attempts,omitempty" yaml:"attempts,omitempty"`
|
||||
InitialInterval ptypes.Duration `json:"initialInterval,omitempty" toml:"initialInterval,omitempty" yaml:"initialInterval,omitempty"`
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen=true
|
||||
|
|
|
@ -122,6 +122,7 @@ func TestDecodeConfiguration(t *testing.T) {
|
|||
"traefik.http.middlewares.Middleware15.replacepathregex.regex": "foobar",
|
||||
"traefik.http.middlewares.Middleware15.replacepathregex.replacement": "foobar",
|
||||
"traefik.http.middlewares.Middleware16.retry.attempts": "42",
|
||||
"traefik.http.middlewares.Middleware16.retry.initialinterval": "1s",
|
||||
"traefik.http.middlewares.Middleware17.stripprefix.prefixes": "foobar, fiibar",
|
||||
"traefik.http.middlewares.Middleware18.stripprefixregex.regex": "foobar, fiibar",
|
||||
"traefik.http.middlewares.Middleware19.compress": "true",
|
||||
|
@ -416,7 +417,8 @@ func TestDecodeConfiguration(t *testing.T) {
|
|||
},
|
||||
"Middleware16": {
|
||||
Retry: &dynamic.Retry{
|
||||
Attempts: 42,
|
||||
Attempts: 42,
|
||||
InitialInterval: ptypes.Duration(time.Second),
|
||||
},
|
||||
},
|
||||
"Middleware17": {
|
||||
|
@ -884,7 +886,8 @@ func TestEncodeConfiguration(t *testing.T) {
|
|||
},
|
||||
"Middleware16": {
|
||||
Retry: &dynamic.Retry{
|
||||
Attempts: 42,
|
||||
Attempts: 42,
|
||||
InitialInterval: ptypes.Duration(time.Second),
|
||||
},
|
||||
},
|
||||
"Middleware17": {
|
||||
|
@ -1238,6 +1241,7 @@ func TestEncodeConfiguration(t *testing.T) {
|
|||
"traefik.HTTP.Middlewares.Middleware15.ReplacePathRegex.Regex": "foobar",
|
||||
"traefik.HTTP.Middlewares.Middleware15.ReplacePathRegex.Replacement": "foobar",
|
||||
"traefik.HTTP.Middlewares.Middleware16.Retry.Attempts": "42",
|
||||
"traefik.HTTP.Middlewares.Middleware16.Retry.InitialInterval": "1000000000",
|
||||
"traefik.HTTP.Middlewares.Middleware17.StripPrefix.Prefixes": "foobar, fiibar",
|
||||
"traefik.HTTP.Middlewares.Middleware17.StripPrefix.ForceSlash": "true",
|
||||
"traefik.HTTP.Middlewares.Middleware18.StripPrefixRegex.Regex": "foobar, fiibar",
|
||||
|
|
|
@ -5,10 +5,13 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptrace"
|
||||
"time"
|
||||
|
||||
"github.com/cenkalti/backoff/v4"
|
||||
"github.com/opentracing/opentracing-go/ext"
|
||||
"github.com/traefik/traefik/v2/pkg/config/dynamic"
|
||||
"github.com/traefik/traefik/v2/pkg/log"
|
||||
|
@ -34,12 +37,18 @@ type Listener interface {
|
|||
// each of them about a retry attempt.
|
||||
type Listeners []Listener
|
||||
|
||||
// nexter returns the duration to wait before retrying the operation.
|
||||
type nexter interface {
|
||||
NextBackOff() time.Duration
|
||||
}
|
||||
|
||||
// retry is a middleware that retries requests.
|
||||
type retry struct {
|
||||
attempts int
|
||||
next http.Handler
|
||||
listener Listener
|
||||
name string
|
||||
attempts int
|
||||
initialInterval time.Duration
|
||||
next http.Handler
|
||||
listener Listener
|
||||
name string
|
||||
}
|
||||
|
||||
// New returns a new retry middleware.
|
||||
|
@ -51,10 +60,11 @@ func New(ctx context.Context, next http.Handler, config dynamic.Retry, listener
|
|||
}
|
||||
|
||||
return &retry{
|
||||
attempts: config.Attempts,
|
||||
next: next,
|
||||
listener: listener,
|
||||
name: name,
|
||||
attempts: config.Attempts,
|
||||
initialInterval: time.Duration(config.InitialInterval),
|
||||
next: next,
|
||||
listener: listener,
|
||||
name: name,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -72,36 +82,65 @@ func (r *retry) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
|
|||
}
|
||||
|
||||
attempts := 1
|
||||
backOff := r.newBackOff()
|
||||
currentInterval := 0 * time.Millisecond
|
||||
for {
|
||||
shouldRetry := attempts < r.attempts
|
||||
retryResponseWriter := newResponseWriter(rw, shouldRetry)
|
||||
select {
|
||||
case <-time.After(currentInterval):
|
||||
|
||||
// Disable retries when the backend already received request data
|
||||
trace := &httptrace.ClientTrace{
|
||||
WroteHeaders: func() {
|
||||
retryResponseWriter.DisableRetries()
|
||||
},
|
||||
WroteRequest: func(httptrace.WroteRequestInfo) {
|
||||
retryResponseWriter.DisableRetries()
|
||||
},
|
||||
shouldRetry := attempts < r.attempts
|
||||
retryResponseWriter := newResponseWriter(rw, shouldRetry)
|
||||
|
||||
// Disable retries when the backend already received request data
|
||||
trace := &httptrace.ClientTrace{
|
||||
WroteHeaders: func() {
|
||||
retryResponseWriter.DisableRetries()
|
||||
},
|
||||
WroteRequest: func(httptrace.WroteRequestInfo) {
|
||||
retryResponseWriter.DisableRetries()
|
||||
},
|
||||
}
|
||||
newCtx := httptrace.WithClientTrace(req.Context(), trace)
|
||||
|
||||
r.next.ServeHTTP(retryResponseWriter, req.WithContext(newCtx))
|
||||
|
||||
if !retryResponseWriter.ShouldRetry() {
|
||||
return
|
||||
}
|
||||
|
||||
currentInterval = backOff.NextBackOff()
|
||||
|
||||
attempts++
|
||||
|
||||
log.FromContext(middlewares.GetLoggerCtx(req.Context(), r.name, typeName)).
|
||||
Debugf("New attempt %d for request: %v", attempts, req.URL)
|
||||
|
||||
r.listener.Retried(req, attempts)
|
||||
|
||||
case <-req.Context().Done():
|
||||
return
|
||||
}
|
||||
newCtx := httptrace.WithClientTrace(req.Context(), trace)
|
||||
|
||||
r.next.ServeHTTP(retryResponseWriter, req.WithContext(newCtx))
|
||||
|
||||
if !retryResponseWriter.ShouldRetry() {
|
||||
break
|
||||
}
|
||||
|
||||
attempts++
|
||||
|
||||
log.FromContext(middlewares.GetLoggerCtx(req.Context(), r.name, typeName)).
|
||||
Debugf("New attempt %d for request: %v", attempts, req.URL)
|
||||
|
||||
r.listener.Retried(req, attempts)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *retry) newBackOff() nexter {
|
||||
if r.attempts < 2 || r.initialInterval <= 0 {
|
||||
return &backoff.ZeroBackOff{}
|
||||
}
|
||||
|
||||
b := backoff.NewExponentialBackOff()
|
||||
b.InitialInterval = r.initialInterval
|
||||
|
||||
// calculate the multiplier for the given number of attempts
|
||||
// so that applying the multiplier for the given number of attempts will not exceed 2 times the initial interval
|
||||
// it allows to control the progression along the attempts
|
||||
b.Multiplier = math.Pow(2, 1/float64(r.attempts-1))
|
||||
|
||||
// according to docs, b.Reset() must be called before using
|
||||
b.Reset()
|
||||
return b
|
||||
}
|
||||
|
||||
// Retried exists to implement the Listener interface. It calls Retried on each of its slice entries.
|
||||
func (l Listeners) Retried(req *http.Request, attempt int) {
|
||||
for _, listener := range l {
|
||||
|
|
|
@ -9,10 +9,12 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/gorilla/websocket"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
ptypes "github.com/traefik/paerser/types"
|
||||
"github.com/traefik/traefik/v2/pkg/config/dynamic"
|
||||
"github.com/traefik/traefik/v2/pkg/middlewares/emptybackendhandler"
|
||||
"github.com/traefik/traefik/v2/pkg/testhelpers"
|
||||
|
@ -35,6 +37,13 @@ func TestRetry(t *testing.T) {
|
|||
wantResponseStatus: http.StatusOK,
|
||||
amountFaultyEndpoints: 0,
|
||||
},
|
||||
{
|
||||
desc: "no retry on success with backoff",
|
||||
config: dynamic.Retry{Attempts: 1, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
|
||||
wantRetryAttempts: 0,
|
||||
wantResponseStatus: http.StatusOK,
|
||||
amountFaultyEndpoints: 0,
|
||||
},
|
||||
{
|
||||
desc: "no retry when max request attempts is one",
|
||||
config: dynamic.Retry{Attempts: 1},
|
||||
|
@ -42,6 +51,13 @@ func TestRetry(t *testing.T) {
|
|||
wantResponseStatus: http.StatusBadGateway,
|
||||
amountFaultyEndpoints: 1,
|
||||
},
|
||||
{
|
||||
desc: "no retry when max request attempts is one with backoff",
|
||||
config: dynamic.Retry{Attempts: 1, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
|
||||
wantRetryAttempts: 0,
|
||||
wantResponseStatus: http.StatusBadGateway,
|
||||
amountFaultyEndpoints: 1,
|
||||
},
|
||||
{
|
||||
desc: "one retry when one server is faulty",
|
||||
config: dynamic.Retry{Attempts: 2},
|
||||
|
@ -49,6 +65,13 @@ func TestRetry(t *testing.T) {
|
|||
wantResponseStatus: http.StatusOK,
|
||||
amountFaultyEndpoints: 1,
|
||||
},
|
||||
{
|
||||
desc: "one retry when one server is faulty with backoff",
|
||||
config: dynamic.Retry{Attempts: 2, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
|
||||
wantRetryAttempts: 1,
|
||||
wantResponseStatus: http.StatusOK,
|
||||
amountFaultyEndpoints: 1,
|
||||
},
|
||||
{
|
||||
desc: "two retries when two servers are faulty",
|
||||
config: dynamic.Retry{Attempts: 3},
|
||||
|
@ -56,6 +79,13 @@ func TestRetry(t *testing.T) {
|
|||
wantResponseStatus: http.StatusOK,
|
||||
amountFaultyEndpoints: 2,
|
||||
},
|
||||
{
|
||||
desc: "two retries when two servers are faulty with backoff",
|
||||
config: dynamic.Retry{Attempts: 3, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
|
||||
wantRetryAttempts: 2,
|
||||
wantResponseStatus: http.StatusOK,
|
||||
amountFaultyEndpoints: 2,
|
||||
},
|
||||
{
|
||||
desc: "max attempts exhausted delivers the 5xx response",
|
||||
config: dynamic.Retry{Attempts: 3},
|
||||
|
@ -63,6 +93,13 @@ func TestRetry(t *testing.T) {
|
|||
wantResponseStatus: http.StatusBadGateway,
|
||||
amountFaultyEndpoints: 3,
|
||||
},
|
||||
{
|
||||
desc: "max attempts exhausted delivers the 5xx response with backoff",
|
||||
config: dynamic.Retry{Attempts: 3, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
|
||||
wantRetryAttempts: 2,
|
||||
wantResponseStatus: http.StatusBadGateway,
|
||||
amountFaultyEndpoints: 3,
|
||||
},
|
||||
}
|
||||
|
||||
backendServer := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
|
||||
|
|
Loading…
Reference in a new issue