2020-07-15 16:28:04 +02:00
package ecs
import (
"context"
"fmt"
2024-07-01 16:50:04 +02:00
"os"
2020-07-15 16:28:04 +02:00
"strings"
"text/template"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
2024-07-01 16:50:04 +02:00
"github.com/aws/aws-sdk-go/aws/credentials/stscreds"
2020-07-15 16:28:04 +02:00
"github.com/aws/aws-sdk-go/aws/defaults"
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ecs"
2022-09-14 15:22:08 +01:00
"github.com/aws/aws-sdk-go/service/ssm"
2024-07-01 16:50:04 +02:00
"github.com/aws/aws-sdk-go/service/sts"
2020-07-15 16:28:04 +02:00
"github.com/cenkalti/backoff/v4"
2020-08-05 11:52:03 +02:00
"github.com/patrickmn/go-cache"
2020-09-16 15:46:04 +02:00
"github.com/traefik/traefik/v2/pkg/config/dynamic"
"github.com/traefik/traefik/v2/pkg/job"
"github.com/traefik/traefik/v2/pkg/log"
"github.com/traefik/traefik/v2/pkg/provider"
"github.com/traefik/traefik/v2/pkg/safe"
2020-07-15 16:28:04 +02:00
)
// Provider holds configurations of the provider.
type Provider struct {
Constraints string ` description:"Constraints is an expression that Traefik matches against the container's labels to determine whether to create any route for that container." json:"constraints,omitempty" toml:"constraints,omitempty" yaml:"constraints,omitempty" export:"true" `
ExposedByDefault bool ` description:"Expose services by default" json:"exposedByDefault,omitempty" toml:"exposedByDefault,omitempty" yaml:"exposedByDefault,omitempty" export:"true" `
RefreshSeconds int ` description:"Polling interval (in seconds)" json:"refreshSeconds,omitempty" toml:"refreshSeconds,omitempty" yaml:"refreshSeconds,omitempty" export:"true" `
DefaultRule string ` description:"Default rule." json:"defaultRule,omitempty" toml:"defaultRule,omitempty" yaml:"defaultRule,omitempty" `
// Provider lookup parameters.
Clusters [ ] string ` description:"ECS Clusters name" json:"clusters,omitempty" toml:"clusters,omitempty" yaml:"clusters,omitempty" export:"true" `
AutoDiscoverClusters bool ` description:"Auto discover cluster" json:"autoDiscoverClusters,omitempty" toml:"autoDiscoverClusters,omitempty" yaml:"autoDiscoverClusters,omitempty" export:"true" `
2022-09-14 15:22:08 +01:00
ECSAnywhere bool ` description:"Enable ECS Anywhere support" json:"ecsAnywhere,omitempty" toml:"ecsAnywhere,omitempty" yaml:"ecsAnywhere,omitempty" export:"true" `
2020-07-15 16:28:04 +02:00
Region string ` description:"The AWS region to use for requests" json:"region,omitempty" toml:"region,omitempty" yaml:"region,omitempty" export:"true" `
2022-01-24 05:08:05 -05:00
AccessKeyID string ` description:"The AWS credentials access key to use for making requests" json:"accessKeyID,omitempty" toml:"accessKeyID,omitempty" yaml:"accessKeyID,omitempty" loggable:"false" `
SecretAccessKey string ` description:"The AWS credentials access key to use for making requests" json:"secretAccessKey,omitempty" toml:"secretAccessKey,omitempty" yaml:"secretAccessKey,omitempty" loggable:"false" `
2020-07-15 16:28:04 +02:00
defaultRuleTpl * template . Template
}
type ecsInstance struct {
Name string
ID string
containerDefinition * ecs . ContainerDefinition
machine * machine
Labels map [ string ] string
ExtraConf configuration
}
type portMapping struct {
containerPort int64
hostPort int64
protocol string
}
type machine struct {
state string
privateIP string
ports [ ] portMapping
healthStatus string
}
type awsClient struct {
ecs * ecs . ECS
ec2 * ec2 . EC2
2022-09-14 15:22:08 +01:00
ssm * ssm . SSM
2020-07-15 16:28:04 +02:00
}
// DefaultTemplateRule The default template for the default rule.
const DefaultTemplateRule = "Host(`{{ normalize .Name }}`)"
var (
_ provider . Provider = ( * Provider ) ( nil )
existingTaskDefCache = cache . New ( 30 * time . Minute , 5 * time . Minute )
)
// SetDefaults sets the default values.
func ( p * Provider ) SetDefaults ( ) {
p . Clusters = [ ] string { "default" }
p . AutoDiscoverClusters = false
p . ExposedByDefault = true
p . RefreshSeconds = 15
p . DefaultRule = DefaultTemplateRule
}
// Init the provider.
func ( p * Provider ) Init ( ) error {
defaultRuleTpl , err := provider . MakeDefaultRuleTemplate ( p . DefaultRule , nil )
if err != nil {
return fmt . Errorf ( "error while parsing default rule: %w" , err )
}
p . defaultRuleTpl = defaultRuleTpl
return nil
}
func ( p * Provider ) createClient ( logger log . Logger ) ( * awsClient , error ) {
2020-08-05 11:52:03 +02:00
sess , err := session . NewSessionWithOptions ( session . Options {
SharedConfigState : session . SharedConfigEnable ,
} )
2020-07-15 16:28:04 +02:00
if err != nil {
return nil , err
}
ec2meta := ec2metadata . New ( sess )
2020-08-05 11:52:03 +02:00
if p . Region == "" && ec2meta . Available ( ) {
logger . Infoln ( "No region provided, querying instance metadata endpoint..." )
2020-07-15 16:28:04 +02:00
identity , err := ec2meta . GetInstanceIdentityDocument ( )
if err != nil {
return nil , err
}
p . Region = identity . Region
}
2024-07-01 16:50:04 +02:00
cfg := aws . NewConfig ( ) .
WithCredentials ( credentials . NewChainCredentials ( [ ] credentials . Provider {
& credentials . StaticProvider {
Value : credentials . Value {
AccessKeyID : p . AccessKeyID ,
SecretAccessKey : p . SecretAccessKey ,
2020-07-15 16:28:04 +02:00
} ,
2024-07-01 16:50:04 +02:00
} ,
& credentials . EnvProvider { } ,
& credentials . SharedCredentialsProvider { } ,
defaults . RemoteCredProvider ( * ( defaults . Config ( ) ) , defaults . Handlers ( ) ) ,
stscreds . NewWebIdentityRoleProviderWithOptions (
sts . New ( sess ) ,
os . Getenv ( "AWS_ROLE_ARN" ) ,
"" ,
stscreds . FetchTokenPath ( os . Getenv ( "AWS_WEB_IDENTITY_TOKEN_FILE" ) ) ,
) ,
} ) )
2020-07-15 16:28:04 +02:00
2020-08-05 11:52:03 +02:00
// Set the region if it is defined by the user or resolved from the EC2 metadata.
if p . Region != "" {
cfg . Region = & p . Region
}
2020-07-15 16:28:04 +02:00
cfg . WithLogger ( aws . LoggerFunc ( func ( args ... interface { } ) {
logger . Debug ( args ... )
} ) )
return & awsClient {
ecs . New ( sess , cfg ) ,
ec2 . New ( sess , cfg ) ,
2022-09-14 15:22:08 +01:00
ssm . New ( sess , cfg ) ,
2020-07-15 16:28:04 +02:00
} , nil
}
// Provide configuration to traefik from ECS.
2022-09-14 15:22:08 +01:00
func ( p * Provider ) Provide ( configurationChan chan <- dynamic . Message , pool * safe . Pool ) error {
2020-07-15 16:28:04 +02:00
pool . GoCtx ( func ( routineCtx context . Context ) {
ctxLog := log . With ( routineCtx , log . Str ( log . ProviderName , "ecs" ) )
logger := log . FromContext ( ctxLog )
operation := func ( ) error {
awsClient , err := p . createClient ( logger )
if err != nil {
2020-11-19 00:12:03 +01:00
return fmt . Errorf ( "unable to create AWS client: %w" , err )
2020-07-15 16:28:04 +02:00
}
2020-11-19 00:12:03 +01:00
err = p . loadConfiguration ( ctxLog , awsClient , configurationChan )
2020-07-15 16:28:04 +02:00
if err != nil {
2020-11-19 00:12:03 +01:00
return fmt . Errorf ( "failed to get ECS configuration: %w" , err )
2020-07-15 16:28:04 +02:00
}
2020-11-19 00:12:03 +01:00
ticker := time . NewTicker ( time . Second * time . Duration ( p . RefreshSeconds ) )
defer ticker . Stop ( )
2020-07-15 16:28:04 +02:00
for {
select {
2020-11-19 00:12:03 +01:00
case <- ticker . C :
err = p . loadConfiguration ( ctxLog , awsClient , configurationChan )
2020-07-15 16:28:04 +02:00
if err != nil {
2020-11-19 00:12:03 +01:00
return fmt . Errorf ( "failed to refresh ECS configuration: %w" , err )
2020-07-15 16:28:04 +02:00
}
case <- routineCtx . Done ( ) :
return nil
}
}
}
notify := func ( err error , time time . Duration ) {
logger . Errorf ( "Provider connection error %+v, retrying in %s" , err , time )
}
err := backoff . RetryNotify ( safe . OperationWithRecover ( operation ) , backoff . WithContext ( job . NewBackOff ( backoff . NewExponentialBackOff ( ) ) , routineCtx ) , notify )
if err != nil {
logger . Errorf ( "Cannot connect to Provider api %+v" , err )
}
} )
return nil
}
2020-11-19 00:12:03 +01:00
func ( p * Provider ) loadConfiguration ( ctx context . Context , client * awsClient , configurationChan chan <- dynamic . Message ) error {
instances , err := p . listInstances ( ctx , client )
if err != nil {
return err
}
configurationChan <- dynamic . Message {
ProviderName : "ecs" ,
Configuration : p . buildConfiguration ( ctx , instances ) ,
}
return nil
}
2020-07-15 16:28:04 +02:00
// Find all running Provider tasks in a cluster, also collect the task definitions (for docker labels)
// and the EC2 instance data.
func ( p * Provider ) listInstances ( ctx context . Context , client * awsClient ) ( [ ] ecsInstance , error ) {
logger := log . FromContext ( ctx )
var clustersArn [ ] * string
var clusters [ ] string
if p . AutoDiscoverClusters {
input := & ecs . ListClustersInput { }
for {
result , err := client . ecs . ListClusters ( input )
if err != nil {
return nil , err
}
if result != nil {
clustersArn = append ( clustersArn , result . ClusterArns ... )
input . NextToken = result . NextToken
if result . NextToken == nil {
break
}
} else {
break
}
}
for _ , cArn := range clustersArn {
clusters = append ( clusters , * cArn )
}
} else {
clusters = p . Clusters
}
var instances [ ] ecsInstance
logger . Debugf ( "ECS Clusters: %s" , clusters )
for _ , c := range clusters {
input := & ecs . ListTasksInput {
Cluster : & c ,
DesiredStatus : aws . String ( ecs . DesiredStatusRunning ) ,
}
tasks := make ( map [ string ] * ecs . Task )
err := client . ecs . ListTasksPagesWithContext ( ctx , input , func ( page * ecs . ListTasksOutput , lastPage bool ) bool {
if len ( page . TaskArns ) > 0 {
resp , err := client . ecs . DescribeTasksWithContext ( ctx , & ecs . DescribeTasksInput {
Tasks : page . TaskArns ,
Cluster : & c ,
} )
if err != nil {
logger . Errorf ( "Unable to describe tasks for %v" , page . TaskArns )
} else {
for _ , t := range resp . Tasks {
if aws . StringValue ( t . LastStatus ) == ecs . DesiredStatusRunning {
tasks [ aws . StringValue ( t . TaskArn ) ] = t
}
}
}
}
return ! lastPage
} )
if err != nil {
2022-04-05 15:54:07 +02:00
return nil , fmt . Errorf ( "listing tasks: %w" , err )
2020-07-15 16:28:04 +02:00
}
// Skip to the next cluster if there are no tasks found on
// this cluster.
if len ( tasks ) == 0 {
continue
}
ec2Instances , err := p . lookupEc2Instances ( ctx , client , & c , tasks )
if err != nil {
return nil , err
}
2022-09-14 15:22:08 +01:00
miInstances := make ( map [ string ] * ssm . InstanceInformation )
if p . ECSAnywhere {
// Try looking up for instances on ECS Anywhere
miInstances , err = p . lookupMiInstances ( ctx , client , & c , tasks )
if err != nil {
return nil , err
}
}
2020-07-15 16:28:04 +02:00
taskDefinitions , err := p . lookupTaskDefinitions ( ctx , client , tasks )
if err != nil {
return nil , err
}
for key , task := range tasks {
containerInstance := ec2Instances [ aws . StringValue ( task . ContainerInstanceArn ) ]
taskDef := taskDefinitions [ key ]
for _ , container := range task . Containers {
var containerDefinition * ecs . ContainerDefinition
for _ , def := range taskDef . ContainerDefinitions {
if aws . StringValue ( container . Name ) == aws . StringValue ( def . Name ) {
containerDefinition = def
break
}
}
if containerDefinition == nil {
logger . Debugf ( "Unable to find container definition for %s" , aws . StringValue ( container . Name ) )
continue
}
var mach * machine
2024-04-10 10:42:04 +02:00
if aws . StringValue ( taskDef . NetworkMode ) == "awsvpc" && len ( task . Attachments ) != 0 {
2023-01-11 15:14:05 +01:00
if len ( container . NetworkInterfaces ) == 0 {
logger . Errorf ( "Skip container %s: no network interfaces" , aws . StringValue ( container . Name ) )
continue
}
2020-07-15 16:28:04 +02:00
var ports [ ] portMapping
for _ , mapping := range containerDefinition . PortMappings {
if mapping != nil {
protocol := "TCP"
if aws . StringValue ( mapping . Protocol ) == "udp" {
protocol = "UDP"
}
ports = append ( ports , portMapping {
hostPort : aws . Int64Value ( mapping . HostPort ) ,
containerPort : aws . Int64Value ( mapping . ContainerPort ) ,
protocol : protocol ,
} )
}
}
mach = & machine {
privateIP : aws . StringValue ( container . NetworkInterfaces [ 0 ] . PrivateIpv4Address ) ,
ports : ports ,
state : aws . StringValue ( task . LastStatus ) ,
healthStatus : aws . StringValue ( task . HealthStatus ) ,
}
} else {
2022-09-14 15:22:08 +01:00
miContainerInstance := miInstances [ aws . StringValue ( task . ContainerInstanceArn ) ]
if containerInstance == nil && miContainerInstance == nil {
2020-07-15 16:28:04 +02:00
logger . Errorf ( "Unable to find container instance information for %s" , aws . StringValue ( container . Name ) )
continue
}
var ports [ ] portMapping
for _ , mapping := range container . NetworkBindings {
if mapping != nil {
ports = append ( ports , portMapping {
hostPort : aws . Int64Value ( mapping . HostPort ) ,
containerPort : aws . Int64Value ( mapping . ContainerPort ) ,
} )
}
}
2022-09-14 15:22:08 +01:00
var privateIPAddress , stateName string
if containerInstance != nil {
privateIPAddress = aws . StringValue ( containerInstance . PrivateIpAddress )
stateName = aws . StringValue ( containerInstance . State . Name )
} else if miContainerInstance != nil {
privateIPAddress = aws . StringValue ( miContainerInstance . IPAddress )
stateName = aws . StringValue ( task . LastStatus )
}
2020-07-15 16:28:04 +02:00
mach = & machine {
2022-09-14 15:22:08 +01:00
privateIP : privateIPAddress ,
2020-07-15 16:28:04 +02:00
ports : ports ,
2022-09-14 15:22:08 +01:00
state : stateName ,
2020-07-15 16:28:04 +02:00
}
}
instance := ecsInstance {
Name : fmt . Sprintf ( "%s-%s" , strings . Replace ( aws . StringValue ( task . Group ) , ":" , "-" , 1 ) , * container . Name ) ,
ID : key [ len ( key ) - 12 : ] ,
containerDefinition : containerDefinition ,
machine : mach ,
Labels : aws . StringValueMap ( containerDefinition . DockerLabels ) ,
}
extraConf , err := p . getConfiguration ( instance )
if err != nil {
2023-01-11 15:14:05 +01:00
logger . Errorf ( "Skip container %s: %w" , getServiceName ( instance ) , err )
2020-07-15 16:28:04 +02:00
continue
}
instance . ExtraConf = extraConf
instances = append ( instances , instance )
}
}
}
return instances , nil
}
2022-09-14 15:22:08 +01:00
func ( p * Provider ) lookupMiInstances ( ctx context . Context , client * awsClient , clusterName * string , ecsDatas map [ string ] * ecs . Task ) ( map [ string ] * ssm . InstanceInformation , error ) {
2024-02-19 15:44:03 +01:00
instanceIDs := make ( map [ string ] string )
2022-09-14 15:22:08 +01:00
miInstances := make ( map [ string ] * ssm . InstanceInformation )
var containerInstancesArns [ ] * string
var instanceArns [ ] * string
for _ , task := range ecsDatas {
if task . ContainerInstanceArn != nil {
containerInstancesArns = append ( containerInstancesArns , task . ContainerInstanceArn )
}
}
for _ , arns := range p . chunkIDs ( containerInstancesArns ) {
resp , err := client . ecs . DescribeContainerInstancesWithContext ( ctx , & ecs . DescribeContainerInstancesInput {
ContainerInstances : arns ,
Cluster : clusterName ,
} )
if err != nil {
return nil , fmt . Errorf ( "describing container instances: %w" , err )
}
for _ , container := range resp . ContainerInstances {
2024-02-19 15:44:03 +01:00
instanceIDs [ aws . StringValue ( container . Ec2InstanceId ) ] = aws . StringValue ( container . ContainerInstanceArn )
2022-09-14 15:22:08 +01:00
// Disallow EC2 Instance IDs
// This prevents considering EC2 instances in ECS
// and getting InvalidInstanceID.Malformed error when calling the describe-instances endpoint.
if ! strings . HasPrefix ( aws . StringValue ( container . Ec2InstanceId ) , "mi-" ) {
continue
}
instanceArns = append ( instanceArns , container . Ec2InstanceId )
}
}
if len ( instanceArns ) > 0 {
for _ , ids := range p . chunkIDs ( instanceArns ) {
input := & ssm . DescribeInstanceInformationInput {
Filters : [ ] * ssm . InstanceInformationStringFilter {
{
Key : aws . String ( "InstanceIds" ) ,
Values : ids ,
} ,
} ,
}
err := client . ssm . DescribeInstanceInformationPagesWithContext ( ctx , input , func ( page * ssm . DescribeInstanceInformationOutput , lastPage bool ) bool {
if len ( page . InstanceInformationList ) > 0 {
for _ , i := range page . InstanceInformationList {
if i . InstanceId != nil {
2024-02-19 15:44:03 +01:00
miInstances [ instanceIDs [ aws . StringValue ( i . InstanceId ) ] ] = i
2022-09-14 15:22:08 +01:00
}
}
}
return ! lastPage
} )
if err != nil {
return nil , fmt . Errorf ( "describing instances: %w" , err )
}
}
}
return miInstances , nil
}
2020-07-15 16:28:04 +02:00
func ( p * Provider ) lookupEc2Instances ( ctx context . Context , client * awsClient , clusterName * string , ecsDatas map [ string ] * ecs . Task ) ( map [ string ] * ec2 . Instance , error ) {
2024-02-19 15:44:03 +01:00
instanceIDs := make ( map [ string ] string )
2020-07-15 16:28:04 +02:00
ec2Instances := make ( map [ string ] * ec2 . Instance )
var containerInstancesArns [ ] * string
var instanceArns [ ] * string
for _ , task := range ecsDatas {
if task . ContainerInstanceArn != nil {
containerInstancesArns = append ( containerInstancesArns , task . ContainerInstanceArn )
}
}
for _ , arns := range p . chunkIDs ( containerInstancesArns ) {
resp , err := client . ecs . DescribeContainerInstancesWithContext ( ctx , & ecs . DescribeContainerInstancesInput {
ContainerInstances : arns ,
Cluster : clusterName ,
} )
if err != nil {
2022-04-05 15:54:07 +02:00
return nil , fmt . Errorf ( "describing container instances: %w" , err )
2020-07-15 16:28:04 +02:00
}
for _ , container := range resp . ContainerInstances {
2024-02-19 15:44:03 +01:00
instanceIDs [ aws . StringValue ( container . Ec2InstanceId ) ] = aws . StringValue ( container . ContainerInstanceArn )
2022-04-28 13:24:08 +01:00
// Disallow Instance IDs of the form mi-*
// This prevents considering external instances in ECS Anywhere setups
// and getting InvalidInstanceID.Malformed error when calling the describe-instances endpoint.
if strings . HasPrefix ( aws . StringValue ( container . Ec2InstanceId ) , "mi-" ) {
continue
}
2020-07-15 16:28:04 +02:00
instanceArns = append ( instanceArns , container . Ec2InstanceId )
}
}
if len ( instanceArns ) > 0 {
for _ , ids := range p . chunkIDs ( instanceArns ) {
input := & ec2 . DescribeInstancesInput {
InstanceIds : ids ,
}
err := client . ec2 . DescribeInstancesPagesWithContext ( ctx , input , func ( page * ec2 . DescribeInstancesOutput , lastPage bool ) bool {
if len ( page . Reservations ) > 0 {
for _ , r := range page . Reservations {
for _ , i := range r . Instances {
if i . InstanceId != nil {
2024-02-19 15:44:03 +01:00
ec2Instances [ instanceIDs [ aws . StringValue ( i . InstanceId ) ] ] = i
2020-07-15 16:28:04 +02:00
}
}
}
}
return ! lastPage
} )
if err != nil {
2022-04-05 15:54:07 +02:00
return nil , fmt . Errorf ( "describing instances: %w" , err )
2020-07-15 16:28:04 +02:00
}
}
}
return ec2Instances , nil
}
func ( p * Provider ) lookupTaskDefinitions ( ctx context . Context , client * awsClient , taskDefArns map [ string ] * ecs . Task ) ( map [ string ] * ecs . TaskDefinition , error ) {
logger := log . FromContext ( ctx )
taskDef := make ( map [ string ] * ecs . TaskDefinition )
for arn , task := range taskDefArns {
if definition , ok := existingTaskDefCache . Get ( arn ) ; ok {
taskDef [ arn ] = definition . ( * ecs . TaskDefinition )
logger . Debugf ( "Found cached task definition for %s. Skipping the call" , arn )
} else {
resp , err := client . ecs . DescribeTaskDefinitionWithContext ( ctx , & ecs . DescribeTaskDefinitionInput {
TaskDefinition : task . TaskDefinitionArn ,
} )
if err != nil {
2022-04-05 15:54:07 +02:00
return nil , fmt . Errorf ( "describing task definition: %w" , err )
2020-07-15 16:28:04 +02:00
}
taskDef [ arn ] = resp . TaskDefinition
existingTaskDefCache . Set ( arn , resp . TaskDefinition , cache . DefaultExpiration )
}
}
return taskDef , nil
}
// chunkIDs ECS expects no more than 100 parameters be passed to a API call;
// thus, pack each string into an array capped at 100 elements.
func ( p * Provider ) chunkIDs ( ids [ ] * string ) [ ] [ ] * string {
2022-02-21 06:40:09 -05:00
var chunked [ ] [ ] * string
2020-07-15 16:28:04 +02:00
for i := 0 ; i < len ( ids ) ; i += 100 {
var sliceEnd int
if i + 100 < len ( ids ) {
sliceEnd = i + 100
} else {
sliceEnd = len ( ids )
}
2022-02-21 06:40:09 -05:00
chunked = append ( chunked , ids [ i : sliceEnd ] )
2020-07-15 16:28:04 +02:00
}
2022-02-21 06:40:09 -05:00
return chunked
2020-07-15 16:28:04 +02:00
}