Skip to content

Commit

Permalink
Terminally exit on unrecoverable exceptions for RCI.
Browse files Browse the repository at this point in the history
  • Loading branch information
BinBin He committed Dec 27, 2024
1 parent 41d593c commit 48fc82a
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 5 deletions.
17 changes: 14 additions & 3 deletions agent/app/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,10 +447,15 @@ func (agent *ecsAgent) doStart(containerChangeEventStream *eventstream.EventStre
// Register the container instance
err = agent.registerContainerInstance(client, vpcSubnetAttributes)
if err != nil {
if isTransient(err) {
if isTerminal(err) {
// On unrecoverable error codes, agent should terminally exit.
seelog.Warnf("Agent will terminally exit, unable to register container instance: %v", err)
return exitcodes.ExitTerminal
} else if isTransient(err) {
return exitcodes.ExitError
}
return exitcodes.ExitTerminal
// Other errors are considered recoverable and will be retried.
return exitcodes.ExitError
}

// Load Managed Daemon images asynchronously
Expand Down Expand Up @@ -840,7 +845,13 @@ func (agent *ecsAgent) registerContainerInstance(
logger.Critical("Instance registration attempt with an invalid parameter", logger.Fields{
field.Error: err,
})
return err
return terminalError{err}
}
if utils.IsAWSErrorCodeEqual(err, ecsmodel.ErrCodeClientException) {
logger.Critical("Instance registration attempt with client performing invalid action", logger.Fields{
field.Error: err,
})
return terminalError{err}
}
if _, ok := err.(apierrors.AttributeError); ok {
attributeErrorMsg := ""
Expand Down
129 changes: 127 additions & 2 deletions agent/app/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ func TestDoStartNewTaskEngineError(t *testing.T) {
assert.Equal(t, exitcodes.ExitTerminal, exitCode)
}

func TestDoStartRegisterContainerInstanceErrorTerminal(t *testing.T) {
func TestDoStartRegisterContainerInstanceErrorTransient(t *testing.T) {
ctrl, credentialsManager, state, imageManager, client,
dockerClient, _, _, execCmdMgr, _ := setup(t)
defer ctrl.Finish()
Expand Down Expand Up @@ -282,7 +282,7 @@ func TestDoStartRegisterContainerInstanceErrorTerminal(t *testing.T) {

exitCode := agent.doStart(eventstream.NewEventStream("events", ctx),
credentialsManager, state, imageManager, client, execCmdMgr)
assert.Equal(t, exitcodes.ExitTerminal, exitCode)
assert.Equal(t, exitcodes.ExitCode, exitCode)
}

func TestDoStartRegisterContainerInstanceErrorNonTerminal(t *testing.T) {
Expand Down Expand Up @@ -1499,6 +1499,131 @@ func TestRegisterContainerInstanceInvalidParameterTerminalError(t *testing.T) {
credentialsManager, state, imageManager, client, execCmdMgr)
assert.Equal(t, exitcodes.ExitTerminal, exitCode)
}

func TestRegisterContainerInstanceInvalidClientError(t *testing.T) {
ctrl, credentialsManager, state, imageManager, client,
dockerClient, _, _, execCmdMgr, _ := setup(t)
defer ctrl.Finish()

mockCredentialsProvider := app_mocks.NewMockCredentialsProvider(ctrl)
mockMobyPlugins := mock_mobypkgwrapper.NewMockPlugins(ctrl)
mockEC2Metadata := mock_ec2.NewMockEC2MetadataClient(ctrl)
mockPauseLoader := mock_loader.NewMockLoader(ctrl)

mockPauseLoader.EXPECT().IsLoaded(gomock.Any()).Return(false, nil).AnyTimes()
mockPauseLoader.EXPECT().LoadImage(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil).AnyTimes()
mockEC2Metadata.EXPECT().PrimaryENIMAC().Return("mac", nil)
mockEC2Metadata.EXPECT().VPCID(gomock.Eq("mac")).Return("vpc-id", nil)
mockEC2Metadata.EXPECT().SubnetID(gomock.Eq("mac")).Return("subnet-id", nil)
mockServiceConnectManager := mock_serviceconnect.NewMockManager(ctrl)
mockServiceConnectManager.EXPECT().IsLoaded(gomock.Any()).Return(true, nil).AnyTimes()
mockServiceConnectManager.EXPECT().GetLoadedAppnetVersion().AnyTimes()
mockServiceConnectManager.EXPECT().GetCapabilitiesForAppnetInterfaceVersion("").AnyTimes()
mockServiceConnectManager.EXPECT().SetECSClient(gomock.Any(), gomock.Any()).AnyTimes()

mockDaemonManager := mock_daemonmanager.NewMockDaemonManager(ctrl)
mockDaemonManagers := map[string]dm.DaemonManager{md.EbsCsiDriver: mockDaemonManager}
mockDaemonManager.EXPECT().IsLoaded(gomock.Any()).Return(true, nil).AnyTimes()
mockDaemonManager.EXPECT().LoadImage(gomock.Any(), gomock.Any()).Return(nil, nil).AnyTimes()

dockerClient.EXPECT().SupportedVersions().Return(apiVersions).AnyTimes()

gomock.InOrder(
client.EXPECT().GetHostResources().Return(testHostResource, nil),
mockCredentialsProvider.EXPECT().Retrieve(gomock.Any()).Return(awsv2.Credentials{}, nil),
mockMobyPlugins.EXPECT().Scan().AnyTimes().Return([]string{}, nil),
dockerClient.EXPECT().ListPluginsWithFilters(gomock.Any(), gomock.Any(), gomock.Any(),
gomock.Any()).AnyTimes().Return([]string{}, nil),
client.EXPECT().RegisterContainerInstance(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(),
gomock.Any()).Return("", "", awserr.New("ClientException", "", nil)),
)
mockEC2Metadata.EXPECT().OutpostARN().Return("", nil)

cfg := getTestConfig()
ctx, cancel := context.WithCancel(context.TODO())
// Cancel the context to cancel async routines
defer cancel()
agent := &ecsAgent{
ctx: ctx,
ec2MetadataClient: mockEC2Metadata,
cfg: &cfg,
pauseLoader: mockPauseLoader,
credentialsCache: awsv2.NewCredentialsCache(mockCredentialsProvider),
dockerClient: dockerClient,
mobyPlugins: mockMobyPlugins,
terminationHandler: func(taskEngineState dockerstate.TaskEngineState, dataClient data.Client, taskEngine engine.TaskEngine, cancel context.CancelFunc) {
},
serviceconnectManager: mockServiceConnectManager,
daemonManagers: mockDaemonManagers,
}

exitCode := agent.doStart(eventstream.NewEventStream("events", ctx),
credentialsManager, state, imageManager, client, execCmdMgr)
assert.Equal(t, exitcodes.ExitTerminal, exitCode)
}

func TestRegisterContainerInstanceThrottlingExceptionError(t *testing.T) {
ctrl, credentialsManager, state, imageManager, client,
dockerClient, _, _, execCmdMgr, _ := setup(t)
defer ctrl.Finish()

mockCredentialsProvider := app_mocks.NewMockCredentialsProvider(ctrl)
mockMobyPlugins := mock_mobypkgwrapper.NewMockPlugins(ctrl)
mockEC2Metadata := mock_ec2.NewMockEC2MetadataClient(ctrl)
mockPauseLoader := mock_loader.NewMockLoader(ctrl)

mockPauseLoader.EXPECT().IsLoaded(gomock.Any()).Return(false, nil).AnyTimes()
mockPauseLoader.EXPECT().LoadImage(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil).AnyTimes()
mockEC2Metadata.EXPECT().PrimaryENIMAC().Return("mac", nil)
mockEC2Metadata.EXPECT().VPCID(gomock.Eq("mac")).Return("vpc-id", nil)
mockEC2Metadata.EXPECT().SubnetID(gomock.Eq("mac")).Return("subnet-id", nil)
mockServiceConnectManager := mock_serviceconnect.NewMockManager(ctrl)
mockServiceConnectManager.EXPECT().IsLoaded(gomock.Any()).Return(true, nil).AnyTimes()
mockServiceConnectManager.EXPECT().GetLoadedAppnetVersion().AnyTimes()
mockServiceConnectManager.EXPECT().GetCapabilitiesForAppnetInterfaceVersion("").AnyTimes()
mockServiceConnectManager.EXPECT().SetECSClient(gomock.Any(), gomock.Any()).AnyTimes()

mockDaemonManager := mock_daemonmanager.NewMockDaemonManager(ctrl)
mockDaemonManagers := map[string]dm.DaemonManager{md.EbsCsiDriver: mockDaemonManager}
mockDaemonManager.EXPECT().IsLoaded(gomock.Any()).Return(true, nil).AnyTimes()
mockDaemonManager.EXPECT().LoadImage(gomock.Any(), gomock.Any()).Return(nil, nil).AnyTimes()

dockerClient.EXPECT().SupportedVersions().Return(apiVersions).AnyTimes()

gomock.InOrder(
client.EXPECT().GetHostResources().Return(testHostResource, nil),
mockCredentialsProvider.EXPECT().Retrieve(gomock.Any()).Return(awsv2.Credentials{}, nil),
mockMobyPlugins.EXPECT().Scan().AnyTimes().Return([]string{}, nil),
dockerClient.EXPECT().ListPluginsWithFilters(gomock.Any(), gomock.Any(), gomock.Any(),
gomock.Any()).AnyTimes().Return([]string{}, nil),
client.EXPECT().RegisterContainerInstance(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(),
gomock.Any()).Return("", "", awserr.New("ThrottlingException", "", nil)),
)
mockEC2Metadata.EXPECT().OutpostARN().Return("", nil)

cfg := getTestConfig()
ctx, cancel := context.WithCancel(context.TODO())
// Cancel the context to cancel async routines
defer cancel()
agent := &ecsAgent{
ctx: ctx,
ec2MetadataClient: mockEC2Metadata,
cfg: &cfg,
pauseLoader: mockPauseLoader,
credentialsCache: awsv2.NewCredentialsCache(mockCredentialsProvider),
dockerClient: dockerClient,
mobyPlugins: mockMobyPlugins,
terminationHandler: func(taskEngineState dockerstate.TaskEngineState, dataClient data.Client, taskEngine engine.TaskEngine, cancel context.CancelFunc) {
},
serviceconnectManager: mockServiceConnectManager,
daemonManagers: mockDaemonManagers,
}

exitCode := agent.doStart(eventstream.NewEventStream("events", ctx),
credentialsManager, state, imageManager, client, execCmdMgr)
assert.Equal(t, exitcodes.ExitError, exitCode)
}

func TestMergeTags(t *testing.T) {
ec2Key := "ec2Key"
ec2Value := "ec2Value"
Expand Down
12 changes: 12 additions & 0 deletions agent/app/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@ func isTransient(err error) bool {
return ok
}

type terminalError struct {
error
}

// isTerminal returns true if the error is already wrapped as an unrecoverable condition
// which will allow agent to exit terminally.
func isTerminal(err error) bool {
// Check if the error is already wrapped as a terminalError
_, terminal := err.(terminalError)
return terminal
}

// clusterMismatchError represents a mismatch in cluster name between the
// state file and the config object
type clusterMismatchError struct {
Expand Down

0 comments on commit 48fc82a

Please sign in to comment.