Skip to content

Commit

Permalink
Merge pull request #4434 from juanluisvaladas/fix-reset-failedcontainerd
Browse files Browse the repository at this point in the history
Make k0s reset fail if it can't reach containerd
  • Loading branch information
juanluisvaladas authored Jun 6, 2024
2 parents 6e20dd5 + 4e7b82a commit 34d4b5e
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 21 deletions.
17 changes: 9 additions & 8 deletions pkg/cleanup/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package cleanup

import (
"context"
"errors"
"fmt"
"io/fs"
Expand Down Expand Up @@ -134,20 +135,20 @@ func (c *containers) stopContainerd() {

func (c *containers) stopAllContainers() error {
var errs []error
logrus.Debugf("trying to list all pods")

var pods []string
ctx := context.TODO()
err := retry.Do(func() error {
logrus.Debugf("trying to list all pods")
var err error
pods, err = c.Config.containerRuntime.ListContainers()
pods, err = c.Config.containerRuntime.ListContainers(ctx)
if err != nil {
return err
}
return nil
})
}, retry.Context(ctx), retry.LastErrorOnly(true))
if err != nil {
logrus.Debugf("failed at listing pods %v", err)
return err
return fmt.Errorf("failed at listing pods %w", err)
}
if len(pods) > 0 {
if err := removeMount("kubelet/pods"); err != nil {
Expand All @@ -160,7 +161,7 @@ func (c *containers) stopAllContainers() error {

for _, pod := range pods {
logrus.Debugf("stopping container: %v", pod)
err := c.Config.containerRuntime.StopContainer(pod)
err := c.Config.containerRuntime.StopContainer(ctx, pod)
if err != nil {
if strings.Contains(err.Error(), "443: connect: connection refused") {
// on a single node instance, we will see "connection refused" error. this is to be expected
Expand All @@ -170,13 +171,13 @@ func (c *containers) stopAllContainers() error {
errs = append(errs, fmt.Errorf("failed to stop running pod %s: %w", pod, err))
}
}
err = c.Config.containerRuntime.RemoveContainer(pod)
err = c.Config.containerRuntime.RemoveContainer(ctx, pod)
if err != nil {
errs = append(errs, fmt.Errorf("failed to remove pod %s: %w", pod, err))
}
}

pods, err = c.Config.containerRuntime.ListContainers()
pods, err = c.Config.containerRuntime.ListContainers(ctx)
if err == nil && len(pods) == 0 {
logrus.Info("successfully removed k0s containers!")
}
Expand Down
17 changes: 8 additions & 9 deletions pkg/container/runtime/cri.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type CRIRuntime struct {
criSocketPath string
}

func (cri *CRIRuntime) ListContainers() ([]string, error) {
func (cri *CRIRuntime) ListContainers(ctx context.Context) ([]string, error) {
client, conn, err := getRuntimeClient(cri.criSocketPath)
defer closeConnection(conn)
if err != nil {
Expand All @@ -43,7 +43,7 @@ func (cri *CRIRuntime) ListContainers() ([]string, error) {
}
request := &pb.ListPodSandboxRequest{}
logrus.Debugf("ListPodSandboxRequest: %v", request)
r, err := client.ListPodSandbox(context.Background(), request)
r, err := client.ListPodSandbox(ctx, request)
logrus.Debugf("ListPodSandboxResponse: %v", r)
if err != nil {
return nil, err
Expand All @@ -55,7 +55,7 @@ func (cri *CRIRuntime) ListContainers() ([]string, error) {
return pods, nil
}

func (cri *CRIRuntime) RemoveContainer(id string) error {
func (cri *CRIRuntime) RemoveContainer(ctx context.Context, id string) error {
client, conn, err := getRuntimeClient(cri.criSocketPath)
defer closeConnection(conn)
if err != nil {
Expand All @@ -66,7 +66,7 @@ func (cri *CRIRuntime) RemoveContainer(id string) error {
}
request := &pb.RemovePodSandboxRequest{PodSandboxId: id}
logrus.Debugf("RemovePodSandboxRequest: %v", request)
r, err := client.RemovePodSandbox(context.Background(), request)
r, err := client.RemovePodSandbox(ctx, request)
logrus.Debugf("RemovePodSandboxResponse: %v", r)
if err != nil {
return err
Expand All @@ -75,7 +75,7 @@ func (cri *CRIRuntime) RemoveContainer(id string) error {
return nil
}

func (cri *CRIRuntime) StopContainer(id string) error {
func (cri *CRIRuntime) StopContainer(ctx context.Context, id string) error {
client, conn, err := getRuntimeClient(cri.criSocketPath)
defer closeConnection(conn)
if err != nil {
Expand All @@ -86,7 +86,7 @@ func (cri *CRIRuntime) StopContainer(id string) error {
}
request := &pb.StopPodSandboxRequest{PodSandboxId: id}
logrus.Debugf("StopPodSandboxRequest: %v", request)
r, err := client.StopPodSandbox(context.Background(), request)
r, err := client.StopPodSandbox(ctx, request)
logrus.Debugf("StopPodSandboxResponse: %v", r)
if err != nil {
return fmt.Errorf("failed to stop pod sandbox: %w", err)
Expand All @@ -105,10 +105,9 @@ func getRuntimeClient(addr string) (pb.RuntimeServiceClient, *grpc.ClientConn, e
}

func getRuntimeClientConnection(addr string) (*grpc.ClientConn, error) {
conn, err := grpc.Dial(addr, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock())
conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
errMsg := fmt.Errorf("connect endpoint %s, make sure you are running as root and the endpoint has been started: %w", addr, err)
logrus.Error(errMsg)
return nil, fmt.Errorf("connect endpoint %s, make sure you are running as root and the endpoint has been started: %w", addr, err)
} else {
logrus.Debugf("connected successfully using endpoint: %s", addr)
}
Expand Down
11 changes: 7 additions & 4 deletions pkg/container/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@ limitations under the License.

package runtime

import "net/url"
import (
"context"
"net/url"
)

type ContainerRuntime interface {
ListContainers() ([]string, error)
RemoveContainer(id string) error
StopContainer(id string) error
ListContainers(ctx context.Context) ([]string, error)
RemoveContainer(ctx context.Context, id string) error
StopContainer(ctx context.Context, id string) error
}

func NewContainerRuntime(runtimeEndpoint *url.URL) ContainerRuntime {
Expand Down

0 comments on commit 34d4b5e

Please sign in to comment.