From 4e7b82aa490911f9589dfe4d74c8024bddfba060 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan-Luis=20de=20Sousa-Valadas=20Casta=C3=B1o?= Date: Thu, 16 May 2024 13:51:08 +0200 Subject: [PATCH] Make k0s reset fail if it can't reach containerd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to this commit, if the containerd unix socket wasn't listening grpc.Dial would try to connect forever. This commit establishes the connection in the background and the actual call will fail if it has to. Also we implement a single context for all the operations so that we can cancel the execution with control c. Co-authored-by: Tom Wieczorek Signed-off-by: Juan-Luis de Sousa-Valadas CastaƱo --- pkg/cleanup/containers.go | 17 +++++++++-------- pkg/container/runtime/cri.go | 17 ++++++++--------- pkg/container/runtime/runtime.go | 11 +++++++---- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/pkg/cleanup/containers.go b/pkg/cleanup/containers.go index e5f876a7f6ca..a3b063fcc896 100644 --- a/pkg/cleanup/containers.go +++ b/pkg/cleanup/containers.go @@ -17,6 +17,7 @@ limitations under the License. package cleanup import ( + "context" "errors" "fmt" "io/fs" @@ -134,20 +135,20 @@ func (c *containers) stopContainerd() { func (c *containers) stopAllContainers() error { var errs []error - logrus.Debugf("trying to list all pods") var pods []string + ctx := context.TODO() err := retry.Do(func() error { + logrus.Debugf("trying to list all pods") var err error - pods, err = c.Config.containerRuntime.ListContainers() + pods, err = c.Config.containerRuntime.ListContainers(ctx) if err != nil { return err } return nil - }) + }, retry.Context(ctx), retry.LastErrorOnly(true)) if err != nil { - logrus.Debugf("failed at listing pods %v", err) - return err + return fmt.Errorf("failed at listing pods %w", err) } if len(pods) > 0 { if err := removeMount("kubelet/pods"); err != nil { @@ -160,7 +161,7 @@ func (c *containers) stopAllContainers() error { for _, pod := range pods { logrus.Debugf("stopping container: %v", pod) - err := c.Config.containerRuntime.StopContainer(pod) + err := c.Config.containerRuntime.StopContainer(ctx, pod) if err != nil { if strings.Contains(err.Error(), "443: connect: connection refused") { // on a single node instance, we will see "connection refused" error. this is to be expected @@ -170,13 +171,13 @@ func (c *containers) stopAllContainers() error { errs = append(errs, fmt.Errorf("failed to stop running pod %s: %w", pod, err)) } } - err = c.Config.containerRuntime.RemoveContainer(pod) + err = c.Config.containerRuntime.RemoveContainer(ctx, pod) if err != nil { errs = append(errs, fmt.Errorf("failed to remove pod %s: %w", pod, err)) } } - pods, err = c.Config.containerRuntime.ListContainers() + pods, err = c.Config.containerRuntime.ListContainers(ctx) if err == nil && len(pods) == 0 { logrus.Info("successfully removed k0s containers!") } diff --git a/pkg/container/runtime/cri.go b/pkg/container/runtime/cri.go index ea9a50e890e3..383259ea4f0c 100644 --- a/pkg/container/runtime/cri.go +++ b/pkg/container/runtime/cri.go @@ -32,7 +32,7 @@ type CRIRuntime struct { criSocketPath string } -func (cri *CRIRuntime) ListContainers() ([]string, error) { +func (cri *CRIRuntime) ListContainers(ctx context.Context) ([]string, error) { client, conn, err := getRuntimeClient(cri.criSocketPath) defer closeConnection(conn) if err != nil { @@ -43,7 +43,7 @@ func (cri *CRIRuntime) ListContainers() ([]string, error) { } request := &pb.ListPodSandboxRequest{} logrus.Debugf("ListPodSandboxRequest: %v", request) - r, err := client.ListPodSandbox(context.Background(), request) + r, err := client.ListPodSandbox(ctx, request) logrus.Debugf("ListPodSandboxResponse: %v", r) if err != nil { return nil, err @@ -55,7 +55,7 @@ func (cri *CRIRuntime) ListContainers() ([]string, error) { return pods, nil } -func (cri *CRIRuntime) RemoveContainer(id string) error { +func (cri *CRIRuntime) RemoveContainer(ctx context.Context, id string) error { client, conn, err := getRuntimeClient(cri.criSocketPath) defer closeConnection(conn) if err != nil { @@ -66,7 +66,7 @@ func (cri *CRIRuntime) RemoveContainer(id string) error { } request := &pb.RemovePodSandboxRequest{PodSandboxId: id} logrus.Debugf("RemovePodSandboxRequest: %v", request) - r, err := client.RemovePodSandbox(context.Background(), request) + r, err := client.RemovePodSandbox(ctx, request) logrus.Debugf("RemovePodSandboxResponse: %v", r) if err != nil { return err @@ -75,7 +75,7 @@ func (cri *CRIRuntime) RemoveContainer(id string) error { return nil } -func (cri *CRIRuntime) StopContainer(id string) error { +func (cri *CRIRuntime) StopContainer(ctx context.Context, id string) error { client, conn, err := getRuntimeClient(cri.criSocketPath) defer closeConnection(conn) if err != nil { @@ -86,7 +86,7 @@ func (cri *CRIRuntime) StopContainer(id string) error { } request := &pb.StopPodSandboxRequest{PodSandboxId: id} logrus.Debugf("StopPodSandboxRequest: %v", request) - r, err := client.StopPodSandbox(context.Background(), request) + r, err := client.StopPodSandbox(ctx, request) logrus.Debugf("StopPodSandboxResponse: %v", r) if err != nil { return fmt.Errorf("failed to stop pod sandbox: %w", err) @@ -105,10 +105,9 @@ func getRuntimeClient(addr string) (pb.RuntimeServiceClient, *grpc.ClientConn, e } func getRuntimeClientConnection(addr string) (*grpc.ClientConn, error) { - conn, err := grpc.Dial(addr, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { - errMsg := fmt.Errorf("connect endpoint %s, make sure you are running as root and the endpoint has been started: %w", addr, err) - logrus.Error(errMsg) + return nil, fmt.Errorf("connect endpoint %s, make sure you are running as root and the endpoint has been started: %w", addr, err) } else { logrus.Debugf("connected successfully using endpoint: %s", addr) } diff --git a/pkg/container/runtime/runtime.go b/pkg/container/runtime/runtime.go index c2d9fa600041..b175d71a3cf7 100644 --- a/pkg/container/runtime/runtime.go +++ b/pkg/container/runtime/runtime.go @@ -16,12 +16,15 @@ limitations under the License. package runtime -import "net/url" +import ( + "context" + "net/url" +) type ContainerRuntime interface { - ListContainers() ([]string, error) - RemoveContainer(id string) error - StopContainer(id string) error + ListContainers(ctx context.Context) ([]string, error) + RemoveContainer(ctx context.Context, id string) error + StopContainer(ctx context.Context, id string) error } func NewContainerRuntime(runtimeEndpoint *url.URL) ContainerRuntime {