From 4dc714d7a96994c883bb3f03ae228388814f6971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan-Luis=20de=20Sousa-Valadas=20Casta=C3=B1o?= Date: Thu, 16 May 2024 13:51:08 +0200 Subject: [PATCH] Make k0s reset fail if it can't reach containerd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to this commit, if the containerd unix socket wasn't listening grpc.Dial would try to connect forever. This commit establishes the connection in the background and the actual call will fail if it has to. Also we implement a single context for all the operations so that we can cancel the execution with control c. Co-authored-by: Tom Wieczorek Signed-off-by: Juan-Luis de Sousa-Valadas CastaƱo (cherry picked from commit 4e7b82aa490911f9589dfe4d74c8024bddfba060) (cherry picked from commit f243680919439c853e0516d293b8b1dc1c933971) Signed-off-by: Tom Wieczorek --- pkg/cleanup/containers.go | 17 +++++++++-------- pkg/container/runtime/cri.go | 17 ++++++++--------- pkg/container/runtime/docker.go | 7 ++++--- pkg/container/runtime/runtime.go | 10 +++++++--- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/pkg/cleanup/containers.go b/pkg/cleanup/containers.go index b49dcb05c6f9..9c2d01b903e3 100644 --- a/pkg/cleanup/containers.go +++ b/pkg/cleanup/containers.go @@ -17,6 +17,7 @@ limitations under the License. package cleanup import ( + "context" "errors" "fmt" "io/fs" @@ -136,20 +137,20 @@ func (c *containers) stopContainerd() { func (c *containers) stopAllContainers() error { var msg []error - logrus.Debugf("trying to list all pods") var pods []string + ctx := context.TODO() err := retry.Do(func() error { + logrus.Debugf("trying to list all pods") var err error - pods, err = c.Config.containerRuntime.ListContainers() + pods, err = c.Config.containerRuntime.ListContainers(ctx) if err != nil { return err } return nil - }) + }, retry.Context(ctx), retry.LastErrorOnly(true)) if err != nil { - logrus.Debugf("failed at listing pods %v", err) - return err + return fmt.Errorf("failed at listing pods %w", err) } if len(pods) > 0 { if err := removeMount("kubelet/pods"); err != nil { @@ -162,7 +163,7 @@ func (c *containers) stopAllContainers() error { for _, pod := range pods { logrus.Debugf("stopping container: %v", pod) - err := c.Config.containerRuntime.StopContainer(pod) + err := c.Config.containerRuntime.StopContainer(ctx, pod) if err != nil { if strings.Contains(err.Error(), "443: connect: connection refused") { // on a single node instance, we will see "connection refused" error. this is to be expected @@ -174,13 +175,13 @@ func (c *containers) stopAllContainers() error { msg = append(msg, fmtError) } } - err = c.Config.containerRuntime.RemoveContainer(pod) + err = c.Config.containerRuntime.RemoveContainer(ctx, pod) if err != nil { msg = append(msg, fmt.Errorf("failed to remove pod %v: err: %v", pod, err)) } } - pods, err = c.Config.containerRuntime.ListContainers() + pods, err = c.Config.containerRuntime.ListContainers(ctx) if err == nil && len(pods) == 0 { logrus.Info("successfully removed k0s containers!") } diff --git a/pkg/container/runtime/cri.go b/pkg/container/runtime/cri.go index ea9a50e890e3..3ffa3b97ef46 100644 --- a/pkg/container/runtime/cri.go +++ b/pkg/container/runtime/cri.go @@ -32,7 +32,7 @@ type CRIRuntime struct { criSocketPath string } -func (cri *CRIRuntime) ListContainers() ([]string, error) { +func (cri *CRIRuntime) ListContainers(ctx context.Context) ([]string, error) { client, conn, err := getRuntimeClient(cri.criSocketPath) defer closeConnection(conn) if err != nil { @@ -43,7 +43,7 @@ func (cri *CRIRuntime) ListContainers() ([]string, error) { } request := &pb.ListPodSandboxRequest{} logrus.Debugf("ListPodSandboxRequest: %v", request) - r, err := client.ListPodSandbox(context.Background(), request) + r, err := client.ListPodSandbox(ctx, request) logrus.Debugf("ListPodSandboxResponse: %v", r) if err != nil { return nil, err @@ -55,7 +55,7 @@ func (cri *CRIRuntime) ListContainers() ([]string, error) { return pods, nil } -func (cri *CRIRuntime) RemoveContainer(id string) error { +func (cri *CRIRuntime) RemoveContainer(ctx context.Context, id string) error { client, conn, err := getRuntimeClient(cri.criSocketPath) defer closeConnection(conn) if err != nil { @@ -66,7 +66,7 @@ func (cri *CRIRuntime) RemoveContainer(id string) error { } request := &pb.RemovePodSandboxRequest{PodSandboxId: id} logrus.Debugf("RemovePodSandboxRequest: %v", request) - r, err := client.RemovePodSandbox(context.Background(), request) + r, err := client.RemovePodSandbox(ctx, request) logrus.Debugf("RemovePodSandboxResponse: %v", r) if err != nil { return err @@ -75,7 +75,7 @@ func (cri *CRIRuntime) RemoveContainer(id string) error { return nil } -func (cri *CRIRuntime) StopContainer(id string) error { +func (cri *CRIRuntime) StopContainer(ctx context.Context, id string) error { client, conn, err := getRuntimeClient(cri.criSocketPath) defer closeConnection(conn) if err != nil { @@ -86,7 +86,7 @@ func (cri *CRIRuntime) StopContainer(id string) error { } request := &pb.StopPodSandboxRequest{PodSandboxId: id} logrus.Debugf("StopPodSandboxRequest: %v", request) - r, err := client.StopPodSandbox(context.Background(), request) + r, err := client.StopPodSandbox(ctx, request) logrus.Debugf("StopPodSandboxResponse: %v", r) if err != nil { return fmt.Errorf("failed to stop pod sandbox: %w", err) @@ -105,10 +105,9 @@ func getRuntimeClient(addr string) (pb.RuntimeServiceClient, *grpc.ClientConn, e } func getRuntimeClientConnection(addr string) (*grpc.ClientConn, error) { - conn, err := grpc.Dial(addr, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.Dial(addr, grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { - errMsg := fmt.Errorf("connect endpoint %s, make sure you are running as root and the endpoint has been started: %w", addr, err) - logrus.Error(errMsg) + return nil, fmt.Errorf("connect endpoint %s, make sure you are running as root and the endpoint has been started: %w", addr, err) } else { logrus.Debugf("connected successfully using endpoint: %s", addr) } diff --git a/pkg/container/runtime/docker.go b/pkg/container/runtime/docker.go index 208b38951fbe..0fd118ea4947 100644 --- a/pkg/container/runtime/docker.go +++ b/pkg/container/runtime/docker.go @@ -17,6 +17,7 @@ limitations under the License. package runtime import ( + "context" "fmt" "os/exec" "strings" @@ -28,7 +29,7 @@ type DockerRuntime struct { criSocketPath string } -func (d *DockerRuntime) ListContainers() ([]string, error) { +func (d *DockerRuntime) ListContainers(context.Context) ([]string, error) { out, err := exec.Command("docker", "--host", d.criSocketPath, "ps", "-a", "--filter", "name=k8s_", "-q").CombinedOutput() if err != nil { return nil, fmt.Errorf("failed to list containers: output: %s: %w", string(out), err) @@ -36,7 +37,7 @@ func (d *DockerRuntime) ListContainers() ([]string, error) { return strings.Fields(string(out)), nil } -func (d *DockerRuntime) RemoveContainer(id string) error { +func (d *DockerRuntime) RemoveContainer(_ context.Context, id string) error { out, err := exec.Command("docker", "--host", d.criSocketPath, "rm", "--volumes", id).CombinedOutput() if err != nil { return fmt.Errorf("failed to remove container %s: output: %s: %w", id, string(out), err) @@ -44,7 +45,7 @@ func (d *DockerRuntime) RemoveContainer(id string) error { return nil } -func (d *DockerRuntime) StopContainer(id string) error { +func (d *DockerRuntime) StopContainer(_ context.Context, id string) error { out, err := exec.Command("docker", "--host", d.criSocketPath, "stop", id).CombinedOutput() if err != nil { return fmt.Errorf("failed to stop running container %s: output: %s: %w", id, string(out), err) diff --git a/pkg/container/runtime/runtime.go b/pkg/container/runtime/runtime.go index d04d2a0fae92..6758d315971d 100644 --- a/pkg/container/runtime/runtime.go +++ b/pkg/container/runtime/runtime.go @@ -16,10 +16,14 @@ limitations under the License. package runtime +import ( + "context" +) + type ContainerRuntime interface { - ListContainers() ([]string, error) - RemoveContainer(id string) error - StopContainer(id string) error + ListContainers(ctx context.Context) ([]string, error) + RemoveContainer(ctx context.Context, id string) error + StopContainer(ctx context.Context, id string) error } func NewContainerRuntime(runtimeType string, criSocketPath string) ContainerRuntime {