Skip to content

Commit

Permalink
Make k0s reset fail if it can't reach containerd
Browse files Browse the repository at this point in the history
Prior to this commit, if the containerd unix socket wasn't listening
grpc.Dial would try to connect forever.

This commit establishes the connection in the background and the actual
call will fail if it has to.

Also we implement a single context for all the operations so that we can
cancel the execution with control c.

Co-authored-by: Tom Wieczorek <[email protected]>
Signed-off-by: Juan-Luis de Sousa-Valadas Castaño <[email protected]>
(cherry picked from commit 4e7b82a)
(cherry picked from commit f243680)
Signed-off-by: Tom Wieczorek <[email protected]>
  • Loading branch information
juanluisvaladas and twz123 committed Jun 6, 2024
1 parent 473ff86 commit 4dc714d
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 23 deletions.
17 changes: 9 additions & 8 deletions pkg/cleanup/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package cleanup

import (
"context"
"errors"
"fmt"
"io/fs"
Expand Down Expand Up @@ -136,20 +137,20 @@ func (c *containers) stopContainerd() {

func (c *containers) stopAllContainers() error {
var msg []error
logrus.Debugf("trying to list all pods")

var pods []string
ctx := context.TODO()
err := retry.Do(func() error {
logrus.Debugf("trying to list all pods")
var err error
pods, err = c.Config.containerRuntime.ListContainers()
pods, err = c.Config.containerRuntime.ListContainers(ctx)
if err != nil {
return err
}
return nil
})
}, retry.Context(ctx), retry.LastErrorOnly(true))
if err != nil {
logrus.Debugf("failed at listing pods %v", err)
return err
return fmt.Errorf("failed at listing pods %w", err)
}
if len(pods) > 0 {
if err := removeMount("kubelet/pods"); err != nil {
Expand All @@ -162,7 +163,7 @@ func (c *containers) stopAllContainers() error {

for _, pod := range pods {
logrus.Debugf("stopping container: %v", pod)
err := c.Config.containerRuntime.StopContainer(pod)
err := c.Config.containerRuntime.StopContainer(ctx, pod)
if err != nil {
if strings.Contains(err.Error(), "443: connect: connection refused") {
// on a single node instance, we will see "connection refused" error. this is to be expected
Expand All @@ -174,13 +175,13 @@ func (c *containers) stopAllContainers() error {
msg = append(msg, fmtError)
}
}
err = c.Config.containerRuntime.RemoveContainer(pod)
err = c.Config.containerRuntime.RemoveContainer(ctx, pod)
if err != nil {
msg = append(msg, fmt.Errorf("failed to remove pod %v: err: %v", pod, err))
}
}

pods, err = c.Config.containerRuntime.ListContainers()
pods, err = c.Config.containerRuntime.ListContainers(ctx)
if err == nil && len(pods) == 0 {
logrus.Info("successfully removed k0s containers!")
}
Expand Down
17 changes: 8 additions & 9 deletions pkg/container/runtime/cri.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type CRIRuntime struct {
criSocketPath string
}

func (cri *CRIRuntime) ListContainers() ([]string, error) {
func (cri *CRIRuntime) ListContainers(ctx context.Context) ([]string, error) {
client, conn, err := getRuntimeClient(cri.criSocketPath)
defer closeConnection(conn)
if err != nil {
Expand All @@ -43,7 +43,7 @@ func (cri *CRIRuntime) ListContainers() ([]string, error) {
}
request := &pb.ListPodSandboxRequest{}
logrus.Debugf("ListPodSandboxRequest: %v", request)
r, err := client.ListPodSandbox(context.Background(), request)
r, err := client.ListPodSandbox(ctx, request)
logrus.Debugf("ListPodSandboxResponse: %v", r)
if err != nil {
return nil, err
Expand All @@ -55,7 +55,7 @@ func (cri *CRIRuntime) ListContainers() ([]string, error) {
return pods, nil
}

func (cri *CRIRuntime) RemoveContainer(id string) error {
func (cri *CRIRuntime) RemoveContainer(ctx context.Context, id string) error {
client, conn, err := getRuntimeClient(cri.criSocketPath)
defer closeConnection(conn)
if err != nil {
Expand All @@ -66,7 +66,7 @@ func (cri *CRIRuntime) RemoveContainer(id string) error {
}
request := &pb.RemovePodSandboxRequest{PodSandboxId: id}
logrus.Debugf("RemovePodSandboxRequest: %v", request)
r, err := client.RemovePodSandbox(context.Background(), request)
r, err := client.RemovePodSandbox(ctx, request)
logrus.Debugf("RemovePodSandboxResponse: %v", r)
if err != nil {
return err
Expand All @@ -75,7 +75,7 @@ func (cri *CRIRuntime) RemoveContainer(id string) error {
return nil
}

func (cri *CRIRuntime) StopContainer(id string) error {
func (cri *CRIRuntime) StopContainer(ctx context.Context, id string) error {
client, conn, err := getRuntimeClient(cri.criSocketPath)
defer closeConnection(conn)
if err != nil {
Expand All @@ -86,7 +86,7 @@ func (cri *CRIRuntime) StopContainer(id string) error {
}
request := &pb.StopPodSandboxRequest{PodSandboxId: id}
logrus.Debugf("StopPodSandboxRequest: %v", request)
r, err := client.StopPodSandbox(context.Background(), request)
r, err := client.StopPodSandbox(ctx, request)
logrus.Debugf("StopPodSandboxResponse: %v", r)
if err != nil {
return fmt.Errorf("failed to stop pod sandbox: %w", err)
Expand All @@ -105,10 +105,9 @@ func getRuntimeClient(addr string) (pb.RuntimeServiceClient, *grpc.ClientConn, e
}

func getRuntimeClientConnection(addr string) (*grpc.ClientConn, error) {
conn, err := grpc.Dial(addr, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock())
conn, err := grpc.Dial(addr, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
errMsg := fmt.Errorf("connect endpoint %s, make sure you are running as root and the endpoint has been started: %w", addr, err)
logrus.Error(errMsg)
return nil, fmt.Errorf("connect endpoint %s, make sure you are running as root and the endpoint has been started: %w", addr, err)
} else {
logrus.Debugf("connected successfully using endpoint: %s", addr)
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/container/runtime/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package runtime

import (
"context"
"fmt"
"os/exec"
"strings"
Expand All @@ -28,23 +29,23 @@ type DockerRuntime struct {
criSocketPath string
}

func (d *DockerRuntime) ListContainers() ([]string, error) {
func (d *DockerRuntime) ListContainers(context.Context) ([]string, error) {
out, err := exec.Command("docker", "--host", d.criSocketPath, "ps", "-a", "--filter", "name=k8s_", "-q").CombinedOutput()
if err != nil {
return nil, fmt.Errorf("failed to list containers: output: %s: %w", string(out), err)
}
return strings.Fields(string(out)), nil
}

func (d *DockerRuntime) RemoveContainer(id string) error {
func (d *DockerRuntime) RemoveContainer(_ context.Context, id string) error {
out, err := exec.Command("docker", "--host", d.criSocketPath, "rm", "--volumes", id).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to remove container %s: output: %s: %w", id, string(out), err)
}
return nil
}

func (d *DockerRuntime) StopContainer(id string) error {
func (d *DockerRuntime) StopContainer(_ context.Context, id string) error {
out, err := exec.Command("docker", "--host", d.criSocketPath, "stop", id).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to stop running container %s: output: %s: %w", id, string(out), err)
Expand Down
10 changes: 7 additions & 3 deletions pkg/container/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ limitations under the License.

package runtime

import (
"context"
)

type ContainerRuntime interface {
ListContainers() ([]string, error)
RemoveContainer(id string) error
StopContainer(id string) error
ListContainers(ctx context.Context) ([]string, error)
RemoveContainer(ctx context.Context, id string) error
StopContainer(ctx context.Context, id string) error
}

func NewContainerRuntime(runtimeType string, criSocketPath string) ContainerRuntime {
Expand Down

0 comments on commit 4dc714d

Please sign in to comment.