From 94636e82163957b134fc7b4cb4f84c1901329aa6 Mon Sep 17 00:00:00 2001 From: Sylvain Baubeau Date: Mon, 23 Dec 2024 02:34:41 +0100 Subject: [PATCH] Make it work for CRI-O --- pkg/sbom/collectors/host/host.go | 2 +- pkg/sbom/collectors/host/request.go | 31 +-- pkg/security/resolvers/sbom/resolver.go | 2 +- pkg/util/trivy/container.go | 9 + pkg/util/trivy/containerd.go | 148 +++++++++++++ pkg/util/trivy/crio.go | 94 +++++++++ pkg/util/trivy/docker.go | 78 +++++++ pkg/util/trivy/overlayfs.go | 266 ++++++------------------ pkg/util/trivy/trivy.go | 113 +--------- pkg/util/trivy/trivy_containerd.go | 166 --------------- 10 files changed, 406 insertions(+), 503 deletions(-) create mode 100644 pkg/util/trivy/container.go create mode 100644 pkg/util/trivy/crio.go delete mode 100644 pkg/util/trivy/trivy_containerd.go diff --git a/pkg/sbom/collectors/host/host.go b/pkg/sbom/collectors/host/host.go index bb792b0e2f3134..6798fb4790f587 100644 --- a/pkg/sbom/collectors/host/host.go +++ b/pkg/sbom/collectors/host/host.go @@ -54,7 +54,7 @@ func (c *Collector) Scan(ctx context.Context, request sbom.ScanRequest) sbom.Sca } log.Infof("host scan request [%v]", hostScanRequest.ID()) - report, err := c.trivyCollector.ScanFilesystem(ctx, hostScanRequest.FS, hostScanRequest.Path, c.opts) + report, err := c.trivyCollector.ScanFilesystem(ctx, hostScanRequest.Path, c.opts) return sbom.ScanResult{ Error: err, Report: report, diff --git a/pkg/sbom/collectors/host/request.go b/pkg/sbom/collectors/host/request.go index 9de8579fc6eb39..3509ec79e56881 100644 --- a/pkg/sbom/collectors/host/request.go +++ b/pkg/sbom/collectors/host/request.go @@ -6,9 +6,7 @@ package host import ( - "io/fs" "os" - "path/filepath" "github.com/DataDog/datadog-agent/pkg/sbom/types" ) @@ -17,34 +15,11 @@ import ( // hashable to be pushed in the work queue for processing. type scanRequest struct { Path string - FS fs.FS -} - -type relFS struct { - root string - fs fs.FS -} - -func newFS(root string) fs.FS { - fs := os.DirFS(root) - return &relFS{root: "/", fs: fs} -} - -func (f *relFS) Open(name string) (fs.File, error) { - if filepath.IsAbs(name) { - var err error - name, err = filepath.Rel(f.root, name) - if err != nil { - return nil, err - } - } - - return f.fs.Open(name) } // NewScanRequest creates a new scan request -func NewScanRequest(path string, fs fs.FS) types.ScanRequest { - return scanRequest{Path: path, FS: fs} +func NewScanRequest(path string) types.ScanRequest { + return scanRequest{Path: path} } // NewHostScanRequest creates a new scan request for the root filesystem @@ -54,7 +29,7 @@ func NewHostScanRequest() types.ScanRequest { // if hostRoot := os.Getenv("HOST_ROOT"); env.IsContainerized() && hostRoot != "" { scanPath = hostRoot } - return NewScanRequest("/", newFS(scanPath)) + return NewScanRequest(scanPath) } // Collector returns the collector name diff --git a/pkg/security/resolvers/sbom/resolver.go b/pkg/security/resolvers/sbom/resolver.go index e282dbd6f23d9d..6fe1b6f3441639 100644 --- a/pkg/security/resolvers/sbom/resolver.go +++ b/pkg/security/resolvers/sbom/resolver.go @@ -274,7 +274,7 @@ func (r *Resolver) generateSBOM(root string) (report *trivy.Report, err error) { seclog.Infof("Generating SBOM for %s", root) r.sbomGenerations.Inc() - scanRequest := host.NewScanRequest(root, os.DirFS("/")) + scanRequest := host.NewScanRequest(root) ch := collectors.GetHostScanner().Channel() if ch == nil { return nil, fmt.Errorf("couldn't retrieve global host scanner result channel") diff --git a/pkg/util/trivy/container.go b/pkg/util/trivy/container.go new file mode 100644 index 00000000000000..87e2bb8c07c140 --- /dev/null +++ b/pkg/util/trivy/container.go @@ -0,0 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build trivy + +// Package trivy holds the scan components +package trivy diff --git a/pkg/util/trivy/containerd.go b/pkg/util/trivy/containerd.go index d3ed7ddf3f84cf..47d504b897048a 100644 --- a/pkg/util/trivy/containerd.go +++ b/pkg/util/trivy/containerd.go @@ -17,10 +17,14 @@ import ( "strings" "time" + ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" "github.com/containerd/containerd" "github.com/containerd/containerd/content" "github.com/containerd/containerd/images/archive" + "github.com/containerd/containerd/leases" + "github.com/containerd/containerd/mount" "github.com/containerd/containerd/namespaces" + "github.com/containerd/errdefs" refdocker "github.com/distribution/reference" api "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" @@ -31,6 +35,9 @@ import ( "github.com/samber/lo" workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/sbom" + cutil "github.com/DataDog/datadog-agent/pkg/util/containerd" + "github.com/DataDog/datadog-agent/pkg/util/log" ) // ContainerdCollector defines the conttainerd collector name @@ -181,3 +188,144 @@ func inspect(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, }, }, history, ref, nil } + +const ( + cleanupTimeout = 30 * time.Second +) + +type fakeContainerdContainer struct { + *fakeContainer + *image +} + +func (c *fakeContainerdContainer) LayerByDiffID(hash string) (ftypes.LayerPath, error) { + return c.fakeContainer.LayerByDiffID(hash) +} + +func (c *fakeContainerdContainer) LayerByDigest(hash string) (ftypes.LayerPath, error) { + return c.fakeContainer.LayerByDigest(hash) +} + +func (c *fakeContainerdContainer) Layers() (layers []ftypes.LayerPath) { + return c.fakeContainer.Layers() +} + +// ContainerdAccessor is a function that should return a containerd client +type ContainerdAccessor func() (cutil.ContainerdItf, error) + +// ScanContainerdImageFromSnapshotter scans containerd image directly from the snapshotter +func (c *Collector) ScanContainerdImageFromSnapshotter(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, img containerd.Image, client cutil.ContainerdItf, scanOptions sbom.ScanOptions) (sbom.Report, error) { + fanalImage, cleanup, err := convertContainerdImage(ctx, client.RawClient(), imgMeta, img) + if cleanup != nil { + defer cleanup() + } + if err != nil { + return nil, err + } + + // Computing duration of containerd lease + deadline, _ := ctx.Deadline() + expiration := deadline.Sub(time.Now().Add(cleanupTimeout)) + clClient := client.RawClient() + imageID := imgMeta.ID + + mounts, err := client.Mounts(ctx, expiration, imgMeta.Namespace, img) + if err != nil { + return nil, fmt.Errorf("unable to get mounts for image %s, err: %w", imgMeta.ID, err) + } + + layers := extractLayersFromOverlayFSMounts(mounts) + if len(layers) == 0 { + return nil, fmt.Errorf("unable to extract layers from overlayfs mounts %+v for image %s", mounts, imgMeta.ID) + } + + ctx = namespaces.WithNamespace(ctx, imgMeta.Namespace) + // Adding a lease to cleanup dandling snaphots at expiration + ctx, done, err := clClient.WithLease(ctx, + leases.WithID(imageID), + leases.WithExpiration(expiration), + leases.WithLabels(map[string]string{ + "containerd.io/gc.ref.snapshot." + containerd.DefaultSnapshotter: imageID, + }), + ) + if err != nil && !errdefs.IsAlreadyExists(err) { + return nil, fmt.Errorf("unable to get a lease, err: %w", err) + } + + report, err := c.scanOverlayFS(ctx, layers, &fakeContainerdContainer{ + image: fanalImage, + fakeContainer: &fakeContainer{ + layerPaths: layers, + imgMeta: imgMeta, + layerIDs: fanalImage.inspect.RootFS.Layers, + }, + }, imgMeta, scanOptions) + + if err := done(ctx); err != nil { + log.Warnf("Unable to cancel containerd lease with id: %s, err: %v", imageID, err) + } + + return report, err +} + +// ScanContainerdImage scans containerd image by exporting it and scanning the tarball +func (c *Collector) ScanContainerdImage(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, img containerd.Image, client cutil.ContainerdItf, scanOptions sbom.ScanOptions) (sbom.Report, error) { + fanalImage, cleanup, err := convertContainerdImage(ctx, client.RawClient(), imgMeta, img) + if cleanup != nil { + defer cleanup() + } + if err != nil { + return nil, fmt.Errorf("unable to convert containerd image, err: %w", err) + } + + return c.scanImage(ctx, fanalImage, imgMeta, scanOptions) +} + +// ScanContainerdImageFromFilesystem scans containerd image from file-system +func (c *Collector) ScanContainerdImageFromFilesystem(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, img containerd.Image, client cutil.ContainerdItf, scanOptions sbom.ScanOptions) (sbom.Report, error) { + imagePath, err := os.MkdirTemp("", "containerd-image-*") + if err != nil { + return nil, fmt.Errorf("unable to create temp dir, err: %w", err) + } + defer func() { + err := os.RemoveAll(imagePath) + if err != nil { + log.Errorf("Unable to remove temp dir: %s, err: %v", imagePath, err) + } + }() + + // Computing duration of containerd lease + deadline, _ := ctx.Deadline() + expiration := deadline.Sub(time.Now().Add(cleanupTimeout)) + + cleanUp, err := client.MountImage(ctx, expiration, imgMeta.Namespace, img, imagePath) + if err != nil { + return nil, fmt.Errorf("unable to mount containerd image, err: %w", err) + } + + defer func() { + cleanUpContext, cleanUpContextCancel := context.WithTimeout(context.Background(), cleanupTimeout) + err := cleanUp(cleanUpContext) + cleanUpContextCancel() + if err != nil { + log.Errorf("Unable to clean up mounted image, err: %v", err) + } + }() + + return c.scanFilesystem(ctx, imagePath, imgMeta, scanOptions) +} + +func extractLayersFromOverlayFSMounts(mounts []mount.Mount) []string { + var layers []string + for _, mount := range mounts { + for _, opt := range mount.Options { + for _, prefix := range []string{"upperdir=", "lowerdir="} { + trimmedOpt := strings.TrimPrefix(opt, prefix) + if trimmedOpt != opt { + layers = append(layers, strings.Split(trimmedOpt, ":")...) + } + } + } + } + return layers +} diff --git a/pkg/util/trivy/crio.go b/pkg/util/trivy/crio.go new file mode 100644 index 00000000000000..50617fe40198ac --- /dev/null +++ b/pkg/util/trivy/crio.go @@ -0,0 +1,94 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build trivy + +// Package trivy holds the scan components +package trivy + +import ( + "context" + "fmt" + "path/filepath" + + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/sbom" + "github.com/DataDog/datadog-agent/pkg/util/crio" + ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" + v1 "github.com/google/go-containerregistry/pkg/v1" +) + +type fakeCRIOContainer struct { + *fakeContainer +} + +func (c *fakeCRIOContainer) ID() (string, error) { + return c.imgMeta.ID, nil +} + +func (c *fakeCRIOContainer) ConfigFile() (*v1.ConfigFile, error) { + configFile := &v1.ConfigFile{} + for _, layer := range c.imgMeta.Layers { + configFile.History = append(configFile.History, v1.History{ + Author: layer.History.Author, + Created: v1.Time{Time: *layer.History.Created}, + CreatedBy: layer.History.CreatedBy, + Comment: layer.History.Comment, + EmptyLayer: layer.History.EmptyLayer, + }) + + } + return configFile, nil +} + +func (c *fakeCRIOContainer) LayerByDiffID(hash string) (ftypes.LayerPath, error) { + return c.fakeContainer.LayerByDiffID(hash) +} + +func (c *fakeCRIOContainer) LayerByDigest(hash string) (ftypes.LayerPath, error) { + return c.fakeContainer.LayerByDigest(hash) +} + +func (c *fakeCRIOContainer) Layers() (layers []ftypes.LayerPath) { + return c.fakeContainer.Layers() +} + +func (c *fakeCRIOContainer) Name() string { + return c.imgMeta.Name +} + +func (c *fakeCRIOContainer) RepoTags() []string { + return c.imgMeta.RepoTags +} + +func (c *fakeCRIOContainer) RepoDigests() []string { + return c.imgMeta.RepoDigests +} + +// ScanCRIOImageFromOverlayFS scans the CRI-O image layers using OverlayFS. +func (c *Collector) ScanCRIOImageFromOverlayFS(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, client crio.Client, scanOptions sbom.ScanOptions) (sbom.Report, error) { + lowerDirs, err := client.GetCRIOImageLayers(imgMeta) + if err != nil { + return nil, fmt.Errorf("failed to retrieve layer directories: %w", err) + } + + var diffIDs []string + for _, dir := range lowerDirs { + diffIDs = append(diffIDs, filepath.Base(filepath.Dir(dir))) + } + + report, err := c.scanOverlayFS(ctx, lowerDirs, &fakeCRIOContainer{ + fakeContainer: &fakeContainer{ + imgMeta: imgMeta, + layerPaths: lowerDirs, + layerIDs: diffIDs, + }, + }, imgMeta, scanOptions) + if err != nil { + return nil, err + } + + return report, nil +} diff --git a/pkg/util/trivy/docker.go b/pkg/util/trivy/docker.go index 10f052e5c75b71..f07de33e871cda 100644 --- a/pkg/util/trivy/docker.go +++ b/pkg/util/trivy/docker.go @@ -11,8 +11,13 @@ import ( "context" "fmt" "os" + "strings" workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/config/env" + "github.com/DataDog/datadog-agent/pkg/sbom" + containersimage "github.com/DataDog/datadog-agent/pkg/util/containers/image" + ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" "github.com/docker/docker/client" ) @@ -61,3 +66,76 @@ func convertDockerImage(ctx context.Context, client client.ImageAPIClient, imgMe return img, cleanup, nil } + +type fakeDockerContainer struct { + *image + *fakeContainer +} + +func (c *fakeDockerContainer) LayerByDiffID(hash string) (ftypes.LayerPath, error) { + return c.fakeContainer.LayerByDiffID(hash) +} + +func (c *fakeDockerContainer) LayerByDigest(hash string) (ftypes.LayerPath, error) { + return c.fakeContainer.LayerByDigest(hash) +} + +func (c *fakeDockerContainer) Layers() (layers []ftypes.LayerPath) { + return c.fakeContainer.Layers() +} + +// ScanDockerImageFromGraphDriver scans a docker image directly from the graph driver +func (c *Collector) ScanDockerImageFromGraphDriver(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, client client.ImageAPIClient, scanOptions sbom.ScanOptions) (sbom.Report, error) { + fanalImage, cleanup, err := convertDockerImage(ctx, client, imgMeta) + if cleanup != nil { + defer cleanup() + } + + if err != nil { + return nil, fmt.Errorf("unable to convert docker image, err: %w", err) + } + + if fanalImage.inspect.GraphDriver.Name == "overlay2" { + var layers []string + if layerDirs, ok := fanalImage.inspect.GraphDriver.Data["LowerDir"]; ok { + layers = append(layers, strings.Split(layerDirs, ":")...) + } + + if layerDirs, ok := fanalImage.inspect.GraphDriver.Data["UpperDir"]; ok { + layers = append(layers, strings.Split(layerDirs, ":")...) + } + + if env.IsContainerized() { + for i, layer := range layers { + layers[i] = containersimage.SanitizeHostPath(layer) + } + } + + fakeContainer := &fakeDockerContainer{ + image: fanalImage, + fakeContainer: &fakeContainer{ + layerIDs: fanalImage.inspect.RootFS.Layers, + layerPaths: layers, + imgMeta: imgMeta, + }, + } + + return c.scanOverlayFS(ctx, layers, fakeContainer, imgMeta, scanOptions) + } + + return nil, fmt.Errorf("unsupported graph driver: %s", fanalImage.inspect.GraphDriver.Name) +} + +// ScanDockerImage scans a docker image by exporting it and scanning the tarball +func (c *Collector) ScanDockerImage(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, client client.ImageAPIClient, scanOptions sbom.ScanOptions) (sbom.Report, error) { + fanalImage, cleanup, err := convertDockerImage(ctx, client, imgMeta) + if cleanup != nil { + defer cleanup() + } + + if err != nil { + return nil, fmt.Errorf("unable to convert docker image, err: %w", err) + } + + return c.scanImage(ctx, fanalImage, imgMeta, scanOptions) +} diff --git a/pkg/util/trivy/overlayfs.go b/pkg/util/trivy/overlayfs.go index f1f8e243983ccb..9f48c333e9e6df 100644 --- a/pkg/util/trivy/overlayfs.go +++ b/pkg/util/trivy/overlayfs.go @@ -10,233 +10,101 @@ package trivy import ( + "context" "errors" "fmt" - "io/fs" - "os" - "path" - "sort" - "syscall" - "golang.org/x/sys/unix" + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/sbom" + "github.com/DataDog/datadog-agent/pkg/util/log" + "github.com/aquasecurity/trivy/pkg/fanal/applier" + local "github.com/aquasecurity/trivy/pkg/fanal/artifact/container" + ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" + v1 "github.com/google/go-containerregistry/pkg/v1" ) -// whiteoutCharDev is defined as zero and is not const only for testing as it -// is not allowed to mknod a 0/0 char dev in userns. -var whiteoutCharDev uint64 // = 0 - -var whiteout *fs.DirEntry - -type filesystem struct { - layers []string -} - -type file struct { - *os.File - ofs filesystem - fi fs.FileInfo - name string -} - -// NewFS returns a fs.ReadDirFS consisting of merging the given layer paths. -func NewFS(layers []string) interface { - fs.FS - fs.ReadDirFS - fs.StatFS -} { - return &filesystem{layers[:]} -} - -// Open implements fs.StatFS. -func (ofs filesystem) Stat(name string) (fs.FileInfo, error) { - name = path.Join("/", name)[1:] - if name == "" { - name = "." - } - _, fi, err := ofs.stat(name) - return fi, err -} - -// Open implements fs.FS. -func (ofs filesystem) Open(name string) (fs.File, error) { - name = path.Join("/", name)[1:] - layerIndex, fi, err := ofs.stat(name) - if err != nil { - err.(*os.PathError).Op = "open" - return nil, err - } - f, err := os.Open(ofs.path(layerIndex, name)) - if err != nil { - return nil, &os.PathError{Op: "open", Path: name, Err: err} +type fakeContainer struct { + layerIDs []string + imgMeta *workloadmeta.ContainerImageMetadata + layerPaths []string +} + +func (c *fakeContainer) LayerByDiffID(hash string) (ftypes.LayerPath, error) { + for i, layer := range c.layerIDs { + diffID, _ := v1.NewHash(layer) + if diffID.String() == hash { + return ftypes.LayerPath{ + DiffID: diffID.String(), + Path: c.layerPaths[i], + Digest: c.imgMeta.Layers[i].Digest, + }, nil + } } - return &file{File: f, ofs: ofs, fi: fi, name: name}, nil + return ftypes.LayerPath{}, errors.New("not found") } -func (ofs filesystem) path(layerIndex int, name string) string { - if !fs.ValidPath(name) { - panic(fmt.Errorf("unexpected invalid path name %q", name)) +func (c *fakeContainer) LayerByDigest(hash string) (ftypes.LayerPath, error) { + for i, layer := range c.layerIDs { + diffID, _ := v1.NewHash(layer) + if hash == c.imgMeta.Layers[i].Digest { + return ftypes.LayerPath{ + DiffID: diffID.String(), + Path: c.layerPaths[i], + Digest: c.imgMeta.Layers[i].Digest, + }, nil + } } - return path.Join(ofs.layers[layerIndex], name) + return ftypes.LayerPath{}, errors.New("not found") } -func (ofs filesystem) stat(name string) (int, fs.FileInfo, error) { - var errf error - for layerIndex := range ofs.layers { - fi, err := os.Stat(ofs.path(layerIndex, name)) - if errors.Is(err, syscall.ENOENT) || errors.Is(err, syscall.ENOTDIR) { - // When path does not exist, overlayfs does not verify that a - // whiteout file has been created as one of the parent dir in the - // current layer. Meaning you can open file from lower dirs even - // if a whiteout or opaque directory has been created on an upper - // layer. - continue - } - if err != nil { - errf = err - break - } - if isWhiteout(fi) { - break - } - return layerIndex, fi, nil - } - if errf == nil { - errf = syscall.ENOENT +func (c *fakeContainer) Layers() (layers []ftypes.LayerPath) { + for i, layer := range c.layerIDs { + diffID, _ := v1.NewHash(layer) + layers = append(layers, ftypes.LayerPath{ + DiffID: diffID.String(), + Path: c.layerPaths[i], + Digest: c.imgMeta.Layers[i].Digest, + }) } - return 0, nil, &os.PathError{Op: "stat", Path: name, Err: errf} -} -// ReadDir implements fs.ReadDirFS. -func (ofs filesystem) ReadDir(name string) ([]fs.DirEntry, error) { - return ofs.readDirN(name, -1) + return layers } -func (ofs filesystem) readDirN(name string, n int) ([]fs.DirEntry, error) { - name = path.Join("/", name)[1:] - if name == "" { - name = "." - } - - var entriesMap map[string]*fs.DirEntry - var err error - var ok bool - for layerIndex := range ofs.layers { - if ok, err = ofs.readDirLayer(layerIndex, name, n, &entriesMap); ok { - break - } - } - if err == nil && entriesMap == nil { - err = syscall.ENOENT - } +func (c *Collector) scanOverlayFS(ctx context.Context, layers []string, ctr ftypes.Container, imgMeta *workloadmeta.ContainerImageMetadata, scanOptions sbom.ScanOptions) (sbom.Report, error) { + cache, err := c.getCache() if err != nil { - return []fs.DirEntry{}, &os.PathError{Op: "readdirent", Path: name, Err: err} + return nil, err } - entries := make([]fs.DirEntry, 0, len(entriesMap)) - for _, entry := range entriesMap { - if entry != whiteout { - entries = append(entries, *entry) - } + if cache == nil { + return nil, errors.New("failed to get cache for scan") } - sort.Slice(entries, func(i, j int) bool { - return entries[i].Name() < entries[j].Name() - }) - if n > 0 && len(entries) > n { - entries = entries[:n] - } - return entries, nil -} -func (ofs filesystem) readDirLayer(layerIndex int, name string, n int, entriesMap *map[string]*fs.DirEntry) (bool, error) { - fullname := ofs.path(layerIndex, name) - - di, err := os.Stat(fullname) - if errors.Is(err, syscall.ENOENT) || errors.Is(err, syscall.ENOTDIR) { - return false, nil - } + containerArtifact, err := local.NewArtifact(ctr, cache, NewFSWalker(), getDefaultArtifactOption(scanOptions)) if err != nil { - return true, err - } - if isWhiteout(di) { - return true, syscall.ENOENT - } - if !di.IsDir() { - return true, syscall.ENOTDIR + return nil, err } - d, err := os.Open(fullname) - if err != nil { - return true, err - } + log.Debugf("Generating SBOM for image %s using overlayfs %+v", imgMeta.ID, layers) - entries, err := d.ReadDir(n) + trivyReport, err := c.scan(ctx, containerArtifact, applier.NewApplier(cache), imgMeta, cache, false) if err != nil { - return true, err - } - if *entriesMap == nil { - *entriesMap = make(map[string]*fs.DirEntry) - } - for entryIndex, entry := range entries { - entryName := entry.Name() - if _, exists := (*entriesMap)[entryName]; !exists { - entryPtr := &entries[entryIndex] - if entry.Type().IsRegular() { - (*entriesMap)[entryName] = entryPtr - } else { - ei, err := entry.Info() - if err != nil { - return true, err - } - if isWhiteout(ei) { - (*entriesMap)[entryName] = whiteout - } else { - (*entriesMap)[entryName] = entryPtr - } - } + if imgMeta != nil { + return nil, fmt.Errorf("unable to marshal report to sbom format for image %s, err: %w", imgMeta.ID, err) } + return nil, fmt.Errorf("unable to marshal report to sbom format, err: %w", err) } - return isOpaqueDir(d), nil -} - -// ReadDir implements fs.ReadDirFile. -func (f *file) ReadDir(n int) ([]fs.DirEntry, error) { - if !f.fi.IsDir() { - return nil, &os.PathError{Op: "readdirent", Path: f.name, Err: syscall.ENOTDIR} + log.Debugf("Found OS: %+v", trivyReport.Metadata.OS) + pkgCount := 0 + for _, results := range trivyReport.Results { + pkgCount += len(results.Packages) } - return f.ofs.readDirN(f.name, n) -} - -// Read implements fs.File. -func (f *file) Read(b []byte) (int, error) { - return f.File.Read(b) -} + log.Debugf("Found %d packages", pkgCount) -// Stat implements fs.File. -func (f *file) Stat() (fs.FileInfo, error) { - return f.fi, nil -} - -// Close implements fs.File. -func (f *file) Close() error { - return f.File.Close() -} - -var _ fs.ReadDirFile = &file{} - -func isWhiteout(fm fs.FileInfo) bool { - return fm.Mode()&fs.ModeCharDevice != 0 && uint64(fm.Sys().(*syscall.Stat_t).Rdev) == whiteoutCharDev -} - -func isOpaqueDir(d *os.File) bool { - var data [1]byte - var sz int - var err error - for { - sz, err = unix.Fgetxattr(int(d.Fd()), "trusted.overlay.opaque", data[:]) - if err != unix.EINTR { - break - } - } - return sz == 1 && data[0] == 'y' + return &Report{ + Report: trivyReport, + id: imgMeta.ID, + marshaler: c.marshaler, + }, nil } diff --git a/pkg/util/trivy/trivy.go b/pkg/util/trivy/trivy.go index 6c1b2a411cc0a4..5c114b38a287fd 100644 --- a/pkg/util/trivy/trivy.go +++ b/pkg/util/trivy/trivy.go @@ -12,10 +12,8 @@ import ( "context" "errors" "fmt" - "io/fs" "runtime" "sort" - "strings" "sync" "golang.org/x/xerrors" @@ -25,13 +23,11 @@ import ( "github.com/DataDog/datadog-agent/pkg/sbom" "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/DataDog/datadog-agent/pkg/util/optional" - v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/aquasecurity/trivy-db/pkg/db" "github.com/aquasecurity/trivy/pkg/fanal/analyzer" "github.com/aquasecurity/trivy/pkg/fanal/applier" "github.com/aquasecurity/trivy/pkg/fanal/artifact" - local "github.com/aquasecurity/trivy/pkg/fanal/artifact/container" image2 "github.com/aquasecurity/trivy/pkg/fanal/artifact/image" local2 "github.com/aquasecurity/trivy/pkg/fanal/artifact/local" ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" @@ -227,107 +223,8 @@ func (c *Collector) getCache() (CacheWithCleaner, error) { return c.persistentCache, nil } -type fakeContainer struct { - imgMeta *workloadmeta.ContainerImageMetadata - layers []string -} - -func (c *fakeContainer) LayerByDiffID(hash string) (ftypes.LayerPath, error) { - imageLayers := c.layers - for i, layer := range imageLayers { - diffID, _ := v1.NewHash(layer) - if diffID.String() == hash { - return ftypes.LayerPath{ - DiffID: diffID.String(), - Path: c.layers[i], - Digest: c.imgMeta.Layers[i].Digest, - }, nil - } - } - return ftypes.LayerPath{}, errors.New("not found") -} - -func (c *fakeContainer) LayerByDigest(hash string) (ftypes.LayerPath, error) { - imageLayers := c.layers - for i, layer := range imageLayers { - diffID, _ := v1.NewHash(layer) - if hash == c.imgMeta.Layers[i].Digest { - return ftypes.LayerPath{ - DiffID: diffID.String(), - Path: c.layers[i], - Digest: c.imgMeta.Layers[i].Digest, - }, nil - } - } - return ftypes.LayerPath{}, errors.New("not found") -} - -func (c *fakeContainer) Layers() (layers []ftypes.LayerPath) { - imageLayers := c.layers - for i, layer := range imageLayers { - diffID, _ := v1.NewHash(layer) - layers = append(layers, ftypes.LayerPath{ - DiffID: diffID.String(), - Path: c.layers[i], - Digest: c.imgMeta.Layers[i].Digest, - }) - } - - return layers -} - -type dirFS struct { - fs.StatFS -} - -func (d *dirFS) Open(name string) (fs.File, error) { - return d.StatFS.Open(name) -} - -func (d *dirFS) Stat(name string) (fs.FileInfo, error) { - if !strings.HasPrefix(name, "/") { - name = "/" + name - } - return d.StatFS.Stat(name) -} - -func (c *Collector) scanOverlayFS(ctx context.Context, layers []string, ctr ftypes.Container, imgMeta *workloadmeta.ContainerImageMetadata, scanOptions sbom.ScanOptions) (sbom.Report, error) { - cache, err := c.getCache() - if err != nil { - return nil, err - } - - containerArtifact, err := local.NewArtifact(ctr, cache, NewFSWalker(), getDefaultArtifactOption(scanOptions)) - if err != nil { - return nil, err - } - - log.Debugf("Generating SBOM for image %s using overlayfs %+v", imgMeta.ID, layers) - - trivyReport, err := c.scan(ctx, containerArtifact, applier.NewApplier(cache), imgMeta, cache, false) - if err != nil { - if imgMeta != nil { - return nil, fmt.Errorf("unable to marshal report to sbom format for image %s, err: %w", imgMeta.ID, err) - } - return nil, fmt.Errorf("unable to marshal report to sbom format, err: %w", err) - } - - log.Debugf("Found OS: %+v", trivyReport.Metadata.OS) - pkgCount := 0 - for _, results := range trivyReport.Results { - pkgCount += len(results.Packages) - } - log.Debugf("Found %d packages", pkgCount) - - return &Report{ - Report: trivyReport, - id: imgMeta.ID, - marshaler: c.marshaler, - }, nil -} - // scanFilesystem scans the specified directory and logs detailed scan steps. -func (c *Collector) scanFilesystem(ctx context.Context, fsys fs.FS, path string, imgMeta *workloadmeta.ContainerImageMetadata, scanOptions sbom.ScanOptions) (sbom.Report, error) { +func (c *Collector) scanFilesystem(ctx context.Context, path string, imgMeta *workloadmeta.ContainerImageMetadata, scanOptions sbom.ScanOptions) (sbom.Report, error) { // For filesystem scans, it is required to walk the filesystem to get the persistentCache key so caching does not add any value. // TODO: Cache directly the trivy report for container images cache := newMemoryCache() @@ -360,15 +257,15 @@ func (c *Collector) scanFilesystem(ctx context.Context, fsys fs.FS, path string, } // ScanFilesystem scans file-system -func (c *Collector) ScanFilesystem(ctx context.Context, fsys fs.FS, path string, scanOptions sbom.ScanOptions) (sbom.Report, error) { - return c.scanFilesystem(ctx, fsys, path, nil, scanOptions) +func (c *Collector) ScanFilesystem(ctx context.Context, path string, scanOptions sbom.ScanOptions) (sbom.Report, error) { + return c.scanFilesystem(ctx, path, nil, scanOptions) } type driver struct { applier applier.Applier } -func (d *driver) Scan(ctx context.Context, target, artifactKey string, blobKeys []string, options types.ScanOptions) ( +func (d *driver) Scan(_ context.Context, target, artifactKey string, blobKeys []string, options types.ScanOptions) ( results types.Results, osFound ftypes.OS, err error) { detail, err := d.applier.ApplyLayers(artifactKey, blobKeys) @@ -405,7 +302,7 @@ func (d *driver) Scan(ctx context.Context, target, artifactKey string, blobKeys } result := types.Result{ - Target: fmt.Sprintf("%s (%s %s)", target, detail.OS, detail.OS.Name), + Target: fmt.Sprintf("%s (%s %s)", target, detail.OS.Family, detail.OS.Name), Class: types.ClassOSPkg, Type: scanTarget.OS.Family, } diff --git a/pkg/util/trivy/trivy_containerd.go b/pkg/util/trivy/trivy_containerd.go deleted file mode 100644 index dfc9d664224539..00000000000000 --- a/pkg/util/trivy/trivy_containerd.go +++ /dev/null @@ -1,166 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2016-present Datadog, Inc. - -//go:build trivy && containerd - -// Package trivy holds the scan components -package trivy - -import ( - "context" - "fmt" - "os" - "strings" - "time" - - ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" - "github.com/containerd/containerd" - "github.com/containerd/containerd/leases" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/errdefs" - - workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" - "github.com/DataDog/datadog-agent/pkg/sbom" - cutil "github.com/DataDog/datadog-agent/pkg/util/containerd" - "github.com/DataDog/datadog-agent/pkg/util/log" -) - -const ( - cleanupTimeout = 30 * time.Second -) - -type fakeContainerdContainer struct { - *fakeContainer - *image -} - -func (c *fakeContainerdContainer) LayerByDiffID(hash string) (ftypes.LayerPath, error) { - return c.fakeContainer.LayerByDiffID(hash) -} - -func (c *fakeContainerdContainer) LayerByDigest(hash string) (ftypes.LayerPath, error) { - return c.fakeContainer.LayerByDigest(hash) -} - -func (c *fakeContainerdContainer) Layers() (layers []ftypes.LayerPath) { - return c.fakeContainer.Layers() -} - -// ContainerdAccessor is a function that should return a containerd client -type ContainerdAccessor func() (cutil.ContainerdItf, error) - -// ScanContainerdImageFromSnapshotter scans containerd image directly from the snapshotter -func (c *Collector) ScanContainerdImageFromSnapshotter(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, img containerd.Image, client cutil.ContainerdItf, scanOptions sbom.ScanOptions) (sbom.Report, error) { - fanalImage, cleanup, err := convertContainerdImage(ctx, client.RawClient(), imgMeta, img) - if cleanup != nil { - defer cleanup() - } - - // Computing duration of containerd lease - deadline, _ := ctx.Deadline() - expiration := deadline.Sub(time.Now().Add(cleanupTimeout)) - clClient := client.RawClient() - imageID := imgMeta.ID - - mounts, err := client.Mounts(ctx, expiration, imgMeta.Namespace, img) - if err != nil { - return nil, fmt.Errorf("unable to get mounts for image %s, err: %w", imgMeta.ID, err) - } - - layers := extractLayersFromOverlayFSMounts(mounts) - if len(layers) == 0 { - return nil, fmt.Errorf("unable to extract layers from overlayfs mounts %+v for image %s", mounts, imgMeta.ID) - } - - ctx = namespaces.WithNamespace(ctx, imgMeta.Namespace) - // Adding a lease to cleanup dandling snaphots at expiration - ctx, done, err := clClient.WithLease(ctx, - leases.WithID(imageID), - leases.WithExpiration(expiration), - leases.WithLabels(map[string]string{ - "containerd.io/gc.ref.snapshot." + containerd.DefaultSnapshotter: imageID, - }), - ) - if err != nil && !errdefs.IsAlreadyExists(err) { - return nil, fmt.Errorf("unable to get a lease, err: %w", err) - } - - report, err := c.scanOverlayFS(ctx, layers, &fakeContainerdContainer{ - image: fanalImage, - fakeContainer: &fakeContainer{ - layers: layers, - imgMeta: imgMeta, - }, - }, imgMeta, scanOptions) - - if err := done(ctx); err != nil { - log.Warnf("Unable to cancel containerd lease with id: %s, err: %v", imageID, err) - } - - return report, err -} - -// ScanContainerdImage scans containerd image by exporting it and scanning the tarball -func (c *Collector) ScanContainerdImage(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, img containerd.Image, client cutil.ContainerdItf, scanOptions sbom.ScanOptions) (sbom.Report, error) { - fanalImage, cleanup, err := convertContainerdImage(ctx, client.RawClient(), imgMeta, img) - if cleanup != nil { - defer cleanup() - } - if err != nil { - return nil, fmt.Errorf("unable to convert containerd image, err: %w", err) - } - - return c.scanImage(ctx, fanalImage, imgMeta, scanOptions) -} - -// ScanContainerdImageFromFilesystem scans containerd image from file-system -func (c *Collector) ScanContainerdImageFromFilesystem(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, img containerd.Image, client cutil.ContainerdItf, scanOptions sbom.ScanOptions) (sbom.Report, error) { - imagePath, err := os.MkdirTemp("", "containerd-image-*") - if err != nil { - return nil, fmt.Errorf("unable to create temp dir, err: %w", err) - } - defer func() { - err := os.RemoveAll(imagePath) - if err != nil { - log.Errorf("Unable to remove temp dir: %s, err: %v", imagePath, err) - } - }() - - // Computing duration of containerd lease - deadline, _ := ctx.Deadline() - expiration := deadline.Sub(time.Now().Add(cleanupTimeout)) - - cleanUp, err := client.MountImage(ctx, expiration, imgMeta.Namespace, img, imagePath) - if err != nil { - return nil, fmt.Errorf("unable to mount containerd image, err: %w", err) - } - - defer func() { - cleanUpContext, cleanUpContextCancel := context.WithTimeout(context.Background(), cleanupTimeout) - err := cleanUp(cleanUpContext) - cleanUpContextCancel() - if err != nil { - log.Errorf("Unable to clean up mounted image, err: %v", err) - } - }() - - return c.scanFilesystem(ctx, os.DirFS("/"), imagePath, imgMeta, scanOptions) -} - -func extractLayersFromOverlayFSMounts(mounts []mount.Mount) []string { - var layers []string - for _, mount := range mounts { - for _, opt := range mount.Options { - for _, prefix := range []string{"upperdir=", "lowerdir="} { - trimmedOpt := strings.TrimPrefix(opt, prefix) - if trimmedOpt != opt { - layers = append(layers, strings.Split(trimmedOpt, ":")...) - } - } - } - } - return layers -}