From 8b33915cac7ef452687f33809838e10a72f26594 Mon Sep 17 00:00:00 2001 From: Justin Lesko Date: Fri, 31 Jan 2025 09:00:39 -0500 Subject: [PATCH] [CONTINT-4545] Fix docker image layer digests from the docker collector (#33384) --- .../collectors/internal/docker/docker.go | 37 +++- .../collectors/internal/docker/docker_test.go | 189 ++++++++++++++++++ ...-image-layer-digests-ab42e0cd0d2bd16d.yaml | 11 + 3 files changed, 232 insertions(+), 5 deletions(-) create mode 100644 comp/core/workloadmeta/collectors/internal/docker/docker_test.go create mode 100644 releasenotes/notes/fix-missing-docker-runtime-image-layer-digests-ab42e0cd0d2bd16d.yaml diff --git a/comp/core/workloadmeta/collectors/internal/docker/docker.go b/comp/core/workloadmeta/collectors/internal/docker/docker.go index 2dabd74dee5acf..c952e549638bb4 100644 --- a/comp/core/workloadmeta/collectors/internal/docker/docker.go +++ b/comp/core/workloadmeta/collectors/internal/docker/docker.go @@ -646,26 +646,53 @@ func (c *collector) getImageMetadata(ctx context.Context, imageID string, newSBO OSVersion: imgInspect.OsVersion, Architecture: imgInspect.Architecture, Variant: imgInspect.Variant, - Layers: layersFromDockerHistory(imageHistory), + Layers: layersFromDockerHistoryAndInspect(imageHistory, imgInspect), SBOM: sbom, }, nil } -func layersFromDockerHistory(history []image.HistoryResponseItem) []workloadmeta.ContainerImageLayer { +// it has been observed that docker can return layers that are missing all metadata when inherited from a base container +func isInheritedLayer(layer image.HistoryResponseItem) bool { + return layer.CreatedBy == "" && layer.Size == 0 +} + +func layersFromDockerHistoryAndInspect(history []image.HistoryResponseItem, inspect types.ImageInspect) []workloadmeta.ContainerImageLayer { var layers []workloadmeta.ContainerImageLayer - // Docker returns the layers in reverse-chronological order + // Sanity check our current assumption that there cannot be more RootFS layer IDs than history layers + if len(inspect.RootFS.Layers) > len(history) { + log.Warn("The number of RootFS layers exceeded the number of history layers") + return layers + } + + // inspectIdx tracks the current RootFS layer ID index (in Docker, this corresponds to the Diff ID of a layer) + // NOTE: Docker returns the RootFS layers in chronological order + inspectIdx := 0 + + // Docker returns the history layers in reverse-chronological order for i := len(history) - 1; i >= 0; i-- { created := time.Unix(history[i].Created, 0) + isEmptyLayer := history[i].Size == 0 + isInheritedLayer := isInheritedLayer(history[i]) + + digest := "" + if isInheritedLayer || !isEmptyLayer { + if isInheritedLayer { + log.Debugf("detected an inherited layer for image ID: \"%s\", assigning it digest: \"%s\"", inspect.ID, inspect.RootFS.Layers[inspectIdx]) + } + digest = inspect.RootFS.Layers[inspectIdx] + inspectIdx++ + } + layer := workloadmeta.ContainerImageLayer{ - Digest: history[i].ID, + Digest: digest, SizeBytes: history[i].Size, History: &v1.History{ Created: &created, CreatedBy: history[i].CreatedBy, Comment: history[i].Comment, - EmptyLayer: history[i].Size == 0, + EmptyLayer: isEmptyLayer, }, } diff --git a/comp/core/workloadmeta/collectors/internal/docker/docker_test.go b/comp/core/workloadmeta/collectors/internal/docker/docker_test.go new file mode 100644 index 00000000000000..b234ce5184380e --- /dev/null +++ b/comp/core/workloadmeta/collectors/internal/docker/docker_test.go @@ -0,0 +1,189 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build docker + +package docker + +import ( + "testing" + "time" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/image" + v1 "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" +) + +func Test_LayersFromDockerHistoryAndInspect(t *testing.T) { + var emptySize int64 + var noDiffCmd = "ENV var=dummy" + + var nonEmptySize int64 = 1 + var cmd = "COPY dummy.sh ." + + var baseTimeUnix int64 + var baseTime = time.Unix(baseTimeUnix, 0) + + var layerID = "dummy id" + + tests := []struct { + name string + history []image.HistoryResponseItem + inspect types.ImageInspect + expected []workloadmeta.ContainerImageLayer + }{ + { + name: "Layer with CreatedBy and positive Size is assigned a digest", + history: []image.HistoryResponseItem{ + { + Size: nonEmptySize, + CreatedBy: cmd, + Created: baseTimeUnix, + }, + }, + inspect: types.ImageInspect{ + RootFS: types.RootFS{ + Layers: []string{layerID}, + }, + }, + expected: []workloadmeta.ContainerImageLayer{ + { + Digest: layerID, + SizeBytes: nonEmptySize, + History: &v1.History{ + Created: &baseTime, + CreatedBy: cmd, + EmptyLayer: false, + }, + }, + }, + }, + { + name: "Inherited layer with no CreatedBy and no Size is detected and is assigned a digest", + history: []image.HistoryResponseItem{ + { + Size: emptySize, + Created: baseTimeUnix, + }, + }, + inspect: types.ImageInspect{ + RootFS: types.RootFS{ + Layers: []string{layerID}, + }, + }, + expected: []workloadmeta.ContainerImageLayer{ + { + Digest: layerID, + SizeBytes: emptySize, + History: &v1.History{ + Created: &baseTime, + EmptyLayer: true, + }, + }, + }, + }, + { + name: "Layer with CreatedBy and empty Size is NOT assigned a digest", + history: []image.HistoryResponseItem{ + { + Size: emptySize, + CreatedBy: noDiffCmd, + Created: baseTimeUnix, + }, + }, + inspect: types.ImageInspect{ + RootFS: types.RootFS{ + Layers: []string{layerID}, + }, + }, + expected: []workloadmeta.ContainerImageLayer{ + { + SizeBytes: emptySize, + History: &v1.History{ + CreatedBy: noDiffCmd, + Created: &baseTime, + EmptyLayer: true, + }, + }, + }, + }, + { + name: "Mix of layers with and without digests are merged in the proper order", + history: []image.HistoryResponseItem{ + { // "2" in the expected field + Size: nonEmptySize, + Created: baseTimeUnix, + CreatedBy: cmd, + }, + { + Size: emptySize, + Created: baseTimeUnix, + CreatedBy: noDiffCmd, + }, + { // "1" in the expected field + Size: emptySize, + Created: baseTimeUnix, + }, + }, + inspect: types.ImageInspect{ + RootFS: types.RootFS{ + Layers: []string{"1", "2"}, + }, + }, + expected: []workloadmeta.ContainerImageLayer{ + { + Digest: "1", + SizeBytes: emptySize, + History: &v1.History{ + Created: &baseTime, + EmptyLayer: true, + }, + }, + { + SizeBytes: emptySize, + History: &v1.History{ + Created: &baseTime, + CreatedBy: noDiffCmd, + EmptyLayer: true, + }, + }, + { + Digest: "2", + SizeBytes: nonEmptySize, + History: &v1.History{ + Created: &baseTime, + CreatedBy: cmd, + EmptyLayer: false, + }, + }, + }, + }, + { + name: "Number of inspect layers exceeds history layers breaks our assumption and results in no layers returned", + history: []image.HistoryResponseItem{ + { + Size: nonEmptySize, + CreatedBy: cmd, + Created: baseTimeUnix, + }, + }, + inspect: types.ImageInspect{ + RootFS: types.RootFS{ + Layers: []string{layerID, layerID}, + }, + }, + expected: []workloadmeta.ContainerImageLayer{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + layers := layersFromDockerHistoryAndInspect(tt.history, tt.inspect) + assert.ElementsMatchf(t, tt.expected, layers, "Expected layers and actual layers returned do not match") + }) + } +} diff --git a/releasenotes/notes/fix-missing-docker-runtime-image-layer-digests-ab42e0cd0d2bd16d.yaml b/releasenotes/notes/fix-missing-docker-runtime-image-layer-digests-ab42e0cd0d2bd16d.yaml new file mode 100644 index 00000000000000..9206161625778d --- /dev/null +++ b/releasenotes/notes/fix-missing-docker-runtime-image-layer-digests-ab42e0cd0d2bd16d.yaml @@ -0,0 +1,11 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + Image layer digests will no longer report as "" from Docker runtimes.