Skip to content

Commit

Permalink
[CONTINT-4545] Fix <missing> docker image layer digests from the dock…
Browse files Browse the repository at this point in the history
…er collector (#33384)
  • Loading branch information
justin-lesko authored Jan 31, 2025
1 parent 04e2e0a commit 8b33915
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 5 deletions.
37 changes: 32 additions & 5 deletions comp/core/workloadmeta/collectors/internal/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -646,26 +646,53 @@ func (c *collector) getImageMetadata(ctx context.Context, imageID string, newSBO
OSVersion: imgInspect.OsVersion,
Architecture: imgInspect.Architecture,
Variant: imgInspect.Variant,
Layers: layersFromDockerHistory(imageHistory),
Layers: layersFromDockerHistoryAndInspect(imageHistory, imgInspect),
SBOM: sbom,
}, nil
}

func layersFromDockerHistory(history []image.HistoryResponseItem) []workloadmeta.ContainerImageLayer {
// it has been observed that docker can return layers that are missing all metadata when inherited from a base container
func isInheritedLayer(layer image.HistoryResponseItem) bool {
return layer.CreatedBy == "" && layer.Size == 0
}

func layersFromDockerHistoryAndInspect(history []image.HistoryResponseItem, inspect types.ImageInspect) []workloadmeta.ContainerImageLayer {
var layers []workloadmeta.ContainerImageLayer

// Docker returns the layers in reverse-chronological order
// Sanity check our current assumption that there cannot be more RootFS layer IDs than history layers
if len(inspect.RootFS.Layers) > len(history) {
log.Warn("The number of RootFS layers exceeded the number of history layers")
return layers
}

// inspectIdx tracks the current RootFS layer ID index (in Docker, this corresponds to the Diff ID of a layer)
// NOTE: Docker returns the RootFS layers in chronological order
inspectIdx := 0

// Docker returns the history layers in reverse-chronological order
for i := len(history) - 1; i >= 0; i-- {
created := time.Unix(history[i].Created, 0)

isEmptyLayer := history[i].Size == 0
isInheritedLayer := isInheritedLayer(history[i])

digest := ""
if isInheritedLayer || !isEmptyLayer {
if isInheritedLayer {
log.Debugf("detected an inherited layer for image ID: \"%s\", assigning it digest: \"%s\"", inspect.ID, inspect.RootFS.Layers[inspectIdx])
}
digest = inspect.RootFS.Layers[inspectIdx]
inspectIdx++
}

layer := workloadmeta.ContainerImageLayer{
Digest: history[i].ID,
Digest: digest,
SizeBytes: history[i].Size,
History: &v1.History{
Created: &created,
CreatedBy: history[i].CreatedBy,
Comment: history[i].Comment,
EmptyLayer: history[i].Size == 0,
EmptyLayer: isEmptyLayer,
},
}

Expand Down
189 changes: 189 additions & 0 deletions comp/core/workloadmeta/collectors/internal/docker/docker_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

//go:build docker

package docker

import (
"testing"
"time"

"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/image"
v1 "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/stretchr/testify/assert"

workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def"
)

func Test_LayersFromDockerHistoryAndInspect(t *testing.T) {
var emptySize int64
var noDiffCmd = "ENV var=dummy"

var nonEmptySize int64 = 1
var cmd = "COPY dummy.sh ."

var baseTimeUnix int64
var baseTime = time.Unix(baseTimeUnix, 0)

var layerID = "dummy id"

tests := []struct {
name string
history []image.HistoryResponseItem
inspect types.ImageInspect
expected []workloadmeta.ContainerImageLayer
}{
{
name: "Layer with CreatedBy and positive Size is assigned a digest",
history: []image.HistoryResponseItem{
{
Size: nonEmptySize,
CreatedBy: cmd,
Created: baseTimeUnix,
},
},
inspect: types.ImageInspect{
RootFS: types.RootFS{
Layers: []string{layerID},
},
},
expected: []workloadmeta.ContainerImageLayer{
{
Digest: layerID,
SizeBytes: nonEmptySize,
History: &v1.History{
Created: &baseTime,
CreatedBy: cmd,
EmptyLayer: false,
},
},
},
},
{
name: "Inherited layer with no CreatedBy and no Size is detected and is assigned a digest",
history: []image.HistoryResponseItem{
{
Size: emptySize,
Created: baseTimeUnix,
},
},
inspect: types.ImageInspect{
RootFS: types.RootFS{
Layers: []string{layerID},
},
},
expected: []workloadmeta.ContainerImageLayer{
{
Digest: layerID,
SizeBytes: emptySize,
History: &v1.History{
Created: &baseTime,
EmptyLayer: true,
},
},
},
},
{
name: "Layer with CreatedBy and empty Size is NOT assigned a digest",
history: []image.HistoryResponseItem{
{
Size: emptySize,
CreatedBy: noDiffCmd,
Created: baseTimeUnix,
},
},
inspect: types.ImageInspect{
RootFS: types.RootFS{
Layers: []string{layerID},
},
},
expected: []workloadmeta.ContainerImageLayer{
{
SizeBytes: emptySize,
History: &v1.History{
CreatedBy: noDiffCmd,
Created: &baseTime,
EmptyLayer: true,
},
},
},
},
{
name: "Mix of layers with and without digests are merged in the proper order",
history: []image.HistoryResponseItem{
{ // "2" in the expected field
Size: nonEmptySize,
Created: baseTimeUnix,
CreatedBy: cmd,
},
{
Size: emptySize,
Created: baseTimeUnix,
CreatedBy: noDiffCmd,
},
{ // "1" in the expected field
Size: emptySize,
Created: baseTimeUnix,
},
},
inspect: types.ImageInspect{
RootFS: types.RootFS{
Layers: []string{"1", "2"},
},
},
expected: []workloadmeta.ContainerImageLayer{
{
Digest: "1",
SizeBytes: emptySize,
History: &v1.History{
Created: &baseTime,
EmptyLayer: true,
},
},
{
SizeBytes: emptySize,
History: &v1.History{
Created: &baseTime,
CreatedBy: noDiffCmd,
EmptyLayer: true,
},
},
{
Digest: "2",
SizeBytes: nonEmptySize,
History: &v1.History{
Created: &baseTime,
CreatedBy: cmd,
EmptyLayer: false,
},
},
},
},
{
name: "Number of inspect layers exceeds history layers breaks our assumption and results in no layers returned",
history: []image.HistoryResponseItem{
{
Size: nonEmptySize,
CreatedBy: cmd,
Created: baseTimeUnix,
},
},
inspect: types.ImageInspect{
RootFS: types.RootFS{
Layers: []string{layerID, layerID},
},
},
expected: []workloadmeta.ContainerImageLayer{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
layers := layersFromDockerHistoryAndInspect(tt.history, tt.inspect)
assert.ElementsMatchf(t, tt.expected, layers, "Expected layers and actual layers returned do not match")
})
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Each section from every release note are combined when the
# CHANGELOG.rst is rendered. So the text needs to be worded so that
# it does not depend on any information only available in another
# section. This may mean repeating some details, but each section
# must be readable independently of the other.
#
# Each section note must be formatted as reStructuredText.
---
enhancements:
- |
Image layer digests will no longer report as "<missing>" from Docker runtimes.

0 comments on commit 8b33915

Please sign in to comment.