Skip to content

Commit 3bdeab0

Browse files
committed
deduplicate layer IDs to handle cases where duplicate layers are in the layer tree
Signed-off-by: Jose R. Gonzalez <[email protected]>
1 parent c8f6f72 commit 3bdeab0

File tree

3 files changed

+109
-34
lines changed

3 files changed

+109
-34
lines changed

internal/policy/container/has_modified_files.go

+60-34
Original file line numberDiff line numberDiff line change
@@ -60,36 +60,69 @@ type packageFilesRef struct {
6060
// Validate runs the check of whether any Red Hat files were modified
6161
func (p *HasModifiedFilesCheck) Validate(ctx context.Context, imgRef image.ImageReference) (bool, error) {
6262
fs := afero.NewOsFs()
63-
layerIDs, packageFiles, packageDist, err := p.gatherDataToValidate(ctx, imgRef, fs)
63+
layerIDs, packageFiles, err := p.gatherDataToValidate(ctx, imgRef, fs)
64+
if err != nil {
65+
return false, fmt.Errorf("could not generate modified files list: %v", err)
66+
}
67+
68+
packageDist, err := p.parsePackageDist(ctx, imgRef.ImageFSPath, fs)
6469
if err != nil {
6570
return false, fmt.Errorf("could not generate modified files list: %v", err)
6671
}
6772

6873
return p.validate(ctx, layerIDs, packageFiles, packageDist)
6974
}
7075

76+
// parsePackageDist returns the platform's distribution value from the
77+
// os-release contents in the extracted image.
78+
func (p *HasModifiedFilesCheck) parsePackageDist(_ context.Context, extractedImageFSPath string, fs afero.Fs) (string, error) {
79+
osRelease, err := fs.Open(filepath.Join(extractedImageFSPath, "etc", "os-release"))
80+
if err != nil {
81+
return "", fmt.Errorf("could not open os-release: %v", err)
82+
}
83+
defer osRelease.Close()
84+
scanner := bufio.NewScanner(osRelease)
85+
packageDist := "unknown"
86+
for scanner.Scan() {
87+
line := scanner.Text()
88+
r, _ := regexp.Compile(`PLATFORM_ID="platform:([[:alnum:]]+)"`)
89+
m := r.FindStringSubmatch(line)
90+
if m == nil {
91+
continue
92+
}
93+
packageDist = m[1]
94+
break
95+
}
96+
97+
if err := scanner.Err(); err != nil {
98+
return "", fmt.Errorf("error while scanning for package dist: %v", err)
99+
}
100+
101+
return packageDist, nil
102+
}
103+
71104
// gatherDataToValidate returns a map from layer digest to a struct containing the list of files
72105
// (packageFilesRef.LayerPackageFiles) installed via packages (packageFilesRef.LayerPackages)
73106
// from the container image, and the list of files (packageFilesRef.LayerFiles) modified/added
74107
// via layers in the image.
75-
func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef image.ImageReference, fs afero.Fs) ([]string, map[string]packageFilesRef, string, error) {
108+
func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef image.ImageReference, fs afero.Fs) ([]string, map[string]packageFilesRef, error) {
76109
logger := logr.FromContextOrDiscard(ctx)
77110

78111
layerDir, err := afero.TempDir(fs, "", "rpm-layers-")
79112
if err != nil {
80-
return nil, nil, "", err
113+
return nil, nil, err
81114
}
82115
defer func() {
83116
_ = fs.RemoveAll(layerDir)
84117
}()
85118

86119
if imgRef.ImageInfo == nil {
87-
return nil, nil, "", fmt.Errorf("image reference invalid")
120+
return nil, nil, fmt.Errorf("image reference invalid")
88121
}
89122

90123
layers, err := imgRef.ImageInfo.Layers()
91124
if err != nil {
92-
return nil, nil, "", err
125+
return nil, nil, err
93126
}
94127

95128
layerIDs := make([]string, 0, len(layers))
@@ -102,21 +135,36 @@ func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef
102135
for idx, layer := range layers {
103136
layerIDHash, err := layer.Digest()
104137
if err != nil {
105-
return nil, nil, "", fmt.Errorf("unable to retrieve diff id for layer: %w", err)
138+
return nil, nil, fmt.Errorf("unable to retrieve diff id for layer: %w", err)
139+
}
140+
141+
// Capture the diff ID to aid in debugging. We don't technically care if
142+
// there's an error returned here because we don't use the layerDiffID
143+
// value for anything meaningful.
144+
layerDiffID := "unknown"
145+
layerDiffHash, err := layer.DiffID()
146+
if err == nil && layerDiffHash.String() != "" {
147+
layerDiffID = layerDiffHash.String()
106148
}
107-
layerID := layerIDHash.String()
108149

109-
layerDir := filepath.Join(layerDir, fmt.Sprintf("%02d-%s", idx, layerID))
150+
rawLayerID := layerIDHash.String()
151+
// Map everything using a combination of the layer index and the layer
152+
// ID to avoid problems when images have multiple scattered layers with
153+
// the same ID.
154+
layerID := fmt.Sprintf("%02d-%s", idx, rawLayerID)
155+
logger.V(log.TRC).Info("generating unique layer ID", "uniqueLayerID", layerID, "layerID", rawLayerID, "layerDiffID", layerDiffID)
156+
157+
layerDir := filepath.Join(layerDir, layerID)
110158
err = fs.Mkdir(layerDir, 0o755)
111159
if err != nil {
112-
return nil, nil, "", fmt.Errorf("could not create layer directory: %w", err)
160+
return nil, nil, fmt.Errorf("could not create layer directory: %w", err)
113161
}
114162

115163
layerIDs = append(layerIDs, layerID)
116164

117165
files, err := generateChangesFor(ctx, layer)
118166
if err != nil {
119-
return nil, nil, "", err
167+
return nil, nil, err
120168
}
121169

122170
found, pkgList := findRPMDB(ctx, layer)
@@ -142,7 +190,7 @@ func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef
142190

143191
packageFiles, err := installedFileMapWithExclusions(ctx, pkgList)
144192
if err != nil {
145-
return nil, nil, "", err
193+
return nil, nil, err
146194
}
147195

148196
layerRefs[layerID] = packageFilesRef{
@@ -153,29 +201,7 @@ func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef
153201
}
154202
}
155203

156-
osRelease, err := fs.Open(filepath.Join(imgRef.ImageFSPath, "etc", "os-release"))
157-
if err != nil {
158-
return nil, nil, "", fmt.Errorf("could not open os-release: %v", err)
159-
}
160-
defer osRelease.Close()
161-
scanner := bufio.NewScanner(osRelease)
162-
packageDist := "unknown"
163-
for scanner.Scan() {
164-
line := scanner.Text()
165-
r, _ := regexp.Compile(`PLATFORM_ID="platform:([[:alnum:]]+)"`)
166-
m := r.FindStringSubmatch(line)
167-
if m == nil {
168-
continue
169-
}
170-
packageDist = m[1]
171-
break
172-
}
173-
174-
if err := scanner.Err(); err != nil {
175-
return nil, nil, "", fmt.Errorf("error while scanning for package dist: %v", err)
176-
}
177-
178-
return layerIDs, layerRefs, packageDist, nil
204+
return layerIDs, layerRefs, nil
179205
}
180206

181207
// validate compares the list of LayerFiles and PackageFiles to see what PackageFiles

internal/policy/container/has_modified_files_test.go

+34
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ import (
55
"context"
66
"path"
77

8+
"github.com/google/go-containerregistry/pkg/crane"
9+
"github.com/spf13/afero"
10+
811
"github.com/redhat-openshift-ecosystem/openshift-preflight/internal/image"
912

1013
"github.com/bombsimon/logrusr/v4"
@@ -402,6 +405,37 @@ var _ = Describe("HasModifiedFiles", func() {
402405
})
403406
})
404407

408+
When("multiple no-op layers with the same IDs split layers containing RPMDB modifications", func() {
409+
// Test case ensures that we properly deduplicate layer hashes in our file mapping to avoid cases where
410+
// a later layer with the same ID as an earlier layer doesn't overwrite the earlier layer's file mapping.
411+
var img image.ImageReference
412+
var actualLayerCount int
413+
BeforeEach(func() {
414+
// TODO: The containerfile that generates this test fixture is stored in-repo tests/containerfiles.
415+
// The external call here avoids having to store the image locally. A crane-built image runs into
416+
// issues because we cannot run `microdnf` commands using Crane, and need to have multiple layers
417+
// containing RPMDBs to test this issue correctly.
418+
const dupeLayerTestFixture = "quay.io/opdev/preflight-test-fixture:duplicate-layers"
419+
cImg, pullError := crane.Pull(dupeLayerTestFixture)
420+
Expect(pullError).ToNot(HaveOccurred())
421+
img = image.ImageReference{
422+
ImageInfo: cImg,
423+
}
424+
425+
layers, err := img.ImageInfo.Layers()
426+
Expect(err).ToNot(HaveOccurred())
427+
actualLayerCount = len(layers)
428+
})
429+
430+
It("should validate and have matching layer counts", func() {
431+
fs := afero.NewOsFs()
432+
layerIDs, layerRefs, err := hasModifiedFiles.gatherDataToValidate(context.TODO(), img, fs)
433+
Expect(err).ToNot(HaveOccurred())
434+
Expect(len(layerIDs)).To(Equal(actualLayerCount))
435+
Expect(len(layerRefs)).To(Equal(actualLayerCount))
436+
})
437+
})
438+
405439
When("calling the top level Validate", func() {
406440
It("should fail with an invalid ImageReference", func() {
407441
passed, err := hasModifiedFiles.Validate(context.TODO(), image.ImageReference{})
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM registry.access.redhat.com/ubi8-minimal@sha256:9e458f41ff8868ceae00608a6fff35b45fd8bbe967bf8655e5ab08da5964f4d0
2+
3+
# This container file backs
4+
# quay.io/opdev/preflight-test-fixture:duplicate-layers, and is intended to test
5+
# an edge case with HasModifiedFiles where multiple duplicate layers were geting
6+
# squashed into a single entry in our layer-to-file map, causing invalid
7+
# modification flags.
8+
#
9+
# The produced artifact is about 100mb, so this fixture exists just to avoid
10+
# having that blob stored in-repo.
11+
12+
COPY example-license.txt /LICENSE
13+
RUN microdnf install gzip -y
14+
COPY example-license.txt /LICENSE
15+
RUN microdnf install gzip -y

0 commit comments

Comments
 (0)