Skip to content

Commit 04908d5

Browse files
committed
Save full list of entries for every source
When we normalize sources by deduplicating on `sourceId`, we end up losing a little bit of data that's important for being able to reconnect `source` objects with their lines in `library/foo`. This resolves that by adjusting our `entry` object to be a vector of `entries` instead, but sorted using the same `SOURCE_DATE_EPOCH` tiebreaker we used previously so that `.entries[0]` is the same as our old `.entry`, but we keep the full list of values for later use/lookup/cross-referencing. I have additionally verified that `meta.jq` here was the only place we are actively consuming from the `.entry` object (currently), so this should be fully sufficient (no changes necessary elsewhere).
1 parent da908ae commit 04908d5

File tree

2 files changed

+28
-21
lines changed

2 files changed

+28
-21
lines changed

meta.jq

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def needs_build:
1010
# output: string ("Builder", but normalized)
1111
def normalized_builder:
1212
.build.arch as $arch
13-
| .source.entry.Builder
13+
| .source.entries[0].Builder
1414
| if . == "" then
1515
if $arch | startswith("windows-") then
1616
# https://github.com/microsoft/Windows-Containers/issues/34
@@ -57,7 +57,7 @@ def pull_command:
5757
# input: "build" object (with "buildId" top level key)
5858
# output: string "giturl" ("https://github.com/docker-library/golang.git#commit:directory), used for "docker buildx build giturl"
5959
def git_build_url:
60-
.source.entry
60+
.source.entries[0]
6161
| (
6262
.GitRepo
6363
| if (endswith(".git") | not) then
@@ -78,8 +78,8 @@ def build_annotations($buildUrl):
7878
{
7979
# https://github.com/opencontainers/image-spec/blob/v1.1.0/annotations.md#pre-defined-annotation-keys
8080
"org.opencontainers.image.source": $buildUrl,
81-
"org.opencontainers.image.revision": .source.entry.GitCommit,
82-
"org.opencontainers.image.created": (.source.entry.SOURCE_DATE_EPOCH | strftime("%FT%TZ")), # see notes below about image index vs image manifest
81+
"org.opencontainers.image.revision": .source.entries[0].GitCommit,
82+
"org.opencontainers.image.created": (.source.entries[0].SOURCE_DATE_EPOCH | strftime("%FT%TZ")), # see notes below about image index vs image manifest
8383

8484
# TODO come up with less assuming values here? (Docker Hub assumption, tag ordering assumption)
8585
"org.opencontainers.image.version": ( # value of the first image tag
@@ -138,7 +138,7 @@ def build_command:
138138
| [
139139
(
140140
[
141-
@sh "SOURCE_DATE_EPOCH=\(.source.entry.SOURCE_DATE_EPOCH)",
141+
@sh "SOURCE_DATE_EPOCH=\(.source.entries[0].SOURCE_DATE_EPOCH)",
142142
# TODO EXPERIMENTAL_BUILDKIT_SOURCE_POLICY=<(jq ...)
143143
"docker buildx build --progress=plain",
144144
@sh "--provenance=mode=max,builder-id=\(buildkit_provenance_builder_id)",
@@ -197,7 +197,7 @@ def build_command:
197197
),
198198
"--build-arg BUILDKIT_SYNTAX=\"$BASHBREW_BUILDKIT_SYNTAX\"", # TODO .doi/.bin/bashbrew-buildkit-env-setup.sh
199199
"--build-arg BUILDKIT_DOCKERFILE_CHECK=skip=all", # disable linting (https://github.com/moby/buildkit/pull/4962)
200-
@sh "--file \(.source.entry.File)",
200+
@sh "--file \(.source.entries[0].File)",
201201
($buildUrl | @sh),
202202
empty
203203
] | join(" \\\n\t")
@@ -228,7 +228,7 @@ def build_command:
228228
| [
229229
(
230230
[
231-
@sh "SOURCE_DATE_EPOCH=\(.source.entry.SOURCE_DATE_EPOCH)",
231+
@sh "SOURCE_DATE_EPOCH=\(.source.entries[0].SOURCE_DATE_EPOCH)",
232232
"DOCKER_BUILDKIT=0",
233233
"docker build",
234234
(
@@ -240,7 +240,7 @@ def build_command:
240240
| "--tag " + @sh
241241
),
242242
@sh "--platform \(.source.arches[.build.arch].platformString)",
243-
@sh "--file \(.source.entry.File)",
243+
@sh "--file \(.source.entries[0].File)",
244244
($buildUrl | @sh),
245245
empty
246246
]
@@ -259,14 +259,14 @@ def build_command:
259259
"_git() { git -C \"$gitCache\" \"$@\"; }",
260260
"_git config gc.auto 0",
261261
# "bashbrew fetch" but in Bash (because we have bashbrew, but not the library file -- we could synthesize a library file instead, but six of one half a dozen of another)
262-
@sh "_commit() { _git rev-parse \(.source.entry.GitCommit + "^{commit}"); }",
263-
@sh "if ! _commit &> /dev/null; then _git fetch \(.source.entry.GitRepo) \(.source.entry.GitCommit + ":") || _git fetch \(.source.entry.GitFetch + ":"); fi",
262+
@sh "_commit() { _git rev-parse \(.source.entries[0].GitCommit + "^{commit}"); }",
263+
@sh "if ! _commit &> /dev/null; then _git fetch \(.source.entries[0].GitRepo) \(.source.entries[0].GitCommit + ":") || _git fetch \(.source.entries[0].GitFetch + ":"); fi",
264264
"_commit",
265265

266266
# TODO figure out a good, safe place to store our temporary build/push directory (maybe this is fine? we do it for buildx build too)
267267
"mkdir temp",
268268
# https://github.com/docker-library/bashbrew/blob/5152c0df682515cbe7ac62b68bcea4278856429f/cmd/bashbrew/git.go#L140-L147 (TODO "bashbrew context" ?)
269-
@sh "_git archive --format=tar \(.source.entry.GitCommit + ":" + (.source.entry.Directory | if . == "." then "" else . + "/" end)) | tar -xvC temp",
269+
@sh "_git archive --format=tar \(.source.entries[0].GitCommit + ":" + (.source.entries[0].Directory | if . == "." then "" else . + "/" end)) | tar -xvC temp",
270270

271271
# validate oci-layout file (https://github.com/docker-library/bashbrew/blob/4e0ea8d8aba49d54daf22bd8415fabba65dc83ee/cmd/bashbrew/oci-builder.go#L104-L112)
272272
@sh "jq -s \("
@@ -279,8 +279,8 @@ def build_command:
279279
" | unindent_and_decomment_jq(3)) temp/oci-layout > /dev/null",
280280

281281
# https://github.com/docker-library/bashbrew/blob/4e0ea8d8aba49d54daf22bd8415fabba65dc83ee/cmd/bashbrew/oci-builder.go#L116
282-
if .source.entry.File != "index.json" then
283-
@sh "jq -s \("{ schemaVersion: 2, manifests: . }") \("./" + .source.entry.File) > temp/index.json"
282+
if .source.entries[0].File != "index.json" then
283+
@sh "jq -s \("{ schemaVersion: 2, manifests: . }") \("./" + .source.entries[0].File) > temp/index.json"
284284
else empty end,
285285

286286
@sh "jq -s \("
@@ -311,7 +311,7 @@ def build_command:
311311
| del(.annotations, .urls)
312312
313313
# inject our annotations
314-
| .annotations = \(build_annotations(.source.entry.GitRepo) | @json)
314+
| .annotations = \(build_annotations(.source.entries[0].GitRepo) | @json)
315315
)
316316
" | unindent_and_decomment_jq(3)) temp/index.json > temp/index.json.new",
317317
"mv temp/index.json.new temp/index.json",
@@ -324,7 +324,7 @@ def build_command:
324324
"originalImageManifest=\"$(jq -r '.manifests[0].digest' temp/index.json)\"",
325325
(
326326
[
327-
@sh "SOURCE_DATE_EPOCH=\(.source.entry.SOURCE_DATE_EPOCH)",
327+
@sh "SOURCE_DATE_EPOCH=\(.source.entries[0].SOURCE_DATE_EPOCH)",
328328
"docker buildx build --progress=plain",
329329
"--load=false", "--provenance=false", # explicitly disable a few features we want to avoid
330330
"--build-arg BUILDKIT_DOCKERFILE_CHECK=skip=all", # disable linting (https://github.com/moby/buildkit/pull/4962)

sources.sh

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,15 @@ bashbrew cat --build-order --format '
5050
{
5151
"sourceId": {{ join "\n" $sum $file $builder "" | sha256sum | json }},
5252
"reproducibleGitChecksum": {{ $sum | json }},
53-
"entry": {
53+
"entries": [ {
5454
"GitRepo": {{ .ArchGitRepo $a | json }},
5555
"GitFetch": {{ .ArchGitFetch $a | json }},
5656
"GitCommit": {{ .ArchGitCommit $a | json }},
5757
"Directory": {{ .ArchDirectory $a | json }},
5858
"File": {{ $file | json }},
5959
"Builder": {{ $builder | json }},
6060
"SOURCE_DATE_EPOCH": {{ ($.ArchGitTime $a .).Unix | json }}
61-
},
61+
} ],
6262
"arches": {
6363
{{ $a | json }}: {
6464
"tags": {{ $.Tags namespace false . | json }},
@@ -111,10 +111,17 @@ bashbrew cat --build-order --format '
111111
end
112112
)
113113
)
114-
| if .entry.SOURCE_DATE_EPOCH > $in.entry.SOURCE_DATE_EPOCH then
115-
# smallest SOURCE_DATE_EPOCH wins in the face of duplicates for a given sourceId
116-
.entry = $in.entry
117-
else . end
114+
| .entries = (
115+
reduce $in.entries[] as $inE (.entries;
116+
# "unique" but without losing ordering (ie, only add entries we do not already have)
117+
if index($inE) then . else
118+
. + [ $inE ]
119+
end
120+
)
121+
# then prefer lower SOURCE_DATE_EPOCH earlier, so .entries[0] is the "preferred" (oldest) commit/entry
122+
| sort_by(.SOURCE_DATE_EPOCH)
123+
# (this does not lose *significant* ordering because it is a "stable sort", so same SOURCE_DATE_EPOCH gets the same position, unlike "unique_by" which would be destructive, even though it is ultimately what we are emulating with this two-part construction of a new .entries value)
124+
)
118125
end
119126
)
120127
# TODO a lot of this could be removed/parsed during the above reduce, since it has to parse things in build order anyhow

0 commit comments

Comments
 (0)