Skip to content

Commit

Permalink
Speed up sources.sh even more
Browse files Browse the repository at this point in the history
by improving slow jq section in sources.sh
  • Loading branch information
yosifkit committed Feb 28, 2025
1 parent 698b0ab commit 663ded4
Showing 1 changed file with 35 additions and 8 deletions.
43 changes: 35 additions & 8 deletions sources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,22 @@ jq <<<"$sources" --argjson pins "$externalPinsJson" '
# https://unix.stackexchange.com/a/738744/153467
reduce .[] as $a ([]; if IN(.[]; $a) then . else . += [$a] end)
;
def meld($a; $b):
# recursive merge of objects like "*", but also append lists (uniquely) instead of replace
# https://stackoverflow.com/a/53666584, but with lists unique-ified
if ($a | type) == "object" and ($b | type) == "object" then
reduce ([$a, $b] | add | keys_unsorted[]) as $k ({};
.[$k] = meld( $a[$k]; $b[$k])
)
elif ($a | type) == "array" and ($b | type) == "array" then
$a + $b
| unique_unsorted
elif $b == null then
$a
else
$b
end
;
reduce .[] as $in ({};
.[$in.sourceId] |=
if . == null then
Expand Down Expand Up @@ -254,15 +270,26 @@ jq <<<"$sources" --argjson pins "$externalPinsJson" '
# TODO a lot of this could be removed/parsed during the above reduce, since it has to parse things in build order anyhow
# TODO actually, instead, this bit should be a totally separate script so the use case of "combine sources.json files together" works better 👀
| (
# TODO make this faster, this reduce takes the longest time now
reduce to_entries[] as $e ({};
$e.key as $sourceId
| .[ $e.value.arches[] | .tags[], .archTags[] ] |= (
.[$e.value.arches | keys[]] |= (
. + [$sourceId] | unique_unsorted
)
# creating a lookup of .[tag][arch] to a list of sourceIds
[
.[] as $e
| ( $e.arches[] | .tags[], .archTags[] )
| {
key: .,
value: {
($e.arches | keys[]): [$e.sourceId]
}
}
]
# do not try to code golf this to one reduce without from_entries or group_by, that is very slow
| group_by(.key)
| [
# many little reduces based on same key (group_by^), instead of one very big and expensive
.[] | reduce .[] as $r ({};
meld(.; $r)
)
)
]
| from_entries
) as $tagArches
| map_values(
.arches |= with_entries(
Expand Down

0 comments on commit 663ded4

Please sign in to comment.