Skip to content

Commit a6a2456

Browse files
authored
Canonicalize targets (#108)
This canonicalize all the labels in all attributes found that are label related using the output of `bazel mod dump_repo_mapping ""` for each commit. This addresses #105 It doesn't have any integration tests yet because it would be much better to leverage #104 which is still in the works but it was tested against our internal monorepo and the reproducer in #105 Label like attributes (string defined but has nodep = True) are special and handled as an exception as if they were labels, and thus also converted. (See [this thread](https://bazelbuild.slack.com/archives/CDCMRLS23/p1742821059464199))
1 parent b7b3ade commit a6a2456

File tree

7 files changed

+467
-76
lines changed

7 files changed

+467
-76
lines changed

pkg/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ go_library(
66
"bazel.go",
77
"configurations.go",
88
"hash_cache.go",
9+
"normalizer.go",
910
"target_determinator.go",
1011
"targets_list.go",
1112
"walker.go",
@@ -31,6 +32,7 @@ go_test(
3132
name = "pkg_test",
3233
srcs = [
3334
"hash_cache_test.go",
35+
"normalizer_test.go",
3436
"target_determinator_test.go",
3537
],
3638
data = ["//testdata/HelloWorld:all_srcs"],

pkg/hash_cache.go

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,19 @@ import (
2525
)
2626

2727
// NewTargetHashCache creates a TargetHashCache which uses context for metadata lookups.
28-
func NewTargetHashCache(context map[gazelle_label.Label]map[Configuration]*analysis.ConfiguredTarget, bazelRelease string) *TargetHashCache {
28+
func NewTargetHashCache(
29+
context map[gazelle_label.Label]map[Configuration]*analysis.ConfiguredTarget,
30+
normalizer *Normalizer,
31+
bazelRelease string,
32+
) *TargetHashCache {
2933
bazelVersionSupportsConfiguredRuleInputs := isConfiguredRuleInputsSupported(bazelRelease)
3034

3135
return &TargetHashCache{
3236
context: context,
3337
fileHashCache: &fileHashCache{
3438
cache: make(map[string]*cacheEntry),
3539
},
40+
normalizer: normalizer,
3641
bazelRelease: bazelRelease,
3742
bazelVersionSupportsConfiguredRuleInputs: bazelVersionSupportsConfiguredRuleInputs,
3843
cache: make(map[gazelle_label.Label]map[Configuration]*cacheEntry),
@@ -63,6 +68,8 @@ type TargetHashCache struct {
6368
bazelRelease string
6469
bazelVersionSupportsConfiguredRuleInputs bool
6570

71+
normalizer *Normalizer
72+
6673
frozen bool
6774

6875
cacheLock sync.Mutex
@@ -138,6 +145,10 @@ func (thc *TargetHashCache) Freeze() {
138145
thc.frozen = true
139146
}
140147

148+
func (thc *TargetHashCache) ParseCanonicalLabel(label string) (gazelle_label.Label, error) {
149+
return thc.normalizer.ParseCanonicalLabel(label)
150+
}
151+
141152
// Difference represents a difference of a target between two commits.
142153
// All fields except Category are optional.
143154
type Difference struct {
@@ -264,14 +275,16 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
264275
Before: string(attributeBeforeJson),
265276
})
266277
} else {
267-
if !equivalentAttributes(attributeBefore, attributeAfter) {
278+
normalizedBeforeAttribute := before.AttributeForSerialization(attributeBefore)
279+
normalizedAfterAttribute := after.AttributeForSerialization(attributeAfter)
280+
if !equivalentAttributes(normalizedBeforeAttribute, normalizedAfterAttribute) {
268281
if attributeName == "$rule_implementation_hash" {
269282
differences = append(differences, Difference{
270283
Category: "RuleImplementedChanged",
271284
})
272285
} else {
273-
attributeBeforeJson, _ := protojson.Marshal(AttributeForSerialization(attributeBefore))
274-
attributeAfterJson, _ := protojson.Marshal(AttributeForSerialization(attributeAfter))
286+
attributeBeforeJson, _ := protojson.Marshal(normalizedBeforeAttribute)
287+
attributeAfterJson, _ := protojson.Marshal(normalizedAfterAttribute)
275288
differences = append(differences, Difference{
276289
Category: "AttributeChanged",
277290
Key: attributeName,
@@ -285,7 +298,7 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
285298
sortedAttributeNamesAfter := sortKeys(attributesAfter)
286299
for _, attributeName := range sortedAttributeNamesAfter {
287300
if _, ok := attributesBefore[attributeName]; !ok {
288-
attributeAfterJson, _ := protojson.Marshal(AttributeForSerialization(attributesAfter[attributeName]))
301+
attributeAfterJson, _ := protojson.Marshal(after.AttributeForSerialization(attributesAfter[attributeName]))
289302
differences = append(differences, Difference{
290303
Category: "AttributeAdded",
291304
Key: attributeName,
@@ -308,7 +321,8 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
308321

309322
for _, ruleInputLabelAndConfigurations := range ruleInputLabelsAndConfigurationsAfter {
310323
ruleInputLabel := ruleInputLabelAndConfigurations.Label
311-
if _, ok := ruleInputLabelsToConfigurationsBefore[ruleInputLabel]; !ok {
324+
knownConfigurationsBefore, ok := ruleInputLabelsToConfigurationsBefore[ruleInputLabel]
325+
if !ok {
312326
differences = append(differences, Difference{
313327
Category: "RuleInputAdded",
314328
Key: ruleInputLabel.String(),
@@ -318,7 +332,6 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
318332
// query information, so we could filter away e.g. host changes when we only have a target dep.
319333
// Unfortunately, Bazel doesn't currently expose this.
320334
// See https://github.com/bazelbuild/bazel/issues/14610#issuecomment-1024460141
321-
knownConfigurationsBefore := ruleInputLabelsToConfigurationsBefore[ruleInputLabel]
322335
knownConfigurationsAfter := ruleInputLabelsToConfigurationsAfter[ruleInputLabel]
323336

324337
for _, knownConfigurationAfter := range knownConfigurationsAfter.SortedSlice() {
@@ -369,6 +382,34 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
369382
return differences, nil
370383
}
371384

385+
// AttributeForSerialization redacts details about an attribute which don't affect the output of
386+
// building them, and returns equivalent canonical attribute metadata.
387+
// In particular it redacts:
388+
// - Whether an attribute was explicitly specified (because the effective value is all that
389+
// matters).
390+
// - Any attribute named `generator_location`, because these point to absolute paths for
391+
// built-in `cc_toolchain_suite` targets such as `@local_config_cc//:toolchain`.
392+
func (thc *TargetHashCache) AttributeForSerialization(rawAttr *build.Attribute) *build.Attribute {
393+
normalized := *rawAttr
394+
normalized.ExplicitlySpecified = nil
395+
396+
// Redact generator_location, which typically contains absolute paths but has no bearing on the
397+
// functioning of a rule.
398+
// This is also done in Bazel's internal target hash computation. See:
399+
// https://github.com/bazelbuild/bazel/blob/6971b016f1e258e3bb567a0f9fe7a88ad565d8f2/src/main/java/com/google/devtools/build/lib/query2/query/output/SyntheticAttributeHashCalculator.java#L78-L81
400+
if normalized.Name != nil {
401+
if *normalized.Name == "generator_location" {
402+
normalized.StringValue = nil
403+
}
404+
}
405+
406+
return thc.normalizer.NormalizeAttribute(&normalized)
407+
}
408+
409+
func equivalentAttributes(left, right *build.Attribute) bool {
410+
return proto.Equal(left, right)
411+
}
412+
372413
func indexByLabel(labelsAndConfigurations []LabelAndConfigurations) map[gazelle_label.Label]*ss.SortedSet[Configuration] {
373414
m := make(map[gazelle_label.Label]*ss.SortedSet[Configuration], len(labelsAndConfigurations))
374415
for _, labelAndConfigurations := range labelsAndConfigurations {
@@ -443,7 +484,7 @@ func hashTarget(thc *TargetHashCache, labelAndConfiguration LabelAndConfiguratio
443484
return hashRule(thc, target.Rule, configuredTarget.Configuration)
444485
case build.Target_GENERATED_FILE:
445486
hasher := sha256.New()
446-
generatingLabel, err := ParseCanonicalLabel(*target.GeneratedFile.GeneratingRule)
487+
generatingLabel, err := thc.ParseCanonicalLabel(*target.GeneratedFile.GeneratingRule)
447488
if err != nil {
448489
return nil, fmt.Errorf("failed to parse generated file generating rule label %s: %w", *target.GeneratedFile.GeneratingRule, err)
449490
}
@@ -472,15 +513,19 @@ func hashRule(thc *TargetHashCache, rule *build.Rule, configuration *analysis.Co
472513
hasher.Write([]byte(rule.GetRuleClass()))
473514
hasher.Write([]byte(rule.GetSkylarkEnvironmentHashCode()))
474515
hasher.Write([]byte(configuration.GetChecksum()))
516+
475517
// TODO: Consider using `$internal_attr_hash` from https://github.com/bazelbuild/bazel/blob/6971b016f1e258e3bb567a0f9fe7a88ad565d8f2/src/main/java/com/google/devtools/build/lib/query2/query/output/SyntheticAttributeHashCalculator.java
476518
// rather than hashing attributes ourselves.
477519
// On the plus side, this builds in some heuristics from Bazel (e.g. ignoring `generator_location`).
478520
// On the down side, it would even further decouple our "hashing" and "diffing" procedures.
479521
for _, attr := range rule.GetAttribute() {
480-
protoBytes, err := proto.Marshal(AttributeForSerialization(attr))
522+
normalizedAttribute := thc.AttributeForSerialization(attr)
523+
524+
protoBytes, err := proto.Marshal(normalizedAttribute)
481525
if err != nil {
482526
return nil, err
483527
}
528+
484529
hasher.Write(protoBytes)
485530
}
486531

@@ -512,7 +557,7 @@ func getConfiguredRuleInputs(thc *TargetHashCache, rule *build.Rule, ownConfigur
512557
labelsAndConfigurations := make([]LabelAndConfigurations, 0)
513558
if thc.bazelVersionSupportsConfiguredRuleInputs {
514559
for _, configuredRuleInput := range rule.ConfiguredRuleInput {
515-
ruleInputLabel, err := ParseCanonicalLabel(configuredRuleInput.GetLabel())
560+
ruleInputLabel, err := thc.ParseCanonicalLabel(configuredRuleInput.GetLabel())
516561
if err != nil {
517562
return nil, fmt.Errorf("failed to parse configuredRuleInput label %s: %w", configuredRuleInput.GetLabel(), err)
518563
}
@@ -534,7 +579,7 @@ func getConfiguredRuleInputs(thc *TargetHashCache, rule *build.Rule, ownConfigur
534579
}
535580
} else {
536581
for _, ruleInputLabelString := range rule.RuleInput {
537-
ruleInputLabel, err := ParseCanonicalLabel(ruleInputLabelString)
582+
ruleInputLabel, err := thc.ParseCanonicalLabel(ruleInputLabelString)
538583
if err != nil {
539584
return nil, fmt.Errorf("failed to parse ruleInput label %s: %w", ruleInputLabelString, err)
540585
}

pkg/hash_cache_test.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -380,19 +380,21 @@ func layoutProject(t *testing.T) (string, *analysis.CqueryResult) {
380380
}
381381

382382
func parseResult(t *testing.T, result *analysis.CqueryResult, bazelRelease string) *TargetHashCache {
383-
cqueryResult, err := ParseCqueryResult(result)
383+
n := Normalizer{}
384+
cqueryResult, err := ParseCqueryResult(result, &n)
384385
if err != nil {
385386
t.Fatalf("Failed to parse cquery result: %v", err)
386387
}
387-
return NewTargetHashCache(cqueryResult, bazelRelease)
388+
return NewTargetHashCache(cqueryResult, &n, bazelRelease)
388389
}
389390

390391
func areHashesEqual(left, right []byte) bool {
391392
return reflect.DeepEqual(left, right)
392393
}
393394

394395
func mustParseLabel(s string) label.Label {
395-
l, err := ParseCanonicalLabel(s)
396+
n := Normalizer{}
397+
l, err := n.ParseCanonicalLabel(s)
396398
if err != nil {
397399
panic(err)
398400
}

pkg/normalizer.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package pkg
2+
3+
import (
4+
"github.com/bazel-contrib/target-determinator/third_party/protobuf/bazel/build"
5+
"github.com/bazelbuild/bazel-gazelle/label"
6+
)
7+
8+
// Normalizer is a struct that contains a mapping of non-canonical repository names to canonical repository names.
9+
type Normalizer struct {
10+
Mapping map[string]string
11+
}
12+
13+
// ParseCanonicalLabel parses a label from a string, and removes sources of inconsequential difference which would make comparing two labels fail.
14+
// In particular, it treats @// the same as //
15+
// If the label is not canonical, it will attempt to map the repository to its canonical form coming from `bazel mod dump_repo_mapping ""`.
16+
func (n *Normalizer) ParseCanonicalLabel(s string) (label.Label, error) {
17+
l, err := label.Parse(s)
18+
if err != nil {
19+
return l, err
20+
}
21+
22+
if !l.Canonical && l.Repo != "" {
23+
mappedValue, ok := n.Mapping[l.Repo]
24+
if ok && l.Repo != mappedValue {
25+
l.Repo = mappedValue
26+
l.Canonical = true
27+
}
28+
}
29+
30+
if l.Repo == "@" {
31+
l.Repo = ""
32+
}
33+
34+
return l, nil
35+
}
36+
37+
func (n *Normalizer) NormalizeAttribute(attr *build.Attribute) *build.Attribute {
38+
attrType := attr.GetType()
39+
40+
// An attribute with a nodep property can also hold labels
41+
// It should be handled as an exception, see https://bazelbuild.slack.com/archives/CDCMRLS23/p1742821059464199
42+
isNoDepAttribute := attrType == build.Attribute_STRING && attr.Nodep != nil && *attr.Nodep
43+
44+
if attrType == build.Attribute_OUTPUT || attrType == build.Attribute_LABEL || isNoDepAttribute {
45+
keyLabel, parseErr := n.ParseCanonicalLabel(attr.GetStringValue())
46+
47+
if parseErr == nil {
48+
value := keyLabel.String()
49+
attr.StringValue = &value
50+
}
51+
}
52+
53+
isNoDepListAttribute := attrType == build.Attribute_STRING_LIST && attr.Nodep != nil && *attr.Nodep
54+
55+
if attrType == build.Attribute_OUTPUT_LIST || attrType == build.Attribute_LABEL_LIST || isNoDepListAttribute {
56+
for idx, dep := range attr.GetStringListValue() {
57+
keyLabel, parseErr := n.ParseCanonicalLabel(dep)
58+
59+
if parseErr == nil {
60+
attr.StringListValue[idx] = keyLabel.String()
61+
}
62+
}
63+
}
64+
65+
if attrType == build.Attribute_LABEL_DICT_UNARY {
66+
for idx, dep := range attr.GetLabelDictUnaryValue() {
67+
keyLabel, parseErr := n.ParseCanonicalLabel(*dep.Value)
68+
69+
if parseErr == nil {
70+
newValue := keyLabel.String()
71+
attr.GetLabelDictUnaryValue()[idx].Value = &newValue
72+
}
73+
}
74+
}
75+
76+
if attrType == build.Attribute_LABEL_LIST_DICT {
77+
for idx, dep := range attr.GetLabelListDictValue() {
78+
for key, value := range dep.Value {
79+
l, parseErr := n.ParseCanonicalLabel(value)
80+
81+
if parseErr == nil {
82+
attr.GetLabelListDictValue()[idx].Value[key] = l.String()
83+
}
84+
}
85+
}
86+
}
87+
88+
if attrType == build.Attribute_LABEL_KEYED_STRING_DICT {
89+
for idx, dep := range attr.GetLabelKeyedStringDictValue() {
90+
keyLabel, parseErr := n.ParseCanonicalLabel(*dep.Key)
91+
92+
if parseErr == nil {
93+
newKey := keyLabel.String()
94+
attr.GetLabelKeyedStringDictValue()[idx].Key = &newKey
95+
}
96+
97+
}
98+
}
99+
100+
return attr
101+
}

0 commit comments

Comments
 (0)