Skip to content

Commit 0908131

Browse files
authored
feat: Handle moved features during data ingestion (#1769)
This change introduces logic to handle features that have been marked as "moved" to a new feature key. During data ingestion for WPT and Chromium histograms, if a feature has been moved, this change ensures that the metrics are associated with the new, redirected feature key. This allows for future ingestions to be correctly attributed while upstream sources (like WPT test annotations or Chromium mojom files) are being updated. These checks are implemented at different layers depending on the data source: - For WPT, the migration happens in the workflow layer before scoring. This allows the scores to be calculated with the corrected feature keys. - For Chromium histograms, the migration happens in the spanner adapter layer. This is because the full list of feature keys is not available until this layer, where it is fetched from the database. The implementation also introduces a check to prevent conflicts where both the old and new feature keys are present in the source data. When this conflict occurs, an error is logged with the message "conflict migrating feature key", which will be used for alerting in GCP. We can also add alerting for these messages too: - "migrating feature key for histogram" - "migrating feature key for test" These will let us know to update the upstream sources
1 parent f88731e commit 0908131

File tree

7 files changed

+679
-29
lines changed

7 files changed

+679
-29
lines changed

lib/gcpspanner/spanneradapters/chromium_historgram_enum_consumer.go

Lines changed: 100 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"strings"
2222

2323
"github.com/GoogleChrome/webstatus.dev/lib/gcpspanner"
24+
"github.com/GoogleChrome/webstatus.dev/lib/gen/jsonschema/web_platform_dx__web_features"
2425
"github.com/GoogleChrome/webstatus.dev/lib/metricdatatypes"
2526
"golang.org/x/text/cases"
2627
"golang.org/x/text/language"
@@ -45,18 +46,45 @@ type ChromiumHistogramEnumsClient interface {
4546
UpsertWebFeatureChromiumHistogramEnumValue(context.Context, gcpspanner.WebFeatureChromiumHistogramEnumValue) error
4647
GetIDFromFeatureKey(context.Context, *gcpspanner.FeatureIDFilter) (*string, error)
4748
FetchAllFeatureKeys(context.Context) ([]string, error)
49+
GetAllMovedWebFeatures(ctx context.Context) ([]gcpspanner.MovedWebFeature, error)
4850
}
4951

5052
// Used by GCP Log-based metrics to extract the data about mismatch mappings.
5153
const logMissingFeatureIDMetricMsg = "unable to find feature ID. skipping mapping"
5254

55+
var ErrConflictMigratingFeatureKey = errors.New("conflict migrating feature key")
56+
57+
func (c *ChromiumHistogramEnumConsumer) GetAllMovedWebFeatures(
58+
ctx context.Context) (map[string]web_platform_dx__web_features.FeatureMovedData, error) {
59+
movedFeatures, err := c.client.GetAllMovedWebFeatures(ctx)
60+
if err != nil {
61+
return nil, err
62+
}
63+
64+
return convertGCPSpannerMovedFeaturesToMap(movedFeatures), nil
65+
}
66+
5367
func (c *ChromiumHistogramEnumConsumer) SaveHistogramEnums(
5468
ctx context.Context, data metricdatatypes.HistogramMapping) error {
5569
featureKeys, err := c.client.FetchAllFeatureKeys(ctx)
5670
if err != nil {
5771
return errors.Join(ErrFailedToGetFeatureKeys, err)
5872
}
5973
enumToFeatureKeyMap := createEnumToFeatureKeyMap(featureKeys)
74+
75+
histogramsToEnumMap, histogramsToAllFeatureKeySet := buildHistogramMaps(data, enumToFeatureKeyMap)
76+
77+
// Get the moved features
78+
movedFeatures, err := c.GetAllMovedWebFeatures(ctx)
79+
if err != nil {
80+
return err
81+
}
82+
83+
err = migrateMovedFeatures(ctx, histogramsToEnumMap, histogramsToAllFeatureKeySet, movedFeatures)
84+
if err != nil {
85+
return err
86+
}
87+
6088
// Create mapping of anticipated enums to feature keys
6189
for histogram, enums := range data {
6290
enumID, err := c.client.UpsertChromiumHistogramEnum(ctx, gcpspanner.ChromiumHistogramEnum{
@@ -75,30 +103,27 @@ func (c *ChromiumHistogramEnumConsumer) SaveHistogramEnums(
75103
return errors.Join(ErrFailedToStoreEnumValue, err)
76104
}
77105

78-
featureKey, found := enumToFeatureKeyMap[enum.Label]
79-
if !found {
80-
slog.WarnContext(ctx,
81-
logMissingFeatureIDMetricMsg,
82-
"label", enum.Label)
83-
106+
featureKey := histogramsToEnumMap[histogram][enum.Value]
107+
if featureKey == nil {
84108
continue
85109
}
86110

87111
featureID, err := c.client.GetIDFromFeatureKey(
88-
ctx, gcpspanner.NewFeatureKeyFilter(featureKey))
112+
ctx, gcpspanner.NewFeatureKeyFilter(*featureKey))
89113
if err != nil {
90114
slog.WarnContext(ctx,
91115
logMissingFeatureIDMetricMsg,
92116
"error", err,
93-
"featureKey", featureKey,
117+
"featureKey", *featureKey,
94118
"label", enum.Label)
95119

96120
continue
97121
}
98-
err = c.client.UpsertWebFeatureChromiumHistogramEnumValue(ctx, gcpspanner.WebFeatureChromiumHistogramEnumValue{
99-
WebFeatureID: *featureID,
100-
ChromiumHistogramEnumValueID: *enumValueID,
101-
})
122+
err = c.client.UpsertWebFeatureChromiumHistogramEnumValue(ctx,
123+
gcpspanner.WebFeatureChromiumHistogramEnumValue{
124+
WebFeatureID: *featureID,
125+
ChromiumHistogramEnumValueID: *enumValueID,
126+
})
102127
if err != nil {
103128
return errors.Join(ErrFailedToStoreEnumValueWebFeatureMapping, err)
104129
}
@@ -108,6 +133,69 @@ func (c *ChromiumHistogramEnumConsumer) SaveHistogramEnums(
108133
return nil
109134
}
110135

136+
func buildHistogramMaps(
137+
data metricdatatypes.HistogramMapping,
138+
enumToFeatureKeyMap map[string]string,
139+
) (map[metricdatatypes.HistogramName]map[int64]*string,
140+
map[metricdatatypes.HistogramName]map[string]metricdatatypes.HistogramEnumValue) {
141+
histogramsToEnumMap := map[metricdatatypes.HistogramName]map[int64]*string{}
142+
histogramsToAllFeatureKeySet := make(
143+
map[metricdatatypes.HistogramName]map[string]metricdatatypes.HistogramEnumValue)
144+
145+
for histogram, enums := range data {
146+
enumIDToFeatureKeyMap := make(map[int64]*string, len(enums))
147+
allFeatureKeySet := make(map[string]metricdatatypes.HistogramEnumValue, len(enums))
148+
for _, enum := range enums {
149+
featureKey, found := enumToFeatureKeyMap[enum.Label]
150+
if !found {
151+
enumIDToFeatureKeyMap[enum.Value] = nil
152+
153+
continue
154+
}
155+
enumIDToFeatureKeyMap[enum.Value] = &featureKey
156+
allFeatureKeySet[featureKey] = enum
157+
}
158+
histogramsToEnumMap[histogram] = enumIDToFeatureKeyMap
159+
histogramsToAllFeatureKeySet[histogram] = allFeatureKeySet
160+
}
161+
162+
return histogramsToEnumMap, histogramsToAllFeatureKeySet
163+
}
164+
165+
func migrateMovedFeatures(
166+
ctx context.Context,
167+
histogramsToEnumMap map[metricdatatypes.HistogramName]map[int64]*string,
168+
histogramsToAllFeatureKeySet map[metricdatatypes.HistogramName]map[string]metricdatatypes.HistogramEnumValue,
169+
movedFeatures map[string]web_platform_dx__web_features.FeatureMovedData,
170+
) error {
171+
for histogram, allFeaturesKeySet := range histogramsToAllFeatureKeySet {
172+
for featureKey, featureEnum := range allFeaturesKeySet {
173+
if movedFeatureData, found := movedFeatures[featureKey]; found {
174+
if _, exists := allFeaturesKeySet[movedFeatureData.RedirectTarget]; exists {
175+
// This new key already exists somewhere in the data.
176+
// That means upstream has done a partial migration to the new feature key.
177+
// Instead of assuming, we should error out and Chromium mojom file needs to be updated.
178+
slog.ErrorContext(ctx, "conflict migrating feature key. upstream currently using both keys",
179+
"histogram", histogram,
180+
"old_key", featureKey,
181+
"new_key", movedFeatureData.RedirectTarget,
182+
)
183+
184+
return ErrConflictMigratingFeatureKey
185+
}
186+
slog.WarnContext(ctx, "migrating feature key for histogram",
187+
"histogram", histogram,
188+
"old_key", featureKey,
189+
"new_key", movedFeatureData.RedirectTarget,
190+
)
191+
histogramsToEnumMap[histogram][featureEnum.Value] = &movedFeatureData.RedirectTarget
192+
}
193+
}
194+
}
195+
196+
return nil
197+
}
198+
111199
var (
112200
// ErrFailedToStoreEnum indicates the storage layer failed to store chromium enum.
113201
ErrFailedToStoreEnum = errors.New("failed to store chromium enum")

0 commit comments

Comments
 (0)