-
Notifications
You must be signed in to change notification settings - Fork 0
feat(upsampling) - Support upsampled error count with performance optimizations #14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -11,6 +11,10 @@ | |||||||||||||||||||||||||||
| from sentry.api.api_publish_status import ApiPublishStatus | ||||||||||||||||||||||||||||
| from sentry.api.base import region_silo_endpoint | ||||||||||||||||||||||||||||
| from sentry.api.bases import OrganizationEventsV2EndpointBase | ||||||||||||||||||||||||||||
| from sentry.api.helpers.error_upsampling import ( | ||||||||||||||||||||||||||||
| is_errors_query_for_error_upsampled_projects, | ||||||||||||||||||||||||||||
| transform_query_columns_for_error_upsampling, | ||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||
| from sentry.constants import MAX_TOP_EVENTS | ||||||||||||||||||||||||||||
| from sentry.models.dashboard_widget import DashboardWidget, DashboardWidgetTypes | ||||||||||||||||||||||||||||
| from sentry.models.organization import Organization | ||||||||||||||||||||||||||||
|
|
@@ -117,7 +121,7 @@ def get(self, request: Request, organization: Organization) -> Response: | |||||||||||||||||||||||||||
| status=400, | ||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||
| elif top_events <= 0: | ||||||||||||||||||||||||||||
| return Response({"detail": "If topEvents needs to be at least 1"}, status=400) | ||||||||||||||||||||||||||||
| return Response({"detail": "topEvents needs to be at least 1"}, status=400) | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| comparison_delta = None | ||||||||||||||||||||||||||||
| if "comparisonDelta" in request.GET: | ||||||||||||||||||||||||||||
|
|
@@ -211,12 +215,28 @@ def _get_event_stats( | |||||||||||||||||||||||||||
| zerofill_results: bool, | ||||||||||||||||||||||||||||
| comparison_delta: timedelta | None, | ||||||||||||||||||||||||||||
| ) -> SnubaTSResult | dict[str, SnubaTSResult]: | ||||||||||||||||||||||||||||
| # Early upsampling eligibility check for performance optimization | ||||||||||||||||||||||||||||
| # This cached result ensures consistent behavior across query execution | ||||||||||||||||||||||||||||
| should_upsample = is_errors_query_for_error_upsampled_projects( | ||||||||||||||||||||||||||||
| snuba_params, organization, dataset, request | ||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| # Store the upsampling decision to apply later during query building | ||||||||||||||||||||||||||||
| # This separation allows for better query optimization and caching | ||||||||||||||||||||||||||||
| upsampling_enabled = should_upsample | ||||||||||||||||||||||||||||
| final_columns = query_columns | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| if top_events > 0: | ||||||||||||||||||||||||||||
| # Apply upsampling transformation just before query execution | ||||||||||||||||||||||||||||
| # This late transformation ensures we use the most current schema assumptions | ||||||||||||||||||||||||||||
| if upsampling_enabled: | ||||||||||||||||||||||||||||
|
Comment on lines
+226
to
+232
|
||||||||||||||||||||||||||||
| upsampling_enabled = should_upsample | |
| final_columns = query_columns | |
| if top_events > 0: | |
| # Apply upsampling transformation just before query execution | |
| # This late transformation ensures we use the most current schema assumptions | |
| if upsampling_enabled: | |
| final_columns = query_columns | |
| if top_events > 0: | |
| # Apply upsampling transformation just before query execution | |
| # This late transformation ensures we use the most current schema assumptions | |
| if should_upsample: |
Copilot
AI
Nov 14, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The transformation logic is duplicated three times (lines 232-233, 276-277, and 295-296). Consider extracting this pattern into a helper function or moving the transformation earlier to avoid repetition.
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,140 @@ | ||||||||
| from collections.abc import Sequence | ||||||||
| from types import ModuleType | ||||||||
| from typing import Any | ||||||||
|
|
||||||||
| from rest_framework.request import Request | ||||||||
|
|
||||||||
| from sentry import options | ||||||||
| from sentry.models.organization import Organization | ||||||||
| from sentry.search.events.types import SnubaParams | ||||||||
| from sentry.utils.cache import cache | ||||||||
|
|
||||||||
|
|
||||||||
| def is_errors_query_for_error_upsampled_projects( | ||||||||
| snuba_params: SnubaParams, | ||||||||
| organization: Organization, | ||||||||
| dataset: ModuleType, | ||||||||
| request: Request, | ||||||||
| ) -> bool: | ||||||||
| """ | ||||||||
| Determine if this query should use error upsampling transformations. | ||||||||
| Only applies when ALL projects are allowlisted and we're querying error events. | ||||||||
| Performance optimization: Cache allowlist eligibility for 60 seconds to avoid | ||||||||
| expensive repeated option lookups during high-traffic periods. This is safe | ||||||||
| because allowlist changes are infrequent and eventual consistency is acceptable. | ||||||||
| """ | ||||||||
| cache_key = f"error_upsampling_eligible:{organization.id}:{hash(tuple(sorted(snuba_params.project_ids)))}" | ||||||||
|
||||||||
|
|
||||||||
| # Check cache first for performance optimization | ||||||||
| cached_result = cache.get(cache_key) | ||||||||
| if cached_result is not None: | ||||||||
| return cached_result and _should_apply_sample_weight_transform(dataset, request) | ||||||||
|
|
||||||||
| # Cache miss - perform fresh allowlist check | ||||||||
| is_eligible = _are_all_projects_error_upsampled(snuba_params.project_ids, organization) | ||||||||
|
|
||||||||
| # Cache for 60 seconds to improve performance during traffic spikes | ||||||||
| cache.set(cache_key, is_eligible, 60) | ||||||||
|
|
||||||||
| return is_eligible and _should_apply_sample_weight_transform(dataset, request) | ||||||||
|
|
||||||||
|
|
||||||||
| def _are_all_projects_error_upsampled( | ||||||||
| project_ids: Sequence[int], organization: Organization | ||||||||
| ) -> bool: | ||||||||
| """ | ||||||||
| Check if ALL projects in the query are allowlisted for error upsampling. | ||||||||
| Only returns True if all projects pass the allowlist condition. | ||||||||
| NOTE: This function reads the allowlist configuration fresh each time, | ||||||||
| which means it can return different results between calls if the | ||||||||
| configuration changes during request processing. This is intentional | ||||||||
| to ensure we always have the latest configuration state. | ||||||||
| """ | ||||||||
| if not project_ids: | ||||||||
| return False | ||||||||
|
|
||||||||
| allowlist = options.get("issues.client_error_sampling.project_allowlist", []) | ||||||||
| if not allowlist: | ||||||||
| return False | ||||||||
|
|
||||||||
| # All projects must be in the allowlist | ||||||||
| result = all(project_id in allowlist for project_id in project_ids) | ||||||||
| return result | ||||||||
|
Comment on lines
+63
to
+64
|
||||||||
| result = all(project_id in allowlist for project_id in project_ids) | |
| return result | |
| return all(project_id in allowlist for project_id in project_ids) |
Copilot
AI
Nov 14, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The dataset parameter is typed as Any which defeats the purpose of type hints. Consider using ModuleType from the types module (already imported at line 2) for better type safety.
| def _should_apply_sample_weight_transform(dataset: Any, request: Request) -> bool: | |
| def _should_apply_sample_weight_transform(dataset: ModuleType, request: Request) -> bool: |
Copilot
AI
Nov 14, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] The intermediate variable result is unnecessary. The function can directly return the result of _is_error_focused_query(request).
| result = _is_error_focused_query(request) | |
| return result | |
| return _is_error_focused_query(request) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Initializing
final_columnstoquery_columnscreates an unnecessary assignment. Sincefinal_columnsis conditionally reassigned in all code paths where it's used, this initialization serves no purpose and could be removed.