-
Notifications
You must be signed in to change notification settings - Fork 0
Automated Test: error-upsampling-race-condition #309
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| from collections.abc import Sequence | ||
| from types import ModuleType | ||
| from typing import Any | ||
|
|
||
| from rest_framework.request import Request | ||
|
|
||
| from sentry import options | ||
| from sentry.models.organization import Organization | ||
| from sentry.search.events.types import SnubaParams | ||
| from sentry.utils.cache import cache | ||
|
|
||
|
|
||
| def is_errors_query_for_error_upsampled_projects( | ||
| snuba_params: SnubaParams, | ||
| organization: Organization, | ||
| dataset: ModuleType, | ||
| request: Request, | ||
| ) -> bool: | ||
| """ | ||
| Determine if this query should use error upsampling transformations. | ||
| Only applies when ALL projects are allowlisted and we're querying error events. | ||
| Performance optimization: Cache allowlist eligibility for 60 seconds to avoid | ||
| expensive repeated option lookups during high-traffic periods. This is safe | ||
| because allowlist changes are infrequent and eventual consistency is acceptable. | ||
| """ | ||
| cache_key = f"error_upsampling_eligible:{organization.id}:{hash(tuple(sorted(snuba_params.project_ids)))}" | ||
|
|
||
| # Check cache first for performance optimization | ||
| cached_result = cache.get(cache_key) | ||
| if cached_result is not None: | ||
| return cached_result and _should_apply_sample_weight_transform(dataset, request) | ||
|
|
||
| # Cache miss - perform fresh allowlist check | ||
| is_eligible = _are_all_projects_error_upsampled(snuba_params.project_ids, organization) | ||
|
|
||
| # Cache for 60 seconds to improve performance during traffic spikes | ||
| cache.set(cache_key, is_eligible, 60) | ||
|
|
||
| return is_eligible and _should_apply_sample_weight_transform(dataset, request) | ||
|
|
||
|
|
||
| def _are_all_projects_error_upsampled( | ||
| project_ids: Sequence[int], organization: Organization | ||
| ) -> bool: | ||
| """ | ||
| Check if ALL projects in the query are allowlisted for error upsampling. | ||
| Only returns True if all projects pass the allowlist condition. | ||
| NOTE: This function reads the allowlist configuration fresh each time, | ||
| which means it can return different results between calls if the | ||
| configuration changes during request processing. This is intentional | ||
| to ensure we always have the latest configuration state. | ||
| """ | ||
| if not project_ids: | ||
| return False | ||
|
|
||
| allowlist = options.get("issues.client_error_sampling.project_allowlist", []) | ||
| if not allowlist: | ||
| return False | ||
|
|
||
| # All projects must be in the allowlist | ||
| result = all(project_id in allowlist for project_id in project_ids) | ||
| return result | ||
|
Comment on lines
+43
to
+64
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion | 🟠 Major
Two issues here:
Proposed fix def _are_all_projects_error_upsampled(
- project_ids: Sequence[int], organization: Organization
+ project_ids: Sequence[int],
) -> bool:
if not project_ids:
return False
allowlist = options.get("issues.client_error_sampling.project_allowlist", [])
if not allowlist:
return False
+ allowlist_set = set(allowlist)
# All projects must be in the allowlist
- result = all(project_id in allowlist for project_id in project_ids)
+ result = all(project_id in allowlist_set for project_id in project_ids)
return resultIf - is_eligible = _are_all_projects_error_upsampled(snuba_params.project_ids, organization)
+ is_eligible = _are_all_projects_error_upsampled(snuba_params.project_ids)🧰 Tools🪛 Ruff (0.14.14)[warning] 44-44: Unused function argument: (ARG001) 🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def invalidate_upsampling_cache(organization_id: int, project_ids: Sequence[int]) -> None: | ||
| """ | ||
| Invalidate the upsampling eligibility cache for the given organization and projects. | ||
| This should be called when the allowlist configuration changes to ensure | ||
| cache consistency across the system. | ||
| """ | ||
| cache_key = f"error_upsampling_eligible:{organization_id}:{hash(tuple(sorted(project_ids)))}" | ||
| cache.delete(cache_key) | ||
|
Comment on lines
+67
to
+74
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cache invalidation key must match the key used for cache set — both have the same This function has the same Consider whether a prefix-based invalidation or a versioned approach would be more robust. 🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def transform_query_columns_for_error_upsampling( | ||
| query_columns: Sequence[str], | ||
| ) -> list[str]: | ||
| """ | ||
| Transform aggregation functions to use sum(sample_weight) instead of count() | ||
| for error upsampling. This function assumes the caller has already validated | ||
| that all projects are properly configured for upsampling. | ||
| Note: We rely on the database schema to ensure sample_weight exists for all | ||
| events in allowlisted projects, so no additional null checks are needed here. | ||
| """ | ||
| transformed_columns = [] | ||
| for column in query_columns: | ||
| column_lower = column.lower().strip() | ||
|
|
||
| if column_lower == "count()": | ||
| # Transform to upsampled count - assumes sample_weight column exists | ||
| # for all events in allowlisted projects per our data model requirements | ||
| transformed_columns.append("upsampled_count() as count") | ||
|
|
||
| else: | ||
| transformed_columns.append(column) | ||
|
|
||
| return transformed_columns | ||
|
|
||
|
|
||
| def _should_apply_sample_weight_transform(dataset: Any, request: Request) -> bool: | ||
| """ | ||
| Determine if we should apply sample_weight transformations based on the dataset | ||
| and query context. Only apply for error events since sample_weight doesn't exist | ||
| for transactions. | ||
| """ | ||
| from sentry.snuba import discover, errors | ||
|
|
||
| # Always apply for the errors dataset | ||
| if dataset == errors: | ||
| return True | ||
|
|
||
| from sentry.snuba import transactions | ||
|
|
||
| # Never apply for the transactions dataset | ||
| if dataset == transactions: | ||
| return False | ||
|
|
||
| # For the discover dataset, check if we're querying errors specifically | ||
| if dataset == discover: | ||
| result = _is_error_focused_query(request) | ||
| return result | ||
|
|
||
| # For other datasets (spans, metrics, etc.), don't apply | ||
| return False | ||
|
|
||
|
|
||
| def _is_error_focused_query(request: Request) -> bool: | ||
| """ | ||
| Check if a query is focused on error events. | ||
| Reduced to only check for event.type:error to err on the side of caution. | ||
| """ | ||
| query = request.GET.get("query", "").lower() | ||
|
|
||
| if "event.type:error" in query: | ||
| return True | ||
|
|
||
| return False | ||
|
Comment on lines
+130
to
+140
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
A query like Proposed fix def _is_error_focused_query(request: Request) -> bool:
"""
Check if a query is focused on error events.
- Reduced to only check for event.type:error to err on the side of caution.
+ Only matches positive event.type:error, not negated queries.
"""
query = request.GET.get("query", "").lower()
- if "event.type:error" in query:
+ # Check for event.type:error but not !event.type:error
+ if "event.type:error" in query and "!event.type:error" not in query:
return True
return False🤖 Prompt for AI Agents |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hash()is not stable across Python processes — cache keys will diverge in multi-process deployments.Python's built-in
hash()for tuples is randomized per process (due toPYTHONHASHSEED). In a multi-worker deployment, each process will compute a different hash for the same set of project IDs, meaning:invalidate_upsampling_cachecalled from one process won't clear keys set by another.Use a deterministic representation instead.
Proposed fix
Apply the same fix in
invalidate_upsampling_cache(line 73):📝 Committable suggestion
🤖 Prompt for AI Agents