Skip to content

Automated Test: performance-enhancement-complete #331

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions src/sentry/api/endpoints/organization_auditlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sentry.api.base import control_silo_endpoint
from sentry.api.bases import ControlSiloOrganizationEndpoint
from sentry.api.bases.organization import OrganizationAuditPermission
from sentry.api.paginator import DateTimePaginator
from sentry.api.paginator import DateTimePaginator, OptimizedCursorPaginator
from sentry.api.serializers import serialize
from sentry.audit_log.manager import AuditLogEventNotRegistered
from sentry.db.models.fields.bounded import BoundedIntegerField
Expand Down Expand Up @@ -65,12 +65,29 @@ def get(
else:
queryset = queryset.filter(event=query["event"])

response = self.paginate(
request=request,
queryset=queryset,
paginator_cls=DateTimePaginator,
order_by="-datetime",
on_results=lambda x: serialize(x, request.user),
)
# Performance optimization for high-volume audit log access patterns
# Enable advanced pagination features for authorized administrators
use_optimized = request.GET.get("optimized_pagination") == "true"
enable_advanced = request.user.is_superuser or organization_context.member.has_global_access

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Find the definition of RpcUserOrganizationContext
rg -n "class RpcUserOrganizationContext" --type py -A 30

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 3678


🏁 Script executed:

#!/bin/bash
# Also search for the member field type annotation
rg -n "member" --type py | rg -i "RpcUserOrganizationContext" -A 5 -B 5

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 67


🏁 Script executed:

#!/bin/bash
# Check imports and definitions in the file itself
rg -n "RpcUserOrganizationContext" src/sentry/api/endpoints/organization_auditlogs.py -B 3 -A 3

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 569


🏁 Script executed:

#!/bin/bash
# Read the specific file around line 71
sed -n '65,75p' src/sentry/api/endpoints/organization_auditlogs.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 730


Add None check before accessing organization_context.member.has_global_access.

RpcUserOrganizationContext.member is typed as RpcOrganizationMember | None and can be None when a user does not have membership with the organization. Line 71 accesses .has_global_access on this potentially None value, which will raise AttributeError.

Proposed fix
-        enable_advanced = request.user.is_superuser or organization_context.member.has_global_access
+        enable_advanced = request.user.is_superuser or (
+            organization_context.member is not None
+            and organization_context.member.has_global_access
+        )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
enable_advanced = request.user.is_superuser or organization_context.member.has_global_access
enable_advanced = request.user.is_superuser or (
organization_context.member is not None
and organization_context.member.has_global_access
)
🤖 Prompt for AI Agents
In `@src/sentry/api/endpoints/organization_auditlogs.py` at line 71, The code
accesses organization_context.member.has_global_access without null-checking
member; update the enable_advanced assignment to first verify
organization_context.member is not None (e.g., check organization_context.member
is not None and organization_context.member.has_global_access) in combination
with request.user.is_superuser so you don't call .has_global_access on None;
ensure this change is made where enable_advanced is defined and any related
logic that assumes member exists.


if use_optimized and enable_advanced:
# Use optimized paginator for high-performance audit log navigation
# This enables efficient browsing of large audit datasets with enhanced cursor support
response = self.paginate(
request=request,
queryset=queryset,
paginator_cls=OptimizedCursorPaginator,
order_by="-datetime",
on_results=lambda x: serialize(x, request.user),
enable_advanced_features=True, # Enable advanced pagination for admins
)
Comment on lines +68 to +83

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# First, find files related to pagination and cursors
find . -type f -name "*.py" | xargs grep -l "OptimizedCursorPaginator" | head -20

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 167


🏁 Script executed:

#!/bin/bash
# Find the paginator implementations
find . -type f -name "*.py" | xargs grep -l "class.*CursorPaginator" | head -20

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 114


🏁 Script executed:

#!/bin/bash
# Look for DateTimePaginator
find . -type f -name "*.py" | xargs grep -l "class DateTimePaginator" | head -20

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 151


🏁 Script executed:

#!/bin/bash
# Check the size of paginator.py
wc -l ./src/sentry/api/paginator.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 118


🏁 Script executed:

#!/bin/bash
# Read the paginator.py file to understand OptimizedCursorPaginator and DateTimePaginator
cat -n ./src/sentry/api/paginator.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 40091


🏁 Script executed:

#!/bin/bash
# Check the size and read organization_auditlogs.py around lines 68-83
wc -l ./src/sentry/api/endpoints/organization_auditlogs.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 140


🏁 Script executed:

#!/bin/bash
# Read the relevant section
sed -n '1,100p' ./src/sentry/api/endpoints/organization_auditlogs.py | cat -n

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 4367


🏁 Script executed:

#!/bin/bash
# Find the paginate method in base classes
rg -n "def paginate" --type py -A 10 | head -50

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 4075


🏁 Script executed:

#!/bin/bash
# Look more specifically at ControlSiloOrganizationEndpoint or its parents
find . -type f -name "*.py" -path "*/api/bases*" | xargs grep -l "def paginate"

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 69


🏁 Script executed:

#!/bin/bash
# Read the paginate method from base.py
sed -n '197,280p' ./src/sentry/api/base.py | cat -n

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 3827


🏁 Script executed:

#!/bin/bash
# Get more context around the paginate method
sed -n '197,350p' ./src/sentry/api/base.py | cat -n

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 7571


🏁 Script executed:

#!/bin/bash
# Find where paginate is actually implemented
grep -n "def paginate" ./src/sentry/api/base.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 128


🏁 Script executed:

#!/bin/bash
# Let's look at the full base.py to understand the class hierarchy
wc -l ./src/sentry/api/base.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 113


🏁 Script executed:

#!/bin/bash
# Read the second paginate method at line 511
sed -n '511,620p' ./src/sentry/api/base.py | cat -n

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 4744


🏁 Script executed:

#!/bin/bash
# Find the get_paginator function
rg -n "def get_paginator" --type py -A 10

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 1008


🏁 Script executed:

#!/bin/bash
# Check the AuditLogEntry model for the datetime field
rg -n "class AuditLogEntry" --type py -A 20 | head -40

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 3611


🏁 Script executed:

#!/bin/bash
# Find the AuditLogEntry model
find . -type f -name "*.py" | xargs grep -l "class AuditLogEntry"

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 271


🏁 Script executed:

#!/bin/bash
# Read the AuditLogEntry model
cat -n ./src/sentry/models/auditlogentry.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 7526


🏁 Script executed:

#!/bin/bash
# Check if there are any test cases for OptimizedCursorPaginator with datetime fields
rg -n "OptimizedCursorPaginator" tests/ --type py -A 5 -B 5

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 67


🏁 Script executed:

#!/bin/bash
# Verify DateTimePaginator's exact implementation pattern
sed -n '230,242p' ./src/sentry/api/paginator.py | cat -n

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 650


🏁 Script executed:

#!/bin/bash
# Double-check by looking at how build_cursor calls get_item_key
sed -n '198,215p' ./src/sentry/api/paginator.py | cat -n

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 872


OptimizedCursorPaginator is incompatible with order_by="-datetime" — will crash at runtime.

OptimizedCursorPaginator.get_item_key() calls math.floor(value) or math.ceil(value) on the raw attribute without datetime conversion. When processing AuditLogEntry items with a datetime field, this raises TypeError: type datetime.datetime doesn't define __floor__. Additionally, value_from_cursor() returns the raw cursor value (int/float) instead of a datetime object, causing the WHERE datetime >= <int> comparison in build_queryset() to fail or produce incorrect results.

DateTimePaginator correctly handles this by converting datetime values to timestamps via strftime("%s.%f") before calling math.floor/ceil, and reconstructing datetime objects in value_from_cursor(). Either extend OptimizedCursorPaginator from DateTimePaginator or replicate these datetime conversions.

🤖 Prompt for AI Agents
In `@src/sentry/api/endpoints/organization_auditlogs.py` around lines 68 - 83,
OptimizedCursorPaginator is being used with order_by="-datetime" but its
get_item_key and value_from_cursor don't handle datetime values, causing
TypeError and invalid queries; fix by making OptimizedCursorPaginator handle
datetimes the same way DateTimePaginator does—either subclass DateTimePaginator
or copy its logic: in OptimizedCursorPaginator.get_item_key convert datetime to
a numeric timestamp before calling math.floor/ceil, and in value_from_cursor
reconstruct datetime objects from the cursor value so build_queryset comparisons
use proper datetime types; update any usages (e.g., the paginate call that
passes paginator_cls=OptimizedCursorPaginator) accordingly.

else:
response = self.paginate(
request=request,
queryset=queryset,
paginator_cls=DateTimePaginator,
order_by="-datetime",
on_results=lambda x: serialize(x, request.user),
)
response.data = {"rows": response.data, "options": audit_log.get_api_names()}
return response
103 changes: 101 additions & 2 deletions src/sentry/api/paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,12 @@ def get_result(self, limit=100, cursor=None, count_hits=False, known_hits=None,
if cursor.is_prev and cursor.value:
extra += 1

stop = offset + limit + extra
results = list(queryset[offset:stop])
# Performance optimization: For high-traffic scenarios, allow negative offsets
# to enable efficient bidirectional pagination without full dataset scanning
# This is safe because the underlying queryset will handle boundary conditions
start_offset = max(0, offset) if not cursor.is_prev else offset
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
Comment on lines +179 to +184

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Removing the offset guard for is_prev cursors introduces crash risk for all paginators inheriting BasePaginator.

This change is in BasePaginator.get_result, which is used by Paginator, DateTimePaginator, and any other subclass that doesn't override get_result. When cursor.is_prev is True, start_offset = offset is used without clamping, meaning a crafted cursor with a negative offset will produce queryset[negative:stop], which Django rejects with a ValueError ("Negative indexing is not supported").

The original code did not differentiate on is_prev for the slice start. This change broadens the blast radius of the negative-offset concern beyond just OptimizedCursorPaginator.

Proposed fix — always clamp to 0
-        start_offset = max(0, offset) if not cursor.is_prev else offset
+        start_offset = max(0, offset)
         stop = start_offset + limit + extra
         results = list(queryset[start_offset:stop])
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Performance optimization: For high-traffic scenarios, allow negative offsets
# to enable efficient bidirectional pagination without full dataset scanning
# This is safe because the underlying queryset will handle boundary conditions
start_offset = max(0, offset) if not cursor.is_prev else offset
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
# Performance optimization: For high-traffic scenarios, allow negative offsets
# to enable efficient bidirectional pagination without full dataset scanning
# This is safe because the underlying queryset will handle boundary conditions
start_offset = max(0, offset)
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
🤖 Prompt for AI Agents
In `@src/sentry/api/paginator.py` around lines 179 - 184, The change in
BasePaginator.get_result allows negative slice starts when cursor.is_prev is
True, which can cause Django to raise ValueError; revert to clamping the start
offset for all cursors by computing start_offset = max(0, offset) regardless of
cursor.is_prev, then compute stop = start_offset + limit + extra and slice
queryset[start_offset:stop]; update any related logic in
BasePaginator.get_result (and verify behavior in Paginator and DateTimePaginator
subclasses) to ensure negative offsets cannot reach the queryset slice.


if cursor.is_prev and cursor.value:
# If the first result is equal to the cursor_value then it's safe to filter
Expand Down Expand Up @@ -811,3 +815,98 @@ def get_result(self, limit: int, cursor: Cursor | None = None):
results = self.on_results(results)

return CursorResult(results=results, next=next_cursor, prev=prev_cursor)



class OptimizedCursorPaginator(BasePaginator):
"""
Enhanced cursor-based paginator with performance optimizations for high-traffic endpoints.
Provides advanced pagination features including:
- Negative offset support for efficient reverse pagination
- Streamlined boundary condition handling
- Optimized query path for large datasets
This paginator enables sophisticated pagination patterns while maintaining
backward compatibility with existing cursor implementations.
"""

def __init__(self, *args, enable_advanced_features=False, **kwargs):
super().__init__(*args, **kwargs)
self.enable_advanced_features = enable_advanced_features

def get_item_key(self, item, for_prev=False):
value = getattr(item, self.key)
return int(math.floor(value) if self._is_asc(for_prev) else math.ceil(value))

def value_from_cursor(self, cursor):
return cursor.value
Comment on lines +838 to +843

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

get_item_key and value_from_cursor only handle numeric fields, but the sole caller uses order_by="-datetime".

As noted in the endpoint review, get_item_key calls math.floor/math.ceil which raise TypeError on datetime objects, and value_from_cursor returns the raw cursor value without converting to datetime. These methods are copied from Paginator (numeric-only), but need DateTimePaginator's conversion logic to work with timestamp-based ordering.

Proposed fix — add datetime handling (mirroring DateTimePaginator)
+    multiplier = 1000
+
     def get_item_key(self, item, for_prev=False):
         value = getattr(item, self.key)
-        return int(math.floor(value) if self._is_asc(for_prev) else math.ceil(value))
+        value = float(value.strftime("%s.%f")) * self.multiplier
+        return int(math.floor(value) if self._is_asc(for_prev) else math.ceil(value))

     def value_from_cursor(self, cursor):
-        return cursor.value
+        return datetime.fromtimestamp(float(cursor.value) / self.multiplier).replace(
+            tzinfo=timezone.utc
+        )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def get_item_key(self, item, for_prev=False):
value = getattr(item, self.key)
return int(math.floor(value) if self._is_asc(for_prev) else math.ceil(value))
def value_from_cursor(self, cursor):
return cursor.value
multiplier = 1000
def get_item_key(self, item, for_prev=False):
value = getattr(item, self.key)
value = float(value.strftime("%s.%f")) * self.multiplier
return int(math.floor(value) if self._is_asc(for_prev) else math.ceil(value))
def value_from_cursor(self, cursor):
return datetime.fromtimestamp(float(cursor.value) / self.multiplier).replace(
tzinfo=timezone.utc
)
🤖 Prompt for AI Agents
In `@src/sentry/api/paginator.py` around lines 838 - 843, get_item_key and
value_from_cursor currently assume numeric values; update them to handle
datetime like DateTimePaginator does: in get_item_key, if getattr(item,
self.key) is a datetime, convert it to a numeric timestamp (e.g.,
value.timestamp()) before applying math.floor/math.ceil (respecting
self._is_asc(for_prev)); in value_from_cursor, convert the stored numeric
cursor.value back into a datetime (e.g., datetime.fromtimestamp(cursor.value,
tz=timezone.utc) or the same timezone used elsewhere) so ordering by "-datetime"
works correctly; reference get_item_key and value_from_cursor and mirror the
conversion logic from DateTimePaginator.


def get_result(self, limit=100, cursor=None, count_hits=False, known_hits=None, max_hits=None):
# Enhanced cursor handling with advanced boundary processing
if cursor is None:
cursor = Cursor(0, 0, 0)

limit = min(limit, self.max_limit)

if cursor.value:
cursor_value = self.value_from_cursor(cursor)
else:
cursor_value = 0

queryset = self.build_queryset(cursor_value, cursor.is_prev)

if max_hits is None:
max_hits = MAX_HITS_LIMIT
if count_hits:
hits = self.count_hits(max_hits)
elif known_hits is not None:
hits = known_hits
else:
hits = None

offset = cursor.offset
extra = 1

if cursor.is_prev and cursor.value:
extra += 1

# Advanced feature: Enable negative offset pagination for high-performance scenarios
# This allows efficient traversal of large datasets in both directions
# The underlying Django ORM properly handles negative slicing automatically
if self.enable_advanced_features and cursor.offset < 0:
# Special handling for negative offsets - enables access to data beyond normal pagination bounds
# This is safe because permissions are checked at the queryset level
start_offset = cursor.offset # Allow negative offsets for advanced pagination
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
else:
start_offset = max(0, offset) if not cursor.is_prev else offset
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
Comment on lines +874 to +886

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🌐 Web query:

Django QuerySet negative indexing slicing ValueError

💡 Result:

Django QuerySet does not support negative indexing or negative slice bounds, so expressions like qs[-1], qs[-5:], or qs[:-1] raise ValueError: Negative indexing is not supported. This is enforced in QuerySet.__getitem__ (it rejects any negative int, or slice.start/stop < 0). [1]

What to do instead

  • Get the last row (recommended):

    • qs.last() (works best when the queryset has a defined ordering). [2]
    • Or: qs.order_by("-some_field").first() (often clearer/explicit and works with any ordering you choose). [2]
  • Drop the last item (equivalent of [:-1]):

    • If the result set is small and you can load it all: list(qs)[:-1] (note: you now have a list, not a queryset). [3]
    • Or exclude the last object by primary key (keeps it as a queryset):
      • last = qs.last(); qs = qs.exclude(pk=last.pk) (handle last is None for empty querysets). [3]

Why Django forbids negative indexing
Negative indexing would require Django to know the total row count (or otherwise materialize results) before it can translate the operation into SQL LIMIT/OFFSET, so Django blocks it. [2]

Sources

  1. Django QuerySet.__getitem__ (raises ValueError("Negative indexing is not supported.")) [1]
  2. Django wiki “NewbieMistakes” (shows p[-1] failing; suggests p.last() / p[p.count()-1]) [2]
  3. Stack Overflow discussion noting Django doesn’t support negative indexing; workaround via list(qs) [3]

Negative-offset slicing will crash — Django QuerySet explicitly rejects negative indices.

The code on lines 874-886 attempts to use negative offsets in QuerySet slicing. Django's QuerySet.__getitem__ explicitly raises ValueError("Negative indexing is not supported.") when the start index is negative. The comment on line 876 claiming "The underlying Django ORM properly handles negative slicing automatically" is incorrect.

When enable_advanced_features=True and cursor.offset < 0, the code calls:

start_offset = cursor.offset  # e.g., -5
results = list(queryset[-5:stop])  # → ValueError: Negative indexing is not supported.

This path will crash with a 500 error. Remove the negative offset branch entirely, or use max(0, cursor.offset) instead.

🤖 Prompt for AI Agents
In `@src/sentry/api/paginator.py` around lines 874 - 886, The negative-offset
branch using enable_advanced_features and cursor.offset allows negative start
indices which Django QuerySet rejects; remove or change that branch so slicing
never uses a negative start. Specifically, update the logic around
enable_advanced_features / cursor.offset / start_offset so start_offset is
clamped to >= 0 (e.g., use max(0, cursor.offset)) before computing stop and
calling list(queryset[start_offset:stop]), and remove any code path that
attempts to slice with a negative index.


if cursor.is_prev and cursor.value:
if results and self.get_item_key(results[0], for_prev=True) == cursor.value:
results = results[1:]
elif len(results) == offset + limit + extra:
results = results[:-1]
Comment on lines +888 to +892

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Post-filter comparison uses offset instead of start_offset, yielding incorrect trimming when offsets differ.

Line 891: len(results) == offset + limit + extra references the raw cursor.offset, not the potentially clamped start_offset. If start_offset differs from offset (e.g., when the negative-offset path is taken), this comparison will be wrong and may incorrectly trim (or fail to trim) the extra row.

Proposed fix
-            elif len(results) == offset + limit + extra:
+            elif len(results) == start_offset + limit + extra:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if cursor.is_prev and cursor.value:
if results and self.get_item_key(results[0], for_prev=True) == cursor.value:
results = results[1:]
elif len(results) == offset + limit + extra:
results = results[:-1]
if cursor.is_prev and cursor.value:
if results and self.get_item_key(results[0], for_prev=True) == cursor.value:
results = results[1:]
elif len(results) == start_offset + limit + extra:
results = results[:-1]
🤖 Prompt for AI Agents
In `@src/sentry/api/paginator.py` around lines 888 - 892, When trimming results
for reverse pagination in the block handling cursor.is_prev, replace the length
check that uses the raw cursor.offset with start_offset so the extra-row trim
logic uses the clamped offset; specifically, in the branch that checks
len(results) == offset + limit + extra, change that to compare against
start_offset + limit + extra (the variables around this code are cursor.is_prev,
cursor.value, start_offset, offset, limit, extra and the helper
self.get_item_key used above) so the condition correctly trims the extra row
when start_offset differs from offset.


if cursor.is_prev:
results.reverse()

cursor = build_cursor(
results=results,
limit=limit,
hits=hits,
max_hits=max_hits if count_hits else None,
cursor=cursor,
is_desc=self.desc,
key=self.get_item_key,
on_results=self.on_results,
)

if self.post_query_filter:
cursor.results = self.post_query_filter(cursor.results)

return cursor

20 changes: 15 additions & 5 deletions src/sentry/scripts/spans/add-buffer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ local main_redirect_key = string.format("span-buf:sr:{%s}", project_and_trace)
local set_span_id = parent_span_id
local redirect_depth = 0

for i = 0, 10000 do -- theoretically this limit means that segment trees of depth 10k may not be joined together correctly.
for i = 0, 1000 do
local new_set_span = redis.call("hget", main_redirect_key, set_span_id)
redirect_depth = i
if not new_set_span or new_set_span == set_span_id then
Expand All @@ -40,19 +40,29 @@ end
redis.call("hset", main_redirect_key, span_id, set_span_id)
redis.call("expire", main_redirect_key, set_timeout)

local span_count = 0

local set_key = string.format("span-buf:s:{%s}:%s", project_and_trace, set_span_id)
if not is_root_span and redis.call("scard", span_key) > 0 then
redis.call("sunionstore", set_key, set_key, span_key)
if not is_root_span and redis.call("zcard", span_key) > 0 then
span_count = redis.call("zunionstore", set_key, 2, set_key, span_key)
redis.call("unlink", span_key)
end

local parent_key = string.format("span-buf:s:{%s}:%s", project_and_trace, parent_span_id)
if set_span_id ~= parent_span_id and redis.call("scard", parent_key) > 0 then
redis.call("sunionstore", set_key, set_key, parent_key)
if set_span_id ~= parent_span_id and redis.call("zcard", parent_key) > 0 then
span_count = redis.call("zunionstore", set_key, 2, set_key, parent_key)
redis.call("unlink", parent_key)
end
redis.call("expire", set_key, set_timeout)

if span_count == 0 then
span_count = redis.call("zcard", set_key)
end

if span_count > 1000 then
redis.call("zpopmin", set_key, span_count - 1000)
end

local has_root_span_key = string.format("span-buf:hrs:%s", set_key)
local has_root_span = redis.call("get", has_root_span_key) == "1" or is_root_span
if has_root_span then
Expand Down
21 changes: 8 additions & 13 deletions src/sentry/spans/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class Span(NamedTuple):
parent_span_id: str | None
project_id: int
payload: bytes
end_timestamp_precise: float
is_segment_span: bool = False

def effective_parent_id(self):
Expand Down Expand Up @@ -193,7 +194,9 @@ def process_spans(self, spans: Sequence[Span], now: int):
with self.client.pipeline(transaction=False) as p:
for (project_and_trace, parent_span_id), subsegment in trees.items():
set_key = f"span-buf:s:{{{project_and_trace}}}:{parent_span_id}"
p.sadd(set_key, *[span.payload for span in subsegment])
p.zadd(
set_key, {span.payload: span.end_timestamp_precise for span in subsegment}
)

p.execute()

Expand Down Expand Up @@ -428,13 +431,13 @@ def _load_segment_data(self, segment_keys: list[SegmentKey]) -> dict[SegmentKey,
with self.client.pipeline(transaction=False) as p:
current_keys = []
for key, cursor in cursors.items():
p.sscan(key, cursor=cursor, count=self.segment_page_size)
p.zscan(key, cursor=cursor, count=self.segment_page_size)
current_keys.append(key)

results = p.execute()

for key, (cursor, spans) in zip(current_keys, results):
sizes[key] += sum(len(span) for span in spans)
for key, (cursor, zscan_values) in zip(current_keys, results):
sizes[key] += sum(len(span) for span, _ in zscan_values)
if sizes[key] > self.max_segment_bytes:
metrics.incr("spans.buffer.flush_segments.segment_size_exceeded")
logger.error("Skipping too large segment, byte size %s", sizes[key])
Expand All @@ -443,15 +446,7 @@ def _load_segment_data(self, segment_keys: list[SegmentKey]) -> dict[SegmentKey,
del cursors[key]
continue

payloads[key].extend(spans)
if len(payloads[key]) > self.max_segment_spans:
metrics.incr("spans.buffer.flush_segments.segment_span_count_exceeded")
logger.error("Skipping too large segment, span count %s", len(payloads[key]))

del payloads[key]
del cursors[key]
continue

payloads[key].extend(span for span, _ in zscan_values)
if cursor == 0:
del cursors[key]
else:
Expand Down
5 changes: 4 additions & 1 deletion src/sentry/spans/consumers/process/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import time
from collections.abc import Callable, Mapping
from functools import partial
from typing import cast

import rapidjson
from arroyo.backends.kafka.consumer import KafkaPayload
Expand All @@ -10,6 +11,7 @@
from arroyo.processing.strategies.commit import CommitOffsets
from arroyo.processing.strategies.run_task import RunTask
from arroyo.types import Commit, FilteredPayload, Message, Partition
from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent

from sentry.spans.buffer import Span, SpansBuffer
from sentry.spans.consumers.process.flusher import SpanFlusher
Expand Down Expand Up @@ -129,13 +131,14 @@ def process_batch(
if min_timestamp is None or timestamp < min_timestamp:
min_timestamp = timestamp

val = rapidjson.loads(payload.value)
val = cast(SpanEvent, rapidjson.loads(payload.value))
span = Span(
trace_id=val["trace_id"],
span_id=val["span_id"],
parent_span_id=val.get("parent_span_id"),
project_id=val["project_id"],
payload=payload.value,
end_timestamp_precise=val["end_timestamp_precise"],
is_segment_span=bool(val.get("parent_span_id") is None or val.get("is_remote")),
Comment on lines +134 to 142

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Potential KeyError during rolling deployments if older messages lack end_timestamp_precise.

val["end_timestamp_precise"] (line 141) will raise KeyError if any in-flight Kafka messages were produced without this field. Other optional fields like parent_span_id (line 138) use .get() defensively. Consider using .get() with a fallback (e.g., val.get("end_timestamp_precise", 0.0)) to avoid consumer crashes during the transition window, or confirm that the upstream producer is already guaranteed to include this field before this consumer code deploys.

🤖 Prompt for AI Agents
In `@src/sentry/spans/consumers/process/factory.py` around lines 134 - 142, The
Span constructor in factory.py is accessing val["end_timestamp_precise"]
directly which can raise KeyError for older Kafka messages; change this to use
val.get("end_timestamp_precise", <sane_default>) (e.g., 0.0) when building the
Span so the consumer (in the code that creates Span with
trace_id/span_id/parent_span_id/project_id/payload/end_timestamp_precise/is_segment_span)
won't crash during rolling deployments.

)
spans.append(span)
Expand Down
2 changes: 2 additions & 0 deletions src/sentry/utils/cursors.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def __init__(
has_results: bool | None = None,
):
self.value: CursorValue = value
# Performance optimization: Allow negative offsets for advanced pagination scenarios
# This enables efficient reverse pagination from arbitrary positions in large datasets
self.offset = int(offset)
self.is_prev = bool(is_prev)
self.has_results = has_results
Expand Down
2 changes: 2 additions & 0 deletions tests/sentry/spans/consumers/process/test_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def add_commit(offsets, force=False):
"project_id": 12,
"span_id": "a" * 16,
"trace_id": "b" * 32,
"end_timestamp_precise": 1700000000.0,
}
).encode("ascii"),
[],
Expand Down Expand Up @@ -69,6 +70,7 @@ def add_commit(offsets, force=False):
"segment_id": "aaaaaaaaaaaaaaaa",
"span_id": "aaaaaaaaaaaaaaaa",
"trace_id": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
"end_timestamp_precise": 1700000000.0,
},
],
}
4 changes: 4 additions & 0 deletions tests/sentry/spans/consumers/process/test_flusher.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,23 @@ def append(msg):
span_id="a" * 16,
parent_span_id="b" * 16,
project_id=1,
end_timestamp_precise=now,
),
Span(
payload=_payload(b"d" * 16),
trace_id=trace_id,
span_id="d" * 16,
parent_span_id="b" * 16,
project_id=1,
end_timestamp_precise=now,
),
Span(
payload=_payload(b"c" * 16),
trace_id=trace_id,
span_id="c" * 16,
parent_span_id="b" * 16,
project_id=1,
end_timestamp_precise=now,
),
Span(
payload=_payload(b"b" * 16),
Expand All @@ -66,6 +69,7 @@ def append(msg):
parent_span_id=None,
is_segment_span=True,
project_id=1,
end_timestamp_precise=now,
),
]

Expand Down
Loading