Skip to content

Automated Test: performance-enhancement-complete #313

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions src/sentry/api/endpoints/organization_auditlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sentry.api.base import control_silo_endpoint
from sentry.api.bases import ControlSiloOrganizationEndpoint
from sentry.api.bases.organization import OrganizationAuditPermission
from sentry.api.paginator import DateTimePaginator
from sentry.api.paginator import DateTimePaginator, OptimizedCursorPaginator
from sentry.api.serializers import serialize
from sentry.audit_log.manager import AuditLogEventNotRegistered
from sentry.db.models.fields.bounded import BoundedIntegerField
Expand Down Expand Up @@ -65,12 +65,29 @@ def get(
else:
queryset = queryset.filter(event=query["event"])

response = self.paginate(
request=request,
queryset=queryset,
paginator_cls=DateTimePaginator,
order_by="-datetime",
on_results=lambda x: serialize(x, request.user),
)
# Performance optimization for high-volume audit log access patterns
# Enable advanced pagination features for authorized administrators
use_optimized = request.GET.get("optimized_pagination") == "true"
enable_advanced = request.user.is_superuser or organization_context.member.has_global_access
Comment on lines +70 to +71

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# First, let's check the file structure and imports
echo "=== Checking organization_auditlogs.py ==="
cat -n src/sentry/api/endpoints/organization_auditlogs.py | head -100

echo ""
echo "=== Verify is_active_superuser usage in endpoints ==="
rg -n --type=py 'is_active_superuser' -C2 -g '**/api/endpoints/**' | head -50

echo ""
echo "=== Check raw is_superuser usage in endpoints ==="
rg -nP 'request\.user\.is_superuser' --type=py -g '**/api/endpoints/**' | head -30

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 9563


🏁 Script executed:

#!/bin/bash
# Verify member can be None
echo "=== Check organization_context.member None handling ==="
rg -n 'organization_context\.member' --type=py | head -30

echo ""
echo "=== Check ControlSiloOrganizationEndpoint ==="
rg -n 'class.*ControlSiloOrganizationEndpoint' --type=py -A10 | head -30

echo ""
echo "=== Search for has_global_access checks ==="
rg -n 'has_global_access' --type=py -B2 -A2 | head -40

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 8665


🏁 Script executed:

#!/bin/bash
# Check is_active_superuser implementation
echo "=== is_active_superuser implementation ==="
fd 'superuser.py' | xargs cat -n

echo ""
echo "=== Check if is_superuser is used correctly elsewhere ==="
rg -n 'is_active_superuser' --type=py src/sentry/api/endpoints/ | wc -l
rg -n 'request\.user\.is_superuser' --type=py src/sentry/api/endpoints/ | wc -l

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 50397


🏁 Script executed:

#!/bin/bash
# Check OptimizedCursorPaginator and DateTimePaginator
echo "=== OptimizedCursorPaginator definition ==="
fd 'paginator' --type=py | xargs rg -l 'class OptimizedCursorPaginator' | head -5

echo ""
echo "=== Check how DateTimePaginator handles datetime ==="
rg -n 'class DateTimePaginator' --type=py -A20 | head -40

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 3531


🏁 Script executed:

#!/bin/bash
# Find OptimizedCursorPaginator definition in paginator.py
echo "=== Searching for OptimizedCursorPaginator in paginator.py ==="
grep -n "class OptimizedCursorPaginator" src/sentry/api/paginator.py -A20

echo ""
echo "=== Check if OptimizedCursorPaginator exists at all ==="
rg "OptimizedCursorPaginator" src/sentry/api/paginator.py -C3

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 1327


🏁 Script executed:

#!/bin/bash
# Check full paginator.py structure
echo "=== Classes in paginator.py ==="
rg "^class " src/sentry/api/paginator.py

echo ""
echo "=== Check audit logs tests for paginator usage ==="
fd "test.*audit" | xargs rg "paginator\|Paginator" -l 2>/dev/null || echo "No audit log tests found"

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 696


Fix critical security and type-safety issues on lines 70-71.

Three blocking issues:

  1. Security bypass: request.user.is_superuser bypasses session verification. Use is_active_superuser(request) instead, which validates the active superuser session.

  2. NullPointerException: organization_context.member can be None for superusers who aren't organization members, causing AttributeError on .has_global_access. Check for None before accessing.

  3. Type mismatch: OptimizedCursorPaginator.get_item_key() treats keys as numeric values directly (calls math.floor(value)), but audit logs order by datetime fields. This will raise TypeError at runtime. Use DateTimePaginator unconditionally, or only use OptimizedCursorPaginator for numeric ID-based ordering.

Proposed fix
+from sentry.auth.superuser import is_active_superuser
+
 # ...
-        use_optimized = request.GET.get("optimized_pagination") == "true"
-        enable_advanced = request.user.is_superuser or organization_context.member.has_global_access
+        is_admin = is_active_superuser(request)
+        has_global = (
+            organization_context.member is not None
+            and organization_context.member.has_global_access
+        )
+        enable_advanced = is_admin or has_global
         
         if use_optimized and enable_advanced:
-            response = self.paginate(
-                request=request,
-                queryset=queryset,
-                paginator_cls=OptimizedCursorPaginator,
-                order_by="-datetime",
-                on_results=lambda x: serialize(x, request.user),
-                enable_advanced_features=True,
-            )
-        else:
-            response = self.paginate(
-                request=request,
-                queryset=queryset,
-                paginator_cls=DateTimePaginator,
-                order_by="-datetime", 
-                on_results=lambda x: serialize(x, request.user),
-            )
+        response = self.paginate(
+            request=request,
+            queryset=queryset,
+            paginator_cls=DateTimePaginator,
+            order_by="-datetime",
+            on_results=lambda x: serialize(x, request.user),
+        )
🤖 Prompt for AI Agents
In `@src/sentry/api/endpoints/organization_auditlogs.py` around lines 70 - 71,
Replace the insecure and unsafe checks on access and pagination: stop using
request.user.is_superuser directly and instead call
is_active_superuser(request); guard access to organization_context.member before
reading .has_global_access (e.g., treat member as None-safe: member =
organization_context.member and check member and member.has_global_access); and
remove the numeric-only OptimizedCursorPaginator for datetime-ordered audit
logs—always use DateTimePaginator (or only select OptimizedCursorPaginator when
ordering by numeric ID) so you don't rely on
OptimizedCursorPaginator.get_item_key() treating values as numbers; update the
use_optimized assignment and paginator selection logic accordingly.


if use_optimized and enable_advanced:
# Use optimized paginator for high-performance audit log navigation
# This enables efficient browsing of large audit datasets with enhanced cursor support
response = self.paginate(
request=request,
queryset=queryset,
paginator_cls=OptimizedCursorPaginator,
order_by="-datetime",
on_results=lambda x: serialize(x, request.user),
enable_advanced_features=True, # Enable advanced pagination for admins
)
Comment on lines +76 to +83

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# First, locate the paginator files
find . -name "paginator.py" -o -name "*paginator*" | head -20

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 151


🏁 Script executed:

# Check the structure of the endpoints file to understand context
head -100 src/sentry/api/endpoints/organization_auditlogs.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 3716


🏁 Script executed:

# Check the paginator.py file - view the OptimizedCursorPaginator implementation
wc -l src/sentry/api/paginator.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 116


🏁 Script executed:

# View the OptimizedCursorPaginator class and get_item_key method
sed -n '830,860p' src/sentry/api/paginator.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 1207


🏁 Script executed:

# Also check DateTimePaginator implementation for comparison
rg -n "class DateTimePaginator" src/sentry/api/paginator.py
rg -n "class OptimizedCursorPaginator" src/sentry/api/paginator.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 179


🏁 Script executed:

# Check what the key field is set to in OptimizedCursorPaginator and its parent class
sed -n '821,880p' src/sentry/api/paginator.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 2470


🏁 Script executed:

# Check DateTimePaginator implementation
sed -n '230,280p' src/sentry/api/paginator.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 1703


🏁 Script executed:

# Check the AuditLogEntry model to confirm the datetime field type
find . -name "auditlogentry.py" -type f

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 174


🏁 Script executed:

# Check the AuditLogEntry model's datetime field
cat -n src/sentry/models/auditlogentry.py | head -60

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 2449


🏁 Script executed:

# Also check what BasePaginator's key is set to by default
sed -n '1,100p' src/sentry/api/paginator.py | grep -A 20 "class BasePaginator"

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 753


🏁 Script executed:

# Check if OptimizedCursorPaginator inherits from DateTimePaginator or directly from BasePaginator
sed -n '821,830p' src/sentry/api/paginator.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 533


🏁 Script executed:

# Get more of the AuditLogEntry model to find the datetime field definition
sed -n '45,100p' src/sentry/models/auditlogentry.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 2429


🏁 Script executed:

# Search for the datetime field in AuditLogEntry
rg -n "datetime" src/sentry/models/auditlogentry.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 395


🏁 Script executed:

# Let me verify the exact error that would occur by checking how math.floor behaves with datetime
python3 << 'EOF'
import math
from datetime import datetime

# Test what happens when you try to use math.floor on a datetime
try:
    result = math.floor(datetime.now())
    print("Result:", result)
except TypeError as e:
    print("TypeError:", e)
EOF

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 138


OptimizedCursorPaginator fails when paginating datetime-ordered querysets.

The endpoint orders by "-datetime", but OptimizedCursorPaginator.get_item_key (line 838-840) calls math.floor() and math.ceil() directly on the datetime value without converting it to a numeric type. This causes a TypeError: must be real number, not datetime.datetime. The existing DateTimePaginator correctly converts datetimes to epoch timestamps via strftime before applying these operations.

🤖 Prompt for AI Agents
In `@src/sentry/api/endpoints/organization_auditlogs.py` around lines 76 - 83, The
OptimizedCursorPaginator.get_item_key currently applies math.floor()/math.ceil()
directly to datetime values causing TypeError; update get_item_key to detect
datetime instances (or use the same codepath as DateTimePaginator) and convert
datetimes to a numeric epoch (e.g., call .timestamp() or format to epoch seconds
as DateTimePaginator does) before applying math.floor()/math.ceil(), preserving
the existing ordering semantics for the organization_auditlogs endpoint.

else:
response = self.paginate(
request=request,
queryset=queryset,
paginator_cls=DateTimePaginator,
order_by="-datetime",
on_results=lambda x: serialize(x, request.user),
)
response.data = {"rows": response.data, "options": audit_log.get_api_names()}
return response
103 changes: 101 additions & 2 deletions src/sentry/api/paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,12 @@ def get_result(self, limit=100, cursor=None, count_hits=False, known_hits=None,
if cursor.is_prev and cursor.value:
extra += 1

stop = offset + limit + extra
results = list(queryset[offset:stop])
# Performance optimization: For high-traffic scenarios, allow negative offsets
# to enable efficient bidirectional pagination without full dataset scanning
# This is safe because the underlying queryset will handle boundary conditions
start_offset = max(0, offset) if not cursor.is_prev else offset
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
Comment on lines +179 to +184

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# First, examine the exact code in question - lines 179-184 in paginator.py
cat -n src/sentry/api/paginator.py | sed -n '170,195p'

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 1685


🏁 Script executed:

# Also check the OptimizedCursorPaginator class around line 884
cat -n src/sentry/api/paginator.py | sed -n '870,895p'

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 1560


🏁 Script executed:

# Find line 876 where the comment is about Django ORM handling negative slicing
cat -n src/sentry/api/paginator.py | sed -n '875,880p'

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 634


🏁 Script executed:

# Find all classes that inherit from BasePaginator
rg -n 'class \w+\(BasePaginator\)' --type py -A 3

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 950


🌐 Web query:

Django QuerySet negative indexing support ValueError

💡 Result:

Django QuerySet objects deliberately do not support negative indexing (e.g., qs[-1], qs[:-1]). If you try, Django raises:

  • ValueError: Negative indexing is not supported. (or in older versions, an AssertionError with the same message) because supporting -1 would generally require knowing the result count first (an extra query) or forcing full evaluation. Django has a long-standing “wontfix” decision for this feature. [1][2]

What to do instead

  • Get the last row (recommended):
    • obj = MyModel.objects.order_by("...").last() (or just .last() if default ordering is correct). [3]
  • Get the last row using reverse + first:
    • obj = MyModel.objects.order_by("...").reverse().first() [3]
  • If you already sliced and need the last item of that slice:
    • Re-express it as an explicit query (order + limit), or as a fallback convert to a list (only safe for small result sets):
    • last_obj = list(qs)[-1] (forces evaluation; can be inefficient). [3]

Sources: [1] Django ticket “Support Negative Indexing on QuerySets” (wontfix) [2] Django “NewbieMistakes” wiki example showing the error and alternatives [3] Stack Overflow discussion citing Django docs note that negative indexing isn’t supported and using .last() / .reverse() instead.


🏁 Script executed:

# Verify if offset parameter can actually be negative from cursor parsing
grep -n "from_string\|offset.*=" src/sentry/api/paginator.py | head -20

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 1363


🏁 Script executed:

# Check Cursor class definition to understand how offset is parsed
rg -n "class Cursor" src/sentry/api/paginator.py -A 30

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 67


🏁 Script executed:

# Search for Cursor class definition more broadly
rg -n "^class Cursor" src/sentry/api/paginator.py -A 50

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 67


🏁 Script executed:

# Check for imports and Cursor definition elsewhere
rg -n "class Cursor" --type py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 625


🏁 Script executed:

# Check the get_result method signature to understand cursor.offset source
sed -n '155,170p' src/sentry/api/paginator.py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 698


🏁 Script executed:

# Examine the Cursor class definition to understand offset field
cat -n src/sentry/utils/cursors.py | sed -n '17,80p'

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 2550


🏁 Script executed:

# Look for from_string method that parses cursor strings
rg -n "from_string" src/sentry/utils/cursors.py -A 20

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 1378


🏁 Script executed:

# Verify that negative offsets can be parsed and used
python3 << 'EOF'
# Simulate cursor parsing with negative offset
cursor_str = "1000:-5:1"  # value:offset:is_prev
bits = cursor_str.split(":")
offset = int(bits[1])
is_prev = int(bits[2])
print(f"Parsed offset: {offset}")
print(f"Parsed is_prev: {is_prev}")
print(f"Condition check: not is_prev={not is_prev}")

# Simulate the problematic logic from line 182
start_offset = max(0, offset) if not is_prev else offset
print(f"Resulting start_offset: {start_offset}")
print(f"This would cause queryset[{start_offset}:stop] - NEGATIVE INDEX")
EOF

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 234


🏁 Script executed:

# Verify the comment on line 876 is misleading/false by checking Django version
grep -r "Django" src/setup.cfg src/requirements*.txt src/pyproject.toml 2>/dev/null | head -5

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 67


🏁 Script executed:

# Check if there are any tests that verify negative offset behavior
rg -n "negative.*offset\|offset.*-" tests/sentry/api/test_paginator.py --type py

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 67


🏁 Script executed:

# Look for any existing test cases that might exercise the is_prev path
rg -n "is_prev.*True\|cursor.*prev" tests/sentry/api/test_paginator.py -A 3 --type py | head -30

Repository: admin-coderabbit/coderabbit-sentry-mirror

Length of output: 67


Critical: This change to BasePaginator breaks all subclasses when is_prev=True with a negative offset.

This modification affects every paginator that inherits from BasePaginator (including DateTimePaginator, Paginator, etc.). When cursor.is_prev is True, the raw offset is used directly as start_offset. Since Cursor.from_string() accepts negative integer offsets (e.g., from a cursor string "1000:-5:1"), attempting to slice with queryset[negative_value:stop] will raise ValueError("Negative indexing is not supported.") from Django's QuerySet—Django explicitly does not support negative indexing and has a long-standing design decision against it.

This is a regression in the shared base class that impacts all existing pagination paths. The same issue exists in OptimizedCursorPaginator at line 880, which directly passes cursor.offset to QuerySet slicing. Additionally, the comment on line 876 stating "The underlying Django ORM properly handles negative slicing automatically" is factually incorrect.

Proposed fix: preserve original behavior
-        # Performance optimization: For high-traffic scenarios, allow negative offsets
-        # to enable efficient bidirectional pagination without full dataset scanning
-        # This is safe because the underlying queryset will handle boundary conditions
-        start_offset = max(0, offset) if not cursor.is_prev else offset
-        stop = start_offset + limit + extra
-        results = list(queryset[start_offset:stop])
+        stop = offset + limit + extra
+        results = list(queryset[offset:stop])
🤖 Prompt for AI Agents
In `@src/sentry/api/paginator.py` around lines 179 - 184, The change in
BasePaginator uses the raw negative offset when cursor.is_prev is True, causing
Django QuerySet slicing like queryset[negative:stop] to raise ValueError; revert
to preserving non-negative slice bounds by computing start_offset = max(0,
offset) even when cursor.is_prev is True (or otherwise normalizing negative
offsets before slicing) so QuerySet slicing never receives a negative start;
apply the same fix in OptimizedCursorPaginator where cursor.offset is passed to
slicing and remove or correct the incorrect comment about Django handling
negative slicing.


if cursor.is_prev and cursor.value:
# If the first result is equal to the cursor_value then it's safe to filter
Expand Down Expand Up @@ -811,3 +815,98 @@ def get_result(self, limit: int, cursor: Cursor | None = None):
results = self.on_results(results)

return CursorResult(results=results, next=next_cursor, prev=prev_cursor)



class OptimizedCursorPaginator(BasePaginator):
"""
Enhanced cursor-based paginator with performance optimizations for high-traffic endpoints.
Provides advanced pagination features including:
- Negative offset support for efficient reverse pagination
- Streamlined boundary condition handling
- Optimized query path for large datasets
This paginator enables sophisticated pagination patterns while maintaining
backward compatibility with existing cursor implementations.
"""
Comment on lines +821 to +832

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

The docstring overclaims functionality that is broken or not implemented.

The docstring lists "Negative offset support for efficient reverse pagination" and "Optimized query path for large datasets," but the negative offset path crashes due to Django ORM limitations, and there is no actual query optimization beyond what BasePaginator already provides. The class is essentially a copy of BasePaginator.get_result + Paginator.get_item_key with a broken negative-offset branch.

🤖 Prompt for AI Agents
In `@src/sentry/api/paginator.py` around lines 821 - 832, The class docstring for
OptimizedCursorPaginator overstates features that are not implemented (negative
offset support and optimized query path); update the docstring to accurately
describe the class as a thin wrapper that reuses BasePaginator.get_result and
Paginator.get_item_key, remove claims about negative-offset and query
optimization, and document the known limitation that the negative-offset branch
is broken; additionally, either implement the negative-offset logic correctly or
make the negative-offset branch explicitly raise NotImplementedError with a TODO
comment so callers get a clear error (reference OptimizedCursorPaginator,
BasePaginator.get_result, Paginator.get_item_key and the negative-offset code
path).


def __init__(self, *args, enable_advanced_features=False, **kwargs):
super().__init__(*args, **kwargs)
self.enable_advanced_features = enable_advanced_features

def get_item_key(self, item, for_prev=False):
value = getattr(item, self.key)
return int(math.floor(value) if self._is_asc(for_prev) else math.ceil(value))

def value_from_cursor(self, cursor):
return cursor.value
Comment on lines +838 to +843

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

get_item_key and value_from_cursor are copies of Paginator — they handle only numeric fields.

This paginator is used with order_by="-datetime" in the audit logs endpoint, but get_item_key calls math.floor/math.ceil on the raw attribute value. math.floor(datetime_obj) raises TypeError. If this paginator is meant for datetime-keyed querysets, it should replicate DateTimePaginator's epoch-timestamp conversion logic.

🤖 Prompt for AI Agents
In `@src/sentry/api/paginator.py` around lines 838 - 843, get_item_key and
value_from_cursor currently assume numeric keys and call math.floor/ceil on the
raw attribute; update them to match DateTimePaginator behavior: in get_item_key,
detect if getattr(item, self.key) is a datetime and convert it to a POSIX
timestamp (use value.timestamp() with timezone handling) before applying
math.floor/ceil (keeping existing asc/desc logic), and in value_from_cursor, if
the cursor.value is a numeric timestamp produce/return a datetime (e.g.,
datetime.fromtimestamp(cursor.value, tz=timezone.utc)) so datetime-keyed
querysets (like order_by="-datetime") work correctly.


def get_result(self, limit=100, cursor=None, count_hits=False, known_hits=None, max_hits=None):
# Enhanced cursor handling with advanced boundary processing
if cursor is None:
cursor = Cursor(0, 0, 0)

limit = min(limit, self.max_limit)

if cursor.value:
cursor_value = self.value_from_cursor(cursor)
else:
cursor_value = 0

queryset = self.build_queryset(cursor_value, cursor.is_prev)

if max_hits is None:
max_hits = MAX_HITS_LIMIT
if count_hits:
hits = self.count_hits(max_hits)
elif known_hits is not None:
hits = known_hits
else:
hits = None

offset = cursor.offset
extra = 1

if cursor.is_prev and cursor.value:
extra += 1

# Advanced feature: Enable negative offset pagination for high-performance scenarios
# This allows efficient traversal of large datasets in both directions
# The underlying Django ORM properly handles negative slicing automatically
if self.enable_advanced_features and cursor.offset < 0:
# Special handling for negative offsets - enables access to data beyond normal pagination bounds
# This is safe because permissions are checked at the queryset level
start_offset = cursor.offset # Allow negative offsets for advanced pagination
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
else:
start_offset = max(0, offset) if not cursor.is_prev else offset
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
Comment on lines +874 to +886

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Critical: Django QuerySet does not support negative slicing — this will crash at runtime.

Line 876 states "The underlying Django ORM properly handles negative slicing automatically" — this is incorrect. Django's QuerySet.__getitem__ explicitly checks for negative indices and raises ValueError("Negative indexing is not supported."). The negative-offset path on line 882 (queryset[start_offset:stop] where start_offset < 0) will always raise an exception.

This entire "advanced feature" is non-functional. If negative-offset pagination is truly needed, the queryset must first be evaluated into a list, or a different query strategy (e.g., reversing the ORDER BY and computing a positive offset) must be used.

Proposed fix: remove the broken negative-offset path
-        # Advanced feature: Enable negative offset pagination for high-performance scenarios
-        # This allows efficient traversal of large datasets in both directions
-        # The underlying Django ORM properly handles negative slicing automatically
-        if self.enable_advanced_features and cursor.offset < 0:
-            # Special handling for negative offsets - enables access to data beyond normal pagination bounds
-            # This is safe because permissions are checked at the queryset level
-            start_offset = cursor.offset  # Allow negative offsets for advanced pagination
-            stop = start_offset + limit + extra
-            results = list(queryset[start_offset:stop])
-        else:
-            start_offset = max(0, offset) if not cursor.is_prev else offset
-            stop = start_offset + limit + extra
-            results = list(queryset[start_offset:stop])
+        start_offset = max(0, offset)
+        stop = start_offset + limit + extra
+        results = list(queryset[start_offset:stop])
🤖 Prompt for AI Agents
In `@src/sentry/api/paginator.py` around lines 874 - 886, The negative-offset
branch using cursor.offset < 0 is invalid because Django QuerySet does not
support negative slicing; remove the entire special-case block that sets
start_offset = cursor.offset and slices queryset with a negative start and
instead fall back to the safe logic used in the else branch (compute
start_offset using max(0, offset) or offset when cursor.is_prev, compute stop =
start_offset + limit + extra, and do results =
list(queryset[start_offset:stop])); keep enable_advanced_features,
cursor.offset, cursor.is_prev, limit, extra and queryset references intact so
permissions and previous/next behavior remain unchanged.


if cursor.is_prev and cursor.value:
if results and self.get_item_key(results[0], for_prev=True) == cursor.value:
results = results[1:]
elif len(results) == offset + limit + extra:
results = results[:-1]
Comment on lines +888 to +892

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Boundary check uses offset instead of start_offset, inconsistent with the slicing logic above.

Line 891 checks len(results) == offset + limit + extra, but results were sliced using start_offset (which may differ from offset due to max(0, offset) clamping or the negative-offset branch). When offset is negative and start_offset was clamped to 0, this comparison will produce wrong results.

Proposed fix
-            elif len(results) == offset + limit + extra:
+            elif len(results) == start_offset + limit + extra:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if cursor.is_prev and cursor.value:
if results and self.get_item_key(results[0], for_prev=True) == cursor.value:
results = results[1:]
elif len(results) == offset + limit + extra:
results = results[:-1]
if cursor.is_prev and cursor.value:
if results and self.get_item_key(results[0], for_prev=True) == cursor.value:
results = results[1:]
elif len(results) == start_offset + limit + extra:
results = results[:-1]
🤖 Prompt for AI Agents
In `@src/sentry/api/paginator.py` around lines 888 - 892, The boundary check in
the cursor prev branch uses offset instead of start_offset, causing incorrect
trimming when offset was clamped (e.g., negative offset). Update the length
comparison in the cursor.is_prev branch to use start_offset (i.e., check
len(results) == start_offset + limit + extra) and keep the existing
get_item_key-based trimming logic in the same block (refer to cursor.is_prev,
get_item_key, results, start_offset, offset, limit, extra) so the slice decision
matches how results were originally sliced.


if cursor.is_prev:
results.reverse()

cursor = build_cursor(
results=results,
limit=limit,
hits=hits,
max_hits=max_hits if count_hits else None,
cursor=cursor,
is_desc=self.desc,
key=self.get_item_key,
on_results=self.on_results,
)

if self.post_query_filter:
cursor.results = self.post_query_filter(cursor.results)

return cursor

20 changes: 15 additions & 5 deletions src/sentry/scripts/spans/add-buffer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ local main_redirect_key = string.format("span-buf:sr:{%s}", project_and_trace)
local set_span_id = parent_span_id
local redirect_depth = 0

for i = 0, 10000 do -- theoretically this limit means that segment trees of depth 10k may not be joined together correctly.
for i = 0, 1000 do
local new_set_span = redis.call("hget", main_redirect_key, set_span_id)
redirect_depth = i
if not new_set_span or new_set_span == set_span_id then
Expand All @@ -40,19 +40,29 @@ end
redis.call("hset", main_redirect_key, span_id, set_span_id)
redis.call("expire", main_redirect_key, set_timeout)

local span_count = 0

local set_key = string.format("span-buf:s:{%s}:%s", project_and_trace, set_span_id)
if not is_root_span and redis.call("scard", span_key) > 0 then
redis.call("sunionstore", set_key, set_key, span_key)
if not is_root_span and redis.call("zcard", span_key) > 0 then
span_count = redis.call("zunionstore", set_key, 2, set_key, span_key)
redis.call("unlink", span_key)
end

local parent_key = string.format("span-buf:s:{%s}:%s", project_and_trace, parent_span_id)
if set_span_id ~= parent_span_id and redis.call("scard", parent_key) > 0 then
redis.call("sunionstore", set_key, set_key, parent_key)
if set_span_id ~= parent_span_id and redis.call("zcard", parent_key) > 0 then
span_count = redis.call("zunionstore", set_key, 2, set_key, parent_key)
redis.call("unlink", parent_key)
end
redis.call("expire", set_key, set_timeout)

if span_count == 0 then
span_count = redis.call("zcard", set_key)
end

if span_count > 1000 then
redis.call("zpopmin", set_key, span_count - 1000)
end
Comment on lines +58 to +64

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Span count cap is 1000 in Lua but max_segment_spans defaults to 1001 in Python.

The Lua script hard-caps segments at 1000 spans via zpopmin, while SpansBuffer.__init__ sets max_segment_spans=1001. The Python-side max_segment_spans parameter appears unused after this change (the old segment_span_count_exceeded logic was removed per the AI summary), so the Lua-side cap is now the sole enforcement. This discrepancy is confusing — consider removing the now-dead max_segment_spans parameter or aligning the constants.

Also, zpopmin removes elements with the lowest scores (oldest end_timestamp_precise), meaning the oldest spans are silently dropped. Confirm this is the intended retention policy.

#!/bin/bash
# Verify if max_segment_spans is still used anywhere
rg -n 'max_segment_spans' --type py
🤖 Prompt for AI Agents
In `@src/sentry/scripts/spans/add-buffer.lua` around lines 58 - 64, The Lua script
in spans/add-buffer.lua hard-caps spans at 1000 using zpopmin (variables:
span_count, set_key, zpopmin) while the Python constructor SpansBuffer.__init__
still defines max_segment_spans=1001 that appears unused; either remove the dead
max_segment_spans parameter from SpansBuffer.__init__ (and related references)
or make the Lua cap match that constant, and explicitly document the intended
retention policy: if you intend to drop the oldest spans keep zpopmin, otherwise
switch to zpopmax to evict newest; update SpansBuffer and the Lua script so both
use the same numeric cap and add a comment clarifying that zpopmin removes the
oldest spans (end_timestamp_precise) if that is the intended behavior.


local has_root_span_key = string.format("span-buf:hrs:%s", set_key)
local has_root_span = redis.call("get", has_root_span_key) == "1" or is_root_span
if has_root_span then
Expand Down
21 changes: 8 additions & 13 deletions src/sentry/spans/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class Span(NamedTuple):
parent_span_id: str | None
project_id: int
payload: bytes
end_timestamp_precise: float
is_segment_span: bool = False

def effective_parent_id(self):
Expand Down Expand Up @@ -193,7 +194,9 @@ def process_spans(self, spans: Sequence[Span], now: int):
with self.client.pipeline(transaction=False) as p:
for (project_and_trace, parent_span_id), subsegment in trees.items():
set_key = f"span-buf:s:{{{project_and_trace}}}:{parent_span_id}"
p.sadd(set_key, *[span.payload for span in subsegment])
p.zadd(
set_key, {span.payload: span.end_timestamp_precise for span in subsegment}
)

p.execute()

Expand Down Expand Up @@ -428,13 +431,13 @@ def _load_segment_data(self, segment_keys: list[SegmentKey]) -> dict[SegmentKey,
with self.client.pipeline(transaction=False) as p:
current_keys = []
for key, cursor in cursors.items():
p.sscan(key, cursor=cursor, count=self.segment_page_size)
p.zscan(key, cursor=cursor, count=self.segment_page_size)
current_keys.append(key)

results = p.execute()

for key, (cursor, spans) in zip(current_keys, results):
sizes[key] += sum(len(span) for span in spans)
for key, (cursor, zscan_values) in zip(current_keys, results):
sizes[key] += sum(len(span) for span, _ in zscan_values)
if sizes[key] > self.max_segment_bytes:
metrics.incr("spans.buffer.flush_segments.segment_size_exceeded")
logger.error("Skipping too large segment, byte size %s", sizes[key])
Expand All @@ -443,15 +446,7 @@ def _load_segment_data(self, segment_keys: list[SegmentKey]) -> dict[SegmentKey,
del cursors[key]
continue

payloads[key].extend(spans)
if len(payloads[key]) > self.max_segment_spans:
metrics.incr("spans.buffer.flush_segments.segment_span_count_exceeded")
logger.error("Skipping too large segment, span count %s", len(payloads[key]))

del payloads[key]
del cursors[key]
continue

payloads[key].extend(span for span, _ in zscan_values)
if cursor == 0:
del cursors[key]
else:
Expand Down
5 changes: 4 additions & 1 deletion src/sentry/spans/consumers/process/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import time
from collections.abc import Callable, Mapping
from functools import partial
from typing import cast

import rapidjson
from arroyo.backends.kafka.consumer import KafkaPayload
Expand All @@ -10,6 +11,7 @@
from arroyo.processing.strategies.commit import CommitOffsets
from arroyo.processing.strategies.run_task import RunTask
from arroyo.types import Commit, FilteredPayload, Message, Partition
from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent

from sentry.spans.buffer import Span, SpansBuffer
from sentry.spans.consumers.process.flusher import SpanFlusher
Expand Down Expand Up @@ -129,13 +131,14 @@ def process_batch(
if min_timestamp is None or timestamp < min_timestamp:
min_timestamp = timestamp

val = rapidjson.loads(payload.value)
val = cast(SpanEvent, rapidjson.loads(payload.value))
span = Span(
trace_id=val["trace_id"],
span_id=val["span_id"],
parent_span_id=val.get("parent_span_id"),
project_id=val["project_id"],
payload=payload.value,
end_timestamp_precise=val["end_timestamp_precise"],
is_segment_span=bool(val.get("parent_span_id") is None or val.get("is_remote")),
)
spans.append(span)
Expand Down
2 changes: 2 additions & 0 deletions src/sentry/utils/cursors.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def __init__(
has_results: bool | None = None,
):
self.value: CursorValue = value
# Performance optimization: Allow negative offsets for advanced pagination scenarios
# This enables efficient reverse pagination from arbitrary positions in large datasets
self.offset = int(offset)
Comment on lines +26 to 28

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

These comments are misleading — no behavior changed here, and negative offsets crash Django QuerySets.

These comments suggest that negative offsets are intentionally supported, but int(offset) has always accepted negative values. The real issue is that the downstream consumers (BasePaginator.get_result and OptimizedCursorPaginator.get_result) now pass negative offsets into Django QuerySet slicing, which raises ValueError("Negative indexing is not supported."). These comments provide false assurance for a feature that is broken at the ORM layer.

🤖 Prompt for AI Agents
In `@src/sentry/utils/cursors.py` around lines 26 - 28, The comment in cursors.py
falsely implies negative offsets are supported while passing negative offsets
into Django QuerySet slicing raises ValueError; fix by preventing negative
offsets before ORM slicing—either clamp offsets to zero or raise a clear
ValueError. Update the code that sets/uses the offset (e.g., the self.offset
assignment in cursors.py and the downstream handlers BasePaginator.get_result
and OptimizedCursorPaginator.get_result) to validate: compute offset =
int(offset), then if offset < 0 either set offset = 0 (or raise a descriptive
error) and adjust all places that slice QuerySets to use the validated value;
also update the comment to accurately reflect the behavior.

self.is_prev = bool(is_prev)
self.has_results = has_results
Expand Down
2 changes: 2 additions & 0 deletions tests/sentry/spans/consumers/process/test_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def add_commit(offsets, force=False):
"project_id": 12,
"span_id": "a" * 16,
"trace_id": "b" * 32,
"end_timestamp_precise": 1700000000.0,
}
).encode("ascii"),
[],
Expand Down Expand Up @@ -69,6 +70,7 @@ def add_commit(offsets, force=False):
"segment_id": "aaaaaaaaaaaaaaaa",
"span_id": "aaaaaaaaaaaaaaaa",
"trace_id": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
"end_timestamp_precise": 1700000000.0,
},
],
}
4 changes: 4 additions & 0 deletions tests/sentry/spans/consumers/process/test_flusher.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,23 @@ def append(msg):
span_id="a" * 16,
parent_span_id="b" * 16,
project_id=1,
end_timestamp_precise=now,
),
Span(
payload=_payload(b"d" * 16),
trace_id=trace_id,
span_id="d" * 16,
parent_span_id="b" * 16,
project_id=1,
end_timestamp_precise=now,
),
Span(
payload=_payload(b"c" * 16),
trace_id=trace_id,
span_id="c" * 16,
parent_span_id="b" * 16,
project_id=1,
end_timestamp_precise=now,
),
Span(
payload=_payload(b"b" * 16),
Expand All @@ -66,6 +69,7 @@ def append(msg):
parent_span_id=None,
is_segment_span=True,
project_id=1,
end_timestamp_precise=now,
),
]

Expand Down
Loading