From b94997495ce9787628dca32202f5bed8e7869f3e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 10:13:34 +0200 Subject: [PATCH 01/14] feat(pydantic-ai): Support span streaming --- .../pydantic_ai/spans/ai_client.py | 75 +- .../pydantic_ai/spans/execute_tool.py | 49 +- .../pydantic_ai/spans/invoke_agent.py | 24 +- .../integrations/pydantic_ai/spans/utils.py | 20 +- sentry_sdk/integrations/pydantic_ai/utils.py | 39 +- .../pydantic_ai/test_pydantic_ai.py | 665 +++++++++++++++--- 6 files changed, 732 insertions(+), 140 deletions(-) diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py index 800ec16e36..33e170e76d 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py @@ -8,6 +8,8 @@ truncate_and_annotate_messages, ) from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.utils import safe_serialize from ..consts import SPAN_ORIGIN @@ -27,7 +29,7 @@ ) if TYPE_CHECKING: - from typing import Any, Dict, List + from typing import Any, Dict, List, Union from pydantic_ai.messages import ModelMessage, SystemPromptPart # type: ignore @@ -97,7 +99,9 @@ def _get_system_instructions( return permanent_instructions, current_instructions -def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> None: +def _set_input_messages( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", messages: "Any" +) -> None: """Set input messages data on a span.""" if not _should_send_prompts(): return @@ -107,14 +111,24 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non permanent_instructions, current_instructions = _get_system_instructions(messages) if len(permanent_instructions) > 0 or len(current_instructions) > 0: - span.set_data( - SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, - json.dumps( - _transform_system_instructions( - permanent_instructions, current_instructions - ) - ), - ) + if isinstance(span, StreamedSpan): + span.set_attribute( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + json.dumps( + _transform_system_instructions( + permanent_instructions, current_instructions + ) + ), + ) + else: + span.set_data( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + json.dumps( + _transform_system_instructions( + permanent_instructions, current_instructions + ) + ), + ) try: formatted_messages = [] @@ -198,7 +212,9 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non pass -def _set_output_data(span: "sentry_sdk.tracing.Span", response: "Any") -> None: +def _set_output_data( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", response: "Any" +) -> None: """Set output data on a span.""" if not _should_send_prompts(): return @@ -206,7 +222,11 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", response: "Any") -> None: if not response: return - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response.model_name) + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) + set_on_span(SPANDATA.GEN_AI_RESPONSE_MODEL, response.model_name) + try: # Extract text from ModelResponse if hasattr(response, "parts"): @@ -230,7 +250,7 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", response: "Any") -> None: set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, texts) if tool_calls: - span.set_data( + set_on_span( SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(tool_calls) ) @@ -257,20 +277,31 @@ def ai_client_span( model_name = _get_model_name(model_obj) or "unknown" - span = sentry_sdk.start_span( - op=OP.GEN_AI_CHAT, - name=f"chat {model_name}", - origin=SPAN_ORIGIN, - ) + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"chat {model_name}", + attributes={ + "sentry.op": OP.GEN_AI_CHAT, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "chat", + SPANDATA.GEN_AI_RESPONSE_STREAMING: get_is_streaming(), + }, + ) + else: + span = sentry_sdk.start_span( + op=OP.GEN_AI_CHAT, + name=f"chat {model_name}", + origin=SPAN_ORIGIN, + ) - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat") + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat") + # Set streaming flag from contextvar + span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, get_is_streaming()) _set_agent_data(span, agent) _set_model_data(span, model, model_settings) - # Set streaming flag from contextvar - span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, get_is_streaming()) - # Add available tools if agent is available agent_obj = agent or get_current_agent() _set_available_tools(span, agent_obj) diff --git a/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py b/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py index 5b4cd1ac94..926a9f48a8 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py @@ -2,13 +2,15 @@ import sentry_sdk from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.utils import safe_serialize from ..consts import SPAN_ORIGIN from ..utils import _set_agent_data, _should_send_prompts if TYPE_CHECKING: - from typing import Any, Optional + from typing import Any, Optional, Union from pydantic_ai._tool_manager import ToolDefinition # type: ignore @@ -27,17 +29,33 @@ def execute_tool_span( agent: The agent executing the tool tool_definition: The definition of the tool, if available """ - span = sentry_sdk.start_span( - op=OP.GEN_AI_EXECUTE_TOOL, - name=f"execute_tool {tool_name}", - origin=SPAN_ORIGIN, - ) + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"execute_tool {tool_name}", + attributes={ + "sentry.op": OP.GEN_AI_EXECUTE_TOOL, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "execute_tool", + SPANDATA.GEN_AI_TOOL_NAME: tool_name, + }, + ) + + set_on_span = span.set_attribute + else: + span = sentry_sdk.start_span( + op=OP.GEN_AI_EXECUTE_TOOL, + name=f"execute_tool {tool_name}", + origin=SPAN_ORIGIN, + ) - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "execute_tool") - span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool_name) + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "execute_tool") + span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool_name) + + set_on_span = span.set_data if tool_definition is not None and hasattr(tool_definition, "description"): - span.set_data( + set_on_span( SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool_definition.description, ) @@ -45,15 +63,22 @@ def execute_tool_span( _set_agent_data(span, agent) if _should_send_prompts() and tool_args is not None: - span.set_data(SPANDATA.GEN_AI_TOOL_INPUT, safe_serialize(tool_args)) + set_on_span(SPANDATA.GEN_AI_TOOL_INPUT, safe_serialize(tool_args)) return span -def update_execute_tool_span(span: "sentry_sdk.tracing.Span", result: "Any") -> None: +def update_execute_tool_span( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", result: "Any" +) -> None: """Update the execute tool span with the result.""" if not span: return - if _should_send_prompts() and result is not None: + if not _should_send_prompts() or result is None: + return + + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_TOOL_OUTPUT, safe_serialize(result)) + else: span.set_data(SPANDATA.GEN_AI_TOOL_OUTPUT, safe_serialize(result)) diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py index e9fdc90fe0..f98808001c 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py @@ -8,6 +8,7 @@ truncate_and_annotate_messages, ) from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN from ..utils import ( @@ -44,13 +45,24 @@ def invoke_agent_span( if agent and getattr(agent, "name", None): name = agent.name - span = get_start_span_function()( - op=OP.GEN_AI_INVOKE_AGENT, - name=f"invoke_agent {name}", - origin=SPAN_ORIGIN, - ) + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"invoke_agent {name}", + attributes={ + "sentry.op": OP.GEN_AI_INVOKE_AGENT, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "invoke_agent", + }, + ) + else: + span = get_start_span_function()( + op=OP.GEN_AI_INVOKE_AGENT, + name=f"invoke_agent {name}", + origin=SPAN_ORIGIN, + ) - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") _set_agent_data(span, agent) _set_model_data(span, model, model_settings) diff --git a/sentry_sdk/integrations/pydantic_ai/spans/utils.py b/sentry_sdk/integrations/pydantic_ai/spans/utils.py index 1564d2d9d0..330496c6b2 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/utils.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/utils.py @@ -7,6 +7,7 @@ from sentry_sdk.ai.consts import DATA_URL_BASE64_REGEX from sentry_sdk.ai.utils import get_modality_from_mime_type from sentry_sdk.consts import SPANDATA +from sentry_sdk.traces import StreamedSpan if TYPE_CHECKING: from typing import Any, Dict, Union @@ -46,7 +47,8 @@ def _serialize_binary_content_item(item: "Any") -> "Dict[str, Any]": def _set_usage_data( - span: "sentry_sdk.tracing.Span", usage: "Union[RequestUsage, RunUsage]" + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", + usage: "Union[RequestUsage, RunUsage]", ) -> None: """Set token usage data on a span. @@ -60,24 +62,26 @@ def _set_usage_data( if usage is None: return + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) + if hasattr(usage, "input_tokens") and usage.input_tokens is not None: - span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens) + set_on_span(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens) # Pydantic AI uses cache_read_tokens (not input_tokens_cached) if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None: - span.set_data( - SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED, usage.cache_read_tokens - ) + set_on_span(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED, usage.cache_read_tokens) # Pydantic AI uses cache_write_tokens (not input_tokens_cache_write) if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None: - span.set_data( + set_on_span( SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE, usage.cache_write_tokens, ) if hasattr(usage, "output_tokens") and usage.output_tokens is not None: - span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens) + set_on_span(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens) if hasattr(usage, "total_tokens") and usage.total_tokens is not None: - span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens) + set_on_span(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens) diff --git a/sentry_sdk/integrations/pydantic_ai/utils.py b/sentry_sdk/integrations/pydantic_ai/utils.py index 896ae20b9d..a82608e543 100644 --- a/sentry_sdk/integrations/pydantic_ai/utils.py +++ b/sentry_sdk/integrations/pydantic_ai/utils.py @@ -4,10 +4,11 @@ import sentry_sdk from sentry_sdk.consts import SPANDATA from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.traces import StreamedSpan from sentry_sdk.utils import event_from_exception, safe_serialize if TYPE_CHECKING: - from typing import Any, Optional + from typing import Any, Optional, Union # Store the current agent context in a contextvar for re-entrant safety @@ -68,7 +69,9 @@ def _should_send_prompts() -> bool: return getattr(integration, "include_prompts", False) -def _set_agent_data(span: "sentry_sdk.tracing.Span", agent: "Any") -> None: +def _set_agent_data( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", agent: "Any" +) -> None: """Set agent-related data on a span. Args: @@ -82,7 +85,10 @@ def _set_agent_data(span: "sentry_sdk.tracing.Span", agent: "Any") -> None: agent_obj = get_current_agent() if agent_obj and hasattr(agent_obj, "name") and agent_obj.name: - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_obj.name) + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_AGENT_NAME, agent_obj.name) + else: + span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_obj.name) def _get_model_name(model_obj: "Any") -> "Optional[str]": @@ -128,15 +134,19 @@ def _set_model_data( if not model_obj and agent_obj and hasattr(agent_obj, "model"): model_obj = agent_obj.model + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) + if model_obj: # Set system from model if hasattr(model_obj, "system"): - span.set_data(SPANDATA.GEN_AI_SYSTEM, model_obj.system) + set_on_span(SPANDATA.GEN_AI_SYSTEM, model_obj.system) # Set model name model_name = _get_model_name(model_obj) if model_name: - span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model_name) + set_on_span(SPANDATA.GEN_AI_REQUEST_MODEL, model_name) # Extract model settings settings = model_settings @@ -157,17 +167,19 @@ def _set_model_data( for setting_name, spandata_key in settings_map.items(): value = settings.get(setting_name) if value is not None: - span.set_data(spandata_key, value) + set_on_span(spandata_key, value) else: # Fallback for object-style settings for setting_name, spandata_key in settings_map.items(): if hasattr(settings, setting_name): value = getattr(settings, setting_name) if value is not None: - span.set_data(spandata_key, value) + set_on_span(spandata_key, value) -def _set_available_tools(span: "sentry_sdk.tracing.Span", agent: "Any") -> None: +def _set_available_tools( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", agent: "Any" +) -> None: """Set available tools data on a span from an agent's function toolset. Args: @@ -197,9 +209,14 @@ def _set_available_tools(span: "sentry_sdk.tracing.Span", agent: "Any") -> None: tools.append(tool_info) if tools: - span.set_data( - SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools) - ) + if isinstance(span, StreamedSpan): + span.set_attribute( + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools) + ) + else: + span.set_data( + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools) + ) except Exception: # If we can't extract tools, just skip it pass diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index cccf5d49bc..4931b85193 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -55,6 +55,7 @@ def inner(): return inner +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_run_async( @@ -63,6 +64,7 @@ async def test_agent_run_async( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that the integration creates spans for async agent runs. @@ -72,11 +74,60 @@ async def test_agent_run_async( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + result = await test_agent.run( + ["Message demonstrating the absence of truncation.", "Test input"] + ) + + assert result is not None + assert result.output is not None + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + assert spans[1]["name"] == "invoke_agent test_agent" + assert spans[1]["attributes"]["sentry.origin"] == "auto.ai.pydantic_ai" + + assert spans[1]["attributes"]["sentry.op"] == "gen_ai.invoke_agent" + + # Find child span types (invoke_agent is the transaction, not a child span) + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + assert len(chat_spans) >= 1 + + # Check chat span + chat_span = chat_spans[0] + assert "chat" in chat_span["name"] + assert chat_span["attributes"]["gen_ai.operation.name"] == "chat" + assert chat_span["attributes"]["gen_ai.response.streaming"] is False + assert json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + }, + { + "type": "text", + "text": "Test input", + }, + ], + } + ] + assert "gen_ai.usage.input_tokens" in chat_span["attributes"] + assert "gen_ai.usage.output_tokens" in chat_span["attributes"] + elif stream_gen_ai_spans: items = capture_items("transaction", "span") result = await test_agent.run( @@ -95,6 +146,7 @@ async def test_agent_run_async( # The transaction itself should have invoke_agent data assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find child span types (invoke_agent is the transaction, not a child span) chat_spans = [ @@ -158,6 +210,7 @@ async def test_agent_run_async( assert "gen_ai.usage.output_tokens" in chat_span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_run_async_model_error( @@ -165,11 +218,13 @@ async def test_agent_run_async_model_error( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) def failing_model(messages, info): @@ -180,7 +235,7 @@ def failing_model(messages, info): name="test_agent", ) - if stream_gen_ai_spans: + if span_streaming: items = capture_items("event", "transaction", "span") with pytest.raises(RuntimeError, match="model exploded"): @@ -189,6 +244,21 @@ def failing_model(messages, info): (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 2 + + assert spans[0]["status"] == "error" + elif stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") + + with pytest.raises(RuntimeError, match="model exploded"): + await agent.run("Test input") + + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 @@ -208,6 +278,7 @@ def failing_model(messages, info): assert spans[0]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_agent_run_sync( sentry_init, @@ -215,6 +286,7 @@ def test_agent_run_sync( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that the integration creates spans for sync agent runs. @@ -224,11 +296,40 @@ def test_agent_run_sync( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + result = test_agent.run_sync( + ["Message demonstrating the absence of truncation.", "Test input"] + ) + + assert result is not None + assert result.output is not None + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + # Find span types + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + assert spans[1]["name"] == "invoke_agent test_agent" + assert spans[1]["attributes"]["sentry.origin"] == "auto.ai.pydantic_ai" + + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + assert len(chat_spans) >= 1 + + # Verify streaming flag is False for sync + for chat_span in chat_spans: + assert chat_span["attributes"]["gen_ai.response.streaming"] is False + elif stream_gen_ai_spans: items = capture_items("transaction", "span") result = test_agent.run_sync( @@ -246,6 +347,7 @@ def test_agent_run_sync( assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" # Find span types + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -279,17 +381,20 @@ def test_agent_run_sync( assert chat_span["data"]["gen_ai.response.streaming"] is False +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_agent_run_sync_model_error( sentry_init, capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) def failing_model(messages, info): @@ -300,7 +405,7 @@ def failing_model(messages, info): name="test_agent", ) - if stream_gen_ai_spans: + if span_streaming: items = capture_items("event", "transaction", "span") with pytest.raises(RuntimeError, match="model exploded"): @@ -309,6 +414,21 @@ def failing_model(messages, info): (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 2 + + assert spans[0]["status"] == "error" + elif stream_gen_ai_spans: + items = capture_items("event", "transaction", "span") + + with pytest.raises(RuntimeError, match="model exploded"): + agent.run_sync("Test input") + + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 @@ -328,6 +448,7 @@ def failing_model(messages, info): assert spans[0]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_run_stream( @@ -336,6 +457,7 @@ async def test_agent_run_stream( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that the integration creates spans for streaming agent runs. @@ -345,11 +467,59 @@ async def test_agent_run_stream( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + async with test_agent.run_stream( + ["Message demonstrating the absence of truncation.", "Test input"] + ) as result: + # Consume the stream + async for _ in result.stream_output(): + pass + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + assert spans[1]["name"] == "invoke_agent test_agent" + assert spans[1]["attributes"]["sentry.origin"] == "auto.ai.pydantic_ai" + + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + assert len(chat_spans) >= 1 + + # Verify streaming flag is True for streaming + for chat_span in chat_spans: + assert chat_span["attributes"]["gen_ai.response.streaming"] is True + assert json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + }, + { + "type": "text", + "text": "Test input", + }, + ], + } + ] + assert "gen_ai.usage.input_tokens" in chat_span["attributes"] + # Streaming responses should still have output data + assert ( + "gen_ai.response.text" in chat_span["attributes"] + or "gen_ai.response.model" in chat_span["attributes"] + ) + elif stream_gen_ai_spans: items = capture_items("transaction", "span") async with test_agent.run_stream( @@ -367,6 +537,7 @@ async def test_agent_run_stream( assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" # Find chat spans + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -430,6 +601,7 @@ async def test_agent_run_stream( ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_run_stream_events( @@ -438,6 +610,7 @@ async def test_agent_run_stream_events( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that run_stream_events creates spans (it uses run internally, so non-streaming). @@ -447,12 +620,41 @@ async def test_agent_run_stream_events( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) # Consume all events test_agent = get_test_agent() - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + if PYDANTIC_AI_VERSION > (2,): + async with test_agent.run_stream_events( + ["Message demonstrating the absence of truncation.", "Test input"] + ) as stream_events: + async for _ in stream_events: + pass + else: + async for _ in test_agent.run_stream_events( + ["Message demonstrating the absence of truncation.", "Test input"] + ): + pass + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + assert spans[1]["name"] == "invoke_agent test_agent" + + chat_spans = [ + s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" + ] + assert len(chat_spans) >= 1 + + # run_stream_events uses run() internally, so streaming should be False + for chat_span in chat_spans: + assert chat_span["attributes"]["gen_ai.response.streaming"] is False + elif stream_gen_ai_spans: items = capture_items("transaction", "span") if PYDANTIC_AI_VERSION > (2,): @@ -474,6 +676,7 @@ async def test_agent_run_stream_events( assert transaction["transaction"] == "invoke_agent test_agent" # Find chat spans + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -509,6 +712,7 @@ async def test_agent_run_stream_events( assert chat_span["data"]["gen_ai.response.streaming"] is False +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_with_tools( @@ -517,6 +721,7 @@ async def test_agent_with_tools( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that tool execution creates execute_tool spans. @@ -526,6 +731,7 @@ async def test_agent_with_tools( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() @@ -535,13 +741,14 @@ def add_numbers(a: int, b: int) -> int: """Add two numbers together.""" return a + b - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") result = await test_agent.run("What is 5 + 3?") assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find child span types (invoke_agent is the transaction, not a child span) @@ -606,6 +813,7 @@ def add_numbers(a: int, b: int) -> int: assert "add_numbers" in available_tools_str +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "handled_tool_call_exceptions", @@ -619,6 +827,7 @@ async def test_agent_with_tool_model_retry( get_test_agent, handled_tool_call_exceptions, stream_gen_ai_spans, + span_streaming, ): """ Test that a handled exception is captured when a tool raises ModelRetry. @@ -632,6 +841,7 @@ async def test_agent_with_tool_model_retry( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) retries = 0 @@ -647,7 +857,7 @@ def add_numbers(a: int, b: int) -> float: raise ModelRetry(message="Try again with the same arguments.") return a + b - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("event", "transaction", "span") result = await test_agent.run("What is 5 + 3?") @@ -659,6 +869,7 @@ def add_numbers(a: int, b: int) -> float: assert error["level"] == "error" assert error["exception"]["values"][0]["mechanism"]["handled"] + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find child span types (invoke_agent is the transaction, not a child span) chat_spans = [ @@ -745,6 +956,7 @@ def add_numbers(a: int, b: int) -> float: assert "add_numbers" in available_tools_str +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "handled_tool_call_exceptions", @@ -758,6 +970,7 @@ async def test_agent_with_tool_validation_error( get_test_agent, handled_tool_call_exceptions, stream_gen_ai_spans, + span_streaming, ): """ Test that a handled exception is captured when a tool has unsatisfiable constraints. @@ -771,6 +984,7 @@ async def test_agent_with_tool_validation_error( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() @@ -780,7 +994,7 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int: """Add two numbers together.""" return a + b - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("event", "transaction", "span") result = None @@ -798,6 +1012,7 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int: assert error["level"] == "error" assert error["exception"]["values"][0]["mechanism"]["handled"] + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -874,6 +1089,7 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int: assert "add_numbers" in available_tools_str +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_with_tools_streaming( @@ -882,6 +1098,7 @@ async def test_agent_with_tools_streaming( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that tool execution works correctly with streaming. @@ -891,6 +1108,7 @@ async def test_agent_with_tools_streaming( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() @@ -900,13 +1118,14 @@ def multiply(a: int, b: int) -> int: """Multiply two numbers.""" return a * b - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") async with test_agent.run_stream("What is 7 times 8?") as result: async for _ in result.stream_output(): pass + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find span types @@ -959,6 +1178,7 @@ def multiply(a: int, b: int) -> int: assert "gen_ai.tool.output" in tool_span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_model_settings( @@ -967,6 +1187,7 @@ async def test_model_settings( capture_items, get_test_agent_with_settings, stream_gen_ai_spans, + span_streaming, ): """ Test that model settings are captured in spans. @@ -975,15 +1196,17 @@ async def test_model_settings( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent_with_settings = get_test_agent_with_settings() - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await test_agent_with_settings.run("Test input") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find chat span @@ -1016,6 +1239,7 @@ async def test_model_settings( assert chat_span["data"].get("gen_ai.request.top_p") == 0.9 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -1034,6 +1258,7 @@ async def test_system_prompt_attribute( send_default_pii, include_prompts, stream_gen_ai_spans, + span_streaming, ): """ Test that system prompts are included as the first message. @@ -1049,13 +1274,15 @@ async def test_system_prompt_attribute( traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await agent.run("Hello") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # The transaction IS the invoke_agent span, check for messages in chat spans instead @@ -1104,6 +1331,7 @@ async def test_system_prompt_attribute( assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_error_handling( @@ -1111,6 +1339,7 @@ async def test_error_handling( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """ Test error handling in agent execution. @@ -1126,9 +1355,21 @@ async def test_error_handling( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + # Simple run that should succeed + await agent.run("Hello") + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + assert spans[1]["is_segment"] is True + assert spans[1]["status"] != "error" # Could be None or some other status + elif stream_gen_ai_spans: items = capture_items("transaction", "span") # Simple run that should succeed @@ -1136,6 +1377,11 @@ async def test_error_handling( # At minimum, we should have a transaction transaction = next(item.payload for item in items if item.type == "transaction") + + assert transaction["transaction"] == "invoke_agent test_error" + # Transaction should complete successfully (status key may not exist if no error) + trace_status = transaction["contexts"]["trace"].get("status") + assert trace_status != "error" # Could be None or some other status else: events = capture_events() @@ -1146,12 +1392,13 @@ async def test_error_handling( assert len(events) >= 1 transaction = [e for e in events if e.get("type") == "transaction"][0] - assert transaction["transaction"] == "invoke_agent test_error" - # Transaction should complete successfully (status key may not exist if no error) - trace_status = transaction["contexts"]["trace"].get("status") - assert trace_status != "error" # Could be None or some other status + assert transaction["transaction"] == "invoke_agent test_error" + # Transaction should complete successfully (status key may not exist if no error) + trace_status = transaction["contexts"]["trace"].get("status") + assert trace_status != "error" # Could be None or some other status +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_without_pii( @@ -1160,6 +1407,7 @@ async def test_without_pii( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that PII is not captured when send_default_pii is False. @@ -1169,14 +1417,16 @@ async def test_without_pii( traces_sample_rate=1.0, send_default_pii=False, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") test_agent = get_test_agent() await test_agent.run("Sensitive input") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find child spans (invoke_agent is the transaction, not a child span) @@ -1206,6 +1456,7 @@ async def test_without_pii( assert "gen_ai.response.text" not in span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_without_pii_tools( @@ -1214,6 +1465,7 @@ async def test_without_pii_tools( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that tool input/output are not captured when send_default_pii is False. @@ -1223,6 +1475,7 @@ async def test_without_pii_tools( traces_sample_rate=1.0, send_default_pii=False, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() @@ -1232,11 +1485,12 @@ def sensitive_tool(data: str) -> str: """A tool with sensitive data.""" return f"Processed: {data}" - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await test_agent.run("Use sensitive tool with private data") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find tool spans @@ -1267,6 +1521,7 @@ def sensitive_tool(data: str) -> str: assert "gen_ai.tool.output" not in tool_span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_multiple_agents_concurrent( @@ -1275,6 +1530,7 @@ async def test_multiple_agents_concurrent( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that multiple agents can run concurrently without interfering. @@ -1283,6 +1539,7 @@ async def test_multiple_agents_concurrent( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() @@ -1290,7 +1547,21 @@ async def test_multiple_agents_concurrent( async def run_agent(input_text): return await test_agent.run(input_text) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + # Run 3 agents concurrently + results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)]) + + assert len(results) == 3 + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + if span["is_segment"] is False: + continue + assert span["name"] == "invoke_agent test_agent" + elif stream_gen_ai_spans: items = capture_items("transaction", "span") # Run 3 agents concurrently @@ -1320,6 +1591,7 @@ async def run_agent(input_text): assert len(transaction["spans"]) >= 1 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_message_history( @@ -1327,6 +1599,7 @@ async def test_message_history( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """ Test that full conversation history is captured in chat spans. @@ -1341,6 +1614,7 @@ async def test_message_history( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) # Second message with history @@ -1356,7 +1630,7 @@ async def test_message_history( ), ] - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") # First message @@ -1364,12 +1638,8 @@ async def test_message_history( await agent.run("What is my name?", message_history=history) - # We should have 2 transactions - events = [item.payload for item in items if item.type == "transaction"] - - # Check the second transaction has the full history - second_transaction = events[1] - spans = second_transaction["spans"] + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] @@ -1404,6 +1674,7 @@ async def test_message_history( assert len(messages_data) > 1 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_gen_ai_system( @@ -1412,6 +1683,7 @@ async def test_gen_ai_system( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that gen_ai.system is set from the model. @@ -1420,15 +1692,17 @@ async def test_gen_ai_system( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await test_agent.run("Test input") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find chat span @@ -1459,6 +1733,7 @@ async def test_gen_ai_system( assert chat_span["data"]["gen_ai.system"] == "test" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_include_prompts_false( @@ -1467,6 +1742,7 @@ async def test_include_prompts_false( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that prompts are not captured when include_prompts=False. @@ -1476,15 +1752,17 @@ async def test_include_prompts_false( traces_sample_rate=1.0, send_default_pii=True, # Even with PII enabled, prompts should not be captured stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await test_agent.run("Sensitive prompt") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find child spans (invoke_agent is the transaction, not a child span) @@ -1513,6 +1791,7 @@ async def test_include_prompts_false( assert "gen_ai.response.text" not in span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_include_prompts_true( @@ -1521,6 +1800,7 @@ async def test_include_prompts_true( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that prompts are captured when include_prompts=True (default). @@ -1530,15 +1810,17 @@ async def test_include_prompts_true( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await test_agent.run("Test prompt") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find child spans (invoke_agent is the transaction, not a child span) @@ -1567,6 +1849,7 @@ async def test_include_prompts_true( assert "gen_ai.request.messages" in chat_span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_include_prompts_false_with_tools( @@ -1575,6 +1858,7 @@ async def test_include_prompts_false_with_tools( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that tool input/output are not captured when include_prompts=False. @@ -1584,6 +1868,7 @@ async def test_include_prompts_false_with_tools( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() @@ -1593,11 +1878,12 @@ def test_tool(value: int) -> int: """A test tool.""" return value * 2 - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await test_agent.run("Use the test tool with value 5") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find tool spans @@ -1628,6 +1914,7 @@ def test_tool(value: int) -> int: assert "gen_ai.tool.output" not in tool_span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_include_prompts_requires_pii( @@ -1636,6 +1923,7 @@ async def test_include_prompts_requires_pii( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that include_prompts requires send_default_pii=True. @@ -1645,15 +1933,17 @@ async def test_include_prompts_requires_pii( traces_sample_rate=1.0, send_default_pii=False, # PII disabled stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await test_agent.run("Test prompt") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find child spans (invoke_agent is the transaction, not a child span) @@ -1840,6 +2130,7 @@ async def run_and_check_context(agent, agent_name): # ==================== Additional Coverage Tests ==================== +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_with_list_user_prompt( @@ -1847,6 +2138,7 @@ async def test_invoke_agent_with_list_user_prompt( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """ Test that invoke_agent span handles list user prompts correctly. @@ -1861,15 +2153,40 @@ async def test_invoke_agent_with_list_user_prompt( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + # Use a list as user prompt + await agent.run(["First part", "Second part"]) + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + # Check that the invoke_agent transaction has messages data + # The invoke_agent is the transaction itself + if "gen_ai.request.messages" in spans[0]["attributes"]: + messages_str = spans[0]["attributes"]["gen_ai.request.messages"] + assert "First part" in messages_str + assert "Second part" in messages_str + elif stream_gen_ai_spans: items = capture_items("transaction", "span") # Use a list as user prompt await agent.run(["First part", "Second part"]) (transaction,) = [item.payload for item in items if item.type == "transaction"] + + # Check that the invoke_agent transaction has messages data + # The invoke_agent is the transaction itself + if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]: + messages_str = transaction["contexts"]["trace"]["data"][ + "gen_ai.request.messages" + ] + assert "First part" in messages_str + assert "Second part" in messages_str else: events = capture_events() @@ -1878,16 +2195,17 @@ async def test_invoke_agent_with_list_user_prompt( (transaction,) = events - # Check that the invoke_agent transaction has messages data - # The invoke_agent is the transaction itself - if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]: - messages_str = transaction["contexts"]["trace"]["data"][ - "gen_ai.request.messages" - ] - assert "First part" in messages_str - assert "Second part" in messages_str + # Check that the invoke_agent transaction has messages data + # The invoke_agent is the transaction itself + if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]: + messages_str = transaction["contexts"]["trace"]["data"][ + "gen_ai.request.messages" + ] + assert "First part" in messages_str + assert "Second part" in messages_str +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -1906,6 +2224,7 @@ async def test_invoke_agent_with_instructions( send_default_pii, include_prompts, stream_gen_ai_spans, + span_streaming, ): """ Test that invoke_agent span handles instructions correctly. @@ -1927,13 +2246,15 @@ async def test_invoke_agent_with_instructions( traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await agent.run("Test input") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # The transaction IS the invoke_agent span, check for messages in chat spans instead @@ -2069,6 +2390,7 @@ async def test_model_settings_object_style(sentry_init, capture_items): assert transaction is not None +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_usage_data_partial( @@ -2076,6 +2398,7 @@ async def test_usage_data_partial( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """ Test that usage data is correctly handled when only some fields are present. @@ -2089,13 +2412,15 @@ async def test_usage_data_partial( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await agent.run("Test input") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ @@ -2119,6 +2444,7 @@ async def test_usage_data_partial( assert chat_span is not None +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_data_from_scope( @@ -2126,6 +2452,7 @@ async def test_agent_data_from_scope( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """ Test that agent data can be retrieved from Sentry scope when not passed directly. @@ -2140,9 +2467,20 @@ async def test_agent_data_from_scope( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + # The integration automatically sets agent in scope during execution + await agent.run("Test input") + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + assert spans[1]["name"] == "invoke_agent test_scope_agent" + elif stream_gen_ai_spans: items = capture_items("transaction", "span") # The integration automatically sets agent in scope during execution @@ -2150,6 +2488,9 @@ async def test_agent_data_from_scope( # Verify agent name is capture (transaction,) = (item.payload for item in items if item.type == "transaction") + + # Verify agent name is captured + assert transaction["transaction"] == "invoke_agent test_scope_agent" else: events = capture_events() @@ -2159,10 +2500,11 @@ async def test_agent_data_from_scope( # Verify agent name is capture (transaction,) = events - # Verify agent name is captured - assert transaction["transaction"] == "invoke_agent test_scope_agent" + # Verify agent name is captured + assert transaction["transaction"] == "invoke_agent test_scope_agent" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_available_tools_without_description( @@ -2171,6 +2513,7 @@ async def test_available_tools_without_description( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that available tools are captured even when description is missing. @@ -2179,6 +2522,7 @@ async def test_available_tools_without_description( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() @@ -2188,11 +2532,12 @@ def tool_without_desc(x: int) -> int: # No docstring = no description return x * 2 - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await test_agent.run("Use the tool with 5") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ @@ -2219,6 +2564,7 @@ def tool_without_desc(x: int) -> int: assert "tool_without_desc" in tools_str +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_output_with_tool_calls( @@ -2227,6 +2573,7 @@ async def test_output_with_tool_calls( capture_items, get_test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that tool calls in model response are captured correctly. @@ -2236,6 +2583,7 @@ async def test_output_with_tool_calls( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) test_agent = get_test_agent() @@ -2245,11 +2593,12 @@ def calc_tool(value: int) -> int: """Calculate something.""" return value + 10 - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await test_agent.run("Use calc_tool with 5") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ @@ -2284,6 +2633,7 @@ def calc_tool(value: int) -> int: assert "gen_ai.operation.name" in chat_span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_message_formatting_with_different_parts( @@ -2291,6 +2641,7 @@ async def test_message_formatting_with_different_parts( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """ Test that different message part types are handled correctly in ai_client span. @@ -2307,6 +2658,7 @@ async def test_message_formatting_with_different_parts( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) # Create message history with different part types @@ -2320,11 +2672,12 @@ async def test_message_formatting_with_different_parts( ), ] - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await agent.run("What did I say?", message_history=history) + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ @@ -2415,6 +2768,7 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite assert transaction is not None +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_without_name( @@ -2422,6 +2776,7 @@ async def test_agent_without_name( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """ Test that agent without a name is handled correctly. @@ -2433,15 +2788,28 @@ async def test_agent_without_name( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + await agent.run("Test input") + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + assert "invoke_agent" in spans[1]["name"] + elif stream_gen_ai_spans: items = capture_items("transaction", "span") await agent.run("Test input") # Should still create transaction, just with default name (transaction,) = (item.payload for item in items if item.type == "transaction") + + # Transaction name should be "invoke_agent agent" or similar default + assert "invoke_agent" in transaction["transaction"] else: events = capture_events() @@ -2452,8 +2820,8 @@ async def test_agent_without_name( # Should still create transaction, just with default name assert transaction["type"] == "transaction" - # Transaction name should be "invoke_agent agent" or similar default - assert "invoke_agent" in transaction["transaction"] + # Transaction name should be "invoke_agent agent" or similar default + assert "invoke_agent" in transaction["transaction"] @pytest.mark.asyncio @@ -2606,6 +2974,7 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items): assert transaction is not None +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_message_parts_with_tool_return( @@ -2613,6 +2982,7 @@ async def test_message_parts_with_tool_return( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """ Test that ToolReturnPart messages are handled correctly. @@ -2634,14 +3004,16 @@ def test_tool(x: int) -> int: traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") # Run with history containing tool return await agent.run("Use test_tool with 5") + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ @@ -3563,6 +3935,7 @@ def _find_binary_content(messages_data, expected_modality, expected_mime_type): return False +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_binary_content_encoding_image( @@ -3570,6 +3943,7 @@ async def test_binary_content_encoding_image( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """Test that BinaryContent with image data is properly encoded in messages.""" sentry_init( @@ -3577,9 +3951,36 @@ async def test_binary_content_encoding_image( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("span") + + with sentry_sdk.traces.start_span( + name="test", attributes={"sentry.op": "test"} + ): + span = sentry_sdk.traces.start_span( + name="custom span", attributes={"sentry.op": "test_span"} + ) + binary_content = BinaryContent( + data=b"fake_image_data_12345", media_type="image/png" + ) + user_part = UserPromptPart(content=["Look at this image:", binary_content]) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None + + _set_input_messages(span, [mock_msg]) + span.finish() + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + span_data = spans[0]["attributes"] + messages_data = _get_messages_from_span(span_data) + assert _find_binary_content(messages_data, "image", "image/png") + elif stream_gen_ai_spans: items = capture_items("transaction", "span") with sentry_sdk.start_transaction(op="test", name="test"): @@ -3596,6 +3997,9 @@ async def test_binary_content_encoding_image( span.finish() (event,) = (item.payload for item in items if item.type == "transaction") + span_data = event["spans"][0]["data"] + messages_data = _get_messages_from_span(span_data) + assert _find_binary_content(messages_data, "image", "image/png") else: events = capture_events() @@ -3613,11 +4017,12 @@ async def test_binary_content_encoding_image( span.finish() (event,) = events - span_data = event["spans"][0]["data"] - messages_data = _get_messages_from_span(span_data) - assert _find_binary_content(messages_data, "image", "image/png") + span_data = event["spans"][0]["data"] + messages_data = _get_messages_from_span(span_data) + assert _find_binary_content(messages_data, "image", "image/png") +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_binary_content_encoding_mixed_content( @@ -3625,6 +4030,7 @@ async def test_binary_content_encoding_mixed_content( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """Test that BinaryContent mixed with text content is properly handled.""" sentry_init( @@ -3632,9 +4038,47 @@ async def test_binary_content_encoding_mixed_content( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("span") + + with sentry_sdk.traces.start_span( + name="test", attributes={"sentry.op": "test"} + ): + span = sentry_sdk.traces.start_span( + name="custom span", attributes={"sentry.op": "test_span"} + ) + binary_content = BinaryContent( + data=b"fake_image_bytes", media_type="image/jpeg" + ) + user_part = UserPromptPart( + content=["Here is an image:", binary_content, "What do you see?"] + ) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None + + _set_input_messages(span, [mock_msg]) + span.finish() + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + span_data = spans[0]["attributes"] + messages_data = _get_messages_from_span(span_data) + + # Verify both text and binary content are present + found_text = any( + content_item.get("type") == "text" + for msg in messages_data + if "content" in msg + for content_item in msg["content"] + ) + assert found_text, "Text content should be found" + assert _find_binary_content(messages_data, "image", "image/jpeg") + elif stream_gen_ai_spans: items = capture_items("transaction", "span") with sentry_sdk.start_transaction(op="test", name="test"): @@ -3653,6 +4097,18 @@ async def test_binary_content_encoding_mixed_content( span.finish() (event,) = (item.payload for item in items if item.type == "transaction") + span_data = event["spans"][0]["data"] + messages_data = _get_messages_from_span(span_data) + + # Verify both text and binary content are present + found_text = any( + content_item.get("type") == "text" + for msg in messages_data + if "content" in msg + for content_item in msg["content"] + ) + assert found_text, "Text content should be found" + assert _find_binary_content(messages_data, "image", "image/jpeg") else: events = capture_events() @@ -3672,21 +4128,21 @@ async def test_binary_content_encoding_mixed_content( span.finish() (event,) = events - - span_data = event["spans"][0]["data"] - messages_data = _get_messages_from_span(span_data) - - # Verify both text and binary content are present - found_text = any( - content_item.get("type") == "text" - for msg in messages_data - if "content" in msg - for content_item in msg["content"] - ) - assert found_text, "Text content should be found" - assert _find_binary_content(messages_data, "image", "image/jpeg") + span_data = event["spans"][0]["data"] + messages_data = _get_messages_from_span(span_data) + + # Verify both text and binary content are present + found_text = any( + content_item.get("type") == "text" + for msg in messages_data + if "content" in msg + for content_item in msg["content"] + ) + assert found_text, "Text content should be found" + assert _find_binary_content(messages_data, "image", "image/jpeg") +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_binary_content_in_agent_run( @@ -3694,6 +4150,7 @@ async def test_binary_content_in_agent_run( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """Test that BinaryContent in actual agent run is properly captured in spans.""" agent = Agent("test", name="test_binary_agent") @@ -3703,17 +4160,19 @@ async def test_binary_content_in_agent_run( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) binary_content = BinaryContent( data=b"fake_image_data_for_testing", media_type="image/png" ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await agent.run(["Analyze this image:", binary_content]) + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -3744,6 +4203,7 @@ async def test_binary_content_in_agent_run( ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_set_usage_data_with_cache_tokens( @@ -3751,15 +4211,42 @@ async def test_set_usage_data_with_cache_tokens( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """Test that cache_read_tokens and cache_write_tokens are tracked.""" sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("transaction", "span") + + with sentry_sdk.traces.start_span( + name="test", attributes={"sentry.op": "test"} + ): + span = sentry_sdk.traces.start_span( + name="custom span", attributes={"sentry.op": "test_span"} + ) + usage = RequestUsage( + input_tokens=100, + output_tokens=50, + cache_read_tokens=80, + cache_write_tokens=20, + ) + _set_usage_data(span, usage) + span.finish() + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + + assert spans[0]["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert ( + spans[0]["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + ) + elif stream_gen_ai_spans: items = capture_items("transaction", "span") with sentry_sdk.start_transaction(op="test", name="test"): @@ -3774,6 +4261,9 @@ async def test_set_usage_data_with_cache_tokens( span.finish() (event,) = (item.payload for item in items if item.type == "transaction") + (span_data,) = event["spans"] + assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 else: events = capture_events() @@ -3789,12 +4279,12 @@ async def test_set_usage_data_with_cache_tokens( span.finish() (event,) = events - - (span_data,) = event["spans"] - assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + (span_data,) = event["spans"] + assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "url,image_url_kwargs,expected_content", @@ -3845,6 +4335,7 @@ def test_image_url_base64_content_in_span( image_url_kwargs, expected_content, stream_gen_ai_spans, + span_streaming, ): from sentry_sdk.integrations.pydantic_ai.spans.ai_client import ai_client_span @@ -3853,13 +4344,16 @@ def test_image_url_base64_content_in_span( traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) found_image = False - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") - with sentry_sdk.start_transaction(op="test", name="test"): + with sentry_sdk.traces.start_span( + name="test", attributes={"sentry.op": "test"} + ): image_url = ImageUrl(url=url, **image_url_kwargs) user_part = UserPromptPart(content=["Look at this image:", image_url]) mock_msg = MagicMock() @@ -3869,6 +4363,7 @@ def test_image_url_base64_content_in_span( span = ai_client_span([mock_msg], None, None, None) span.finish() + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -3912,6 +4407,7 @@ def test_image_url_base64_content_in_span( assert found_image, "Image content item should be found in messages data" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -3951,25 +4447,28 @@ async def test_invoke_agent_image_url( image_url_kwargs, expected_content, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[PydanticAIIntegration()], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) agent = Agent("test", name="test_image_url_agent") image_url = ImageUrl(url=url, **image_url_kwargs) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") await agent.run([image_url, "Describe this image"]) found_image = False + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -4006,6 +4505,7 @@ async def test_invoke_agent_image_url( assert found_image, "Image content item should be found in messages data" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_description_in_execute_tool_span( @@ -4013,6 +4513,7 @@ async def test_tool_description_in_execute_tool_span( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): """ Test that tool description from the tool's docstring is included in execute_tool spans. @@ -4033,14 +4534,16 @@ def multiply_numbers(a: int, b: int) -> int: traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") result = await agent.run("What is 5 times 3?") assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] tool_spans = [ From 1368130ebc3f952c31e18a9839e363fc7cba47c6 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 10:20:30 +0200 Subject: [PATCH 02/14] mypy --- sentry_sdk/integrations/pydantic_ai/spans/ai_client.py | 2 +- sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py | 2 +- sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py | 4 ++-- sentry_sdk/integrations/pydantic_ai/utils.py | 4 +++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py index 33e170e76d..e53fbbcdca 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py @@ -261,7 +261,7 @@ def _set_output_data( def ai_client_span( messages: "Any", agent: "Any", model: "Any", model_settings: "Any" -) -> "sentry_sdk.tracing.Span": +) -> "Union[sentry_sdk.tracing.Span, StreamedSpan]": """Create a span for an AI client call (model request). Args: diff --git a/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py b/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py index 926a9f48a8..7648c1418a 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py @@ -20,7 +20,7 @@ def execute_tool_span( tool_args: "Any", agent: "Any", tool_definition: "Optional[ToolDefinition]" = None, -) -> "sentry_sdk.tracing.Span": +) -> "Union[sentry_sdk.tracing.Span, StreamedSpan]": """Create a span for tool execution. Args: diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py index f98808001c..108a4149e4 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py @@ -23,7 +23,7 @@ ) if TYPE_CHECKING: - from typing import Any + from typing import Any, Union try: from pydantic_ai.messages import BinaryContent, ImageUrl # type: ignore @@ -38,7 +38,7 @@ def invoke_agent_span( model: "Any", model_settings: "Any", is_streaming: bool = False, -) -> "sentry_sdk.tracing.Span": +) -> "Union[sentry_sdk.tracing.Span, sentry_sdk.traces.StreamedSpan]": """Create a span for invoking the agent.""" # Determine agent name for span name = "agent" diff --git a/sentry_sdk/integrations/pydantic_ai/utils.py b/sentry_sdk/integrations/pydantic_ai/utils.py index a82608e543..340dcf8953 100644 --- a/sentry_sdk/integrations/pydantic_ai/utils.py +++ b/sentry_sdk/integrations/pydantic_ai/utils.py @@ -117,7 +117,9 @@ def _get_model_name(model_obj: "Any") -> "Optional[str]": def _set_model_data( - span: "sentry_sdk.tracing.Span", model: "Any", model_settings: "Any" + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", + model: "Any", + model_settings: "Any", ) -> None: """Set model-related data on a span. From b72b7a190431d96fefadd85a5e58207f361cfbd7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 10:25:37 +0200 Subject: [PATCH 03/14] remove unneeded flushes --- tests/integrations/pydantic_ai/test_pydantic_ai.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 4931b85193..c11f3ca217 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -146,7 +146,6 @@ async def test_agent_run_async( # The transaction itself should have invoke_agent data assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent" - sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find child span types (invoke_agent is the transaction, not a child span) chat_spans = [ @@ -258,7 +257,6 @@ def failing_model(messages, info): (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 @@ -347,7 +345,6 @@ def test_agent_run_sync( assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" # Find span types - sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -428,7 +425,6 @@ def failing_model(messages, info): (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 @@ -537,7 +533,6 @@ async def test_agent_run_stream( assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai" # Find chat spans - sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -676,7 +671,6 @@ async def test_agent_run_stream_events( assert transaction["transaction"] == "invoke_agent test_agent" # Find chat spans - sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" From b13cfbe1c2ce53a5b6c6f79e5c47493a2ccef32b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 10:29:42 +0200 Subject: [PATCH 04/14] mypy --- sentry_sdk/integrations/pydantic_ai/patches/agent_run.py | 2 +- sentry_sdk/integrations/pydantic_ai/spans/ai_client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py b/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py index 90048e41fc..7adf854def 100644 --- a/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py +++ b/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py @@ -37,7 +37,7 @@ def __init__( self.model_settings = model_settings self.is_streaming = is_streaming self._isolation_scope: "Any" = None - self._span: "Optional[sentry_sdk.tracing.Span]" = None + self._span: "Optional[sentry_sdk.tracing.Span, sentry_sdk.traces.StreamedSpan]" = None self._result: "Any" = None async def __aenter__(self) -> "Any": diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py index e53fbbcdca..dfe898d139 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py @@ -314,7 +314,7 @@ def ai_client_span( def update_ai_client_span( - span: "sentry_sdk.tracing.Span", model_response: "Any" + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", model_response: "Any" ) -> None: """Update the AI client span with response data.""" if not span: From fc55e04c9519b74858810da8e7a40dafbac66053 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 10:33:21 +0200 Subject: [PATCH 05/14] mypy --- sentry_sdk/integrations/pydantic_ai/patches/agent_run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py b/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py index 7adf854def..2581ed41bb 100644 --- a/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py +++ b/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py @@ -15,7 +15,7 @@ raise DidNotEnable("pydantic-ai not installed") if TYPE_CHECKING: - from typing import Any, Callable, Optional + from typing import Any, Callable, Optional, Union class _StreamingContextManagerWrapper: @@ -37,7 +37,7 @@ def __init__( self.model_settings = model_settings self.is_streaming = is_streaming self._isolation_scope: "Any" = None - self._span: "Optional[sentry_sdk.tracing.Span, sentry_sdk.traces.StreamedSpan]" = None + self._span: "Optional[Union[sentry_sdk.tracing.Span, sentry_sdk.traces.StreamedSpan]]" = None self._result: "Any" = None async def __aenter__(self) -> "Any": From ed1351ea2d974c78815c4f10224fcfbabe142fd3 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 10:37:24 +0200 Subject: [PATCH 06/14] test --- tests/integrations/pydantic_ai/test_pydantic_ai.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index c11f3ca217..244a48eac1 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -4345,9 +4345,13 @@ def test_image_url_base64_content_in_span( if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") - with sentry_sdk.traces.start_span( - name="test", attributes={"sentry.op": "test"} - ): + ctx = ( + sentry_sdk.traces.start_span(name="test", attributes={"sentry.op": "test"}) + if span_streaming + else sentry_sdk.start_transaction(op="test", name="test") + ) + + with ctx: image_url = ImageUrl(url=url, **image_url_kwargs) user_part = UserPromptPart(content=["Look at this image:", image_url]) mock_msg = MagicMock() From 61bf6a80f938a4d26656e2d8723dc1f94e6acbc3 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 10:42:43 +0200 Subject: [PATCH 07/14] update hint --- sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py index 108a4149e4..9b2743aa75 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py @@ -147,7 +147,10 @@ def invoke_agent_span( return span -def update_invoke_agent_span(span: "sentry_sdk.tracing.Span", result: "Any") -> None: +def update_invoke_agent_span( + span: "Union[sentry_sdk.tracing.Span, sentry_sdk.traces.StreamedSpan]", + result: "Any", +) -> None: """Update and close the invoke agent span.""" if not span or not result: return From 1493e3571dc6acbfe15cea82189dc0a1ce7af5b5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 10:47:19 +0200 Subject: [PATCH 08/14] mypy --- .../integrations/pydantic_ai/spans/invoke_agent.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py index 9b2743aa75..e4021f3bfa 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py @@ -8,6 +8,7 @@ truncate_and_annotate_messages, ) from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.traces import StreamedSpan from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN @@ -38,7 +39,7 @@ def invoke_agent_span( model: "Any", model_settings: "Any", is_streaming: bool = False, -) -> "Union[sentry_sdk.tracing.Span, sentry_sdk.traces.StreamedSpan]": +) -> "Union[sentry_sdk.tracing.Span, StreamedSpan]": """Create a span for invoking the agent.""" # Determine agent name for span name = "agent" @@ -148,7 +149,7 @@ def invoke_agent_span( def update_invoke_agent_span( - span: "Union[sentry_sdk.tracing.Span, sentry_sdk.traces.StreamedSpan]", + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", result: "Any", ) -> None: """Update and close the invoke agent span.""" @@ -169,7 +170,12 @@ def update_invoke_agent_span( try: response = result.response if hasattr(response, "model_name") and response.model_name: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response.model_name) + if isinstance(span, StreamedSpan): + span.set_attribute( + SPANDATA.GEN_AI_RESPONSE_MODEL, response.model_name + ) + else: + span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response.model_name) except Exception: # If response access fails, continue without setting model name pass From b158d5d302d9ec6689ce39a67719ae516bf559c3 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 10:59:30 +0200 Subject: [PATCH 09/14] narrow capture_items --- .../pydantic_ai/test_pydantic_ai.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 244a48eac1..a1da359c53 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -80,7 +80,7 @@ async def test_agent_run_async( test_agent = get_test_agent() if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") result = await test_agent.run( ["Message demonstrating the absence of truncation.", "Test input"] @@ -300,7 +300,7 @@ def test_agent_run_sync( test_agent = get_test_agent() if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") result = test_agent.run_sync( ["Message demonstrating the absence of truncation.", "Test input"] @@ -469,7 +469,7 @@ async def test_agent_run_stream( test_agent = get_test_agent() if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") async with test_agent.run_stream( ["Message demonstrating the absence of truncation.", "Test input"] @@ -622,7 +622,7 @@ async def test_agent_run_stream_events( test_agent = get_test_agent() if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") if PYDANTIC_AI_VERSION > (2,): async with test_agent.run_stream_events( @@ -1353,7 +1353,7 @@ async def test_error_handling( ) if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") # Simple run that should succeed await agent.run("Hello") @@ -1542,7 +1542,7 @@ async def run_agent(input_text): return await test_agent.run(input_text) if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") # Run 3 agents concurrently results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)]) @@ -2151,7 +2151,7 @@ async def test_invoke_agent_with_list_user_prompt( ) if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") # Use a list as user prompt await agent.run(["First part", "Second part"]) @@ -2465,7 +2465,7 @@ async def test_agent_data_from_scope( ) if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") # The integration automatically sets agent in scope during execution await agent.run("Test input") @@ -2786,7 +2786,7 @@ async def test_agent_without_name( ) if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") await agent.run("Test input") @@ -4216,7 +4216,7 @@ async def test_set_usage_data_with_cache_tokens( ) if span_streaming: - items = capture_items("transaction", "span") + items = capture_items("span") with sentry_sdk.traces.start_span( name="test", attributes={"sentry.op": "test"} From e6e98457c43353dd991567ef9f4f560bf5b73411 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 12:28:46 +0200 Subject: [PATCH 10/14] add print to see what's in CI --- tests/integrations/pydantic_ai/test_pydantic_ai.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index a1da359c53..d98f954939 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -639,6 +639,7 @@ async def test_agent_run_stream_events( sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] + print("spans are", spans) assert spans[1]["name"] == "invoke_agent test_agent" chat_spans = [ From 06d049e1e7e73e6f857e4a5480d8dac989bb19fd Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 12:40:11 +0200 Subject: [PATCH 11/14] fix test --- tests/integrations/pydantic_ai/test_pydantic_ai.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index d98f954939..044f46773e 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -639,8 +639,7 @@ async def test_agent_run_stream_events( sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] - print("spans are", spans) - assert spans[1]["name"] == "invoke_agent test_agent" + assert spans[-1]["name"] == "invoke_agent test_agent" chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" From 22caaca9e96b38e23cef9afa26e8ce699efe8f13 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 14:58:00 +0200 Subject: [PATCH 12/14] clean up list comprehensions in tests --- .../pydantic_ai/test_pydantic_ai.py | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 044f46773e..278c8a3d76 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -90,7 +90,7 @@ async def test_agent_run_async( assert result.output is not None sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] assert spans[1]["name"] == "invoke_agent test_agent" assert spans[1]["attributes"]["sentry.origin"] == "auto.ai.pydantic_ai" @@ -235,7 +235,7 @@ def failing_model(messages, info): ) if span_streaming: - items = capture_items("event", "transaction", "span") + items = capture_items("event", "span") with pytest.raises(RuntimeError, match="model exploded"): await agent.run("Test input") @@ -310,11 +310,7 @@ def test_agent_run_sync( assert result.output is not None sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] - - # Find span types - sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] assert spans[1]["name"] == "invoke_agent test_agent" assert spans[1]["attributes"]["sentry.origin"] == "auto.ai.pydantic_ai" @@ -403,7 +399,7 @@ def failing_model(messages, info): ) if span_streaming: - items = capture_items("event", "transaction", "span") + items = capture_items("event", "span") with pytest.raises(RuntimeError, match="model exploded"): agent.run_sync("Test input") @@ -479,7 +475,7 @@ async def test_agent_run_stream( pass sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] assert spans[1]["name"] == "invoke_agent test_agent" assert spans[1]["attributes"]["sentry.origin"] == "auto.ai.pydantic_ai" @@ -637,7 +633,7 @@ async def test_agent_run_stream_events( pass sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] assert spans[-1]["name"] == "invoke_agent test_agent" @@ -1359,7 +1355,7 @@ async def test_error_handling( await agent.run("Hello") sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] assert spans[1]["is_segment"] is True assert spans[1]["status"] != "error" # Could be None or some other status @@ -1550,7 +1546,7 @@ async def run_agent(input_text): assert len(results) == 3 sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] for span in spans: if span["is_segment"] is False: continue @@ -2157,7 +2153,7 @@ async def test_invoke_agent_with_list_user_prompt( await agent.run(["First part", "Second part"]) sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] # Check that the invoke_agent transaction has messages data # The invoke_agent is the transaction itself @@ -2471,7 +2467,7 @@ async def test_agent_data_from_scope( await agent.run("Test input") sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] assert spans[1]["name"] == "invoke_agent test_scope_agent" elif stream_gen_ai_spans: @@ -2791,7 +2787,7 @@ async def test_agent_without_name( await agent.run("Test input") sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] assert "invoke_agent" in spans[1]["name"] elif stream_gen_ai_spans: @@ -3969,7 +3965,7 @@ async def test_binary_content_encoding_image( span.finish() sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] span_data = spans[0]["attributes"] messages_data = _get_messages_from_span(span_data) @@ -4058,7 +4054,7 @@ async def test_binary_content_encoding_mixed_content( span.finish() sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] span_data = spans[0]["attributes"] messages_data = _get_messages_from_span(span_data) @@ -4234,7 +4230,7 @@ async def test_set_usage_data_with_cache_tokens( span.finish() sentry_sdk.flush() - spans = [item.payload for item in items if item.type == "span"] + spans = [item.payload for item in items] assert spans[0]["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 assert ( From 8694a9186d3f783fb29ccc259b9bf6015a4eb006 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 15:14:38 +0200 Subject: [PATCH 13/14] remove outdated docstrings --- tests/integrations/pydantic_ai/test_pydantic_ai.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 278c8a3d76..cc288ece1b 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -97,7 +97,6 @@ async def test_agent_run_async( assert spans[1]["attributes"]["sentry.op"] == "gen_ai.invoke_agent" - # Find child span types (invoke_agent is the transaction, not a child span) chat_spans = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" ] @@ -2155,8 +2154,6 @@ async def test_invoke_agent_with_list_user_prompt( sentry_sdk.flush() spans = [item.payload for item in items] - # Check that the invoke_agent transaction has messages data - # The invoke_agent is the transaction itself if "gen_ai.request.messages" in spans[0]["attributes"]: messages_str = spans[0]["attributes"]["gen_ai.request.messages"] assert "First part" in messages_str From f6c63c1cae619e9c1be838a8426e74afe20b6f8e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 15:20:44 +0200 Subject: [PATCH 14/14] revert test change --- tests/integrations/pydantic_ai/test_pydantic_ai.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index cc288ece1b..611a158967 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -4338,13 +4338,7 @@ def test_image_url_base64_content_in_span( if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") - ctx = ( - sentry_sdk.traces.start_span(name="test", attributes={"sentry.op": "test"}) - if span_streaming - else sentry_sdk.start_transaction(op="test", name="test") - ) - - with ctx: + with sentry_sdk.start_transaction(op="test", name="test"): image_url = ImageUrl(url=url, **image_url_kwargs) user_part = UserPromptPart(content=["Look at this image:", image_url]) mock_msg = MagicMock()