From 6f67df0c0fd8476d8e0ae0d3204b1bb03760a8a9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 11:59:00 +0200 Subject: [PATCH] fix(openai-agents): Remove redundant hosted MCP tool spans --- .../openai_agents/spans/ai_client.py | 2 - .../integrations/openai_agents/utils.py | 24 +- .../openai_agents/test_openai_agents.py | 523 ------------------ 3 files changed, 1 insertion(+), 548 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py index b060c29aaf..564d325416 100644 --- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py +++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py @@ -5,7 +5,6 @@ from ..consts import SPAN_ORIGIN from ..utils import ( - _create_mcp_execute_tool_spans, _set_agent_data, _set_input_data, _set_output_data, @@ -55,7 +54,6 @@ def update_ai_client_span( if hasattr(response, "output") and response.output: _set_output_data(span, response) - _create_mcp_execute_tool_spans(span, response) if response_model is not None: span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 5ffdb915ba..78f0a90f65 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -14,7 +14,7 @@ set_data_normalized, truncate_and_annotate_messages, ) -from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS +from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable from sentry_sdk.scope import should_send_default_pii from sentry_sdk.utils import event_from_exception, safe_serialize @@ -215,25 +215,3 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"] ) - - -def _create_mcp_execute_tool_spans( - span: "sentry_sdk.tracing.Span", result: "agents.Result" -) -> None: - for output in result.output: - if output.__class__.__name__ == "McpCall": - with sentry_sdk.start_span( - op=OP.GEN_AI_EXECUTE_TOOL, - name=f"execute_tool {output.name}", - start_timestamp=span.start_timestamp, - ) as execute_tool_span: - execute_tool_span.set_data(SPANDATA.GEN_AI_TOOL_NAME, output.name) - if should_send_default_pii(): - execute_tool_span.set_data( - SPANDATA.GEN_AI_TOOL_INPUT, output.arguments - ) - execute_tool_span.set_data( - SPANDATA.GEN_AI_TOOL_OUTPUT, output.output - ) - if output.error: - execute_tool_span.set_status(SPANSTATUS.INTERNAL_ERROR) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 6e49b2b08e..2cc33d6fd7 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -16,7 +16,6 @@ ) from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError from agents.items import ( - McpCall, ResponseFunctionToolCall, ResponseOutputMessage, ResponseOutputText, @@ -3123,528 +3122,6 @@ async def test_span_status_error( assert transaction["contexts"]["trace"]["status"] == "internal_error" -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -@pytest.mark.asyncio -async def test_mcp_tool_execution_spans( - sentry_init, - capture_events, - capture_items, - test_agent, - get_model_response, - stream_gen_ai_spans, -): - """ - Test that MCP (Model Context Protocol) tool calls create execute_tool spans. - """ - client = AsyncOpenAI(api_key="test-key") - model = OpenAIResponsesModel(model="gpt-4", openai_client=client) - agent = test_agent.clone(model=model) - - mcp_response = get_model_response( - Response( - id="resp_mcp_123", - output=[ - McpCall( - id="mcp_call_123", - name="test_mcp_tool", - arguments='{"query": "search term"}', - output="MCP tool executed successfully", - error=None, - type="mcp_call", - server_label="test_server", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=5, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=15, - ), - ), - serialize_pydantic=True, - ) - - final_response = get_model_response( - Response( - id="resp_final_123", - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Task completed using MCP tool", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=15, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=25, - ), - ), - serialize_pydantic=True, - ) - - if stream_gen_ai_spans: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, - ) - - items = capture_items("span", "transaction") - - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) - - spans = [item.payload for item in items if item.type == "span"] - - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" - assert ( - mcp_tool_span["attributes"]["gen_ai.tool.input"] - == '{"query": "search term"}' - ) - assert ( - mcp_tool_span["attributes"]["gen_ai.tool.output"] - == "MCP tool executed successfully" - ) - - # Verify no error status since error was None - assert mcp_tool_span.get("status") != "error" - assert mcp_tool_span.get("tags", {}).get("status") != "error" - else: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, - ) - events = capture_events() - - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) - - (transaction,) = events - spans = transaction["spans"] - - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("description") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}' - assert ( - mcp_tool_span["data"]["gen_ai.tool.output"] - == "MCP tool executed successfully" - ) - - # Verify no error status since error was None - assert mcp_tool_span.get("status") != "internal_error" - assert mcp_tool_span.get("tags", {}).get("status") != "internal_error" - - -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -@pytest.mark.asyncio -async def test_mcp_tool_execution_with_error( - sentry_init, - capture_events, - capture_items, - test_agent, - get_model_response, - stream_gen_ai_spans, -): - """ - Test that MCP tool calls with errors are tracked with error status. - """ - client = AsyncOpenAI(api_key="test-key") - model = OpenAIResponsesModel(model="gpt-4", openai_client=client) - agent = test_agent.clone(model=model) - - mcp_response = get_model_response( - Response( - id="resp_mcp_123", - output=[ - McpCall( - id="mcp_call_error_123", - name="failing_mcp_tool", - arguments='{"query": "test"}', - output=None, - error="MCP tool execution failed", - type="mcp_call", - server_label="test_server", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=5, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=15, - ), - ), - serialize_pydantic=True, - ) - - final_response = get_model_response( - Response( - id="resp_final_123", - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Task completed using MCP tool", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=15, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=25, - ), - ), - serialize_pydantic=True, - ) - - if stream_gen_ai_spans: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, - ) - - items = capture_items("span", "transaction") - - await agents.Runner.run( - agent, - "Please use failing MCP tool", - run_config=test_run_config, - ) - - spans = [item.payload for item in items if item.type == "span"] - - # Find the MCP execute_tool span with error - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool failing_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created with error status - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}' - - # Verify error status was set - assert mcp_tool_span["status"] == "error" - else: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, - ) - events = capture_events() - - await agents.Runner.run( - agent, - "Please use failing MCP tool", - run_config=test_run_config, - ) - - (transaction,) = events - spans = transaction["spans"] - - # Find the MCP execute_tool span with error - mcp_tool_span = None - for span in spans: - if span.get("description") == "execute_tool failing_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created with error status - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}' - assert mcp_tool_span["data"]["gen_ai.tool.output"] is None - - # Verify error status was set - assert mcp_tool_span["status"] == "internal_error" - assert mcp_tool_span["tags"]["status"] == "internal_error" - - -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -@pytest.mark.asyncio -async def test_mcp_tool_execution_without_pii( - sentry_init, - capture_events, - capture_items, - test_agent, - get_model_response, - stream_gen_ai_spans, -): - """ - Test that MCP tool input/output are not included when send_default_pii is False. - """ - client = AsyncOpenAI(api_key="test-key") - model = OpenAIResponsesModel(model="gpt-4", openai_client=client) - agent = test_agent.clone(model=model) - - mcp_response = get_model_response( - Response( - id="resp_mcp_123", - output=[ - McpCall( - id="mcp_call_pii_123", - name="test_mcp_tool", - arguments='{"query": "sensitive data"}', - output="Result with sensitive info", - error=None, - type="mcp_call", - server_label="test_server", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=5, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=15, - ), - ), - serialize_pydantic=True, - ) - - final_response = get_model_response( - Response( - id="resp_final_123", - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Task completed", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=15, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=5, - ), - total_tokens=25, - ), - ), - serialize_pydantic=True, - ) - - if stream_gen_ai_spans: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, # PII disabled - stream_gen_ai_spans=stream_gen_ai_spans, - ) - - items = capture_items("span", "transaction") - - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) - - spans = [item.payload for item in items if item.type == "span"] - - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created but without input/output - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" - - # Verify input and output are not included when send_default_pii is False - assert "gen_ai.tool.input" not in mcp_tool_span["attributes"] - assert "gen_ai.tool.output" not in mcp_tool_span["attributes"] - else: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, # PII disabled - stream_gen_ai_spans=stream_gen_ai_spans, - ) - events = capture_events() - - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) - - (transaction,) = events - spans = transaction["spans"] - - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("description") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created but without input/output - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" - - # Verify input and output are not included when send_default_pii is False - assert "gen_ai.tool.input" not in mcp_tool_span["data"] - assert "gen_ai.tool.output" not in mcp_tool_span["data"] - - @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_multiple_agents_asyncio(