From 6f67df0c0fd8476d8e0ae0d3204b1bb03760a8a9 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 22 May 2026 11:59:00 +0200
Subject: [PATCH] fix(openai-agents): Remove redundant hosted MCP tool spans

---
 .../openai_agents/spans/ai_client.py          |   2 -
 .../integrations/openai_agents/utils.py       |  24 +-
 .../openai_agents/test_openai_agents.py       | 523 ------------------
 3 files changed, 1 insertion(+), 548 deletions(-)

diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py
index b060c29aaf..564d325416 100644
--- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py
+++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py
@@ -5,7 +5,6 @@
 
 from ..consts import SPAN_ORIGIN
 from ..utils import (
-    _create_mcp_execute_tool_spans,
     _set_agent_data,
     _set_input_data,
     _set_output_data,
@@ -55,7 +54,6 @@ def update_ai_client_span(
 
     if hasattr(response, "output") and response.output:
         _set_output_data(span, response)
-        _create_mcp_execute_tool_spans(span, response)
 
     if response_model is not None:
         span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model)
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index 5ffdb915ba..78f0a90f65 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -14,7 +14,7 @@
     set_data_normalized,
     truncate_and_annotate_messages,
 )
-from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS
+from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations import DidNotEnable
 from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.utils import event_from_exception, safe_serialize
@@ -215,25 +215,3 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None:
         set_data_normalized(
             span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
         )
-
-
-def _create_mcp_execute_tool_spans(
-    span: "sentry_sdk.tracing.Span", result: "agents.Result"
-) -> None:
-    for output in result.output:
-        if output.__class__.__name__ == "McpCall":
-            with sentry_sdk.start_span(
-                op=OP.GEN_AI_EXECUTE_TOOL,
-                name=f"execute_tool {output.name}",
-                start_timestamp=span.start_timestamp,
-            ) as execute_tool_span:
-                execute_tool_span.set_data(SPANDATA.GEN_AI_TOOL_NAME, output.name)
-                if should_send_default_pii():
-                    execute_tool_span.set_data(
-                        SPANDATA.GEN_AI_TOOL_INPUT, output.arguments
-                    )
-                    execute_tool_span.set_data(
-                        SPANDATA.GEN_AI_TOOL_OUTPUT, output.output
-                    )
-                if output.error:
-                    execute_tool_span.set_status(SPANSTATUS.INTERNAL_ERROR)
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 6e49b2b08e..2cc33d6fd7 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -16,7 +16,6 @@
 )
 from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError
 from agents.items import (
-    McpCall,
     ResponseFunctionToolCall,
     ResponseOutputMessage,
     ResponseOutputText,
@@ -3123,528 +3122,6 @@ async def test_span_status_error(
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-@pytest.mark.asyncio
-async def test_mcp_tool_execution_spans(
-    sentry_init,
-    capture_events,
-    capture_items,
-    test_agent,
-    get_model_response,
-    stream_gen_ai_spans,
-):
-    """
-    Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
-    """
-    client = AsyncOpenAI(api_key="test-key")
-    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
-    agent = test_agent.clone(model=model)
-
-    mcp_response = get_model_response(
-        Response(
-            id="resp_mcp_123",
-            output=[
-                McpCall(
-                    id="mcp_call_123",
-                    name="test_mcp_tool",
-                    arguments='{"query": "search term"}',
-                    output="MCP tool executed successfully",
-                    error=None,
-                    type="mcp_call",
-                    server_label="test_server",
-                )
-            ],
-            parallel_tool_calls=False,
-            tool_choice="none",
-            tools=[],
-            created_at=10000000,
-            model="gpt-4.1-2025-04-14",
-            object="response",
-            usage=ResponseUsage(
-                input_tokens=10,
-                input_tokens_details=InputTokensDetails(
-                    cached_tokens=0,
-                ),
-                output_tokens=5,
-                output_tokens_details=OutputTokensDetails(
-                    reasoning_tokens=0,
-                ),
-                total_tokens=15,
-            ),
-        ),
-        serialize_pydantic=True,
-    )
-
-    final_response = get_model_response(
-        Response(
-            id="resp_final_123",
-            output=[
-                ResponseOutputMessage(
-                    id="msg_final",
-                    type="message",
-                    status="completed",
-                    content=[
-                        ResponseOutputText(
-                            text="Task completed using MCP tool",
-                            type="output_text",
-                            annotations=[],
-                        )
-                    ],
-                    role="assistant",
-                )
-            ],
-            parallel_tool_calls=False,
-            tool_choice="none",
-            tools=[],
-            created_at=10000000,
-            model="gpt-4.1-2025-04-14",
-            object="response",
-            usage=ResponseUsage(
-                input_tokens=15,
-                input_tokens_details=InputTokensDetails(
-                    cached_tokens=0,
-                ),
-                output_tokens=10,
-                output_tokens_details=OutputTokensDetails(
-                    reasoning_tokens=0,
-                ),
-                total_tokens=25,
-            ),
-        ),
-        serialize_pydantic=True,
-    )
-
-    if stream_gen_ai_spans:
-        with patch.object(
-            agent.model._client._client,
-            "send",
-            side_effect=[mcp_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-                stream_gen_ai_spans=stream_gen_ai_spans,
-            )
-
-            items = capture_items("span", "transaction")
-
-            await agents.Runner.run(
-                agent,
-                "Please use MCP tool",
-                run_config=test_run_config,
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-
-        # Find the MCP execute_tool span
-        mcp_tool_span = None
-        for span in spans:
-            if span.get("name") == "execute_tool test_mcp_tool":
-                mcp_tool_span = span
-                break
-
-        # Verify the MCP tool span was created
-        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-        assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
-        assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
-        assert (
-            mcp_tool_span["attributes"]["gen_ai.tool.input"]
-            == '{"query": "search term"}'
-        )
-        assert (
-            mcp_tool_span["attributes"]["gen_ai.tool.output"]
-            == "MCP tool executed successfully"
-        )
-
-        # Verify no error status since error was None
-        assert mcp_tool_span.get("status") != "error"
-        assert mcp_tool_span.get("tags", {}).get("status") != "error"
-    else:
-        with patch.object(
-            agent.model._client._client,
-            "send",
-            side_effect=[mcp_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-                stream_gen_ai_spans=stream_gen_ai_spans,
-            )
-            events = capture_events()
-
-            await agents.Runner.run(
-                agent,
-                "Please use MCP tool",
-                run_config=test_run_config,
-            )
-
-        (transaction,) = events
-        spans = transaction["spans"]
-
-        # Find the MCP execute_tool span
-        mcp_tool_span = None
-        for span in spans:
-            if span.get("description") == "execute_tool test_mcp_tool":
-                mcp_tool_span = span
-                break
-
-        # Verify the MCP tool span was created
-        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-        assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
-        assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
-        assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
-        assert (
-            mcp_tool_span["data"]["gen_ai.tool.output"]
-            == "MCP tool executed successfully"
-        )
-
-        # Verify no error status since error was None
-        assert mcp_tool_span.get("status") != "internal_error"
-        assert mcp_tool_span.get("tags", {}).get("status") != "internal_error"
-
-
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-@pytest.mark.asyncio
-async def test_mcp_tool_execution_with_error(
-    sentry_init,
-    capture_events,
-    capture_items,
-    test_agent,
-    get_model_response,
-    stream_gen_ai_spans,
-):
-    """
-    Test that MCP tool calls with errors are tracked with error status.
-    """
-    client = AsyncOpenAI(api_key="test-key")
-    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
-    agent = test_agent.clone(model=model)
-
-    mcp_response = get_model_response(
-        Response(
-            id="resp_mcp_123",
-            output=[
-                McpCall(
-                    id="mcp_call_error_123",
-                    name="failing_mcp_tool",
-                    arguments='{"query": "test"}',
-                    output=None,
-                    error="MCP tool execution failed",
-                    type="mcp_call",
-                    server_label="test_server",
-                )
-            ],
-            parallel_tool_calls=False,
-            tool_choice="none",
-            tools=[],
-            created_at=10000000,
-            model="gpt-4.1-2025-04-14",
-            object="response",
-            usage=ResponseUsage(
-                input_tokens=10,
-                input_tokens_details=InputTokensDetails(
-                    cached_tokens=0,
-                ),
-                output_tokens=5,
-                output_tokens_details=OutputTokensDetails(
-                    reasoning_tokens=0,
-                ),
-                total_tokens=15,
-            ),
-        ),
-        serialize_pydantic=True,
-    )
-
-    final_response = get_model_response(
-        Response(
-            id="resp_final_123",
-            output=[
-                ResponseOutputMessage(
-                    id="msg_final",
-                    type="message",
-                    status="completed",
-                    content=[
-                        ResponseOutputText(
-                            text="Task completed using MCP tool",
-                            type="output_text",
-                            annotations=[],
-                        )
-                    ],
-                    role="assistant",
-                )
-            ],
-            parallel_tool_calls=False,
-            tool_choice="none",
-            tools=[],
-            created_at=10000000,
-            model="gpt-4.1-2025-04-14",
-            object="response",
-            usage=ResponseUsage(
-                input_tokens=15,
-                input_tokens_details=InputTokensDetails(
-                    cached_tokens=0,
-                ),
-                output_tokens=10,
-                output_tokens_details=OutputTokensDetails(
-                    reasoning_tokens=0,
-                ),
-                total_tokens=25,
-            ),
-        ),
-        serialize_pydantic=True,
-    )
-
-    if stream_gen_ai_spans:
-        with patch.object(
-            agent.model._client._client,
-            "send",
-            side_effect=[mcp_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-                stream_gen_ai_spans=stream_gen_ai_spans,
-            )
-
-            items = capture_items("span", "transaction")
-
-            await agents.Runner.run(
-                agent,
-                "Please use failing MCP tool",
-                run_config=test_run_config,
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-
-        # Find the MCP execute_tool span with error
-        mcp_tool_span = None
-        for span in spans:
-            if span.get("name") == "execute_tool failing_mcp_tool":
-                mcp_tool_span = span
-                break
-
-        # Verify the MCP tool span was created with error status
-        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-        assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool"
-        assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool"
-        assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}'
-
-        # Verify error status was set
-        assert mcp_tool_span["status"] == "error"
-    else:
-        with patch.object(
-            agent.model._client._client,
-            "send",
-            side_effect=[mcp_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-                stream_gen_ai_spans=stream_gen_ai_spans,
-            )
-            events = capture_events()
-
-            await agents.Runner.run(
-                agent,
-                "Please use failing MCP tool",
-                run_config=test_run_config,
-            )
-
-        (transaction,) = events
-        spans = transaction["spans"]
-
-        # Find the MCP execute_tool span with error
-        mcp_tool_span = None
-        for span in spans:
-            if span.get("description") == "execute_tool failing_mcp_tool":
-                mcp_tool_span = span
-                break
-
-        # Verify the MCP tool span was created with error status
-        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-        assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool"
-        assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool"
-        assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}'
-        assert mcp_tool_span["data"]["gen_ai.tool.output"] is None
-
-        # Verify error status was set
-        assert mcp_tool_span["status"] == "internal_error"
-        assert mcp_tool_span["tags"]["status"] == "internal_error"
-
-
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-@pytest.mark.asyncio
-async def test_mcp_tool_execution_without_pii(
-    sentry_init,
-    capture_events,
-    capture_items,
-    test_agent,
-    get_model_response,
-    stream_gen_ai_spans,
-):
-    """
-    Test that MCP tool input/output are not included when send_default_pii is False.
-    """
-    client = AsyncOpenAI(api_key="test-key")
-    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
-    agent = test_agent.clone(model=model)
-
-    mcp_response = get_model_response(
-        Response(
-            id="resp_mcp_123",
-            output=[
-                McpCall(
-                    id="mcp_call_pii_123",
-                    name="test_mcp_tool",
-                    arguments='{"query": "sensitive data"}',
-                    output="Result with sensitive info",
-                    error=None,
-                    type="mcp_call",
-                    server_label="test_server",
-                )
-            ],
-            parallel_tool_calls=False,
-            tool_choice="none",
-            tools=[],
-            created_at=10000000,
-            model="gpt-4.1-2025-04-14",
-            object="response",
-            usage=ResponseUsage(
-                input_tokens=10,
-                input_tokens_details=InputTokensDetails(
-                    cached_tokens=0,
-                ),
-                output_tokens=5,
-                output_tokens_details=OutputTokensDetails(
-                    reasoning_tokens=0,
-                ),
-                total_tokens=15,
-            ),
-        ),
-        serialize_pydantic=True,
-    )
-
-    final_response = get_model_response(
-        Response(
-            id="resp_final_123",
-            output=[
-                ResponseOutputMessage(
-                    id="msg_final",
-                    type="message",
-                    status="completed",
-                    content=[
-                        ResponseOutputText(
-                            text="Task completed",
-                            type="output_text",
-                            annotations=[],
-                        )
-                    ],
-                    role="assistant",
-                )
-            ],
-            parallel_tool_calls=False,
-            tool_choice="none",
-            tools=[],
-            created_at=10000000,
-            model="gpt-4.1-2025-04-14",
-            object="response",
-            usage=ResponseUsage(
-                input_tokens=15,
-                input_tokens_details=InputTokensDetails(
-                    cached_tokens=0,
-                ),
-                output_tokens=10,
-                output_tokens_details=OutputTokensDetails(
-                    reasoning_tokens=5,
-                ),
-                total_tokens=25,
-            ),
-        ),
-        serialize_pydantic=True,
-    )
-
-    if stream_gen_ai_spans:
-        with patch.object(
-            agent.model._client._client,
-            "send",
-            side_effect=[mcp_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=False,  # PII disabled
-                stream_gen_ai_spans=stream_gen_ai_spans,
-            )
-
-            items = capture_items("span", "transaction")
-
-            await agents.Runner.run(
-                agent,
-                "Please use MCP tool",
-                run_config=test_run_config,
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-
-        # Find the MCP execute_tool span
-        mcp_tool_span = None
-        for span in spans:
-            if span.get("name") == "execute_tool test_mcp_tool":
-                mcp_tool_span = span
-                break
-
-        # Verify the MCP tool span was created but without input/output
-        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-        assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
-        assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
-
-        # Verify input and output are not included when send_default_pii is False
-        assert "gen_ai.tool.input" not in mcp_tool_span["attributes"]
-        assert "gen_ai.tool.output" not in mcp_tool_span["attributes"]
-    else:
-        with patch.object(
-            agent.model._client._client,
-            "send",
-            side_effect=[mcp_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=False,  # PII disabled
-                stream_gen_ai_spans=stream_gen_ai_spans,
-            )
-            events = capture_events()
-
-            await agents.Runner.run(
-                agent,
-                "Please use MCP tool",
-                run_config=test_run_config,
-            )
-
-        (transaction,) = events
-        spans = transaction["spans"]
-
-        # Find the MCP execute_tool span
-        mcp_tool_span = None
-        for span in spans:
-            if span.get("description") == "execute_tool test_mcp_tool":
-                mcp_tool_span = span
-                break
-
-        # Verify the MCP tool span was created but without input/output
-        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-        assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
-        assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
-
-        # Verify input and output are not included when send_default_pii is False
-        assert "gen_ai.tool.input" not in mcp_tool_span["data"]
-        assert "gen_ai.tool.output" not in mcp_tool_span["data"]
-
-
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_multiple_agents_asyncio(