From dd79d6f27590559c3e7e71352ab9a8616e8be051 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 09:05:45 +0200 Subject: [PATCH 1/4] feat(langchain): Record run_name as gen_ai.pipeline.name on Invoke Agent Spans --- sentry_sdk/integrations/langchain.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 21447a6655..5b1540560d 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -975,7 +975,7 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": origin=LangchainIntegration.origin, ) as span: if run_name: - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, run_name) + span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, run_name) span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, False) @@ -1035,7 +1035,7 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": span.__enter__() if run_name: - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, run_name) + span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, run_name) span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) From 77af6f29d0f345d07616613e6a37e69e188b9742 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 10:01:38 +0200 Subject: [PATCH 2/4] add tests --- .../integrations/langchain/test_langchain.py | 377 ++++++++++-------- 1 file changed, 215 insertions(+), 162 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index e7edd645f2..b2a2e72f90 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -43,14 +43,6 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.messages import HumanMessage, SystemMessage -from openai.types.chat.chat_completion_chunk import ( - ChatCompletionChunk, - Choice, - ChoiceDelta, - ChoiceDeltaToolCall, - ChoiceDeltaToolCallFunction, -) - from openai.types.completion import Completion from openai.types.completion_choice import CompletionChoice @@ -581,6 +573,7 @@ def test_langchain_openai_tools_agent( request, get_model_response, server_side_event_chunks, + streaming_chat_completions_model_responses, ): sentry_init( integrations=[ @@ -604,167 +597,18 @@ def test_langchain_openai_tools_agent( ] ) + model_responses = streaming_chat_completions_model_responses() + tool_response = get_model_response( server_side_event_chunks( - [ - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(role="assistant"), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta( - tool_calls=[ - ChoiceDeltaToolCall( - index=0, - id="call_BbeyNhCKa6kYLYzrD40NGm3b", - type="function", - function=ChoiceDeltaToolCallFunction( - name="get_word_length", - arguments="", - ), - ), - ], - ), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta( - tool_calls=[ - ChoiceDeltaToolCall( - index=0, - function=ChoiceDeltaToolCallFunction( - arguments='{"word": "eudca"}', - ), - ), - ], - ), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(content="5"), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(), - finish_reason="function_call", - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-1", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[], - usage=CompletionUsage( - prompt_tokens=142, - completion_tokens=50, - total_tokens=192, - ), - ), - ], + next(model_responses), include_event_type=False, ) ) final_response = get_model_response( server_side_event_chunks( - [ - ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(role="assistant"), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(content="The word eudca has 5 letters."), - finish_reason=None, - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[ - Choice( - index=0, - delta=ChoiceDelta(), - finish_reason="stop", - ), - ], - ), - ChatCompletionChunk( - id="chatcmpl-turn-2", - object="chat.completion.chunk", - created=10000000, - model="gpt-3.5-turbo", - choices=[], - usage=CompletionUsage( - prompt_tokens=89, - completion_tokens=28, - total_tokens=117, - ), - ), - ], + next(model_responses), include_event_type=False, ) ) @@ -784,7 +628,12 @@ def test_langchain_openai_tools_agent( side_effect=[tool_response, final_response], ) as _: with start_transaction(): - list(agent_executor.stream({"input": "How many letters in the word eudca"})) + list( + agent_executor.invoke( + {"input": "How many letters in the word eudca"}, + {"run_name": "my-snazzy-pipeline"}, + ) + ) tx = events[0] assert tx["type"] == "transaction" @@ -801,6 +650,210 @@ def test_langchain_openai_tools_agent( assert chat_spans[1]["origin"] == "auto.ai.langchain" assert tool_exec_span["origin"] == "auto.ai.langchain" + assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + if send_default_pii and include_prompts: + assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + ["You are a helpful assistant.", "Be concise and clear."], + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "list", "blocks"], +) +def test_langchain_openai_tools_agent_stream( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + system_instructions_content, + request, + get_model_response, + server_side_event_chunks, + streaming_chat_completions_model_responses, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + system_instructions_content, + ), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + model_responses = streaming_chat_completions_model_responses() + + tool_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + final_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt) + + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + with start_transaction(): + list( + agent_executor.stream( + {"input": "How many letters in the word eudca"}, + {"run_name": "my-snazzy-pipeline"}, + ) + ) + + tx = events[0] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent") + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + + assert len(chat_spans) == 2 + + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + # We can't guarantee anything about the "shape" of the langchain execution graph assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 From d02f1cad11b3f3751c7e93dc2d677152cb4f1edf Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 10:17:14 +0200 Subject: [PATCH 3/4] add conftest --- tests/conftest.py | 169 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 6a15d3668f..00dafe6ce2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1126,6 +1126,175 @@ def nonstreaming_chat_completions_model_response(): ) +@pytest.fixture +def streaming_chat_completions_model_responses(): + def inner(): + yield [ + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + role="assistant" + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + tool_calls=[ + openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCall( + index=0, + id="call_BbeyNhCKa6kYLYzrD40NGm3b", + type="function", + function=openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCallFunction( + name="get_word_length", + arguments="", + ), + ), + ], + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + tool_calls=[ + openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCall( + index=0, + function=openai.types.chat.chat_completion_chunk.ChoiceDeltaToolCallFunction( + arguments='{"word": "eudca"}', + ), + ), + ], + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + content="5" + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(), + finish_reason="function_call", + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-1", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[], + usage=openai.types.chat.chat_completion_chunk.CompletionUsage( + prompt_tokens=142, + completion_tokens=50, + total_tokens=192, + ), + ), + ] + + yield [ + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + role="assistant" + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + content="The word eudca has 5 letters." + ), + finish_reason=None, + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + index=0, + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(), + finish_reason="stop", + ), + ], + ), + openai.types.chat.chat_completion_chunk.ChatCompletionChunk( + id="chatcmpl-turn-2", + object="chat.completion.chunk", + created=10000000, + model="gpt-3.5-turbo", + choices=[], + usage=openai.types.chat.chat_completion_chunk.CompletionUsage( + prompt_tokens=89, + completion_tokens=28, + total_tokens=117, + ), + ), + ] + + return inner + + @pytest.fixture def responses_tool_call_model_responses(): def inner( From bf72fb2ae5248cb070427bf417c3e08f6b2ed70b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 1 Apr 2026 10:35:16 +0200 Subject: [PATCH 4/4] add tests --- .../integrations/langchain/test_langchain.py | 406 ++++++++++++++++++ 1 file changed, 406 insertions(+) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index b2a2e72f90..31d4789891 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -745,6 +745,209 @@ def test_langchain_openai_tools_agent( assert "get_word_length" in tools_data +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + ["You are a helpful assistant.", "Be concise and clear."], + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "list", "blocks"], +) +def test_langchain_openai_tools_agent_with_config( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + system_instructions_content, + request, + get_model_response, + server_side_event_chunks, + streaming_chat_completions_model_responses, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + system_instructions_content, + ), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + model_responses = streaming_chat_completions_model_responses() + + tool_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + final_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt).with_config( + {"run_name": "my-snazzy-pipeline"} + ) + + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + with start_transaction(): + list( + agent_executor.invoke( + {"input": "How many letters in the word eudca"}, + ) + ) + + tx = events[0] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent") + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + + assert len(chat_spans) == 2 + + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + if send_default_pii and include_prompts: + assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + + @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -947,6 +1150,209 @@ def test_langchain_openai_tools_agent_stream( assert "get_word_length" in tools_data +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + ["You are a helpful assistant.", "Be concise and clear."], + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "list", "blocks"], +) +def test_langchain_openai_tools_agent_stream_with_config( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + system_instructions_content, + request, + get_model_response, + server_side_event_chunks, + streaming_chat_completions_model_responses, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + system_instructions_content, + ), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + model_responses = streaming_chat_completions_model_responses() + + tool_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + final_response = get_model_response( + server_side_event_chunks( + next(model_responses), + include_event_type=False, + ) + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt).with_config( + {"run_name": "my-snazzy-pipeline"} + ) + + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with patch.object( + llm.client._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + with start_transaction(): + list( + agent_executor.stream( + {"input": "How many letters in the word eudca"}, + ) + ) + + tx = events[0] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent") + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool") + + assert len(chat_spans) == 2 + + assert invoke_agent_span["origin"] == "auto.ai.langchain" + assert chat_spans[0]["origin"] == "auto.ai.langchain" + assert chat_spans[1]["origin"] == "auto.ai.langchain" + assert tool_exec_span["origin"] == "auto.ai.langchain" + + assert invoke_agent_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline" + + # We can't guarantee anything about the "shape" of the langchain execution graph + assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0 + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192 + + if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]: + assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89 + assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28 + assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 + + if send_default_pii and include_prompts: + assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] + assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # Verify tool calls are recorded when PII is enabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), ( + "Tool calls should be recorded when send_default_pii=True and include_prompts=True" + ) + tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, (list, str)) # Could be serialized + if isinstance(tool_calls_data, str): + assert "get_word_length" in tool_calls_data + elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0: + # Check if tool calls contain expected function name + tool_call_str = str(tool_calls_data) + assert "get_word_length" in tool_call_str + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) + assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) + assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {}) + + # Verify tool calls are NOT recorded when PII is disabled + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get( + "data", {} + ), ( + f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} " + f"and include_prompts={include_prompts}" + ) + + # Verify finish_reasons is always an array of strings + assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [ + "function_call" + ] + assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] + + # Verify that available tools are always recorded regardless of PII settings + for chat_span in chat_spans: + tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + assert tools_data is not None, ( + "Available tools should always be recorded regardless of PII settings" + ) + assert "get_word_length" in tools_data + + def test_langchain_error(sentry_init, capture_events): global llm_type llm_type = "acme-llm"