diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 6707f8194b..480db9132d 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -50,8 +50,13 @@
     from sentry_sdk.tracing import Span
     from sentry_sdk._types import TextPart
 
-    from openai.types.responses import ResponseInputParam, SequenceNotStr
-    from openai.types.responses import ResponseStreamEvent
+    from openai.types.responses.response_usage import ResponseUsage
+    from openai.types.responses import (
+        ResponseInputParam,
+        SequenceNotStr,
+        ResponseStreamEvent,
+    )
+    from openai.types import CompletionUsage
     from openai import Omit
 
 try:
@@ -144,44 +149,56 @@ def _capture_exception(exc: "Any", manual_span_cleanup: bool = True) -> None:
     sentry_sdk.capture_event(event, hint=hint)
 
 
-def _get_usage(usage: "Any", names: "List[str]") -> int:
-    for name in names:
-        if hasattr(usage, name) and isinstance(getattr(usage, name), int):
-            return getattr(usage, name)
-    return 0
+def _has_attr_and_is_int(
+    token_usage: "Union[CompletionUsage, ResponseUsage]", attr_name: str
+) -> bool:
+    return hasattr(token_usage, attr_name) and isinstance(
+        getattr(token_usage, attr_name, None), int
+    )
 
 
-def _calculate_token_usage(
+def _calculate_completions_token_usage(
     messages: "Optional[Iterable[ChatCompletionMessageParam]]",
     response: "Any",
     span: "Span",
     streaming_message_responses: "Optional[List[str]]",
+    streaming_message_total_token_usage: "Optional[CompletionUsage]",
     count_tokens: "Callable[..., Any]",
 ) -> None:
+    """Extract and record token usage from a Chat Completions API response."""
     input_tokens: "Optional[int]" = 0
     input_tokens_cached: "Optional[int]" = 0
     output_tokens: "Optional[int]" = 0
     output_tokens_reasoning: "Optional[int]" = 0
     total_tokens: "Optional[int]" = 0
-
-    if hasattr(response, "usage"):
-        input_tokens = _get_usage(response.usage, ["input_tokens", "prompt_tokens"])
-        if hasattr(response.usage, "input_tokens_details"):
-            input_tokens_cached = _get_usage(
-                response.usage.input_tokens_details, ["cached_tokens"]
+    usage = None
+
+    if streaming_message_total_token_usage is not None:
+        usage = streaming_message_total_token_usage
+    elif hasattr(response, "usage"):
+        usage = response.usage
+
+    if usage is not None:
+        if _has_attr_and_is_int(usage, "prompt_tokens"):
+            input_tokens = usage.prompt_tokens
+        if _has_attr_and_is_int(usage, "completion_tokens"):
+            output_tokens = usage.completion_tokens
+        if _has_attr_and_is_int(usage, "total_tokens"):
+            total_tokens = usage.total_tokens
+
+        if hasattr(usage, "prompt_tokens_details"):
+            cached = getattr(usage.prompt_tokens_details, "cached_tokens", None)
+            if isinstance(cached, int):
+                input_tokens_cached = cached
+
+        if hasattr(usage, "completion_tokens_details"):
+            reasoning = getattr(
+                usage.completion_tokens_details, "reasoning_tokens", None
             )
+            if isinstance(reasoning, int):
+                output_tokens_reasoning = reasoning
 
-        output_tokens = _get_usage(
-            response.usage, ["output_tokens", "completion_tokens"]
-        )
-        if hasattr(response.usage, "output_tokens_details"):
-            output_tokens_reasoning = _get_usage(
-                response.usage.output_tokens_details, ["reasoning_tokens"]
-            )
-
-        total_tokens = _get_usage(response.usage, ["total_tokens"])
-
-    # Manually count tokens
+    # Manually count input tokens
     if input_tokens == 0:
         for message in messages or []:
             if isinstance(message, str):
@@ -191,11 +208,11 @@ def _calculate_token_usage(
                 message_content = message.get("content")
                 if message_content is None:
                     continue
-                # Deliberate use of Completions function for both Completions and Responses input format.
                 text_items = _get_text_items(message_content)
                 input_tokens += sum(count_tokens(text) for text in text_items)
                 continue
 
+    # Manually count output tokens
     if output_tokens == 0:
         if streaming_message_responses is not None:
             for message in streaming_message_responses:
@@ -222,6 +239,84 @@ def _calculate_token_usage(
     )
 
 
+def _calculate_responses_token_usage(
+    input: "Any",
+    response: "Any",
+    span: "Span",
+    streaming_message_responses: "Optional[List[str]]",
+    count_tokens: "Callable[..., Any]",
+) -> None:
+    """Extract and record token usage from a Responses API response."""
+    input_tokens: "Optional[int]" = 0
+    input_tokens_cached: "Optional[int]" = 0
+    output_tokens: "Optional[int]" = 0
+    output_tokens_reasoning: "Optional[int]" = 0
+    total_tokens: "Optional[int]" = 0
+
+    if hasattr(response, "usage"):
+        usage = response.usage
+
+        if _has_attr_and_is_int(usage, "input_tokens"):
+            input_tokens = usage.input_tokens
+        if _has_attr_and_is_int(usage, "output_tokens"):
+            output_tokens = usage.output_tokens
+        if _has_attr_and_is_int(usage, "total_tokens"):
+            total_tokens = usage.total_tokens
+
+        if hasattr(usage, "input_tokens_details"):
+            cached = getattr(usage.input_tokens_details, "cached_tokens", None)
+            if isinstance(cached, int):
+                input_tokens_cached = cached
+
+        if hasattr(usage, "output_tokens_details"):
+            reasoning = getattr(usage.output_tokens_details, "reasoning_tokens", None)
+            if isinstance(reasoning, int):
+                output_tokens_reasoning = reasoning
+
+    # Manually count input tokens
+    if input_tokens == 0:
+        for message in input or []:
+            if isinstance(message, str):
+                input_tokens += count_tokens(message)
+                continue
+            elif isinstance(message, dict):
+                message_content = message.get("content")
+                if message_content is None:
+                    continue
+                # Deliberate use of Completions function for both Completions and Responses input format.
+                text_items = _get_text_items(message_content)
+                input_tokens += sum(count_tokens(text) for text in text_items)
+                continue
+
+    # Manually count output tokens
+    if output_tokens == 0:
+        if streaming_message_responses is not None:
+            for message in streaming_message_responses:
+                output_tokens += count_tokens(message)
+        elif hasattr(response, "output"):
+            for output_item in response.output:
+                if hasattr(output_item, "content"):
+                    for content_item in output_item.content:
+                        if hasattr(content_item, "text"):
+                            output_tokens += count_tokens(content_item.text)
+
+    # Do not set token data if it is 0
+    input_tokens = input_tokens or None
+    input_tokens_cached = input_tokens_cached or None
+    output_tokens = output_tokens or None
+    output_tokens_reasoning = output_tokens_reasoning or None
+    total_tokens = total_tokens or None
+
+    record_token_usage(
+        span,
+        input_tokens=input_tokens,
+        input_tokens_cached=input_tokens_cached,
+        output_tokens=output_tokens,
+        output_tokens_reasoning=output_tokens_reasoning,
+        total_tokens=total_tokens,
+    )
+
+
 def _set_responses_api_input_data(
     span: "Span",
     kwargs: "dict[str, Any]",
@@ -486,6 +581,7 @@ def _set_common_output_data(
     if hasattr(response, "model"):
         set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
 
+    # Chat Completions API
     if hasattr(response, "choices"):
         if should_send_default_pii() and integration.include_prompts:
             response_text = [
@@ -496,11 +592,19 @@ def _set_common_output_data(
             if len(response_text) > 0:
                 set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_text)
 
-        _calculate_token_usage(input, response, span, None, integration.count_tokens)
+        _calculate_completions_token_usage(
+            messages=input,
+            response=response,
+            span=span,
+            streaming_message_responses=None,
+            streaming_message_total_token_usage=None,
+            count_tokens=integration.count_tokens,
+        )
 
         if finish_span:
             span.__exit__(None, None, None)
 
+    # Responses API
     elif hasattr(response, "output"):
         if should_send_default_pii() and integration.include_prompts:
             output_messages: "dict[str, list[Any]]" = {
@@ -532,12 +636,26 @@ def _set_common_output_data(
                     span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
                 )
 
-        _calculate_token_usage(input, response, span, None, integration.count_tokens)
+        _calculate_responses_token_usage(
+            input=input,
+            response=response,
+            span=span,
+            streaming_message_responses=None,
+            count_tokens=integration.count_tokens,
+        )
 
         if finish_span:
             span.__exit__(None, None, None)
+    # Embeddings API (fallback for responses with neither choices nor output)
     else:
-        _calculate_token_usage(input, response, span, None, integration.count_tokens)
+        _calculate_completions_token_usage(
+            messages=input,
+            response=response,
+            span=span,
+            streaming_message_responses=None,
+            streaming_message_total_token_usage=None,
+            count_tokens=integration.count_tokens,
+        )
         if finish_span:
             span.__exit__(None, None, None)
 
@@ -655,6 +773,7 @@ def _wrap_synchronous_completions_chunk_iterator(
     """
     ttft = None
     data_buf: "list[list[str]]" = []  # one for each choice
+    streaming_message_total_token_usage = None
 
     for x in old_iterator:
         span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
@@ -671,6 +790,8 @@ def _wrap_synchronous_completions_chunk_iterator(
                             data_buf.append([])
                         data_buf[choice_index].append(content or "")
                     choice_index += 1
+            if hasattr(x, "usage"):
+                streaming_message_total_token_usage = x.usage
 
         yield x
 
@@ -679,17 +800,20 @@ def _wrap_synchronous_completions_chunk_iterator(
             set_data_normalized(
                 span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
             )
+        all_responses = None
         if len(data_buf) > 0:
             all_responses = ["".join(chunk) for chunk in data_buf]
             if should_send_default_pii() and integration.include_prompts:
                 set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
-            _calculate_token_usage(
-                messages,
-                response,
-                span,
-                all_responses,
-                integration.count_tokens,
-            )
+
+        _calculate_completions_token_usage(
+            messages=messages,
+            response=response,
+            span=span,
+            streaming_message_responses=all_responses,
+            streaming_message_total_token_usage=streaming_message_total_token_usage,
+            count_tokens=integration.count_tokens,
+        )
 
     if finish_span:
         span.__exit__(None, None, None)
@@ -711,6 +835,7 @@ async def _wrap_asynchronous_completions_chunk_iterator(
     """
     ttft = None
     data_buf: "list[list[str]]" = []  # one for each choice
+    streaming_message_total_token_usage = None
 
     async for x in old_iterator:
         span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
@@ -727,6 +852,8 @@ async def _wrap_asynchronous_completions_chunk_iterator(
                             data_buf.append([])
                         data_buf[choice_index].append(content or "")
                     choice_index += 1
+            if hasattr(x, "usage"):
+                streaming_message_total_token_usage = x.usage
 
         yield x
 
@@ -735,17 +862,20 @@ async def _wrap_asynchronous_completions_chunk_iterator(
             set_data_normalized(
                 span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
             )
+        all_responses = None
         if len(data_buf) > 0:
             all_responses = ["".join(chunk) for chunk in data_buf]
             if should_send_default_pii() and integration.include_prompts:
                 set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
-            _calculate_token_usage(
-                messages,
-                response,
-                span,
-                all_responses,
-                integration.count_tokens,
-            )
+
+        _calculate_completions_token_usage(
+            messages=messages,
+            response=response,
+            span=span,
+            streaming_message_responses=all_responses,
+            streaming_message_total_token_usage=streaming_message_total_token_usage,
+            count_tokens=integration.count_tokens,
+        )
 
     if finish_span:
         span.__exit__(None, None, None)
@@ -781,12 +911,12 @@ def _wrap_synchronous_responses_event_iterator(
             if isinstance(x, ResponseCompletedEvent):
                 span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model)
 
-                _calculate_token_usage(
-                    input,
-                    x.response,
-                    span,
-                    None,
-                    integration.count_tokens,
+                _calculate_responses_token_usage(
+                    input=input,
+                    response=x.response,
+                    span=span,
+                    streaming_message_responses=None,
+                    count_tokens=integration.count_tokens,
                 )
                 count_tokens_manually = False
 
@@ -801,13 +931,14 @@ def _wrap_synchronous_responses_event_iterator(
             all_responses = ["".join(chunk) for chunk in data_buf]
             if should_send_default_pii() and integration.include_prompts:
                 set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
+
             if count_tokens_manually:
-                _calculate_token_usage(
-                    input,
-                    response,
-                    span,
-                    all_responses,
-                    integration.count_tokens,
+                _calculate_responses_token_usage(
+                    input=input,
+                    response=response,
+                    span=span,
+                    streaming_message_responses=all_responses,
+                    count_tokens=integration.count_tokens,
                 )
 
     if finish_span:
@@ -844,12 +975,12 @@ async def _wrap_asynchronous_responses_event_iterator(
             if isinstance(x, ResponseCompletedEvent):
                 span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model)
 
-                _calculate_token_usage(
-                    input,
-                    x.response,
-                    span,
-                    None,
-                    integration.count_tokens,
+                _calculate_responses_token_usage(
+                    input=input,
+                    response=x.response,
+                    span=span,
+                    streaming_message_responses=None,
+                    count_tokens=integration.count_tokens,
                 )
                 count_tokens_manually = False
 
@@ -865,12 +996,12 @@ async def _wrap_asynchronous_responses_event_iterator(
             if should_send_default_pii() and integration.include_prompts:
                 set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
             if count_tokens_manually:
-                _calculate_token_usage(
-                    input,
-                    response,
-                    span,
-                    all_responses,
-                    integration.count_tokens,
+                _calculate_responses_token_usage(
+                    input=input,
+                    response=response,
+                    span=span,
+                    streaming_message_responses=all_responses,
+                    count_tokens=integration.count_tokens,
                 )
     if finish_span:
         span.__exit__(None, None, None)
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 0fd049e742..ada2e633de 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -44,7 +44,8 @@
 from sentry_sdk.consts import SPANDATA, OP
 from sentry_sdk.integrations.openai import (
     OpenAIIntegration,
-    _calculate_token_usage,
+    _calculate_completions_token_usage,
+    _calculate_responses_token_usage,
 )
 from sentry_sdk.utils import safe_serialize
 
@@ -610,6 +611,304 @@ def test_streaming_chat_completion_no_prompts(
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
 
+@pytest.mark.skipif(
+    OPENAI_VERSION <= (1, 1, 0),
+    reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
+)
+def test_streaming_chat_completion_with_usage_in_stream(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    server_side_event_chunks,
+):
+    """When stream_options=include_usage is set, token usage comes from the final chunk's usage field."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=False)],
+        traces_sample_rate=1.0,
+        send_default_pii=False,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    returned_stream = get_model_response(
+        server_side_event_chunks(
+            [
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=0,
+                            delta=ChoiceDelta(content="hel"),
+                            finish_reason=None,
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=0,
+                            delta=ChoiceDelta(content="lo"),
+                            finish_reason="stop",
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                    usage=CompletionUsage(
+                        prompt_tokens=20,
+                        completion_tokens=10,
+                        total_tokens=30,
+                    ),
+                ),
+            ],
+            include_event_type=False,
+        )
+    )
+
+    with mock.patch.object(
+        client.chat._client._client,
+        "send",
+        return_value=returned_stream,
+    ):
+        with start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "hello"}],
+                stream=True,
+                stream_options={"include_usage": True},
+            )
+            for _ in response_stream:
+                pass
+
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.chat"
+    assert span["data"]["gen_ai.usage.input_tokens"] == 20
+    assert span["data"]["gen_ai.usage.output_tokens"] == 10
+    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+
+
+@pytest.mark.skipif(
+    OPENAI_VERSION <= (1, 1, 0),
+    reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
+)
+def test_streaming_chat_completion_empty_content_preserves_token_usage(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    server_side_event_chunks,
+):
+    """Token usage from the stream is recorded even when no content is produced (e.g. content filter)."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=False)],
+        traces_sample_rate=1.0,
+        send_default_pii=False,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    returned_stream = get_model_response(
+        server_side_event_chunks(
+            [
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                    usage=CompletionUsage(
+                        prompt_tokens=20,
+                        completion_tokens=0,
+                        total_tokens=20,
+                    ),
+                ),
+            ],
+            include_event_type=False,
+        )
+    )
+
+    with mock.patch.object(
+        client.chat._client._client,
+        "send",
+        return_value=returned_stream,
+    ):
+        with start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "hello"}],
+                stream=True,
+                stream_options={"include_usage": True},
+            )
+            for _ in response_stream:
+                pass
+
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.chat"
+    assert span["data"]["gen_ai.usage.input_tokens"] == 20
+    assert "gen_ai.usage.output_tokens" not in span["data"]
+    assert span["data"]["gen_ai.usage.total_tokens"] == 20
+
+
+@pytest.mark.skipif(
+    OPENAI_VERSION <= (1, 1, 0),
+    reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
+)
+@pytest.mark.asyncio
+async def test_streaming_chat_completion_empty_content_preserves_token_usage_async(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    async_iterator,
+    server_side_event_chunks,
+):
+    """Token usage from the stream is recorded even when no content is produced - async variant."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=False)],
+        traces_sample_rate=1.0,
+        send_default_pii=False,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+    returned_stream = get_model_response(
+        async_iterator(
+            server_side_event_chunks(
+                [
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                        usage=CompletionUsage(
+                            prompt_tokens=20,
+                            completion_tokens=0,
+                            total_tokens=20,
+                        ),
+                    ),
+                ],
+                include_event_type=False,
+            )
+        )
+    )
+
+    with mock.patch.object(
+        client.chat._client._client,
+        "send",
+        return_value=returned_stream,
+    ):
+        with start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "hello"}],
+                stream=True,
+                stream_options={"include_usage": True},
+            )
+            async for _ in response_stream:
+                pass
+
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.chat"
+    assert span["data"]["gen_ai.usage.input_tokens"] == 20
+    assert "gen_ai.usage.output_tokens" not in span["data"]
+    assert span["data"]["gen_ai.usage.total_tokens"] == 20
+
+
+@pytest.mark.skipif(
+    OPENAI_VERSION <= (1, 1, 0),
+    reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
+)
+@pytest.mark.asyncio
+async def test_streaming_chat_completion_async_with_usage_in_stream(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    async_iterator,
+    server_side_event_chunks,
+):
+    """When stream_options=include_usage is set, token usage comes from the final chunk's usage field (async)."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=False)],
+        traces_sample_rate=1.0,
+        send_default_pii=False,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+    returned_stream = get_model_response(
+        async_iterator(
+            server_side_event_chunks(
+                [
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=0,
+                                delta=ChoiceDelta(content="hel"),
+                                finish_reason=None,
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=0,
+                                delta=ChoiceDelta(content="lo"),
+                                finish_reason="stop",
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                        usage=CompletionUsage(
+                            prompt_tokens=20,
+                            completion_tokens=10,
+                            total_tokens=30,
+                        ),
+                    ),
+                ],
+                include_event_type=False,
+            )
+        )
+    )
+
+    with mock.patch.object(
+        client.chat._client._client,
+        "send",
+        return_value=returned_stream,
+    ):
+        with start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "hello"}],
+                stream=True,
+                stream_options={"include_usage": True},
+            )
+            async for _ in response_stream:
+                pass
+
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.chat"
+    assert span["data"]["gen_ai.usage.input_tokens"] == 20
+    assert span["data"]["gen_ai.usage.output_tokens"] == 10
+    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+
+
 # noinspection PyTypeChecker
 @pytest.mark.parametrize(
     "messages",
@@ -1780,7 +2079,8 @@ async def test_span_origin_embeddings_async(sentry_init, capture_events):
     assert event["spans"][0]["origin"] == "auto.ai.openai"
 
 
-def test_calculate_token_usage_a():
+def test_completions_token_usage_from_response():
+    """Token counts are extracted from response.usage using Completions API field names."""
     span = mock.MagicMock()
 
     def count_tokens(msg):
@@ -1797,8 +2097,13 @@ def count_tokens(msg):
     with mock.patch(
         "sentry_sdk.integrations.openai.record_token_usage"
     ) as mock_record_token_usage:
-        _calculate_token_usage(
-            messages, response, span, streaming_message_responses, count_tokens
+        _calculate_completions_token_usage(
+            messages=messages,
+            response=response,
+            span=span,
+            streaming_message_responses=streaming_message_responses,
+            streaming_message_total_token_usage=None,
+            count_tokens=count_tokens,
         )
         mock_record_token_usage.assert_called_once_with(
             span,
@@ -1810,7 +2115,46 @@ def count_tokens(msg):
         )
 
 
-def test_calculate_token_usage_b():
+def test_completions_token_usage_with_detailed_fields():
+    """Cached and reasoning token counts are extracted from prompt_tokens_details and completion_tokens_details."""
+    span = mock.MagicMock()
+
+    def count_tokens(msg):
+        return len(str(msg))
+
+    response = mock.MagicMock()
+    response.usage = mock.MagicMock()
+    response.usage.prompt_tokens = 20
+    response.usage.prompt_tokens_details = mock.MagicMock()
+    response.usage.prompt_tokens_details.cached_tokens = 5
+    response.usage.completion_tokens = 10
+    response.usage.completion_tokens_details = mock.MagicMock()
+    response.usage.completion_tokens_details.reasoning_tokens = 8
+    response.usage.total_tokens = 30
+
+    with mock.patch(
+        "sentry_sdk.integrations.openai.record_token_usage"
+    ) as mock_record_token_usage:
+        _calculate_completions_token_usage(
+            messages=[],
+            response=response,
+            span=span,
+            streaming_message_responses=[],
+            streaming_message_total_token_usage=None,
+            count_tokens=count_tokens,
+        )
+        mock_record_token_usage.assert_called_once_with(
+            span,
+            input_tokens=20,
+            input_tokens_cached=5,
+            output_tokens=10,
+            output_tokens_reasoning=8,
+            total_tokens=30,
+        )
+
+
+def test_completions_token_usage_manual_input_counting():
+    """When prompt_tokens is missing, input tokens are counted manually from messages."""
     span = mock.MagicMock()
 
     def count_tokens(msg):
@@ -1830,8 +2174,13 @@ def count_tokens(msg):
     with mock.patch(
         "sentry_sdk.integrations.openai.record_token_usage"
     ) as mock_record_token_usage:
-        _calculate_token_usage(
-            messages, response, span, streaming_message_responses, count_tokens
+        _calculate_completions_token_usage(
+            messages=messages,
+            response=response,
+            span=span,
+            streaming_message_responses=streaming_message_responses,
+            streaming_message_total_token_usage=None,
+            count_tokens=count_tokens,
         )
         mock_record_token_usage.assert_called_once_with(
             span,
@@ -1843,7 +2192,8 @@ def count_tokens(msg):
         )
 
 
-def test_calculate_token_usage_c():
+def test_completions_token_usage_manual_output_counting_streaming():
+    """When completion_tokens is missing, output tokens are counted from streaming responses."""
     span = mock.MagicMock()
 
     def count_tokens(msg):
@@ -1863,8 +2213,13 @@ def count_tokens(msg):
     with mock.patch(
         "sentry_sdk.integrations.openai.record_token_usage"
     ) as mock_record_token_usage:
-        _calculate_token_usage(
-            messages, response, span, streaming_message_responses, count_tokens
+        _calculate_completions_token_usage(
+            messages=messages,
+            response=response,
+            span=span,
+            streaming_message_responses=streaming_message_responses,
+            streaming_message_total_token_usage=None,
+            count_tokens=count_tokens,
         )
         mock_record_token_usage.assert_called_once_with(
             span,
@@ -1876,7 +2231,8 @@ def count_tokens(msg):
         )
 
 
-def test_calculate_token_usage_d():
+def test_completions_token_usage_manual_output_counting_choices():
+    """When completion_tokens is missing, output tokens are counted from response.choices."""
     span = mock.MagicMock()
 
     def count_tokens(msg):
@@ -1887,30 +2243,48 @@ def count_tokens(msg):
     response.usage.prompt_tokens = 20
     response.usage.total_tokens = 20
     response.choices = [
-        mock.MagicMock(message="one"),
-        mock.MagicMock(message="two"),
-        mock.MagicMock(message="three"),
+        Choice(
+            index=0,
+            finish_reason="stop",
+            message=ChatCompletionMessage(role="assistant", content="one"),
+        ),
+        Choice(
+            index=1,
+            finish_reason="stop",
+            message=ChatCompletionMessage(role="assistant", content="two"),
+        ),
+        Choice(
+            index=2,
+            finish_reason="stop",
+            message=ChatCompletionMessage(role="assistant", content="three"),
+        ),
     ]
     messages = []
-    streaming_message_responses = []
+    streaming_message_responses = None
 
     with mock.patch(
         "sentry_sdk.integrations.openai.record_token_usage"
     ) as mock_record_token_usage:
-        _calculate_token_usage(
-            messages, response, span, streaming_message_responses, count_tokens
+        _calculate_completions_token_usage(
+            messages=messages,
+            response=response,
+            span=span,
+            streaming_message_responses=streaming_message_responses,
+            streaming_message_total_token_usage=None,
+            count_tokens=count_tokens,
         )
         mock_record_token_usage.assert_called_once_with(
             span,
             input_tokens=20,
             input_tokens_cached=None,
-            output_tokens=None,
+            output_tokens=11,
             output_tokens_reasoning=None,
             total_tokens=20,
         )
 
 
-def test_calculate_token_usage_e():
+def test_completions_token_usage_no_usage_data():
+    """When response has no usage data and no streaming responses, all tokens are None."""
     span = mock.MagicMock()
 
     def count_tokens(msg):
@@ -1923,8 +2297,75 @@ def count_tokens(msg):
     with mock.patch(
         "sentry_sdk.integrations.openai.record_token_usage"
     ) as mock_record_token_usage:
-        _calculate_token_usage(
-            messages, response, span, streaming_message_responses, count_tokens
+        _calculate_completions_token_usage(
+            messages=messages,
+            response=response,
+            span=span,
+            streaming_message_responses=streaming_message_responses,
+            streaming_message_total_token_usage=None,
+            count_tokens=count_tokens,
+        )
+        mock_record_token_usage.assert_called_once_with(
+            span,
+            input_tokens=None,
+            input_tokens_cached=None,
+            output_tokens=None,
+            output_tokens_reasoning=None,
+            total_tokens=None,
+        )
+
+
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
+def test_responses_token_usage_from_response():
+    """Token counts including cached and reasoning tokens are extracted from Responses API."""
+    span = mock.MagicMock()
+
+    def count_tokens(msg):
+        return len(str(msg))
+
+    response = mock.MagicMock()
+    response.usage = mock.MagicMock()
+    response.usage.input_tokens = 20
+    response.usage.input_tokens_details = mock.MagicMock()
+    response.usage.input_tokens_details.cached_tokens = 5
+    response.usage.output_tokens = 10
+    response.usage.output_tokens_details = mock.MagicMock()
+    response.usage.output_tokens_details.reasoning_tokens = 8
+    response.usage.total_tokens = 30
+    input = []
+
+    with mock.patch(
+        "sentry_sdk.integrations.openai.record_token_usage"
+    ) as mock_record_token_usage:
+        _calculate_responses_token_usage(input, response, span, None, count_tokens)
+        mock_record_token_usage.assert_called_once_with(
+            span,
+            input_tokens=20,
+            input_tokens_cached=5,
+            output_tokens=10,
+            output_tokens_reasoning=8,
+            total_tokens=30,
+        )
+
+
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
+def test_responses_token_usage_no_usage_data():
+    """When Responses API response has no usage data, all tokens are None."""
+    span = mock.MagicMock()
+
+    def count_tokens(msg):
+        return len(str(msg))
+
+    response = mock.MagicMock()
+    response.usage = None
+    input = []
+    streaming_message_responses = None
+
+    with mock.patch(
+        "sentry_sdk.integrations.openai.record_token_usage"
+    ) as mock_record_token_usage:
+        _calculate_responses_token_usage(
+            input, response, span, streaming_message_responses, count_tokens
         )
         mock_record_token_usage.assert_called_once_with(
             span,
@@ -1936,6 +2377,70 @@ def count_tokens(msg):
         )
 
 
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
+def test_responses_token_usage_manual_output_counting_response_output():
+    """When output_tokens is missing, output tokens are counted from response.output."""
+    span = mock.MagicMock()
+
+    def count_tokens(msg):
+        return len(str(msg))
+
+    response = mock.MagicMock()
+    response.usage = mock.MagicMock()
+    response.usage.input_tokens = 20
+    response.usage.total_tokens = 20
+    response.output = [
+        ResponseOutputMessage(
+            id="msg-1",
+            content=[
+                ResponseOutputText(
+                    annotations=[],
+                    text="one",
+                    type="output_text",
+                ),
+            ],
+            role="assistant",
+            status="completed",
+            type="message",
+        ),
+        ResponseOutputMessage(
+            id="msg-2",
+            content=[
+                ResponseOutputText(
+                    annotations=[],
+                    text="two",
+                    type="output_text",
+                ),
+                ResponseOutputText(
+                    annotations=[],
+                    text="three",
+                    type="output_text",
+                ),
+            ],
+            role="assistant",
+            status="completed",
+            type="message",
+        ),
+    ]
+    input = []
+    streaming_message_responses = None
+
+    with mock.patch(
+        "sentry_sdk.integrations.openai.record_token_usage"
+    ) as mock_record_token_usage:
+        _calculate_responses_token_usage(
+            input, response, span, streaming_message_responses, count_tokens
+        )
+        mock_record_token_usage.assert_called_once_with(
+            span,
+            input_tokens=20,
+            input_tokens_cached=None,
+            output_tokens=11,
+            output_tokens_reasoning=None,
+            total_tokens=20,
+        )
+
+
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
     sentry_init(