From e46017b92cd33404b535d9a1f45610cecb05a070 Mon Sep 17 00:00:00 2001
From: Sahil D Shah <sahildshah1@gmail.com>
Date: Fri, 17 Apr 2026 12:13:35 -0400
Subject: [PATCH 1/3] Initial commit

---
 server/api/views/assistant/assistant_services.py | 0
 server/api/views/assistant/eval_assistant.py     | 0
 server/api/views/assistant/review.ipynb          | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 server/api/views/assistant/assistant_services.py
 create mode 100644 server/api/views/assistant/eval_assistant.py
 create mode 100644 server/api/views/assistant/review.ipynb

diff --git a/server/api/views/assistant/assistant_services.py b/server/api/views/assistant/assistant_services.py
new file mode 100644
index 00000000..e69de29b
diff --git a/server/api/views/assistant/eval_assistant.py b/server/api/views/assistant/eval_assistant.py
new file mode 100644
index 00000000..e69de29b
diff --git a/server/api/views/assistant/review.ipynb b/server/api/views/assistant/review.ipynb
new file mode 100644
index 00000000..e69de29b

From 2ad1024cc60b1497055ad47b7a32ac47a7469563 Mon Sep 17 00:00:00 2001
From: Sahil D Shah <sahildshah1@gmail.com>
Date: Mon, 20 Apr 2026 13:46:27 -0400
Subject: [PATCH 2/3] Scaffold implementation

---
 .../api/views/assistant/assistant_prompts.py  |  38 +++
 .../api/views/assistant/assistant_services.py |  60 ++++
 server/api/views/assistant/eval_assistant.py  |  15 +
 server/api/views/assistant/review.ipynb       |   1 +
 .../assistant/test_assistant_services.py      |   0
 .../views/assistant/test_eval_assistant.py    |   0
 .../api/views/assistant/test_tool_services.py |   0
 server/api/views/assistant/test_views.py      |   0
 server/api/views/assistant/tool_services.py   | 150 +++++++++
 server/api/views/assistant/views.py           | 310 ++----------------
 10 files changed, 292 insertions(+), 282 deletions(-)
 create mode 100644 server/api/views/assistant/assistant_prompts.py
 create mode 100644 server/api/views/assistant/test_assistant_services.py
 create mode 100644 server/api/views/assistant/test_eval_assistant.py
 create mode 100644 server/api/views/assistant/test_tool_services.py
 create mode 100644 server/api/views/assistant/test_views.py
 create mode 100644 server/api/views/assistant/tool_services.py

diff --git a/server/api/views/assistant/assistant_prompts.py b/server/api/views/assistant/assistant_prompts.py
new file mode 100644
index 00000000..44bf9b9b
--- /dev/null
+++ b/server/api/views/assistant/assistant_prompts.py
@@ -0,0 +1,38 @@
+INSTRUCTIONS = """
+You are an AI assistant that helps users find and understand information about bipolar disorder 
+from your internal library of bipolar disorder research sources using semantic search.
+
+IMPORTANT CONTEXT:
+- You have access to a library of sources that the user CANNOT see
+- The user did not upload these sources and doesn't know about them
+- You must explain what information exists in your sources and provide clear references
+
+TOPIC RESTRICTIONS:
+When a prompt is received that is unrelated to bipolar disorder, mental health treatment, 
+or psychiatric medications, respond by saying you are limited to bipolar-specific conversations.
+
+SEMANTIC SEARCH STRATEGY:
+- Always perform semantic search using the search_documents function when users ask questions
+- Use conceptually related terms and synonyms, not just exact keyword matches
+- Search for the meaning and context of the user's question, not just literal words
+- Consider medical terminology, lay terms, and related conditions when searching
+
+FUNCTION USAGE:
+- When a user asks about information that might be in your source library, ALWAYS use the search_documents function first
+- Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question
+- Only provide answers based on information found through your source searches
+
+RESPONSE FORMAT:
+After gathering information through semantic searches, provide responses that:
+1. Answer the user's question directly using only the found information
+2. Structure responses with clear sections and paragraphs
+3. Explain what information you found in your sources and provide context
+4. Include citations using this exact format: [Name {name}, Page {page_number}]
+5. Only cite information that directly supports your statements
+
+If no relevant information is found in your source library, clearly state that the information 
+is not available in your current sources.
+
+REMEMBER: You are working with an internal library of bipolar disorder sources that the user 
+cannot see. Always search these sources first, explain what you found, and provide proper citations.
+"""
\ No newline at end of file
diff --git a/server/api/views/assistant/assistant_services.py b/server/api/views/assistant/assistant_services.py
index e69de29b..65d1dce0 100644
--- a/server/api/views/assistant/assistant_services.py
+++ b/server/api/views/assistant/assistant_services.py
@@ -0,0 +1,60 @@
+
+def run_assistant():
+    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+
+    tools = [
+        {
+            "type": "function",
+            "name": "search_documents",
+            "description": TOOL_DESCRIPTION,
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": TOOL_PROPERTY_DESCRIPTION,
+                    }
+                },
+                "required": ["query"],
+            },
+        }
+    ]
+
+
+    MODEL_DEFAULTS = {
+        "instructions": INSTRUCTIONS,
+        "model": "gpt-5-nano",  # 400,000 token context window
+        # A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process.
+        "reasoning": {"effort": "low", "summary": None},
+        "tools": tools,
+    }
+
+    # We fetch a response and then kick off a loop to handle the response
+
+
+
+    # TODO: Track total duration, cost metrics, and tool_calls_made count
+    # and return them from run_assistant for use in eval_assistant.py CSV output
+
+    if not previous_response_id:
+        response = client.responses.create(
+            input=[
+                {"type": "message", "role": "user", "content": str(message)}
+            ],
+            **MODEL_DEFAULTS,
+        )
+    else:
+        response = client.responses.create(
+            input=[
+                {"type": "message", "role": "user", "content": str(message)}
+            ],
+            previous_response_id=str(previous_response_id),
+            **MODEL_DEFAULTS,
+        )
+
+    
+    
+    final_response_output_text, final_response_id = handle_tool_calls_with_reasoning()
+
+
+
diff --git a/server/api/views/assistant/eval_assistant.py b/server/api/views/assistant/eval_assistant.py
index e69de29b..4b22f8ba 100644
--- a/server/api/views/assistant/eval_assistant.py
+++ b/server/api/views/assistant/eval_assistant.py
@@ -0,0 +1,15 @@
+# uv script (or plain Python) to generate results to CSV, run from the terminal 
+
+import asyncio
+
+# Set of representative questions
+
+
+# Read model and INSTRUCTIONS from the source file or add a lightweight config endpoint to the backend
+
+
+async def main():
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
\ No newline at end of file
diff --git a/server/api/views/assistant/review.ipynb b/server/api/views/assistant/review.ipynb
index e69de29b..e6da83d0 100644
--- a/server/api/views/assistant/review.ipynb
+++ b/server/api/views/assistant/review.ipynb
@@ -0,0 +1 @@
+# notebook to review and compare the two CSVs  
\ No newline at end of file
diff --git a/server/api/views/assistant/test_assistant_services.py b/server/api/views/assistant/test_assistant_services.py
new file mode 100644
index 00000000..e69de29b
diff --git a/server/api/views/assistant/test_eval_assistant.py b/server/api/views/assistant/test_eval_assistant.py
new file mode 100644
index 00000000..e69de29b
diff --git a/server/api/views/assistant/test_tool_services.py b/server/api/views/assistant/test_tool_services.py
new file mode 100644
index 00000000..e69de29b
diff --git a/server/api/views/assistant/test_views.py b/server/api/views/assistant/test_views.py
new file mode 100644
index 00000000..e69de29b
diff --git a/server/api/views/assistant/tool_services.py b/server/api/views/assistant/tool_services.py
new file mode 100644
index 00000000..a90441a3
--- /dev/null
+++ b/server/api/views/assistant/tool_services.py
@@ -0,0 +1,150 @@
+
+TOOL_DESCRIPTION = """
+Search the user's uploaded documents for information relevant to answering their question.
+Call this function when you need to find specific information from the user's documents
+to provide an accurate, citation-backed response. Always search before answering questions
+about document content.
+"""
+
+TOOL_PROPERTY_DESCRIPTION = """
+A specific search query to find relevant information in the user's documents.
+Use keywords, phrases, or questions related to what the user is asking about.
+Be specific rather than generic - use terms that would appear in the relevant documents.
+"""
+
+def search_documents(query: str, user=user) -> str:
+    """
+    Search through user's uploaded documents using semantic similarity.
+
+    This function performs vector similarity search against the user's document corpus
+    and returns formatted results with context information for the LLM to use.
+
+    Parameters
+    ----------
+    query : str
+        The search query string
+    user : User
+        The authenticated user whose documents to search
+
+    Returns
+    -------
+    str
+        Formatted search results containing document excerpts with metadata
+
+    Raises
+    ------
+    Exception
+        If embedding search fails
+    """
+
+    try:
+        embeddings_results = get_closest_embeddings(
+            user=user, message_data=query.strip()
+        )
+        embeddings_results = convert_uuids(embeddings_results)
+
+        if not embeddings_results:
+            return "No relevant documents found for your query. Please try different search terms or upload documents first."
+
+        # Format results with clear structure and metadata
+        prompt_texts = [
+            f"[Document {i + 1} - File: {obj['file_id']}, Name: {obj['name']}, Page: {obj['page_number']}, Chunk: {obj['chunk_number']}, Similarity: {1 - obj['distance']:.3f}]\n{obj['text']}\n[End Document {i + 1}]"
+            for i, obj in enumerate(embeddings_results)
+        ]
+
+        return "\n\n".join(prompt_texts)
+
+    except Exception as e:
+        return f"Error searching documents: {str(e)}. Please try again if the issue persists."
+
+def handle_tool_calls_with_reasoning():
+    # Open AI Cookbook: Handling Function Calls with Reasoning Models
+    # https://cookbook.openai.com/examples/reasoning_function_calls
+    while True:
+        # Mapping of the tool names we tell the model about and the functions that implement them
+        function_responses = invoke_functions_from_response(
+            response, tool_mapping={"search_documents": search_documents}
+        )
+        if len(function_responses) == 0:  # We're done reasoning
+            logger.info("Reasoning completed")
+            final_response_output_text = response.output_text
+            final_response_id = response.id
+            logger.info(f"Final response: {final_response_output_text}")
+            break
+        else:
+            logger.info("More reasoning required, continuing...")
+            response = client.responses.create(
+                input=function_responses,
+                previous_response_id=response.id,
+                **MODEL_DEFAULTS,
+            )
+            # # Accumulate token usage from reasoning iterations
+            # if hasattr(response, "usage"):
+            #     total_token_usage["input_tokens"] += getattr(
+            #         response.usage, "input_tokens", 0
+            #     )
+            #     total_token_usage["output_tokens"] += getattr(
+            #         response.usage, "output_tokens", 0
+            #     )
+    
+    
+    
+
+
+
+# Open AI Cookbook: Handling Function Calls with Reasoning Models
+# https://cookbook.openai.com/examples/reasoning_function_calls
+def invoke_functions_from_response(
+    response, tool_mapping: dict[str, Callable]
+) -> list[dict]:
+    """Extract all function calls from the response, look up the corresponding tool function(s) and execute them.
+    (This would be a good place to handle asynchroneous tool calls, or ones that take a while to execute.)
+    This returns a list of messages to be added to the conversation history.
+
+    Parameters
+    ----------
+    response : OpenAI Response
+        The response object from OpenAI containing output items that may include function calls
+    tool_mapping : dict[str, Callable]
+        A dictionary mapping function names (as strings) to their corresponding Python functions.
+        Keys should match the function names defined in the tools schema.
+
+    Returns
+    -------
+    list[dict]
+        List of function call output messages formatted for the OpenAI conversation.
+        Each message contains:
+        - type: "function_call_output"
+        - call_id: The unique identifier for the function call
+        - output: The result returned by the executed function (string or error message)
+    """
+    intermediate_messages = []
+    for response_item in response.output:
+        if response_item.type == "function_call":
+            target_tool = tool_mapping.get(response_item.name)
+            if target_tool:
+                try:
+                    arguments = json.loads(response_item.arguments)
+                    logger.info(
+                        f"Invoking tool: {response_item.name} with arguments: {arguments}"
+                    )
+                    tool_output = target_tool(**arguments)
+                    logger.info(f"Tool {response_item.name} completed successfully")
+                except Exception as e:
+                    msg = f"Error executing function call: {response_item.name}: {e}"
+                    tool_output = msg
+                    logger.error(msg, exc_info=True)
+            else:
+                msg = f"ERROR - No tool registered for function call: {response_item.name}"
+                tool_output = msg
+                logger.error(msg)
+            intermediate_messages.append(
+                {
+                    "type": "function_call_output",
+                    "call_id": response_item.call_id,
+                    "output": tool_output,
+                }
+            )
+        elif response_item.type == "reasoning":
+            logger.info(f"Reasoning step: {response_item.summary}")
+    return intermediate_messages
\ No newline at end of file
diff --git a/server/api/views/assistant/views.py b/server/api/views/assistant/views.py
index e3e8d6f7..6240cd72 100644
--- a/server/api/views/assistant/views.py
+++ b/server/api/views/assistant/views.py
@@ -21,94 +21,37 @@
 # Configure logging
 logger = logging.getLogger(__name__)
 
-GPT_5_NANO_PRICING_DOLLARS_PER_MILLION_TOKENS = {"input": 0.05, "output": 0.40}
 
+# TODO: OpenAI API Dashboard has total duration and cost metrics  
+# GPT_5_NANO_PRICING_DOLLARS_PER_MILLION_TOKENS = {"input": 0.05, "output": 0.40}
 
-def calculate_cost_metrics(token_usage: dict, pricing: dict) -> dict:
-    """
-    Calculate cost metrics based on token usage and pricing
+# def calculate_cost_metrics(token_usage: dict, pricing: dict) -> dict:
+#     """
+#     Calculate cost metrics based on token usage and pricing
 
-    Args:
-        token_usage: Dictionary containing input_tokens and output_tokens
-        pricing: Dictionary containing input and output pricing per million tokens
+#     Args:
+#         token_usage: Dictionary containing input_tokens and output_tokens
+#         pricing: Dictionary containing input and output pricing per million tokens
 
-    Returns:
-        Dictionary containing input_cost, output_cost, and total_cost in USD
-    """
-    TOKENS_PER_MILLION = 1_000_000
+#     Returns:
+#         Dictionary containing input_cost, output_cost, and total_cost in USD
+#     """
+#     TOKENS_PER_MILLION = 1_000_000
 
-    # Pricing is in dollars per million tokens
-    input_cost_dollars = (pricing["input"] / TOKENS_PER_MILLION) * token_usage.get(
-        "input_tokens", 0
-    )
-    output_cost_dollars = (pricing["output"] / TOKENS_PER_MILLION) * token_usage.get(
-        "output_tokens", 0
-    )
-    total_cost_dollars = input_cost_dollars + output_cost_dollars
-
-    return {
-        "input_cost": input_cost_dollars,
-        "output_cost": output_cost_dollars,
-        "total_cost": total_cost_dollars,
-    }
+#     # Pricing is in dollars per million tokens
+#     input_cost_dollars = (pricing["input"] / TOKENS_PER_MILLION) * token_usage.get(
+#         "input_tokens", 0
+#     )
+#     output_cost_dollars = (pricing["output"] / TOKENS_PER_MILLION) * token_usage.get(
+#         "output_tokens", 0
+#     )
+#     total_cost_dollars = input_cost_dollars + output_cost_dollars
 
-
-# Open AI Cookbook: Handling Function Calls with Reasoning Models
-# https://cookbook.openai.com/examples/reasoning_function_calls
-def invoke_functions_from_response(
-    response, tool_mapping: dict[str, Callable]
-) -> list[dict]:
-    """Extract all function calls from the response, look up the corresponding tool function(s) and execute them.
-    (This would be a good place to handle asynchroneous tool calls, or ones that take a while to execute.)
-    This returns a list of messages to be added to the conversation history.
-
-    Parameters
-    ----------
-    response : OpenAI Response
-        The response object from OpenAI containing output items that may include function calls
-    tool_mapping : dict[str, Callable]
-        A dictionary mapping function names (as strings) to their corresponding Python functions.
-        Keys should match the function names defined in the tools schema.
-
-    Returns
-    -------
-    list[dict]
-        List of function call output messages formatted for the OpenAI conversation.
-        Each message contains:
-        - type: "function_call_output"
-        - call_id: The unique identifier for the function call
-        - output: The result returned by the executed function (string or error message)
-    """
-    intermediate_messages = []
-    for response_item in response.output:
-        if response_item.type == "function_call":
-            target_tool = tool_mapping.get(response_item.name)
-            if target_tool:
-                try:
-                    arguments = json.loads(response_item.arguments)
-                    logger.info(
-                        f"Invoking tool: {response_item.name} with arguments: {arguments}"
-                    )
-                    tool_output = target_tool(**arguments)
-                    logger.info(f"Tool {response_item.name} completed successfully")
-                except Exception as e:
-                    msg = f"Error executing function call: {response_item.name}: {e}"
-                    tool_output = msg
-                    logger.error(msg, exc_info=True)
-            else:
-                msg = f"ERROR - No tool registered for function call: {response_item.name}"
-                tool_output = msg
-                logger.error(msg)
-            intermediate_messages.append(
-                {
-                    "type": "function_call_output",
-                    "call_id": response_item.call_id,
-                    "output": tool_output,
-                }
-            )
-        elif response_item.type == "reasoning":
-            logger.info(f"Reasoning step: {response_item.summary}")
-    return intermediate_messages
+#     return {
+#         "input_cost": input_cost_dollars,
+#         "output_cost": output_cost_dollars,
+#         "total_cost": total_cost_dollars,
+#     }
 
 
 @method_decorator(csrf_exempt, name="dispatch")
@@ -133,209 +76,12 @@ class Assistant(APIView):
     def post(self, request):
         try:
             user = request.user
-
-            client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-
-            TOOL_DESCRIPTION = """
-            Search the user's uploaded documents for information relevant to answering their question.
-            Call this function when you need to find specific information from the user's documents
-            to provide an accurate, citation-backed response. Always search before answering questions
-            about document content.
-            """
-
-            TOOL_PROPERTY_DESCRIPTION = """
-            A specific search query to find relevant information in the user's documents.
-            Use keywords, phrases, or questions related to what the user is asking about.
-            Be specific rather than generic - use terms that would appear in the relevant documents.
-            """
-
-            tools = [
-                {
-                    "type": "function",
-                    "name": "search_documents",
-                    "description": TOOL_DESCRIPTION,
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "query": {
-                                "type": "string",
-                                "description": TOOL_PROPERTY_DESCRIPTION,
-                            }
-                        },
-                        "required": ["query"],
-                    },
-                }
-            ]
-
-            def search_documents(query: str, user=user) -> str:
-                """
-                Search through user's uploaded documents using semantic similarity.
-
-                This function performs vector similarity search against the user's document corpus
-                and returns formatted results with context information for the LLM to use.
-
-                Parameters
-                ----------
-                query : str
-                    The search query string
-                user : User
-                    The authenticated user whose documents to search
-
-                Returns
-                -------
-                str
-                    Formatted search results containing document excerpts with metadata
-
-                Raises
-                ------
-                Exception
-                    If embedding search fails
-                """
-
-                try:
-                    embeddings_results = get_closest_embeddings(
-                        user=user, message_data=query.strip()
-                    )
-                    embeddings_results = convert_uuids(embeddings_results)
-
-                    if not embeddings_results:
-                        return "No relevant documents found for your query. Please try different search terms or upload documents first."
-
-                    # Format results with clear structure and metadata
-                    prompt_texts = [
-                        f"[Document {i + 1} - File: {obj['file_id']}, Name: {obj['name']}, Page: {obj['page_number']}, Chunk: {obj['chunk_number']}, Similarity: {1 - obj['distance']:.3f}]\n{obj['text']}\n[End Document {i + 1}]"
-                        for i, obj in enumerate(embeddings_results)
-                    ]
-
-                    return "\n\n".join(prompt_texts)
-
-                except Exception as e:
-                    return f"Error searching documents: {str(e)}. Please try again if the issue persists."
-
-            INSTRUCTIONS = """
-            You are an AI assistant that helps users find and understand information about bipolar disorder 
-            from your internal library of bipolar disorder research sources using semantic search.
-            
-            IMPORTANT CONTEXT:
-            - You have access to a library of sources that the user CANNOT see
-            - The user did not upload these sources and doesn't know about them
-            - You must explain what information exists in your sources and provide clear references
-            
-            TOPIC RESTRICTIONS:
-            When a prompt is received that is unrelated to bipolar disorder, mental health treatment, 
-            or psychiatric medications, respond by saying you are limited to bipolar-specific conversations.
-            
-            SEMANTIC SEARCH STRATEGY:
-            - Always perform semantic search using the search_documents function when users ask questions
-            - Use conceptually related terms and synonyms, not just exact keyword matches
-            - Search for the meaning and context of the user's question, not just literal words
-            - Consider medical terminology, lay terms, and related conditions when searching
-            
-            FUNCTION USAGE:
-            - When a user asks about information that might be in your source library, ALWAYS use the search_documents function first
-            - Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question
-            - Only provide answers based on information found through your source searches
-            
-            RESPONSE FORMAT:
-            After gathering information through semantic searches, provide responses that:
-            1. Answer the user's question directly using only the found information
-            2. Structure responses with clear sections and paragraphs
-            3. Explain what information you found in your sources and provide context
-            4. Include citations using this exact format: [Name {name}, Page {page_number}]
-            5. Only cite information that directly supports your statements
-            
-            If no relevant information is found in your source library, clearly state that the information 
-            is not available in your current sources.
-            
-            REMEMBER: You are working with an internal library of bipolar disorder sources that the user 
-            cannot see. Always search these sources first, explain what you found, and provide proper citations.
-            """
-
-            MODEL_DEFAULTS = {
-                "instructions": INSTRUCTIONS,
-                "model": "gpt-5-nano",  # 400,000 token context window
-                # A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process.
-                "reasoning": {"effort": "low", "summary": None},
-                "tools": tools,
-            }
-
-            # We fetch a response and then kick off a loop to handle the response
-
+    
             message = request.data.get("message", None)
             previous_response_id = request.data.get("previous_response_id", None)
+            
+            = run_assistant()
 
-            # Track total duration and cost metrics
-            start_time = time.time()
-            total_token_usage = {"input_tokens": 0, "output_tokens": 0}
-
-            if not previous_response_id:
-                response = client.responses.create(
-                    input=[
-                        {"type": "message", "role": "user", "content": str(message)}
-                    ],
-                    **MODEL_DEFAULTS,
-                )
-            else:
-                response = client.responses.create(
-                    input=[
-                        {"type": "message", "role": "user", "content": str(message)}
-                    ],
-                    previous_response_id=str(previous_response_id),
-                    **MODEL_DEFAULTS,
-                )
-
-            # Accumulate token usage from initial response
-            if hasattr(response, "usage"):
-                total_token_usage["input_tokens"] += getattr(
-                    response.usage, "input_tokens", 0
-                )
-                total_token_usage["output_tokens"] += getattr(
-                    response.usage, "output_tokens", 0
-                )
-
-            # Open AI Cookbook: Handling Function Calls with Reasoning Models
-            # https://cookbook.openai.com/examples/reasoning_function_calls
-            while True:
-                # Mapping of the tool names we tell the model about and the functions that implement them
-                function_responses = invoke_functions_from_response(
-                    response, tool_mapping={"search_documents": search_documents}
-                )
-                if len(function_responses) == 0:  # We're done reasoning
-                    logger.info("Reasoning completed")
-                    final_response_output_text = response.output_text
-                    final_response_id = response.id
-                    logger.info(f"Final response: {final_response_output_text}")
-                    break
-                else:
-                    logger.info("More reasoning required, continuing...")
-                    response = client.responses.create(
-                        input=function_responses,
-                        previous_response_id=response.id,
-                        **MODEL_DEFAULTS,
-                    )
-                    # Accumulate token usage from reasoning iterations
-                    if hasattr(response, "usage"):
-                        total_token_usage["input_tokens"] += getattr(
-                            response.usage, "input_tokens", 0
-                        )
-                        total_token_usage["output_tokens"] += getattr(
-                            response.usage, "output_tokens", 0
-                        )
-
-            # Calculate total duration and cost metrics
-            total_duration = time.time() - start_time
-            cost_metrics = calculate_cost_metrics(
-                total_token_usage, GPT_5_NANO_PRICING_DOLLARS_PER_MILLION_TOKENS
-            )
-
-            # Log cost and duration metrics
-            logger.info(
-                f"Request completed: "
-                f"Duration: {total_duration:.2f}s, "
-                f"Input tokens: {total_token_usage['input_tokens']}, "
-                f"Output tokens: {total_token_usage['output_tokens']}, "
-                f"Total cost: ${cost_metrics['total_cost']:.6f}"
-            )
 
             return Response(
                 {

From 2a9434a39cb98639e1eef0081e9d9f5fd8356139 Mon Sep 17 00:00:00 2001
From: Sahil D Shah <sahildshah1@gmail.com>
Date: Thu, 23 Apr 2026 14:27:02 -0400
Subject: [PATCH 3/3] Extract logic so it can be called without going through
 HTTP endpoint

---
 .../api/views/assistant/assistant_services.py |  76 ++++++----
 server/api/views/assistant/tool_services.py   | 140 +++++++++++++-----
 server/api/views/assistant/views.py           |  49 +-----
 3 files changed, 152 insertions(+), 113 deletions(-)

diff --git a/server/api/views/assistant/assistant_services.py b/server/api/views/assistant/assistant_services.py
index 65d1dce0..ac339b9f 100644
--- a/server/api/views/assistant/assistant_services.py
+++ b/server/api/views/assistant/assistant_services.py
@@ -1,40 +1,57 @@
+import os
+import logging
 
-def run_assistant():
-    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+from openai import OpenAI
+
+from .assistant_prompts import INSTRUCTIONS
+from .tool_services import (
+    SEARCH_TOOLS_SCHEMA,
+    make_search_tool_mapping,
+    handle_tool_calls_with_reasoning,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def run_assistant(
+    message: str,
+    user,
+    previous_response_id: str | None = None,
+) -> tuple[str, str]:
+    """Wire together the OpenAI client, retrieval, and the agentic reasoning loop.
 
-    tools = [
-        {
-            "type": "function",
-            "name": "search_documents",
-            "description": TOOL_DESCRIPTION,
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "query": {
-                        "type": "string",
-                        "description": TOOL_PROPERTY_DESCRIPTION,
-                    }
-                },
-                "required": ["query"],
-            },
-        }
-    ]
+    Parameters
+    ----------
+    message : str
+        The user's input message.
+    user : User
+        The Django user object used for document access control in search_documents.
+    previous_response_id : str | None
+        ID of a prior response for multi-turn conversation continuity.
 
+    Returns
+    -------
+    tuple[str, str]
+        (final_response_output_text, final_response_id)
+    """
+    # TODO: Track total duration, cost metrics, and tool_calls_made count
+    # and return them from run_assistant for use in eval_assistant.py CSV output
+
+    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 
     MODEL_DEFAULTS = {
         "instructions": INSTRUCTIONS,
         "model": "gpt-5-nano",  # 400,000 token context window
         # A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process.
         "reasoning": {"effort": "low", "summary": None},
-        "tools": tools,
+        "tools": SEARCH_TOOLS_SCHEMA,
     }
 
-    # We fetch a response and then kick off a loop to handle the response
-
-
-
-    # TODO: Track total duration, cost metrics, and tool_calls_made count
-    # and return them from run_assistant for use in eval_assistant.py CSV output
+    # TOOLS_SCHEMA tells the model what tools exist and what arguments to generate.
+    # tool_mapping wires those tool names to the Python functions that execute them.
+    # They are separate because the model generates arguments (schema concern) but
+    # cannot supply request-time values like user (mapping concern).
+    tool_mapping = make_search_tool_mapping(user)
 
     if not previous_response_id:
         response = client.responses.create(
@@ -52,9 +69,4 @@ def run_assistant():
             **MODEL_DEFAULTS,
         )
 
-    
-    
-    final_response_output_text, final_response_id = handle_tool_calls_with_reasoning()
-
-
-
+    return handle_tool_calls_with_reasoning(response, client, MODEL_DEFAULTS, tool_mapping)
diff --git a/server/api/views/assistant/tool_services.py b/server/api/views/assistant/tool_services.py
index a90441a3..0fb96cef 100644
--- a/server/api/views/assistant/tool_services.py
+++ b/server/api/views/assistant/tool_services.py
@@ -1,3 +1,11 @@
+import json
+import logging
+from typing import Callable
+
+from ...services.embedding_services import get_closest_embeddings
+from ...services.conversions_services import convert_uuids
+
+logger = logging.getLogger(__name__)
 
 TOOL_DESCRIPTION = """
 Search the user's uploaded documents for information relevant to answering their question.
@@ -12,7 +20,55 @@
 Be specific rather than generic - use terms that would appear in the relevant documents.
 """
 
-def search_documents(query: str, user=user) -> str:
+# SEARCH_TOOLS_SCHEMA defines the search_documents tool for the OpenAI API.
+# The model reads this schema to know what tools are available and what
+# arguments to generate — it can only generate arguments declared here.
+SEARCH_TOOLS_SCHEMA = [
+    {
+        "type": "function",
+        "name": "search_documents",
+        "description": TOOL_DESCRIPTION,
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": TOOL_PROPERTY_DESCRIPTION,
+                }
+            },
+            "required": ["query"],
+        },
+    }
+]
+
+
+# TODO: Add get_tools_schema() and make_tool_mapping(user) aggregation functions
+# that combine all tool schemas and mappings so assistant_services.py never needs
+# to change when a new tool is added — only tool_services.py does.
+
+def make_search_tool_mapping(user) -> dict[str, Callable]:
+    # make_search_tool_mapping binds user to search_documents at call time.
+    # user is a request-time value the model cannot generate, so it must be
+    # captured here and kept out of the schema.
+    """Return a tool mapping with search_documents bound to the given user.
+
+    Parameters
+    ----------
+    user : User
+        The Django user object used for document access control.
+
+    Returns
+    -------
+    dict[str, Callable]
+        Tool mapping ready to pass to invoke_functions_from_response.
+    """
+    def bound_search(query: str) -> str:
+        return search_documents(query, user)
+
+    return {"search_documents": bound_search}
+
+
+def search_documents(query: str, user) -> str:
     """
     Search through user's uploaded documents using semantic similarity.
 
@@ -57,43 +113,7 @@ def search_documents(query: str, user=user) -> str:
     except Exception as e:
         return f"Error searching documents: {str(e)}. Please try again if the issue persists."
 
-def handle_tool_calls_with_reasoning():
-    # Open AI Cookbook: Handling Function Calls with Reasoning Models
-    # https://cookbook.openai.com/examples/reasoning_function_calls
-    while True:
-        # Mapping of the tool names we tell the model about and the functions that implement them
-        function_responses = invoke_functions_from_response(
-            response, tool_mapping={"search_documents": search_documents}
-        )
-        if len(function_responses) == 0:  # We're done reasoning
-            logger.info("Reasoning completed")
-            final_response_output_text = response.output_text
-            final_response_id = response.id
-            logger.info(f"Final response: {final_response_output_text}")
-            break
-        else:
-            logger.info("More reasoning required, continuing...")
-            response = client.responses.create(
-                input=function_responses,
-                previous_response_id=response.id,
-                **MODEL_DEFAULTS,
-            )
-            # # Accumulate token usage from reasoning iterations
-            # if hasattr(response, "usage"):
-            #     total_token_usage["input_tokens"] += getattr(
-            #         response.usage, "input_tokens", 0
-            #     )
-            #     total_token_usage["output_tokens"] += getattr(
-            #         response.usage, "output_tokens", 0
-            #     )
-    
-    
-    
-
-
 
-# Open AI Cookbook: Handling Function Calls with Reasoning Models
-# https://cookbook.openai.com/examples/reasoning_function_calls
 def invoke_functions_from_response(
     response, tool_mapping: dict[str, Callable]
 ) -> list[dict]:
@@ -118,6 +138,10 @@ def invoke_functions_from_response(
         - call_id: The unique identifier for the function call
         - output: The result returned by the executed function (string or error message)
     """
+    
+    # Open AI Cookbook: Handling Function Calls with Reasoning Models
+    # https://cookbook.openai.com/examples/reasoning_function_calls
+    
     intermediate_messages = []
     for response_item in response.output:
         if response_item.type == "function_call":
@@ -147,4 +171,44 @@ def invoke_functions_from_response(
             )
         elif response_item.type == "reasoning":
             logger.info(f"Reasoning step: {response_item.summary}")
-    return intermediate_messages
\ No newline at end of file
+    return intermediate_messages
+
+def handle_tool_calls_with_reasoning(
+    response, client, model_defaults: dict, tool_mapping: dict[str, Callable]
+) -> tuple[str, str]:
+    """Run the agentic loop until the model stops emitting function calls.
+
+    Parameters
+    ----------
+    response : OpenAI Response
+        The initial response from the model.
+    client : OpenAI
+        The OpenAI client instance.
+    model_defaults : dict
+        Keyword arguments forwarded to every client.responses.create call.
+    tool_mapping : dict[str, Callable]
+        Maps function names to their implementations.
+
+    Returns
+    -------
+    tuple[str, str]
+        (final_response_output_text, final_response_id)
+    """
+    # Open AI Cookbook: Handling Function Calls with Reasoning Models
+    # https://cookbook.openai.com/examples/reasoning_function_calls
+    while True:
+        # Mapping of the tool names we tell the model about and the functions that implement them
+        function_responses = invoke_functions_from_response(response, tool_mapping)
+        if len(function_responses) == 0: # We're done reasoning
+            logger.info("Reasoning completed")
+            final_response_output_text = response.output_text
+            final_response_id = response.id
+            logger.info(f"Final response: {final_response_output_text}")
+            return final_response_output_text, final_response_id
+        else:
+            logger.info("More reasoning required, continuing...")
+            response = client.responses.create(
+                input=function_responses,
+                previous_response_id=response.id,
+                **model_defaults,
+            )
diff --git a/server/api/views/assistant/views.py b/server/api/views/assistant/views.py
index 6240cd72..d7b10a4b 100644
--- a/server/api/views/assistant/views.py
+++ b/server/api/views/assistant/views.py
@@ -1,8 +1,4 @@
-import os
-import json
 import logging
-import time
-from typing import Callable
 
 from rest_framework.views import APIView
 from rest_framework.response import Response
@@ -13,47 +9,11 @@
 from drf_spectacular.utils import extend_schema, inline_serializer
 from rest_framework import serializers as drf_serializers
 
-from openai import OpenAI
+from .assistant_services import run_assistant
 
-from ...services.embedding_services import get_closest_embeddings
-from ...services.conversions_services import convert_uuids
-
-# Configure logging
 logger = logging.getLogger(__name__)
 
 
-# TODO: OpenAI API Dashboard has total duration and cost metrics  
-# GPT_5_NANO_PRICING_DOLLARS_PER_MILLION_TOKENS = {"input": 0.05, "output": 0.40}
-
-# def calculate_cost_metrics(token_usage: dict, pricing: dict) -> dict:
-#     """
-#     Calculate cost metrics based on token usage and pricing
-
-#     Args:
-#         token_usage: Dictionary containing input_tokens and output_tokens
-#         pricing: Dictionary containing input and output pricing per million tokens
-
-#     Returns:
-#         Dictionary containing input_cost, output_cost, and total_cost in USD
-#     """
-#     TOKENS_PER_MILLION = 1_000_000
-
-#     # Pricing is in dollars per million tokens
-#     input_cost_dollars = (pricing["input"] / TOKENS_PER_MILLION) * token_usage.get(
-#         "input_tokens", 0
-#     )
-#     output_cost_dollars = (pricing["output"] / TOKENS_PER_MILLION) * token_usage.get(
-#         "output_tokens", 0
-#     )
-#     total_cost_dollars = input_cost_dollars + output_cost_dollars
-
-#     return {
-#         "input_cost": input_cost_dollars,
-#         "output_cost": output_cost_dollars,
-#         "total_cost": total_cost_dollars,
-#     }
-
-
 @method_decorator(csrf_exempt, name="dispatch")
 class Assistant(APIView):
     permission_classes = [AllowAny]
@@ -80,8 +40,11 @@ def post(self, request):
             message = request.data.get("message", None)
             previous_response_id = request.data.get("previous_response_id", None)
             
-            = run_assistant()
-
+            final_response_output_text, final_response_id = run_assistant(
+                message=message,
+                user=user,
+                previous_response_id=previous_response_id,
+            )
 
             return Response(
                 {