Spaces:

mishrabp
/

deep-research

Running

App Files Files Community

mishrabp commited on 6 days ago

Commit

bfe9585

verified ·

1 Parent(s): 6778a49

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

Dockerfile +29 -0
README.md +46 -6
appagents/__init__.py +0 -0
appagents/email_agent.py +32 -0
appagents/guardrail_agent.py +45 -0
appagents/orchestrator.py +119 -0
appagents/planner_agent.py +45 -0
appagents/search_agent.py +87 -0
appagents/writer_agent.py +41 -0
core/__init__.py +0 -0
core/logger.py +22 -0
prompts/__init__.py +0 -0
run.py +11 -0
tools/__init__.py +0 -0
tools/google_tools.py +132 -0
tools/time_tools.py +22 -0
ui/app.py +291 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM python:3.11-slim
+ENV PYTHONUNBUFFERED=1 \
+    DEBIAN_FRONTEND=noninteractive
+WORKDIR /app
+# System deps
+RUN apt-get update && apt-get install -y \
+    git build-essential curl \
+    && rm -rf /var/lib/apt/lists/*
+# Install uv
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+ENV PATH="/root/.local/bin:$PATH"
+# Copy project metadata
+COPY pyproject.toml .
+COPY uv.lock .
+# Install dependencies using uv
+RUN uv sync --frozen --no-dev --system
+# Copy your source code
+COPY . .
+EXPOSE 7860
+CMD ["streamlit", "run", "ui/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless=true"]

README.md CHANGED Viewed

@@ -1,10 +1,50 @@
 ---
-title: Deep Research
-emoji: 🏢
-colorFrom: indigo
-colorTo: indigo
-sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AI Deep Researcher        # Give your app a title
+emoji: 🤖                       # Pick an emoji
+colorFrom: indigo               # Theme start color
+colorTo: blue                   # Theme end color
+sdk: docker                     # SDK type
+sdk_version: "4.39.0"           # Example Gradio version
+app_file: ui/app.py             # <-- points to your app.py inside ui/
 pinned: false
 ---
+# AI Deep Researcher
+**AI Deep Researcher** is a generative AI learning project built using the OpenAI Agentic Framework. This app performs deep-level web research based on user queries and generates a well-structured, consolidated report.
+To achieve this, the project integrates the following technologies and AI features:
+- **OpenAI SDK**
+- **OpenAI Agents**
+- **OpenAI WebSearch Tool**
+- **Serper API** - a free alternative to OpenAI WebSearch Tool (https://serper.dev/api-keys)
+- **News API** (https://newsapi.org/v2/everything)
+- **SendGrid** (for emailing report)
+- **LLMs** - (OpenAI, Geminia, Groq)
+## How it works?
+The system is a multi-agent solution, where each agent has a specific responsibility:
+1. **Planner Agent**
+    - Receives the user query and builds a structured query plan.
+2. **Guardrail Agent**
+    - Validates user input and ensures compliance.
+    - Stops the workflow if the input contains inappropriate or unparliamentary words.
+3. **Search Agent**
+    - Executes the query plan.
+    - Runs multiple web searches in parallel to gather data.
+4. **Writer Agent**
+    - Reads results from all search agents.
+    - Generates a well-formatted, consolidated report.
+5. **Email Agent**
+    - Responsible for sending the report via email using SendGrid.
+6. **Orchestrator**
+    - The entry point of the system.
+    - Facilitates communication and workflow between all agents.

appagents/__init__.py ADDED Viewed

File without changes

appagents/email_agent.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import os
+from typing import Dict
+import sendgrid
+from sendgrid.helpers.mail import Email, Mail, Content, To
+from agents import Agent, function_tool
+from core.logger import log_call
+@function_tool
+@log_call
+def send_email(subject: str, html_body: str) -> Dict[str, str]:
+    """ Send an email with the given subject and HTML body """
+    sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))
+    from_email = Email("bm80177@gmail.com") # put your verified sender here
+    to_email = To("bibhup_mishra@yahoo.com") # put your recipient here
+    content = Content("text/html", html_body)
+    mail = Mail(from_email, to_email, subject, content).get()
+    response = sg.client.mail.send.post(request_body=mail)
+    print("Email response", response.status_code)
+    return {"status": "success"}
+INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
+You will be provided with a detailed report. You should use your tool to send one email, providing the
+report converted into clean, well presented HTML with an appropriate subject line."""
+email_agent = Agent(
+    name="Email agent",
+    instructions=INSTRUCTIONS,
+    tools=[send_email],
+    model="gpt-4o-mini",
+)

appagents/guardrail_agent.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+from pydantic import BaseModel
+from agents import (
+    Agent,
+    Runner,
+    input_guardrail,
+    GuardrailFunctionOutput,
+)
+from tools.time_tools import TimeTools
+from openai import AsyncOpenAI
+# ✅ Step 1: Define structured output schema
+class UnparliamentaryCheckOutput(BaseModel):
+    has_unparliamentary_language: bool
+    explanation: str
+# ✅ Step 2: Define the LLM guardrail agent
+guardrail_agent = Agent(
+    name="Unparliamentary language check",
+    instructions=(
+        "Analyze the user input and determine if it contains any unparliamentary, "
+        "offensive, or disrespectful language. "
+        "If it does, set has_unparliamentary_language=true and explain briefly why. "
+        "Otherwise, set it to false."
+    ),
+    output_type=UnparliamentaryCheckOutput,
+    model="gpt-4o-mini",
+)
+# ✅ Step 3: Use the input guardrail decorator
+@input_guardrail
+async def guardrail_against_unparliamentary(ctx, agent, message: str):
+    """Guardrail function that blocks messages with unparliamentary words."""
+    result = await Runner.run(guardrail_agent, message, context=ctx.context)
+    has_unparliamentary_language = result.final_output.has_unparliamentary_language
+    return GuardrailFunctionOutput(
+        output_info={
+            "found_unparliamentary_word": result.final_output.model_dump()
+        },
+        tripwire_triggered=has_unparliamentary_language,
+    )

appagents/orchestrator.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from agents import Runner, trace, gen_trace_id, SQLiteSession
+from appagents.search_agent import search_agent
+from appagents.planner_agent import planner_agent, WebSearchItem, WebSearchPlan
+from appagents.writer_agent import writer_agent, ReportData
+from appagents.email_agent import email_agent
+from agents.exceptions import InputGuardrailTripwireTriggered
+from core.logger import log_call
+import asyncio
+class Orchestrator:
+    def __init__(self, session: SQLiteSession | None = None):
+        self.session = session or SQLiteSession()
+    @log_call
+    async def run(self, query: str):
+        """ Run the deep research process, yielding the status updates and the final report"""
+        trace_id = gen_trace_id()
+        with trace("Deep Research Orchestrator", trace_id=trace_id):
+            print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}")
+            yield f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
+            print("Starting research...")
+            search_plan = await self.plan_searches(query)
+            if not search_plan or not getattr(search_plan, "searches", []):
+                note = getattr(search_plan, "note", "")
+                if "unparliamentary" in note.lower():
+                    print("⚠️ Guardrail triggered – unparliamentary language detected.")
+                    yield note
+                else:
+                    yield note or "No search results found, ending research."
+                    return
+            yield "Searches planned, starting to search..."
+            search_results = await self.perform_searches(search_plan)
+            yield "Searches complete, writing report..."
+            report = await self.write_report(query, search_results)
+            yield "Report written, sending email..."
+            # await self.send_email(report)
+            # yield "Email sent, research complete"
+            yield report.markdown_report
+    @log_call
+    async def plan_searches(self, query: str) -> WebSearchPlan:
+        """Plan the searches to perform for the query."""
+        print("Planning searches...")
+        try:
+            result = await Runner.run(
+                planner_agent,              # use self. unless global
+                f"Query: {query}",
+                session=self.session,
+            )
+            print(f"Will perform {len(result.final_output.searches)} searches")
+            return result.final_output_as(WebSearchPlan)
+        except InputGuardrailTripwireTriggered as e:
+            explanation = getattr(e, "result", {}).get("output_info", {}).get(
+                "found_unparliamentary_word", {}
+            ).get("explanation", "")
+            print("⚠️ Guardrail triggered – unparliamentary language detected.")
+            return WebSearchPlan(searches=[], note=f"Blocked due to unparliamentary input. {explanation}")
+        except Exception as e:
+            print(f"❌ Error during planning: {e}")
+            return WebSearchPlan(searches=[], note="An error occurred while planning searches.")
+    @log_call
+    async def perform_searches(self, search_plan: WebSearchPlan) -> list[str]:
+        """ Perform the searches to perform for the query """
+        print("Searching...")
+        num_completed = 0
+        tasks = [asyncio.create_task(self.search(item)) for item in search_plan.searches]
+        results = []
+        for task in asyncio.as_completed(tasks):
+            result = await task
+            if result is not None:
+                results.append(result)
+            num_completed += 1
+            print(f"Searching... {num_completed}/{len(tasks)} completed")
+        print("Finished searching")
+        return results
+    @log_call
+    async def search(self, item: WebSearchItem) -> str | None:
+        """ Perform a search for the query """
+        input = f"Search term: {item.query}\nReason for searching: {item.reason}"
+        try:
+            result = await Runner.run(
+                search_agent,
+                input,
+            )
+            return str(result.final_output)
+        except Exception:
+            return None
+    @log_call
+    async def write_report(self, query: str, search_results: list[str]) -> ReportData:
+        """ Write the report for the query """
+        print("Thinking about report...")
+        input = f"Original query: {query}\nSummarized search results: {search_results}"
+        result = await Runner.run(
+            writer_agent,
+            input,
+        )
+        print("Finished writing report")
+        return result.final_output_as(ReportData)
+    @log_call
+    async def send_email(self, report: ReportData) -> None:
+        print("Writing email...")
+        result = await Runner.run(
+            email_agent,
+            report.markdown_report,
+        )
+        print("Email sent")
+        return report

appagents/planner_agent.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+from pydantic import BaseModel, Field
+from agents import Agent, OpenAIChatCompletionsModel
+from openai import AsyncOpenAI
+from tools.time_tools import TimeTools
+from appagents.guardrail_agent import guardrail_against_unparliamentary
+HOW_MANY_SEARCHES = 10
+INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
+to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for.  \
+Use the tool to find current date & time, and use it where relevant to inform your search and summary."
+class WebSearchItem(BaseModel):
+    reason: str = Field(description="Your reasoning for why this search is important to the query.")
+    query: str = Field(description="The search term to use for the web search.")
+    current_date_time: str = Field(description="Current date and time.")
+class WebSearchPlan(BaseModel):
+    searches: list[WebSearchItem] = Field(description="A list of web searches to perform to best answer the query.")
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
+groq_api_key = os.getenv('GROQ_API_KEY')
+groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
+groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
+openai_model = "gpt-4.1-mini"
+# Note: Many models do not like tool call and json output_schema used together.
+planner_agent = Agent(
+    name="PlannerAgent",
+    instructions=INSTRUCTIONS,
+    model=openai_model,
+    tools=[TimeTools.current_datetime],
+    output_type=WebSearchPlan,
+    input_guardrails=[guardrail_against_unparliamentary],
+)

appagents/search_agent.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
+from openai import AsyncOpenAI
+from agents.model_settings import ModelSettings
+from tools.google_tools import GoogleTools
+# INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
+# produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 \
+# words. Capture the main points. Write succintly, no need to have complete sentences or good \
+# grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
+# essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
+# INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web and produce a detailed synthesis of the results. \
+# The output must be structured into sections, one for each search result provided by the tool. \
+# For each result, you MUST include the full link/URL and the title. \
+# Your response should capture the main points and relevant details from all sources. \
+# Do not add any personal commentary, introductions, or conclusions. \
+# Format the entire output as a single, detailed block of text in markdown format, ensuring ALL source links are visible and preserved."
+INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
+produce a concise summary of the results. The summary must 3-5 paragraphs and less than 500 \
+words. Capture the main points. Write succintly, no need to have complete sentences or good \
+grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
+essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
+# search_agent = Agent(
+#     name="Search agent",
+#     instructions=INSTRUCTIONS,
+#     tools=[WebSearchTool(search_context_size="low")],
+#     # tools=[GoogleTools.search],
+#     model="gpt-4o-mini",
+#     model_settings=ModelSettings(tool_choice="required"),
+# )
+# -----------------------------
+# CONNECT TO MCP SERVER
+# -----------------------------
+async def setup_mcp_tools():
+    """
+    Starts the MCP server via stdio and returns its list of tools
+    that can be attached to the agent.
+    """
+    # Absolute path ensures the script is found even from a notebook
+    import os
+    script_path = os.path.abspath("../mcp/search-server.py")
+    params = {
+        "command": "uvx",  # or "uv" depending on your environment
+        "args": ["run", script_path],
+    }
+    # Start MCP server and list available tools
+    async with MCPServerStdio(
+        params=params,
+        client_session_timeout_seconds=60,
+        verbose=True,  # helpful for debugging
+    ) as server:
+        mcp_tools = await server.list_tools()
+        print(f"✅ Connected to MCP server with {len(mcp_tools)} tool(s).")
+        return mcp_tools
+# # Note: Gemini does not like
+# search_agent = Agent(
+#     name="Search agent",
+#     instructions=INSTRUCTIONS,
+#     # tools=[WebSearchTool(search_context_size="low")],
+#     tools=[GoogleTools.search],
+#     model=gemini_model,
+#     model_settings=ModelSettings(tool_choice="required"),
+# )
+search_agent = Agent(
+    name="Search agent",
+    instructions=INSTRUCTIONS,
+    # tools=[WebSearchTool(search_context_size="low")],
+    tools=[GoogleTools.search],
+    model=gemini_model,
+    model_settings=ModelSettings(tool_choice="required"),
+)

appagents/writer_agent.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+from pydantic import BaseModel, Field
+from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
+from openai import AsyncOpenAI
+INSTRUCTIONS = (
+    "You are a senior researcher tasked with writing a cohesive report for a research query. "
+    "You will be provided with the original query, and some initial research done by a research assistant.\n"
+    "You should first come up with an outline for the report that describes the structure and "
+    "flow of the report. Then, generate the report and return that as your final output.\n"
+    "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
+    "for 5-10 pages of content, at least 1000 words."
+)
+class ReportData(BaseModel):
+    short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
+    markdown_report: str = Field(description="The final report")
+    follow_up_questions: list[str] = Field(description="Suggested topics to research further")
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
+# writer_agent = Agent(
+#     name="WriterAgent",
+#     instructions=INSTRUCTIONS,
+#     model="gpt-5-mini",
+#     output_type=ReportData,
+# )
+writer_agent = Agent(
+    name="WriterAgent",
+    instructions=INSTRUCTIONS,
+    model=gemini_model,
+    output_type=ReportData,
+)

core/__init__.py ADDED Viewed

File without changes

core/logger.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import functools
+import datetime
+def log_call(func):
+    """
+    A decorator that logs when a function is called and when it finishes.
+    """
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        arg_list = ", ".join(
+            [repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
+        )
+        print(f"[{timestamp}] 🚀 Calling: {func.__name__}({arg_list})")
+        try:
+            result = func(*args, **kwargs)
+            # print(f"[{timestamp}] ✅ Finished: {func.__name__}")
+            return result
+        except Exception as e:
+            print(f"[{timestamp}] ❌ Error in {func.__name__}: {e}")
+            raise
+    return wrapper

prompts/__init__.py ADDED Viewed

File without changes

run.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import os
+import subprocess
+import sys
+# Use module execution to guarantee Streamlit runs inside the current interpreter
+subprocess.run([
+    sys.executable, "-m", "streamlit",
+    "run",
+    os.path.join("ui", "app.py"),
+    "--server.runOnSave", "true"
+])

tools/__init__.py ADDED Viewed

File without changes

tools/google_tools.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+import requests
+from dotenv import load_dotenv
+from agents import function_tool
+from core.logger import log_call
+# Load environment variables once
+load_dotenv()
+# ============================================================
+# 🔹 GOOGLE SEARCH TOOLSET (Serper.dev API)
+# ============================================================
+class GoogleTools:
+    """
+    GoogleTools provides function tools to perform web searches
+    using the Serper.dev API (Google Search). I am a fallback for
+    retrieving recent information from the web.
+    """
+    @staticmethod
+    @function_tool
+    @log_call
+    def search(query: str, num_results: int = 3) -> str:
+        """
+        Perform a general Google search using Serper.dev API.
+        Parameters:
+        -----------
+        query : str
+            The search query string, e.g., "latest Tesla stock news".
+        num_results : int, optional (default=3)
+            Maximum number of search results to return.
+        Returns:
+        --------
+        str
+            Nicely formatted search results.
+        """
+        try:
+            api_key = os.getenv("SERPER_API_KEY")
+            if not api_key:
+                return "❌ Missing SERPER_API_KEY in environment variables."
+            url = "https://google.serper.dev/search"
+            headers = {
+                "X-API-KEY": api_key,
+                "Content-Type": "application/json"
+            }
+            payload = {
+                "q": query,
+                "gl": "us",   # country code (optional)
+                "hl": "en",   # language code (optional)
+            }
+            response = requests.post(url, headers=headers, json=payload)
+            response.raise_for_status()
+            data = response.json()
+            organic_results = data.get("organic", [])
+            if not organic_results:
+                return "No search results found."
+            formatted = []
+            for item in organic_results[:num_results]:
+                title = item.get("title", "No title")
+                link = item.get("link", "No link")
+                snippet = item.get("snippet", "")
+                formatted.append(
+                    f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n"
+                )
+                # print(formatted[-1])  # Log each result
+            return "\n".join(formatted)
+        except requests.exceptions.RequestException as e:
+            return f"⚠️ Network error during Google search: {e}"
+        except Exception as e:
+            return f"⚠️ Error performing Google search: {e}"
+# ============================================================
+# 🔹 OPENAI & OTHER MODEL TOOLS
+# ============================================================
+class ModelTools:
+    """
+    ModelTools provides function tools to interact with LLM APIs
+    such as OpenAI, Gemini, or Groq.
+    Features:
+    - Send prompts to a language model.
+    - Receive structured text completions.
+    - Can be extended to support multiple LLM providers.
+    """
+    @staticmethod
+    @function_tool
+    def query_openai(prompt: str, model: str = "gpt-4o-mini") -> str:
+        """
+        Query an OpenAI language model with a prompt.
+        Parameters:
+        -----------
+        prompt : str
+            User-provided prompt for the model.
+        model : str, optional (default="gpt-4o-mini")
+            Model name to query (e.g., "gpt-4o-mini", "gpt-4").
+        Returns:
+        --------
+        str
+            Model's response content as text.
+            If an error occurs (network/API), returns an error message.
+        Example:
+        --------
+        query_openai("Explain AI in finance")
+        Output:
+        "AI in finance refers to the use of machine learning and natural language
+        processing techniques to automate trading, risk assessment, and customer service..."
+        """
+        try:
+            from openai import OpenAI  # delayed import
+            client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+            response = client.chat.completions.create(
+                model=model,
+                messages=[{"role": "user", "content": prompt}],
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            return f"Error querying OpenAI API: {e}"

tools/time_tools.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from datetime import datetime
+from agents import function_tool
+from core.logger import log_call
+class TimeTools:
+    """Provides tools related to current date and time."""
+    @staticmethod
+    @function_tool
+    @log_call
+    def current_datetime(format: str = "%Y-%m-%d %H:%M:%S") -> str:
+        """
+        Returns the current date and time as a formatted string.
+        Args:
+            format (str): Optional datetime format (default: "YYYY-MM-DD HH:MM:SS")
+        Returns:
+            str: Current date and time in the specified format
+        """
+        now = datetime.now()
+        return now.strftime(format)

ui/app.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import streamlit as st
+import asyncio
+import time
+import html
+from datetime import datetime, UTC
+from io import BytesIO
+from dotenv import load_dotenv
+from reportlab.platypus import SimpleDocTemplate, Paragraph
+from reportlab.lib.styles import getSampleStyleSheet
+from appagents.orchestrator import Orchestrator
+from agents import SQLiteSession
+load_dotenv(override=True)
+# --------------------
+# Page config
+# --------------------
+st.set_page_config(page_title="Deep Research AI", layout="wide")
+# --------------------
+# Session-state init
+# --------------------
+if "session_store" not in st.session_state:
+    st.session_state.session_store = {}
+if "session_id" not in st.session_state:
+    st.session_state.session_id = str(id(st))
+if "final_report" not in st.session_state:
+    st.session_state.final_report = ""
+if "button_disabled" not in st.session_state:
+    st.session_state.button_disabled = False
+# (dark mode removed - UI uses single light theme)
+# --------------------
+# CSS for light/dark and layout
+# --------------------
+LIGHT_CSS = """
+<style>
+.block-container { max-width: 90% !important; margin-left:5% !important; margin-right:5% !important; padding-top:1.5rem; padding-bottom:2rem; }
+h1 { font-size:2.2rem !important; text-align:center; color: #0b1220 !important; }
+h2 { color: #0b1220 !important; }
+h3 { color: #0b1220 !important; }
+.report-box { background:#ffffff; padding:24px; border-radius:12px; border:1px solid #e9ecef; box-shadow:0 6px 18px rgba(23,43,77,0.04); font-size:1.05rem; line-height:1.65; white-space:pre-wrap; word-wrap:break-word; overflow-wrap:break-word; }
+textarea, .stTextArea>div>div>textarea { font-size:1.05rem !important; }
+.button-row { display:flex; justify-content:flex-start; gap:12px; margin-top:15px; margin-bottom:15px; flex-wrap: wrap; align-items: center; }
+.button-row [data-testid="column"] { flex: 0 !important; }
+.stButton { width: 100% !important; }
+.stButton>button { padding: 0 !important; width: 180px !important; height: 48px !important; border-radius: 6px !important; border: 1px solid #d0d0d0 !important; font-weight: 500 !important; font-size: 0.9rem !important; white-space: normal !important; background-color: #f8f9fa !important; color: #0b1220 !important; transition: all 0.2s !important; display: flex !important; align-items: center !important; justify-content: center !important; line-height: 1.2 !important; }
+.stButton>button:hover { background-color: #e9ecef !important; border-color: #999 !important; }
+.stButton>button:active { background-color: #dee2e6 !important; }
+.sidebar-title { font-weight:700; margin-bottom:8px; }
+.history-item { padding:6px 8px; border-radius:8px; margin-bottom:6px; background: #fafafa; border:1px solid #eee; }
+.small-muted { color:#6c757d; font-size:0.9rem; }
+/* Remove truncation constraints on markdown output */
+[data-testid="stMarkdown"] { max-height: none !important; height: auto !important; }
+</style>
+"""
+st.markdown(LIGHT_CSS, unsafe_allow_html=True)
+# --------------------
+# Helpers: orchestrator streaming
+# --------------------
+async def run_async_chunks(query: str, session_id: str):
+    if session_id not in st.session_state.session_store:
+        st.session_state.session_store[session_id] = SQLiteSession(f"session_{session_id}.db")
+    session = st.session_state.session_store[session_id]
+    orchestrator = Orchestrator(session=session)
+    async for chunk in orchestrator.run(query):
+        yield chunk
+def safe_title_from_query(q: str):
+    q = q.strip()
+    if not q:
+        return "Untitled Report"
+    first_line = q.splitlines()[0]
+    # limit length for title
+    return (first_line[:80] + "...") if len(first_line) > 80 else first_line
+# --------------------
+# Export helpers
+# --------------------
+def make_pdf_bytes(text: str) -> bytes:
+    """Convert markdown text to PDF with proper formatting."""
+    buf = BytesIO()
+    doc = SimpleDocTemplate(buf, topMargin=0.5*72, bottomMargin=0.5*72, leftMargin=0.75*72, rightMargin=0.75*72)
+    styles = getSampleStyleSheet()
+    story = []
+    # parse markdown: headings, lists, bold, italic
+    lines = text.split("\n")
+    for line in lines:
+        stripped = line.strip()
+        if not stripped:
+            story.append(Paragraph(" ", styles["Normal"]))  # empty line
+            continue
+        # heading levels
+        if stripped.startswith("# "):
+            story.append(Paragraph(html.escape(stripped[2:]), styles["Heading1"]))
+        elif stripped.startswith("## "):
+            story.append(Paragraph(html.escape(stripped[3:]), styles["Heading2"]))
+        elif stripped.startswith("### "):
+            story.append(Paragraph(html.escape(stripped[4:]), styles["Heading3"]))
+        elif stripped.startswith("- ") or stripped.startswith("* "):
+            # bullet list
+            story.append(Paragraph("• " + html.escape(stripped[2:]), styles["Normal"]))
+        elif stripped[0].isdigit() and ". " in stripped[:4]:
+            # numbered list
+            story.append(Paragraph(html.escape(stripped), styles["Normal"]))
+        else:
+            # regular paragraph with basic markdown formatting
+            # escape first, then replace with safe formatting tags
+            p_text = html.escape(stripped)
+            # handle **bold** (convert escaped ** back and wrap in <b> tags)
+            p_text = p_text.replace("&lt;b&gt;", "<b>").replace("&lt;/b&gt;", "</b>")
+            # Simple approach: replace **text** with <b>text</b>
+            import re
+            p_text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', p_text)
+            p_text = re.sub(r'__(.+?)__', r'<b>\1</b>', p_text)
+            # handle *italic* → <i>italic</i> carefully (avoid double replacement)
+            p_text = re.sub(r'\*([^*]+?)\*', r'<i>\1</i>', p_text)
+            p_text = re.sub(r'_([^_]+?)_', r'<i>\1</i>', p_text)
+            story.append(Paragraph(p_text, styles["Normal"]))
+    doc.build(story)
+    buf.seek(0)
+    return buf.read()
+def make_md_bytes(text: str) -> bytes:
+    return text.encode("utf-8")
+def make_html_bytes(text: str, title="Deep Research Report") -> bytes:
+    # simple HTML wrapper, escape content and preserve newlines
+    body = "<br/>".join(html.escape(text).split("\n"))
+    html_doc = f"""<!doctype html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>{html.escape(title)}</title>
+<style>body{{font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial; padding:24px; max-width:900px; margin:auto; line-height:1.6; color: #0b1220; background: #ffffff }}</style>
+</head>
+<body>
+<h1>{html.escape(title)}</h1>
+<div>{body}</div>
+</body>
+</html>"""
+    return html_doc.encode("utf-8")
+# --------------------
+# Streaming runner (final output replaces trace)
+# --------------------
+def run_streaming(query: str, final_ph, status_ph):
+    session_id = st.session_state.session_id
+    # placeholders
+    # status_ph = st.empty()
+    progress_ph = st.empty()
+    # reset final_report
+    st.session_state.final_report = ""
+    # track only the last received chunk
+    last_chunk = ""
+    progress_val = 0
+    progress_bar = progress_ph.progress(progress_val)
+    # ensure any prior final output is cleared while streaming
+    try:
+        final_ph.empty()
+    except Exception:
+        pass
+    # status_ph.info("🔎 Researching — streaming (final result only)...")
+    async def _stream():
+        nonlocal progress_val, last_chunk
+        status_ph.info("Streaming... receiving data")
+        bStartChunkCollected = False
+        async for chunk in run_async_chunks(query, session_id):
+            # start collecting chunks once we see one beginning with #
+            if not bStartChunkCollected and chunk.strip().startswith("#"):
+                bStartChunkCollected = True
+            if bStartChunkCollected:
+                last_chunk += chunk
+                # render accumulated markdown in real-time so user sees content streaming
+                status_ph.markdown(last_chunk)
+            progress_val = min(progress_val + 2, 98)
+            progress_bar.progress(progress_val)
+    # run async generator (compatibility fallback)
+    try:
+        asyncio.run(_stream())
+    except RuntimeError:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        loop.run_until_complete(_stream())
+        loop.close()
+    except Exception as e:
+        # on exception, re-enable button and show error
+        st.session_state.button_disabled = False
+        status_ph.error(f"❌ Error during research: {str(e)}")
+        progress_ph.empty()
+        return
+    # finalize
+    progress_bar.progress(100)
+    status_ph.success("✅ Research complete!")
+    # set final_report to only the last yield (trim surrounding whitespace)
+    md_text = last_chunk.strip()
+    st.session_state.final_report = md_text
+    progress_ph.empty()
+    # re-enable button after completion
+    st.session_state.button_disabled = False
+    # history saving disabled (kept minimal in-memory state only)
+    # render final output as Markdown into the dedicated placeholder
+    # Use Streamlit's markdown renderer so headings, lists, links render correctly.
+    if st.session_state.final_report:
+        final_ph.markdown(st.session_state.final_report)
+    else:
+        final_ph.empty()
+    # rerun to reflect button re-enable and final output
+    st.rerun()
+# Sidebar removed per UI request. Dark-mode and history removed.
+# --------------------
+# Main UI
+# --------------------
+st.title("🧠 Deep Research (Powered by Agentic AI)")
+st.write("What topic would you like to research?")
+query = st.text_area("Enter your research topic", value="The impact of AI on the Healthcare Industry.", height=50, label_visibility="collapsed")
+# Action row with buttons
+col1, col2, col3, col4 = st.columns([2.0, 2.0, 2.0, 2.0])
+with col1:
+    run_clicked = st.button("🚀 Run Deep Research", key="run", disabled=st.session_state.button_disabled)
+# PDF and MD download buttons appear inline after a final_report exists
+if st.session_state.final_report:
+    with col2:
+        # PDF generator stream - create bytes on demand
+        pdf_bytes = make_pdf_bytes(st.session_state.final_report)
+        st.download_button("📄 Download PDF", data=pdf_bytes, file_name="report.pdf", mime="application/pdf")
+    with col3:
+        # Markdown
+        md_bytes = make_md_bytes(st.session_state.final_report)
+        st.download_button("📝 Download MD", data=md_bytes, file_name="report.md", mime="text/markdown")
+# placeholder for final report (used so streaming traces can be cleared)
+final_ph = st.empty()
+# placeholder for streaming status and progress updates
+status_ph = st.empty()
+# Run research if requested; disable button on click and re-run
+if run_clicked and query.strip():
+    st.session_state.button_disabled = True
+    st.rerun()
+# Execute streaming if button was disabled (i.e., on the rerun after click)
+if st.session_state.button_disabled and query.strip():
+    run_streaming(query.strip(), final_ph, status_ph)
+elif not st.session_state.button_disabled:
+    # if final_report exists (e.g., from previous run), show it in the final placeholder
+    if st.session_state.final_report:
+        # final_ph.markdown(f"<div class='report-box'>{st.session_state.final_report}</div>", unsafe_allow_html=True)
+        final_ph.markdown(st.session_state.final_report, unsafe_allow_html=True)
+    else:
+        st.info("Enter a topic and press Run. Final report will replace streaming traces.")
+# small debug caption
+st.caption(f"Session: {st.session_state.session_id}")