Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- Dockerfile +29 -0
- README.md +46 -6
- appagents/__init__.py +0 -0
- appagents/email_agent.py +32 -0
- appagents/guardrail_agent.py +45 -0
- appagents/orchestrator.py +119 -0
- appagents/planner_agent.py +45 -0
- appagents/search_agent.py +87 -0
- appagents/writer_agent.py +41 -0
- core/__init__.py +0 -0
- core/logger.py +22 -0
- prompts/__init__.py +0 -0
- run.py +11 -0
- tools/__init__.py +0 -0
- tools/google_tools.py +132 -0
- tools/time_tools.py +22 -0
- ui/app.py +291 -0
Dockerfile
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 4 |
+
DEBIAN_FRONTEND=noninteractive
|
| 5 |
+
|
| 6 |
+
WORKDIR /app
|
| 7 |
+
|
| 8 |
+
# System deps
|
| 9 |
+
RUN apt-get update && apt-get install -y \
|
| 10 |
+
git build-essential curl \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Install uv
|
| 14 |
+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 15 |
+
ENV PATH="/root/.local/bin:$PATH"
|
| 16 |
+
|
| 17 |
+
# Copy project metadata
|
| 18 |
+
COPY pyproject.toml .
|
| 19 |
+
COPY uv.lock .
|
| 20 |
+
|
| 21 |
+
# Install dependencies using uv
|
| 22 |
+
RUN uv sync --frozen --no-dev --system
|
| 23 |
+
|
| 24 |
+
# Copy your source code
|
| 25 |
+
COPY . .
|
| 26 |
+
|
| 27 |
+
EXPOSE 7860
|
| 28 |
+
|
| 29 |
+
CMD ["streamlit", "run", "ui/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless=true"]
|
README.md
CHANGED
|
@@ -1,10 +1,50 @@
|
|
| 1 |
---
|
| 2 |
-
title: Deep
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom: indigo
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: docker
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: AI Deep Researcher # Give your app a title
|
| 3 |
+
emoji: π€ # Pick an emoji
|
| 4 |
+
colorFrom: indigo # Theme start color
|
| 5 |
+
colorTo: blue # Theme end color
|
| 6 |
+
sdk: docker # SDK type
|
| 7 |
+
sdk_version: "4.39.0" # Example Gradio version
|
| 8 |
+
app_file: ui/app.py # <-- points to your app.py inside ui/
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# AI Deep Researcher
|
| 13 |
+
|
| 14 |
+
**AI Deep Researcher** is a generative AI learning project built using the OpenAI Agentic Framework. This app performs deep-level web research based on user queries and generates a well-structured, consolidated report.
|
| 15 |
+
|
| 16 |
+
To achieve this, the project integrates the following technologies and AI features:
|
| 17 |
+
- **OpenAI SDK**
|
| 18 |
+
- **OpenAI Agents**
|
| 19 |
+
- **OpenAI WebSearch Tool**
|
| 20 |
+
- **Serper API** - a free alternative to OpenAI WebSearch Tool (https://serper.dev/api-keys)
|
| 21 |
+
- **News API** (https://newsapi.org/v2/everything)
|
| 22 |
+
- **SendGrid** (for emailing report)
|
| 23 |
+
- **LLMs** - (OpenAI, Geminia, Groq)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
## How it works?
|
| 27 |
+
The system is a multi-agent solution, where each agent has a specific responsibility:
|
| 28 |
+
|
| 29 |
+
1. **Planner Agent**
|
| 30 |
+
- Receives the user query and builds a structured query plan.
|
| 31 |
+
|
| 32 |
+
2. **Guardrail Agent**
|
| 33 |
+
- Validates user input and ensures compliance.
|
| 34 |
+
- Stops the workflow if the input contains inappropriate or unparliamentary words.
|
| 35 |
+
|
| 36 |
+
3. **Search Agent**
|
| 37 |
+
- Executes the query plan.
|
| 38 |
+
- Runs multiple web searches in parallel to gather data.
|
| 39 |
+
|
| 40 |
+
4. **Writer Agent**
|
| 41 |
+
- Reads results from all search agents.
|
| 42 |
+
- Generates a well-formatted, consolidated report.
|
| 43 |
+
|
| 44 |
+
5. **Email Agent**
|
| 45 |
+
- Responsible for sending the report via email using SendGrid.
|
| 46 |
+
|
| 47 |
+
6. **Orchestrator**
|
| 48 |
+
- The entry point of the system.
|
| 49 |
+
- Facilitates communication and workflow between all agents.
|
| 50 |
+
|
appagents/__init__.py
ADDED
|
File without changes
|
appagents/email_agent.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Dict
|
| 3 |
+
|
| 4 |
+
import sendgrid
|
| 5 |
+
from sendgrid.helpers.mail import Email, Mail, Content, To
|
| 6 |
+
from agents import Agent, function_tool
|
| 7 |
+
from core.logger import log_call
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@function_tool
|
| 11 |
+
@log_call
|
| 12 |
+
def send_email(subject: str, html_body: str) -> Dict[str, str]:
|
| 13 |
+
""" Send an email with the given subject and HTML body """
|
| 14 |
+
sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))
|
| 15 |
+
from_email = Email("bm80177@gmail.com") # put your verified sender here
|
| 16 |
+
to_email = To("bibhup_mishra@yahoo.com") # put your recipient here
|
| 17 |
+
content = Content("text/html", html_body)
|
| 18 |
+
mail = Mail(from_email, to_email, subject, content).get()
|
| 19 |
+
response = sg.client.mail.send.post(request_body=mail)
|
| 20 |
+
print("Email response", response.status_code)
|
| 21 |
+
return {"status": "success"}
|
| 22 |
+
|
| 23 |
+
INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
|
| 24 |
+
You will be provided with a detailed report. You should use your tool to send one email, providing the
|
| 25 |
+
report converted into clean, well presented HTML with an appropriate subject line."""
|
| 26 |
+
|
| 27 |
+
email_agent = Agent(
|
| 28 |
+
name="Email agent",
|
| 29 |
+
instructions=INSTRUCTIONS,
|
| 30 |
+
tools=[send_email],
|
| 31 |
+
model="gpt-4o-mini",
|
| 32 |
+
)
|
appagents/guardrail_agent.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from agents import (
|
| 4 |
+
Agent,
|
| 5 |
+
Runner,
|
| 6 |
+
input_guardrail,
|
| 7 |
+
GuardrailFunctionOutput,
|
| 8 |
+
)
|
| 9 |
+
from tools.time_tools import TimeTools
|
| 10 |
+
from openai import AsyncOpenAI
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# β
Step 1: Define structured output schema
|
| 14 |
+
class UnparliamentaryCheckOutput(BaseModel):
|
| 15 |
+
has_unparliamentary_language: bool
|
| 16 |
+
explanation: str
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# β
Step 2: Define the LLM guardrail agent
|
| 20 |
+
guardrail_agent = Agent(
|
| 21 |
+
name="Unparliamentary language check",
|
| 22 |
+
instructions=(
|
| 23 |
+
"Analyze the user input and determine if it contains any unparliamentary, "
|
| 24 |
+
"offensive, or disrespectful language. "
|
| 25 |
+
"If it does, set has_unparliamentary_language=true and explain briefly why. "
|
| 26 |
+
"Otherwise, set it to false."
|
| 27 |
+
),
|
| 28 |
+
output_type=UnparliamentaryCheckOutput,
|
| 29 |
+
model="gpt-4o-mini",
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# β
Step 3: Use the input guardrail decorator
|
| 34 |
+
@input_guardrail
|
| 35 |
+
async def guardrail_against_unparliamentary(ctx, agent, message: str):
|
| 36 |
+
"""Guardrail function that blocks messages with unparliamentary words."""
|
| 37 |
+
result = await Runner.run(guardrail_agent, message, context=ctx.context)
|
| 38 |
+
has_unparliamentary_language = result.final_output.has_unparliamentary_language
|
| 39 |
+
|
| 40 |
+
return GuardrailFunctionOutput(
|
| 41 |
+
output_info={
|
| 42 |
+
"found_unparliamentary_word": result.final_output.model_dump()
|
| 43 |
+
},
|
| 44 |
+
tripwire_triggered=has_unparliamentary_language,
|
| 45 |
+
)
|
appagents/orchestrator.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agents import Runner, trace, gen_trace_id, SQLiteSession
|
| 2 |
+
from appagents.search_agent import search_agent
|
| 3 |
+
from appagents.planner_agent import planner_agent, WebSearchItem, WebSearchPlan
|
| 4 |
+
from appagents.writer_agent import writer_agent, ReportData
|
| 5 |
+
from appagents.email_agent import email_agent
|
| 6 |
+
from agents.exceptions import InputGuardrailTripwireTriggered
|
| 7 |
+
from core.logger import log_call
|
| 8 |
+
import asyncio
|
| 9 |
+
|
| 10 |
+
class Orchestrator:
|
| 11 |
+
|
| 12 |
+
def __init__(self, session: SQLiteSession | None = None):
|
| 13 |
+
self.session = session or SQLiteSession()
|
| 14 |
+
|
| 15 |
+
@log_call
|
| 16 |
+
async def run(self, query: str):
|
| 17 |
+
""" Run the deep research process, yielding the status updates and the final report"""
|
| 18 |
+
trace_id = gen_trace_id()
|
| 19 |
+
with trace("Deep Research Orchestrator", trace_id=trace_id):
|
| 20 |
+
print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}")
|
| 21 |
+
yield f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
|
| 22 |
+
print("Starting research...")
|
| 23 |
+
search_plan = await self.plan_searches(query)
|
| 24 |
+
|
| 25 |
+
if not search_plan or not getattr(search_plan, "searches", []):
|
| 26 |
+
note = getattr(search_plan, "note", "")
|
| 27 |
+
if "unparliamentary" in note.lower():
|
| 28 |
+
print("β οΈ Guardrail triggered β unparliamentary language detected.")
|
| 29 |
+
yield note
|
| 30 |
+
else:
|
| 31 |
+
yield note or "No search results found, ending research."
|
| 32 |
+
return
|
| 33 |
+
|
| 34 |
+
yield "Searches planned, starting to search..."
|
| 35 |
+
search_results = await self.perform_searches(search_plan)
|
| 36 |
+
yield "Searches complete, writing report..."
|
| 37 |
+
report = await self.write_report(query, search_results)
|
| 38 |
+
yield "Report written, sending email..."
|
| 39 |
+
# await self.send_email(report)
|
| 40 |
+
# yield "Email sent, research complete"
|
| 41 |
+
yield report.markdown_report
|
| 42 |
+
|
| 43 |
+
@log_call
|
| 44 |
+
async def plan_searches(self, query: str) -> WebSearchPlan:
|
| 45 |
+
"""Plan the searches to perform for the query."""
|
| 46 |
+
print("Planning searches...")
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
result = await Runner.run(
|
| 50 |
+
planner_agent, # use self. unless global
|
| 51 |
+
f"Query: {query}",
|
| 52 |
+
session=self.session,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
print(f"Will perform {len(result.final_output.searches)} searches")
|
| 56 |
+
return result.final_output_as(WebSearchPlan)
|
| 57 |
+
|
| 58 |
+
except InputGuardrailTripwireTriggered as e:
|
| 59 |
+
explanation = getattr(e, "result", {}).get("output_info", {}).get(
|
| 60 |
+
"found_unparliamentary_word", {}
|
| 61 |
+
).get("explanation", "")
|
| 62 |
+
print("β οΈ Guardrail triggered β unparliamentary language detected.")
|
| 63 |
+
return WebSearchPlan(searches=[], note=f"Blocked due to unparliamentary input. {explanation}")
|
| 64 |
+
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"β Error during planning: {e}")
|
| 67 |
+
return WebSearchPlan(searches=[], note="An error occurred while planning searches.")
|
| 68 |
+
|
| 69 |
+
@log_call
|
| 70 |
+
async def perform_searches(self, search_plan: WebSearchPlan) -> list[str]:
|
| 71 |
+
""" Perform the searches to perform for the query """
|
| 72 |
+
print("Searching...")
|
| 73 |
+
num_completed = 0
|
| 74 |
+
tasks = [asyncio.create_task(self.search(item)) for item in search_plan.searches]
|
| 75 |
+
results = []
|
| 76 |
+
for task in asyncio.as_completed(tasks):
|
| 77 |
+
result = await task
|
| 78 |
+
if result is not None:
|
| 79 |
+
results.append(result)
|
| 80 |
+
num_completed += 1
|
| 81 |
+
print(f"Searching... {num_completed}/{len(tasks)} completed")
|
| 82 |
+
print("Finished searching")
|
| 83 |
+
return results
|
| 84 |
+
|
| 85 |
+
@log_call
|
| 86 |
+
async def search(self, item: WebSearchItem) -> str | None:
|
| 87 |
+
""" Perform a search for the query """
|
| 88 |
+
input = f"Search term: {item.query}\nReason for searching: {item.reason}"
|
| 89 |
+
try:
|
| 90 |
+
result = await Runner.run(
|
| 91 |
+
search_agent,
|
| 92 |
+
input,
|
| 93 |
+
)
|
| 94 |
+
return str(result.final_output)
|
| 95 |
+
except Exception:
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
@log_call
|
| 99 |
+
async def write_report(self, query: str, search_results: list[str]) -> ReportData:
|
| 100 |
+
""" Write the report for the query """
|
| 101 |
+
print("Thinking about report...")
|
| 102 |
+
input = f"Original query: {query}\nSummarized search results: {search_results}"
|
| 103 |
+
result = await Runner.run(
|
| 104 |
+
writer_agent,
|
| 105 |
+
input,
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
print("Finished writing report")
|
| 109 |
+
return result.final_output_as(ReportData)
|
| 110 |
+
|
| 111 |
+
@log_call
|
| 112 |
+
async def send_email(self, report: ReportData) -> None:
|
| 113 |
+
print("Writing email...")
|
| 114 |
+
result = await Runner.run(
|
| 115 |
+
email_agent,
|
| 116 |
+
report.markdown_report,
|
| 117 |
+
)
|
| 118 |
+
print("Email sent")
|
| 119 |
+
return report
|
appagents/planner_agent.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
from agents import Agent, OpenAIChatCompletionsModel
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
from tools.time_tools import TimeTools
|
| 6 |
+
from appagents.guardrail_agent import guardrail_against_unparliamentary
|
| 7 |
+
|
| 8 |
+
HOW_MANY_SEARCHES = 10
|
| 9 |
+
|
| 10 |
+
INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
|
| 11 |
+
to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for. \
|
| 12 |
+
Use the tool to find current date & time, and use it where relevant to inform your search and summary."
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class WebSearchItem(BaseModel):
|
| 16 |
+
reason: str = Field(description="Your reasoning for why this search is important to the query.")
|
| 17 |
+
query: str = Field(description="The search term to use for the web search.")
|
| 18 |
+
current_date_time: str = Field(description="Current date and time.")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class WebSearchPlan(BaseModel):
|
| 22 |
+
searches: list[WebSearchItem] = Field(description="A list of web searches to perform to best answer the query.")
|
| 23 |
+
|
| 24 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 25 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 26 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 27 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
|
| 28 |
+
|
| 29 |
+
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
| 30 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 31 |
+
groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
|
| 32 |
+
groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
|
| 33 |
+
|
| 34 |
+
openai_model = "gpt-4.1-mini"
|
| 35 |
+
|
| 36 |
+
# Note: Many models do not like tool call and json output_schema used together.
|
| 37 |
+
|
| 38 |
+
planner_agent = Agent(
|
| 39 |
+
name="PlannerAgent",
|
| 40 |
+
instructions=INSTRUCTIONS,
|
| 41 |
+
model=openai_model,
|
| 42 |
+
tools=[TimeTools.current_datetime],
|
| 43 |
+
output_type=WebSearchPlan,
|
| 44 |
+
input_guardrails=[guardrail_against_unparliamentary],
|
| 45 |
+
)
|
appagents/search_agent.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
|
| 3 |
+
from openai import AsyncOpenAI
|
| 4 |
+
|
| 5 |
+
from agents.model_settings import ModelSettings
|
| 6 |
+
from tools.google_tools import GoogleTools
|
| 7 |
+
|
| 8 |
+
# INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
|
| 9 |
+
# produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 \
|
| 10 |
+
# words. Capture the main points. Write succintly, no need to have complete sentences or good \
|
| 11 |
+
# grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
|
| 12 |
+
# essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
|
| 13 |
+
|
| 14 |
+
# INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web and produce a detailed synthesis of the results. \
|
| 15 |
+
# The output must be structured into sections, one for each search result provided by the tool. \
|
| 16 |
+
# For each result, you MUST include the full link/URL and the title. \
|
| 17 |
+
# Your response should capture the main points and relevant details from all sources. \
|
| 18 |
+
# Do not add any personal commentary, introductions, or conclusions. \
|
| 19 |
+
# Format the entire output as a single, detailed block of text in markdown format, ensuring ALL source links are visible and preserved."
|
| 20 |
+
|
| 21 |
+
INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
|
| 22 |
+
produce a concise summary of the results. The summary must 3-5 paragraphs and less than 500 \
|
| 23 |
+
words. Capture the main points. Write succintly, no need to have complete sentences or good \
|
| 24 |
+
grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
|
| 25 |
+
essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
|
| 26 |
+
|
| 27 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 28 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 29 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 30 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
|
| 31 |
+
|
| 32 |
+
# search_agent = Agent(
|
| 33 |
+
# name="Search agent",
|
| 34 |
+
# instructions=INSTRUCTIONS,
|
| 35 |
+
# tools=[WebSearchTool(search_context_size="low")],
|
| 36 |
+
# # tools=[GoogleTools.search],
|
| 37 |
+
# model="gpt-4o-mini",
|
| 38 |
+
# model_settings=ModelSettings(tool_choice="required"),
|
| 39 |
+
# )
|
| 40 |
+
|
| 41 |
+
# -----------------------------
|
| 42 |
+
# CONNECT TO MCP SERVER
|
| 43 |
+
# -----------------------------
|
| 44 |
+
async def setup_mcp_tools():
|
| 45 |
+
"""
|
| 46 |
+
Starts the MCP server via stdio and returns its list of tools
|
| 47 |
+
that can be attached to the agent.
|
| 48 |
+
"""
|
| 49 |
+
# Absolute path ensures the script is found even from a notebook
|
| 50 |
+
import os
|
| 51 |
+
script_path = os.path.abspath("../mcp/search-server.py")
|
| 52 |
+
|
| 53 |
+
params = {
|
| 54 |
+
"command": "uvx", # or "uv" depending on your environment
|
| 55 |
+
"args": ["run", script_path],
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
# Start MCP server and list available tools
|
| 59 |
+
async with MCPServerStdio(
|
| 60 |
+
params=params,
|
| 61 |
+
client_session_timeout_seconds=60,
|
| 62 |
+
verbose=True, # helpful for debugging
|
| 63 |
+
) as server:
|
| 64 |
+
mcp_tools = await server.list_tools()
|
| 65 |
+
print(f"β
Connected to MCP server with {len(mcp_tools)} tool(s).")
|
| 66 |
+
return mcp_tools
|
| 67 |
+
|
| 68 |
+
# # Note: Gemini does not like
|
| 69 |
+
# search_agent = Agent(
|
| 70 |
+
# name="Search agent",
|
| 71 |
+
# instructions=INSTRUCTIONS,
|
| 72 |
+
# # tools=[WebSearchTool(search_context_size="low")],
|
| 73 |
+
# tools=[GoogleTools.search],
|
| 74 |
+
# model=gemini_model,
|
| 75 |
+
# model_settings=ModelSettings(tool_choice="required"),
|
| 76 |
+
# )
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
search_agent = Agent(
|
| 80 |
+
name="Search agent",
|
| 81 |
+
instructions=INSTRUCTIONS,
|
| 82 |
+
# tools=[WebSearchTool(search_context_size="low")],
|
| 83 |
+
tools=[GoogleTools.search],
|
| 84 |
+
model=gemini_model,
|
| 85 |
+
model_settings=ModelSettings(tool_choice="required"),
|
| 86 |
+
)
|
| 87 |
+
|
appagents/writer_agent.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
|
| 6 |
+
INSTRUCTIONS = (
|
| 7 |
+
"You are a senior researcher tasked with writing a cohesive report for a research query. "
|
| 8 |
+
"You will be provided with the original query, and some initial research done by a research assistant.\n"
|
| 9 |
+
"You should first come up with an outline for the report that describes the structure and "
|
| 10 |
+
"flow of the report. Then, generate the report and return that as your final output.\n"
|
| 11 |
+
"The final output should be in markdown format, and it should be lengthy and detailed. Aim "
|
| 12 |
+
"for 5-10 pages of content, at least 1000 words."
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class ReportData(BaseModel):
|
| 17 |
+
short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
|
| 18 |
+
|
| 19 |
+
markdown_report: str = Field(description="The final report")
|
| 20 |
+
|
| 21 |
+
follow_up_questions: list[str] = Field(description="Suggested topics to research further")
|
| 22 |
+
|
| 23 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 24 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 25 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 26 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# writer_agent = Agent(
|
| 30 |
+
# name="WriterAgent",
|
| 31 |
+
# instructions=INSTRUCTIONS,
|
| 32 |
+
# model="gpt-5-mini",
|
| 33 |
+
# output_type=ReportData,
|
| 34 |
+
# )
|
| 35 |
+
|
| 36 |
+
writer_agent = Agent(
|
| 37 |
+
name="WriterAgent",
|
| 38 |
+
instructions=INSTRUCTIONS,
|
| 39 |
+
model=gemini_model,
|
| 40 |
+
output_type=ReportData,
|
| 41 |
+
)
|
core/__init__.py
ADDED
|
File without changes
|
core/logger.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import functools
|
| 2 |
+
import datetime
|
| 3 |
+
|
| 4 |
+
def log_call(func):
|
| 5 |
+
"""
|
| 6 |
+
A decorator that logs when a function is called and when it finishes.
|
| 7 |
+
"""
|
| 8 |
+
@functools.wraps(func)
|
| 9 |
+
def wrapper(*args, **kwargs):
|
| 10 |
+
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 11 |
+
arg_list = ", ".join(
|
| 12 |
+
[repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
|
| 13 |
+
)
|
| 14 |
+
print(f"[{timestamp}] π Calling: {func.__name__}({arg_list})")
|
| 15 |
+
try:
|
| 16 |
+
result = func(*args, **kwargs)
|
| 17 |
+
# print(f"[{timestamp}] β
Finished: {func.__name__}")
|
| 18 |
+
return result
|
| 19 |
+
except Exception as e:
|
| 20 |
+
print(f"[{timestamp}] β Error in {func.__name__}: {e}")
|
| 21 |
+
raise
|
| 22 |
+
return wrapper
|
prompts/__init__.py
ADDED
|
File without changes
|
run.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
import sys
|
| 4 |
+
|
| 5 |
+
# Use module execution to guarantee Streamlit runs inside the current interpreter
|
| 6 |
+
subprocess.run([
|
| 7 |
+
sys.executable, "-m", "streamlit",
|
| 8 |
+
"run",
|
| 9 |
+
os.path.join("ui", "app.py"),
|
| 10 |
+
"--server.runOnSave", "true"
|
| 11 |
+
])
|
tools/__init__.py
ADDED
|
File without changes
|
tools/google_tools.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from agents import function_tool
|
| 5 |
+
from core.logger import log_call
|
| 6 |
+
|
| 7 |
+
# Load environment variables once
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# ============================================================
|
| 12 |
+
# πΉ GOOGLE SEARCH TOOLSET (Serper.dev API)
|
| 13 |
+
# ============================================================
|
| 14 |
+
class GoogleTools:
|
| 15 |
+
"""
|
| 16 |
+
GoogleTools provides function tools to perform web searches
|
| 17 |
+
using the Serper.dev API (Google Search). I am a fallback for
|
| 18 |
+
retrieving recent information from the web.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
@staticmethod
|
| 22 |
+
@function_tool
|
| 23 |
+
@log_call
|
| 24 |
+
def search(query: str, num_results: int = 3) -> str:
|
| 25 |
+
"""
|
| 26 |
+
Perform a general Google search using Serper.dev API.
|
| 27 |
+
|
| 28 |
+
Parameters:
|
| 29 |
+
-----------
|
| 30 |
+
query : str
|
| 31 |
+
The search query string, e.g., "latest Tesla stock news".
|
| 32 |
+
num_results : int, optional (default=3)
|
| 33 |
+
Maximum number of search results to return.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
--------
|
| 37 |
+
str
|
| 38 |
+
Nicely formatted search results.
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
api_key = os.getenv("SERPER_API_KEY")
|
| 42 |
+
if not api_key:
|
| 43 |
+
return "β Missing SERPER_API_KEY in environment variables."
|
| 44 |
+
|
| 45 |
+
url = "https://google.serper.dev/search"
|
| 46 |
+
headers = {
|
| 47 |
+
"X-API-KEY": api_key,
|
| 48 |
+
"Content-Type": "application/json"
|
| 49 |
+
}
|
| 50 |
+
payload = {
|
| 51 |
+
"q": query,
|
| 52 |
+
"gl": "us", # country code (optional)
|
| 53 |
+
"hl": "en", # language code (optional)
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
response = requests.post(url, headers=headers, json=payload)
|
| 57 |
+
response.raise_for_status()
|
| 58 |
+
data = response.json()
|
| 59 |
+
|
| 60 |
+
organic_results = data.get("organic", [])
|
| 61 |
+
if not organic_results:
|
| 62 |
+
return "No search results found."
|
| 63 |
+
|
| 64 |
+
formatted = []
|
| 65 |
+
for item in organic_results[:num_results]:
|
| 66 |
+
title = item.get("title", "No title")
|
| 67 |
+
link = item.get("link", "No link")
|
| 68 |
+
snippet = item.get("snippet", "")
|
| 69 |
+
formatted.append(
|
| 70 |
+
f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n"
|
| 71 |
+
)
|
| 72 |
+
# print(formatted[-1]) # Log each result
|
| 73 |
+
|
| 74 |
+
return "\n".join(formatted)
|
| 75 |
+
|
| 76 |
+
except requests.exceptions.RequestException as e:
|
| 77 |
+
return f"β οΈ Network error during Google search: {e}"
|
| 78 |
+
except Exception as e:
|
| 79 |
+
return f"β οΈ Error performing Google search: {e}"
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# ============================================================
|
| 83 |
+
# πΉ OPENAI & OTHER MODEL TOOLS
|
| 84 |
+
# ============================================================
|
| 85 |
+
class ModelTools:
|
| 86 |
+
"""
|
| 87 |
+
ModelTools provides function tools to interact with LLM APIs
|
| 88 |
+
such as OpenAI, Gemini, or Groq.
|
| 89 |
+
|
| 90 |
+
Features:
|
| 91 |
+
- Send prompts to a language model.
|
| 92 |
+
- Receive structured text completions.
|
| 93 |
+
- Can be extended to support multiple LLM providers.
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
@staticmethod
|
| 97 |
+
@function_tool
|
| 98 |
+
def query_openai(prompt: str, model: str = "gpt-4o-mini") -> str:
|
| 99 |
+
"""
|
| 100 |
+
Query an OpenAI language model with a prompt.
|
| 101 |
+
|
| 102 |
+
Parameters:
|
| 103 |
+
-----------
|
| 104 |
+
prompt : str
|
| 105 |
+
User-provided prompt for the model.
|
| 106 |
+
model : str, optional (default="gpt-4o-mini")
|
| 107 |
+
Model name to query (e.g., "gpt-4o-mini", "gpt-4").
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
--------
|
| 111 |
+
str
|
| 112 |
+
Model's response content as text.
|
| 113 |
+
If an error occurs (network/API), returns an error message.
|
| 114 |
+
|
| 115 |
+
Example:
|
| 116 |
+
--------
|
| 117 |
+
query_openai("Explain AI in finance")
|
| 118 |
+
|
| 119 |
+
Output:
|
| 120 |
+
"AI in finance refers to the use of machine learning and natural language
|
| 121 |
+
processing techniques to automate trading, risk assessment, and customer service..."
|
| 122 |
+
"""
|
| 123 |
+
try:
|
| 124 |
+
from openai import OpenAI # delayed import
|
| 125 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 126 |
+
response = client.chat.completions.create(
|
| 127 |
+
model=model,
|
| 128 |
+
messages=[{"role": "user", "content": prompt}],
|
| 129 |
+
)
|
| 130 |
+
return response.choices[0].message.content
|
| 131 |
+
except Exception as e:
|
| 132 |
+
return f"Error querying OpenAI API: {e}"
|
tools/time_tools.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
from agents import function_tool
|
| 3 |
+
from core.logger import log_call
|
| 4 |
+
|
| 5 |
+
class TimeTools:
|
| 6 |
+
"""Provides tools related to current date and time."""
|
| 7 |
+
|
| 8 |
+
@staticmethod
|
| 9 |
+
@function_tool
|
| 10 |
+
@log_call
|
| 11 |
+
def current_datetime(format: str = "%Y-%m-%d %H:%M:%S") -> str:
|
| 12 |
+
"""
|
| 13 |
+
Returns the current date and time as a formatted string.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
format (str): Optional datetime format (default: "YYYY-MM-DD HH:MM:SS")
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
str: Current date and time in the specified format
|
| 20 |
+
"""
|
| 21 |
+
now = datetime.now()
|
| 22 |
+
return now.strftime(format)
|
ui/app.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import asyncio
|
| 3 |
+
import time
|
| 4 |
+
import html
|
| 5 |
+
from datetime import datetime, UTC
|
| 6 |
+
from io import BytesIO
|
| 7 |
+
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph
|
| 10 |
+
from reportlab.lib.styles import getSampleStyleSheet
|
| 11 |
+
|
| 12 |
+
from appagents.orchestrator import Orchestrator
|
| 13 |
+
from agents import SQLiteSession
|
| 14 |
+
|
| 15 |
+
load_dotenv(override=True)
|
| 16 |
+
|
| 17 |
+
# --------------------
|
| 18 |
+
# Page config
|
| 19 |
+
# --------------------
|
| 20 |
+
st.set_page_config(page_title="Deep Research AI", layout="wide")
|
| 21 |
+
|
| 22 |
+
# --------------------
|
| 23 |
+
# Session-state init
|
| 24 |
+
# --------------------
|
| 25 |
+
if "session_store" not in st.session_state:
|
| 26 |
+
st.session_state.session_store = {}
|
| 27 |
+
|
| 28 |
+
if "session_id" not in st.session_state:
|
| 29 |
+
st.session_state.session_id = str(id(st))
|
| 30 |
+
|
| 31 |
+
if "final_report" not in st.session_state:
|
| 32 |
+
st.session_state.final_report = ""
|
| 33 |
+
|
| 34 |
+
if "button_disabled" not in st.session_state:
|
| 35 |
+
st.session_state.button_disabled = False
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# (dark mode removed - UI uses single light theme)
|
| 39 |
+
|
| 40 |
+
# --------------------
|
| 41 |
+
# CSS for light/dark and layout
|
| 42 |
+
# --------------------
|
| 43 |
+
LIGHT_CSS = """
|
| 44 |
+
<style>
|
| 45 |
+
.block-container { max-width: 90% !important; margin-left:5% !important; margin-right:5% !important; padding-top:1.5rem; padding-bottom:2rem; }
|
| 46 |
+
h1 { font-size:2.2rem !important; text-align:center; color: #0b1220 !important; }
|
| 47 |
+
h2 { color: #0b1220 !important; }
|
| 48 |
+
h3 { color: #0b1220 !important; }
|
| 49 |
+
.report-box { background:#ffffff; padding:24px; border-radius:12px; border:1px solid #e9ecef; box-shadow:0 6px 18px rgba(23,43,77,0.04); font-size:1.05rem; line-height:1.65; white-space:pre-wrap; word-wrap:break-word; overflow-wrap:break-word; }
|
| 50 |
+
textarea, .stTextArea>div>div>textarea { font-size:1.05rem !important; }
|
| 51 |
+
.button-row { display:flex; justify-content:flex-start; gap:12px; margin-top:15px; margin-bottom:15px; flex-wrap: wrap; align-items: center; }
|
| 52 |
+
.button-row [data-testid="column"] { flex: 0 !important; }
|
| 53 |
+
.stButton { width: 100% !important; }
|
| 54 |
+
.stButton>button { padding: 0 !important; width: 180px !important; height: 48px !important; border-radius: 6px !important; border: 1px solid #d0d0d0 !important; font-weight: 500 !important; font-size: 0.9rem !important; white-space: normal !important; background-color: #f8f9fa !important; color: #0b1220 !important; transition: all 0.2s !important; display: flex !important; align-items: center !important; justify-content: center !important; line-height: 1.2 !important; }
|
| 55 |
+
.stButton>button:hover { background-color: #e9ecef !important; border-color: #999 !important; }
|
| 56 |
+
.stButton>button:active { background-color: #dee2e6 !important; }
|
| 57 |
+
.sidebar-title { font-weight:700; margin-bottom:8px; }
|
| 58 |
+
.history-item { padding:6px 8px; border-radius:8px; margin-bottom:6px; background: #fafafa; border:1px solid #eee; }
|
| 59 |
+
.small-muted { color:#6c757d; font-size:0.9rem; }
|
| 60 |
+
/* Remove truncation constraints on markdown output */
|
| 61 |
+
[data-testid="stMarkdown"] { max-height: none !important; height: auto !important; }
|
| 62 |
+
</style>
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
st.markdown(LIGHT_CSS, unsafe_allow_html=True)
|
| 66 |
+
|
| 67 |
+
# --------------------
|
| 68 |
+
# Helpers: orchestrator streaming
|
| 69 |
+
# --------------------
|
| 70 |
+
async def run_async_chunks(query: str, session_id: str):
|
| 71 |
+
if session_id not in st.session_state.session_store:
|
| 72 |
+
st.session_state.session_store[session_id] = SQLiteSession(f"session_{session_id}.db")
|
| 73 |
+
session = st.session_state.session_store[session_id]
|
| 74 |
+
orchestrator = Orchestrator(session=session)
|
| 75 |
+
async for chunk in orchestrator.run(query):
|
| 76 |
+
yield chunk
|
| 77 |
+
|
| 78 |
+
def safe_title_from_query(q: str):
|
| 79 |
+
q = q.strip()
|
| 80 |
+
if not q:
|
| 81 |
+
return "Untitled Report"
|
| 82 |
+
first_line = q.splitlines()[0]
|
| 83 |
+
# limit length for title
|
| 84 |
+
return (first_line[:80] + "...") if len(first_line) > 80 else first_line
|
| 85 |
+
|
| 86 |
+
# --------------------
|
| 87 |
+
# Export helpers
|
| 88 |
+
# --------------------
|
| 89 |
+
def make_pdf_bytes(text: str) -> bytes:
|
| 90 |
+
"""Convert markdown text to PDF with proper formatting."""
|
| 91 |
+
buf = BytesIO()
|
| 92 |
+
doc = SimpleDocTemplate(buf, topMargin=0.5*72, bottomMargin=0.5*72, leftMargin=0.75*72, rightMargin=0.75*72)
|
| 93 |
+
styles = getSampleStyleSheet()
|
| 94 |
+
story = []
|
| 95 |
+
|
| 96 |
+
# parse markdown: headings, lists, bold, italic
|
| 97 |
+
lines = text.split("\n")
|
| 98 |
+
for line in lines:
|
| 99 |
+
stripped = line.strip()
|
| 100 |
+
|
| 101 |
+
if not stripped:
|
| 102 |
+
story.append(Paragraph(" ", styles["Normal"])) # empty line
|
| 103 |
+
continue
|
| 104 |
+
|
| 105 |
+
# heading levels
|
| 106 |
+
if stripped.startswith("# "):
|
| 107 |
+
story.append(Paragraph(html.escape(stripped[2:]), styles["Heading1"]))
|
| 108 |
+
elif stripped.startswith("## "):
|
| 109 |
+
story.append(Paragraph(html.escape(stripped[3:]), styles["Heading2"]))
|
| 110 |
+
elif stripped.startswith("### "):
|
| 111 |
+
story.append(Paragraph(html.escape(stripped[4:]), styles["Heading3"]))
|
| 112 |
+
elif stripped.startswith("- ") or stripped.startswith("* "):
|
| 113 |
+
# bullet list
|
| 114 |
+
story.append(Paragraph("β’ " + html.escape(stripped[2:]), styles["Normal"]))
|
| 115 |
+
elif stripped[0].isdigit() and ". " in stripped[:4]:
|
| 116 |
+
# numbered list
|
| 117 |
+
story.append(Paragraph(html.escape(stripped), styles["Normal"]))
|
| 118 |
+
else:
|
| 119 |
+
# regular paragraph with basic markdown formatting
|
| 120 |
+
# escape first, then replace with safe formatting tags
|
| 121 |
+
p_text = html.escape(stripped)
|
| 122 |
+
|
| 123 |
+
# handle **bold** (convert escaped ** back and wrap in <b> tags)
|
| 124 |
+
p_text = p_text.replace("<b>", "<b>").replace("</b>", "</b>")
|
| 125 |
+
# Simple approach: replace **text** with <b>text</b>
|
| 126 |
+
import re
|
| 127 |
+
p_text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', p_text)
|
| 128 |
+
p_text = re.sub(r'__(.+?)__', r'<b>\1</b>', p_text)
|
| 129 |
+
# handle *italic* β <i>italic</i> carefully (avoid double replacement)
|
| 130 |
+
p_text = re.sub(r'\*([^*]+?)\*', r'<i>\1</i>', p_text)
|
| 131 |
+
p_text = re.sub(r'_([^_]+?)_', r'<i>\1</i>', p_text)
|
| 132 |
+
|
| 133 |
+
story.append(Paragraph(p_text, styles["Normal"]))
|
| 134 |
+
|
| 135 |
+
doc.build(story)
|
| 136 |
+
buf.seek(0)
|
| 137 |
+
return buf.read()
|
| 138 |
+
|
| 139 |
+
def make_md_bytes(text: str) -> bytes:
|
| 140 |
+
return text.encode("utf-8")
|
| 141 |
+
|
| 142 |
+
def make_html_bytes(text: str, title="Deep Research Report") -> bytes:
|
| 143 |
+
# simple HTML wrapper, escape content and preserve newlines
|
| 144 |
+
body = "<br/>".join(html.escape(text).split("\n"))
|
| 145 |
+
html_doc = f"""<!doctype html>
|
| 146 |
+
<html>
|
| 147 |
+
<head>
|
| 148 |
+
<meta charset="utf-8">
|
| 149 |
+
<title>{html.escape(title)}</title>
|
| 150 |
+
<style>body{{font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial; padding:24px; max-width:900px; margin:auto; line-height:1.6; color: #0b1220; background: #ffffff }}</style>
|
| 151 |
+
</head>
|
| 152 |
+
<body>
|
| 153 |
+
<h1>{html.escape(title)}</h1>
|
| 154 |
+
<div>{body}</div>
|
| 155 |
+
</body>
|
| 156 |
+
</html>"""
|
| 157 |
+
return html_doc.encode("utf-8")
|
| 158 |
+
|
| 159 |
+
# --------------------
|
| 160 |
+
# Streaming runner (final output replaces trace)
|
| 161 |
+
# --------------------
|
| 162 |
+
def run_streaming(query: str, final_ph, status_ph):
|
| 163 |
+
session_id = st.session_state.session_id
|
| 164 |
+
|
| 165 |
+
# placeholders
|
| 166 |
+
# status_ph = st.empty()
|
| 167 |
+
progress_ph = st.empty()
|
| 168 |
+
|
| 169 |
+
# reset final_report
|
| 170 |
+
st.session_state.final_report = ""
|
| 171 |
+
# track only the last received chunk
|
| 172 |
+
last_chunk = ""
|
| 173 |
+
progress_val = 0
|
| 174 |
+
progress_bar = progress_ph.progress(progress_val)
|
| 175 |
+
|
| 176 |
+
# ensure any prior final output is cleared while streaming
|
| 177 |
+
try:
|
| 178 |
+
final_ph.empty()
|
| 179 |
+
except Exception:
|
| 180 |
+
pass
|
| 181 |
+
# status_ph.info("π Researching β streaming (final result only)...")
|
| 182 |
+
|
| 183 |
+
async def _stream():
|
| 184 |
+
nonlocal progress_val, last_chunk
|
| 185 |
+
status_ph.info("Streaming... receiving data")
|
| 186 |
+
bStartChunkCollected = False
|
| 187 |
+
async for chunk in run_async_chunks(query, session_id):
|
| 188 |
+
# start collecting chunks once we see one beginning with #
|
| 189 |
+
if not bStartChunkCollected and chunk.strip().startswith("#"):
|
| 190 |
+
bStartChunkCollected = True
|
| 191 |
+
|
| 192 |
+
if bStartChunkCollected:
|
| 193 |
+
last_chunk += chunk
|
| 194 |
+
# render accumulated markdown in real-time so user sees content streaming
|
| 195 |
+
status_ph.markdown(last_chunk)
|
| 196 |
+
|
| 197 |
+
progress_val = min(progress_val + 2, 98)
|
| 198 |
+
progress_bar.progress(progress_val)
|
| 199 |
+
|
| 200 |
+
# run async generator (compatibility fallback)
|
| 201 |
+
try:
|
| 202 |
+
asyncio.run(_stream())
|
| 203 |
+
except RuntimeError:
|
| 204 |
+
loop = asyncio.new_event_loop()
|
| 205 |
+
asyncio.set_event_loop(loop)
|
| 206 |
+
loop.run_until_complete(_stream())
|
| 207 |
+
loop.close()
|
| 208 |
+
except Exception as e:
|
| 209 |
+
# on exception, re-enable button and show error
|
| 210 |
+
st.session_state.button_disabled = False
|
| 211 |
+
status_ph.error(f"β Error during research: {str(e)}")
|
| 212 |
+
progress_ph.empty()
|
| 213 |
+
return
|
| 214 |
+
|
| 215 |
+
# finalize
|
| 216 |
+
progress_bar.progress(100)
|
| 217 |
+
status_ph.success("β
Research complete!")
|
| 218 |
+
|
| 219 |
+
# set final_report to only the last yield (trim surrounding whitespace)
|
| 220 |
+
md_text = last_chunk.strip()
|
| 221 |
+
st.session_state.final_report = md_text
|
| 222 |
+
progress_ph.empty()
|
| 223 |
+
|
| 224 |
+
# re-enable button after completion
|
| 225 |
+
st.session_state.button_disabled = False
|
| 226 |
+
|
| 227 |
+
# history saving disabled (kept minimal in-memory state only)
|
| 228 |
+
|
| 229 |
+
# render final output as Markdown into the dedicated placeholder
|
| 230 |
+
# Use Streamlit's markdown renderer so headings, lists, links render correctly.
|
| 231 |
+
if st.session_state.final_report:
|
| 232 |
+
final_ph.markdown(st.session_state.final_report)
|
| 233 |
+
else:
|
| 234 |
+
final_ph.empty()
|
| 235 |
+
|
| 236 |
+
# rerun to reflect button re-enable and final output
|
| 237 |
+
st.rerun()
|
| 238 |
+
|
| 239 |
+
# Sidebar removed per UI request. Dark-mode and history removed.
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
# --------------------
|
| 243 |
+
# Main UI
|
| 244 |
+
# --------------------
|
| 245 |
+
st.title("π§ Deep Research (Powered by Agentic AI)")
|
| 246 |
+
st.write("What topic would you like to research?")
|
| 247 |
+
|
| 248 |
+
query = st.text_area("Enter your research topic", value="The impact of AI on the Healthcare Industry.", height=50, label_visibility="collapsed")
|
| 249 |
+
|
| 250 |
+
# Action row with buttons
|
| 251 |
+
col1, col2, col3, col4 = st.columns([2.0, 2.0, 2.0, 2.0])
|
| 252 |
+
|
| 253 |
+
with col1:
|
| 254 |
+
run_clicked = st.button("π Run Deep Research", key="run", disabled=st.session_state.button_disabled)
|
| 255 |
+
|
| 256 |
+
# PDF and MD download buttons appear inline after a final_report exists
|
| 257 |
+
if st.session_state.final_report:
|
| 258 |
+
with col2:
|
| 259 |
+
# PDF generator stream - create bytes on demand
|
| 260 |
+
pdf_bytes = make_pdf_bytes(st.session_state.final_report)
|
| 261 |
+
st.download_button("π Download PDF", data=pdf_bytes, file_name="report.pdf", mime="application/pdf")
|
| 262 |
+
|
| 263 |
+
with col3:
|
| 264 |
+
# Markdown
|
| 265 |
+
md_bytes = make_md_bytes(st.session_state.final_report)
|
| 266 |
+
st.download_button("π Download MD", data=md_bytes, file_name="report.md", mime="text/markdown")
|
| 267 |
+
|
| 268 |
+
# placeholder for final report (used so streaming traces can be cleared)
|
| 269 |
+
final_ph = st.empty()
|
| 270 |
+
|
| 271 |
+
# placeholder for streaming status and progress updates
|
| 272 |
+
status_ph = st.empty()
|
| 273 |
+
|
| 274 |
+
# Run research if requested; disable button on click and re-run
|
| 275 |
+
if run_clicked and query.strip():
|
| 276 |
+
st.session_state.button_disabled = True
|
| 277 |
+
st.rerun()
|
| 278 |
+
|
| 279 |
+
# Execute streaming if button was disabled (i.e., on the rerun after click)
|
| 280 |
+
if st.session_state.button_disabled and query.strip():
|
| 281 |
+
run_streaming(query.strip(), final_ph, status_ph)
|
| 282 |
+
elif not st.session_state.button_disabled:
|
| 283 |
+
# if final_report exists (e.g., from previous run), show it in the final placeholder
|
| 284 |
+
if st.session_state.final_report:
|
| 285 |
+
# final_ph.markdown(f"<div class='report-box'>{st.session_state.final_report}</div>", unsafe_allow_html=True)
|
| 286 |
+
final_ph.markdown(st.session_state.final_report, unsafe_allow_html=True)
|
| 287 |
+
else:
|
| 288 |
+
st.info("Enter a topic and press Run. Final report will replace streaming traces.")
|
| 289 |
+
|
| 290 |
+
# small debug caption
|
| 291 |
+
st.caption(f"Session: {st.session_state.session_id}")
|