Spaces:
Sleeping
Sleeping
Commit
·
c049b12
0
Parent(s):
add basic version
Browse files- .gitignore +13 -0
- .python-version +1 -0
- README.md +0 -0
- pyproject.toml +14 -0
- src/__init__.py +0 -0
- src/main.py +26 -0
- src/planner.py +28 -0
- src/report_generator.py +31 -0
- src/research_manager.py +73 -0
- src/web_search.py +17 -0
- uv.lock +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
| 11 |
+
|
| 12 |
+
# Environment variables
|
| 13 |
+
.env
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
README.md
ADDED
|
File without changes
|
pyproject.toml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "deep-research"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"black>=25.1.0",
|
| 9 |
+
"dotenv>=0.9.9",
|
| 10 |
+
"gradio>=5.33.1",
|
| 11 |
+
"isort>=6.0.1",
|
| 12 |
+
"openai>=1.86.0",
|
| 13 |
+
"openai-agents>=0.0.17",
|
| 14 |
+
]
|
src/__init__.py
ADDED
|
File without changes
|
src/main.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
from research_manager import ResearchManager
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
async def run(query: str):
|
| 10 |
+
async for chunk in ResearchManager().run(query):
|
| 11 |
+
yield chunk
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
with gr.Blocks(theme=gr.themes.Default(primary_hue="yellow")) as ui:
|
| 15 |
+
gr.Markdown("# Deep Research")
|
| 16 |
+
query_textbox = gr.Textbox(
|
| 17 |
+
label="What topic would you like to research?",
|
| 18 |
+
value="e.g. How to create a Deep Research Agent?",
|
| 19 |
+
)
|
| 20 |
+
run_button = gr.Button("Run", variant="primary")
|
| 21 |
+
report = gr.Markdown(label="Report")
|
| 22 |
+
|
| 23 |
+
run_button.click(fn=run, inputs=query_textbox, outputs=report)
|
| 24 |
+
query_textbox.submit(fn=run, inputs=query_textbox, outputs=report)
|
| 25 |
+
|
| 26 |
+
ui.launch()
|
src/planner.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agents import Agent
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
|
| 4 |
+
HOW_MANY_SEARCHES = 5
|
| 5 |
+
|
| 6 |
+
INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
|
| 7 |
+
to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for."
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class WebSearchItem(BaseModel):
|
| 11 |
+
reason: str = Field(
|
| 12 |
+
description="Your reasoning for why this search is important to the query."
|
| 13 |
+
)
|
| 14 |
+
query: str = Field(description="The search term to use for the web search.")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class WebSearchPlan(BaseModel):
|
| 18 |
+
searches: list[WebSearchItem] = Field(
|
| 19 |
+
description="A list of web searches to perform to best answer the query."
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
planner_agent = Agent(
|
| 24 |
+
name="PlannerAgent",
|
| 25 |
+
instructions=INSTRUCTIONS,
|
| 26 |
+
model="gpt-4o-mini",
|
| 27 |
+
output_type=WebSearchPlan,
|
| 28 |
+
)
|
src/report_generator.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agents import Agent
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
|
| 4 |
+
INSTRUCTIONS = (
|
| 5 |
+
"You are a senior researcher tasked with writing a cohesive report for a research query. "
|
| 6 |
+
"You will be provided with the original query, and some initial research done by a research assistant.\n"
|
| 7 |
+
"You should first come up with an outline for the report that describes the structure and "
|
| 8 |
+
"flow of the report. Then, generate the report and return that as your final output.\n"
|
| 9 |
+
"The final output should be in markdown format, and it should be lengthy and detailed. Aim "
|
| 10 |
+
"for 5-10 pages of content, at least 1000 words."
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ReportData(BaseModel):
|
| 15 |
+
short_summary: str = Field(
|
| 16 |
+
description="A short 2-3 sentence summary of the findings."
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
markdown_report: str = Field(description="The final report")
|
| 20 |
+
|
| 21 |
+
follow_up_questions: list[str] = Field(
|
| 22 |
+
description="Suggested topics to research further"
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
writer_agent = Agent(
|
| 27 |
+
name="WriterAgent",
|
| 28 |
+
instructions=INSTRUCTIONS,
|
| 29 |
+
model="gpt-4o-mini",
|
| 30 |
+
output_type=ReportData,
|
| 31 |
+
)
|
src/research_manager.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
|
| 3 |
+
from agents import Runner, gen_trace_id, trace
|
| 4 |
+
|
| 5 |
+
from planner import WebSearchItem, WebSearchPlan, planner_agent
|
| 6 |
+
from report_generator import ReportData, writer_agent
|
| 7 |
+
from web_search import search_agent
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class ResearchManager:
|
| 11 |
+
|
| 12 |
+
async def run(self, query: str):
|
| 13 |
+
"""Run the deep research process, yielding the status updates and the final report"""
|
| 14 |
+
trace_id = gen_trace_id()
|
| 15 |
+
with trace("Research trace", trace_id=trace_id):
|
| 16 |
+
print("Starting research...")
|
| 17 |
+
search_plan = await self.plan_searches(query)
|
| 18 |
+
yield "Searches planned, starting to search..."
|
| 19 |
+
search_results = await self.perform_searches(search_plan)
|
| 20 |
+
yield "Searches complete, writing report..."
|
| 21 |
+
report = await self.write_report(query, search_results)
|
| 22 |
+
yield report.markdown_report
|
| 23 |
+
|
| 24 |
+
async def plan_searches(self, query: str) -> WebSearchPlan:
|
| 25 |
+
"""Plan the searches to perform for the query"""
|
| 26 |
+
print("Planning searches...")
|
| 27 |
+
result = await Runner.run(
|
| 28 |
+
planner_agent,
|
| 29 |
+
f"Query: {query}",
|
| 30 |
+
)
|
| 31 |
+
print(f"Will perform {len(result.final_output.searches)} searches")
|
| 32 |
+
return result.final_output_as(WebSearchPlan)
|
| 33 |
+
|
| 34 |
+
async def perform_searches(self, search_plan: WebSearchPlan) -> list[str]:
|
| 35 |
+
"""Perform the searches to perform for the query"""
|
| 36 |
+
print("Searching...")
|
| 37 |
+
num_completed = 0
|
| 38 |
+
tasks = [
|
| 39 |
+
asyncio.create_task(self.search(item)) for item in search_plan.searches
|
| 40 |
+
]
|
| 41 |
+
results = []
|
| 42 |
+
for task in asyncio.as_completed(tasks):
|
| 43 |
+
result = await task
|
| 44 |
+
if result is not None:
|
| 45 |
+
results.append(result)
|
| 46 |
+
num_completed += 1
|
| 47 |
+
print(f"Searching... {num_completed}/{len(tasks)} completed")
|
| 48 |
+
print("Finished searching")
|
| 49 |
+
return results
|
| 50 |
+
|
| 51 |
+
async def search(self, item: WebSearchItem) -> str | None:
|
| 52 |
+
"""Perform a search for the query"""
|
| 53 |
+
input = f"Search term: {item.query}\nReason for searching: {item.reason}"
|
| 54 |
+
try:
|
| 55 |
+
result = await Runner.run(
|
| 56 |
+
search_agent,
|
| 57 |
+
input,
|
| 58 |
+
)
|
| 59 |
+
return str(result.final_output)
|
| 60 |
+
except Exception:
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
async def write_report(self, query: str, search_results: list[str]) -> ReportData:
|
| 64 |
+
"""Write the report for the query"""
|
| 65 |
+
print("Thinking about report...")
|
| 66 |
+
input = f"Original query: {query}\nSummarized search results: {search_results}"
|
| 67 |
+
result = await Runner.run(
|
| 68 |
+
writer_agent,
|
| 69 |
+
input,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
print("Finished writing report")
|
| 73 |
+
return result.final_output_as(ReportData)
|
src/web_search.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agents import Agent, ModelSettings, WebSearchTool
|
| 2 |
+
|
| 3 |
+
INSTRUCTIONS = (
|
| 4 |
+
"You are a research assistant. Given a search term, you search the web for that term and "
|
| 5 |
+
"produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 "
|
| 6 |
+
"words. Capture the main points. Write succintly, no need to have complete sentences or good "
|
| 7 |
+
"grammar. This will be consumed by someone synthesizing a report, so its vital you capture the "
|
| 8 |
+
"essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
search_agent = Agent(
|
| 12 |
+
name="Search agent",
|
| 13 |
+
instructions=INSTRUCTIONS,
|
| 14 |
+
tools=[WebSearchTool(search_context_size="low")],
|
| 15 |
+
model="gpt-4o-mini",
|
| 16 |
+
model_settings=ModelSettings(tool_choice="required"),
|
| 17 |
+
)
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|