mishrabp commited on
Commit
bfe9585
Β·
verified Β·
1 Parent(s): 6778a49

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1 \
4
+ DEBIAN_FRONTEND=noninteractive
5
+
6
+ WORKDIR /app
7
+
8
+ # System deps
9
+ RUN apt-get update && apt-get install -y \
10
+ git build-essential curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Install uv
14
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
15
+ ENV PATH="/root/.local/bin:$PATH"
16
+
17
+ # Copy project metadata
18
+ COPY pyproject.toml .
19
+ COPY uv.lock .
20
+
21
+ # Install dependencies using uv
22
+ RUN uv sync --frozen --no-dev --system
23
+
24
+ # Copy your source code
25
+ COPY . .
26
+
27
+ EXPOSE 7860
28
+
29
+ CMD ["streamlit", "run", "ui/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless=true"]
README.md CHANGED
@@ -1,10 +1,50 @@
1
  ---
2
- title: Deep Research
3
- emoji: 🏒
4
- colorFrom: indigo
5
- colorTo: indigo
6
- sdk: docker
 
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: AI Deep Researcher # Give your app a title
3
+ emoji: πŸ€– # Pick an emoji
4
+ colorFrom: indigo # Theme start color
5
+ colorTo: blue # Theme end color
6
+ sdk: docker # SDK type
7
+ sdk_version: "4.39.0" # Example Gradio version
8
+ app_file: ui/app.py # <-- points to your app.py inside ui/
9
  pinned: false
10
  ---
11
 
12
+ # AI Deep Researcher
13
+
14
+ **AI Deep Researcher** is a generative AI learning project built using the OpenAI Agentic Framework. This app performs deep-level web research based on user queries and generates a well-structured, consolidated report.
15
+
16
+ To achieve this, the project integrates the following technologies and AI features:
17
+ - **OpenAI SDK**
18
+ - **OpenAI Agents**
19
+ - **OpenAI WebSearch Tool**
20
+ - **Serper API** - a free alternative to OpenAI WebSearch Tool (https://serper.dev/api-keys)
21
+ - **News API** (https://newsapi.org/v2/everything)
22
+ - **SendGrid** (for emailing report)
23
+ - **LLMs** - (OpenAI, Geminia, Groq)
24
+
25
+
26
+ ## How it works?
27
+ The system is a multi-agent solution, where each agent has a specific responsibility:
28
+
29
+ 1. **Planner Agent**
30
+ - Receives the user query and builds a structured query plan.
31
+
32
+ 2. **Guardrail Agent**
33
+ - Validates user input and ensures compliance.
34
+ - Stops the workflow if the input contains inappropriate or unparliamentary words.
35
+
36
+ 3. **Search Agent**
37
+ - Executes the query plan.
38
+ - Runs multiple web searches in parallel to gather data.
39
+
40
+ 4. **Writer Agent**
41
+ - Reads results from all search agents.
42
+ - Generates a well-formatted, consolidated report.
43
+
44
+ 5. **Email Agent**
45
+ - Responsible for sending the report via email using SendGrid.
46
+
47
+ 6. **Orchestrator**
48
+ - The entry point of the system.
49
+ - Facilitates communication and workflow between all agents.
50
+
appagents/__init__.py ADDED
File without changes
appagents/email_agent.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict
3
+
4
+ import sendgrid
5
+ from sendgrid.helpers.mail import Email, Mail, Content, To
6
+ from agents import Agent, function_tool
7
+ from core.logger import log_call
8
+
9
+
10
+ @function_tool
11
+ @log_call
12
+ def send_email(subject: str, html_body: str) -> Dict[str, str]:
13
+ """ Send an email with the given subject and HTML body """
14
+ sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))
15
+ from_email = Email("bm80177@gmail.com") # put your verified sender here
16
+ to_email = To("bibhup_mishra@yahoo.com") # put your recipient here
17
+ content = Content("text/html", html_body)
18
+ mail = Mail(from_email, to_email, subject, content).get()
19
+ response = sg.client.mail.send.post(request_body=mail)
20
+ print("Email response", response.status_code)
21
+ return {"status": "success"}
22
+
23
+ INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
24
+ You will be provided with a detailed report. You should use your tool to send one email, providing the
25
+ report converted into clean, well presented HTML with an appropriate subject line."""
26
+
27
+ email_agent = Agent(
28
+ name="Email agent",
29
+ instructions=INSTRUCTIONS,
30
+ tools=[send_email],
31
+ model="gpt-4o-mini",
32
+ )
appagents/guardrail_agent.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic import BaseModel
3
+ from agents import (
4
+ Agent,
5
+ Runner,
6
+ input_guardrail,
7
+ GuardrailFunctionOutput,
8
+ )
9
+ from tools.time_tools import TimeTools
10
+ from openai import AsyncOpenAI
11
+
12
+
13
+ # βœ… Step 1: Define structured output schema
14
+ class UnparliamentaryCheckOutput(BaseModel):
15
+ has_unparliamentary_language: bool
16
+ explanation: str
17
+
18
+
19
+ # βœ… Step 2: Define the LLM guardrail agent
20
+ guardrail_agent = Agent(
21
+ name="Unparliamentary language check",
22
+ instructions=(
23
+ "Analyze the user input and determine if it contains any unparliamentary, "
24
+ "offensive, or disrespectful language. "
25
+ "If it does, set has_unparliamentary_language=true and explain briefly why. "
26
+ "Otherwise, set it to false."
27
+ ),
28
+ output_type=UnparliamentaryCheckOutput,
29
+ model="gpt-4o-mini",
30
+ )
31
+
32
+
33
+ # βœ… Step 3: Use the input guardrail decorator
34
+ @input_guardrail
35
+ async def guardrail_against_unparliamentary(ctx, agent, message: str):
36
+ """Guardrail function that blocks messages with unparliamentary words."""
37
+ result = await Runner.run(guardrail_agent, message, context=ctx.context)
38
+ has_unparliamentary_language = result.final_output.has_unparliamentary_language
39
+
40
+ return GuardrailFunctionOutput(
41
+ output_info={
42
+ "found_unparliamentary_word": result.final_output.model_dump()
43
+ },
44
+ tripwire_triggered=has_unparliamentary_language,
45
+ )
appagents/orchestrator.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agents import Runner, trace, gen_trace_id, SQLiteSession
2
+ from appagents.search_agent import search_agent
3
+ from appagents.planner_agent import planner_agent, WebSearchItem, WebSearchPlan
4
+ from appagents.writer_agent import writer_agent, ReportData
5
+ from appagents.email_agent import email_agent
6
+ from agents.exceptions import InputGuardrailTripwireTriggered
7
+ from core.logger import log_call
8
+ import asyncio
9
+
10
+ class Orchestrator:
11
+
12
+ def __init__(self, session: SQLiteSession | None = None):
13
+ self.session = session or SQLiteSession()
14
+
15
+ @log_call
16
+ async def run(self, query: str):
17
+ """ Run the deep research process, yielding the status updates and the final report"""
18
+ trace_id = gen_trace_id()
19
+ with trace("Deep Research Orchestrator", trace_id=trace_id):
20
+ print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}")
21
+ yield f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
22
+ print("Starting research...")
23
+ search_plan = await self.plan_searches(query)
24
+
25
+ if not search_plan or not getattr(search_plan, "searches", []):
26
+ note = getattr(search_plan, "note", "")
27
+ if "unparliamentary" in note.lower():
28
+ print("⚠️ Guardrail triggered – unparliamentary language detected.")
29
+ yield note
30
+ else:
31
+ yield note or "No search results found, ending research."
32
+ return
33
+
34
+ yield "Searches planned, starting to search..."
35
+ search_results = await self.perform_searches(search_plan)
36
+ yield "Searches complete, writing report..."
37
+ report = await self.write_report(query, search_results)
38
+ yield "Report written, sending email..."
39
+ # await self.send_email(report)
40
+ # yield "Email sent, research complete"
41
+ yield report.markdown_report
42
+
43
+ @log_call
44
+ async def plan_searches(self, query: str) -> WebSearchPlan:
45
+ """Plan the searches to perform for the query."""
46
+ print("Planning searches...")
47
+
48
+ try:
49
+ result = await Runner.run(
50
+ planner_agent, # use self. unless global
51
+ f"Query: {query}",
52
+ session=self.session,
53
+ )
54
+
55
+ print(f"Will perform {len(result.final_output.searches)} searches")
56
+ return result.final_output_as(WebSearchPlan)
57
+
58
+ except InputGuardrailTripwireTriggered as e:
59
+ explanation = getattr(e, "result", {}).get("output_info", {}).get(
60
+ "found_unparliamentary_word", {}
61
+ ).get("explanation", "")
62
+ print("⚠️ Guardrail triggered – unparliamentary language detected.")
63
+ return WebSearchPlan(searches=[], note=f"Blocked due to unparliamentary input. {explanation}")
64
+
65
+ except Exception as e:
66
+ print(f"❌ Error during planning: {e}")
67
+ return WebSearchPlan(searches=[], note="An error occurred while planning searches.")
68
+
69
+ @log_call
70
+ async def perform_searches(self, search_plan: WebSearchPlan) -> list[str]:
71
+ """ Perform the searches to perform for the query """
72
+ print("Searching...")
73
+ num_completed = 0
74
+ tasks = [asyncio.create_task(self.search(item)) for item in search_plan.searches]
75
+ results = []
76
+ for task in asyncio.as_completed(tasks):
77
+ result = await task
78
+ if result is not None:
79
+ results.append(result)
80
+ num_completed += 1
81
+ print(f"Searching... {num_completed}/{len(tasks)} completed")
82
+ print("Finished searching")
83
+ return results
84
+
85
+ @log_call
86
+ async def search(self, item: WebSearchItem) -> str | None:
87
+ """ Perform a search for the query """
88
+ input = f"Search term: {item.query}\nReason for searching: {item.reason}"
89
+ try:
90
+ result = await Runner.run(
91
+ search_agent,
92
+ input,
93
+ )
94
+ return str(result.final_output)
95
+ except Exception:
96
+ return None
97
+
98
+ @log_call
99
+ async def write_report(self, query: str, search_results: list[str]) -> ReportData:
100
+ """ Write the report for the query """
101
+ print("Thinking about report...")
102
+ input = f"Original query: {query}\nSummarized search results: {search_results}"
103
+ result = await Runner.run(
104
+ writer_agent,
105
+ input,
106
+ )
107
+
108
+ print("Finished writing report")
109
+ return result.final_output_as(ReportData)
110
+
111
+ @log_call
112
+ async def send_email(self, report: ReportData) -> None:
113
+ print("Writing email...")
114
+ result = await Runner.run(
115
+ email_agent,
116
+ report.markdown_report,
117
+ )
118
+ print("Email sent")
119
+ return report
appagents/planner_agent.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic import BaseModel, Field
3
+ from agents import Agent, OpenAIChatCompletionsModel
4
+ from openai import AsyncOpenAI
5
+ from tools.time_tools import TimeTools
6
+ from appagents.guardrail_agent import guardrail_against_unparliamentary
7
+
8
+ HOW_MANY_SEARCHES = 10
9
+
10
+ INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
11
+ to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for. \
12
+ Use the tool to find current date & time, and use it where relevant to inform your search and summary."
13
+
14
+
15
+ class WebSearchItem(BaseModel):
16
+ reason: str = Field(description="Your reasoning for why this search is important to the query.")
17
+ query: str = Field(description="The search term to use for the web search.")
18
+ current_date_time: str = Field(description="Current date and time.")
19
+
20
+
21
+ class WebSearchPlan(BaseModel):
22
+ searches: list[WebSearchItem] = Field(description="A list of web searches to perform to best answer the query.")
23
+
24
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
25
+ google_api_key = os.getenv('GOOGLE_API_KEY')
26
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
27
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
28
+
29
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
30
+ groq_api_key = os.getenv('GROQ_API_KEY')
31
+ groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
32
+ groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
33
+
34
+ openai_model = "gpt-4.1-mini"
35
+
36
+ # Note: Many models do not like tool call and json output_schema used together.
37
+
38
+ planner_agent = Agent(
39
+ name="PlannerAgent",
40
+ instructions=INSTRUCTIONS,
41
+ model=openai_model,
42
+ tools=[TimeTools.current_datetime],
43
+ output_type=WebSearchPlan,
44
+ input_guardrails=[guardrail_against_unparliamentary],
45
+ )
appagents/search_agent.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
3
+ from openai import AsyncOpenAI
4
+
5
+ from agents.model_settings import ModelSettings
6
+ from tools.google_tools import GoogleTools
7
+
8
+ # INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
9
+ # produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 \
10
+ # words. Capture the main points. Write succintly, no need to have complete sentences or good \
11
+ # grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
12
+ # essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
13
+
14
+ # INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web and produce a detailed synthesis of the results. \
15
+ # The output must be structured into sections, one for each search result provided by the tool. \
16
+ # For each result, you MUST include the full link/URL and the title. \
17
+ # Your response should capture the main points and relevant details from all sources. \
18
+ # Do not add any personal commentary, introductions, or conclusions. \
19
+ # Format the entire output as a single, detailed block of text in markdown format, ensuring ALL source links are visible and preserved."
20
+
21
+ INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
22
+ produce a concise summary of the results. The summary must 3-5 paragraphs and less than 500 \
23
+ words. Capture the main points. Write succintly, no need to have complete sentences or good \
24
+ grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
25
+ essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
26
+
27
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
28
+ google_api_key = os.getenv('GOOGLE_API_KEY')
29
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
30
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
31
+
32
+ # search_agent = Agent(
33
+ # name="Search agent",
34
+ # instructions=INSTRUCTIONS,
35
+ # tools=[WebSearchTool(search_context_size="low")],
36
+ # # tools=[GoogleTools.search],
37
+ # model="gpt-4o-mini",
38
+ # model_settings=ModelSettings(tool_choice="required"),
39
+ # )
40
+
41
+ # -----------------------------
42
+ # CONNECT TO MCP SERVER
43
+ # -----------------------------
44
+ async def setup_mcp_tools():
45
+ """
46
+ Starts the MCP server via stdio and returns its list of tools
47
+ that can be attached to the agent.
48
+ """
49
+ # Absolute path ensures the script is found even from a notebook
50
+ import os
51
+ script_path = os.path.abspath("../mcp/search-server.py")
52
+
53
+ params = {
54
+ "command": "uvx", # or "uv" depending on your environment
55
+ "args": ["run", script_path],
56
+ }
57
+
58
+ # Start MCP server and list available tools
59
+ async with MCPServerStdio(
60
+ params=params,
61
+ client_session_timeout_seconds=60,
62
+ verbose=True, # helpful for debugging
63
+ ) as server:
64
+ mcp_tools = await server.list_tools()
65
+ print(f"βœ… Connected to MCP server with {len(mcp_tools)} tool(s).")
66
+ return mcp_tools
67
+
68
+ # # Note: Gemini does not like
69
+ # search_agent = Agent(
70
+ # name="Search agent",
71
+ # instructions=INSTRUCTIONS,
72
+ # # tools=[WebSearchTool(search_context_size="low")],
73
+ # tools=[GoogleTools.search],
74
+ # model=gemini_model,
75
+ # model_settings=ModelSettings(tool_choice="required"),
76
+ # )
77
+
78
+
79
+ search_agent = Agent(
80
+ name="Search agent",
81
+ instructions=INSTRUCTIONS,
82
+ # tools=[WebSearchTool(search_context_size="low")],
83
+ tools=[GoogleTools.search],
84
+ model=gemini_model,
85
+ model_settings=ModelSettings(tool_choice="required"),
86
+ )
87
+
appagents/writer_agent.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic import BaseModel, Field
3
+ from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
4
+ from openai import AsyncOpenAI
5
+
6
+ INSTRUCTIONS = (
7
+ "You are a senior researcher tasked with writing a cohesive report for a research query. "
8
+ "You will be provided with the original query, and some initial research done by a research assistant.\n"
9
+ "You should first come up with an outline for the report that describes the structure and "
10
+ "flow of the report. Then, generate the report and return that as your final output.\n"
11
+ "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
12
+ "for 5-10 pages of content, at least 1000 words."
13
+ )
14
+
15
+
16
+ class ReportData(BaseModel):
17
+ short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
18
+
19
+ markdown_report: str = Field(description="The final report")
20
+
21
+ follow_up_questions: list[str] = Field(description="Suggested topics to research further")
22
+
23
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
24
+ google_api_key = os.getenv('GOOGLE_API_KEY')
25
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
26
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
27
+
28
+
29
+ # writer_agent = Agent(
30
+ # name="WriterAgent",
31
+ # instructions=INSTRUCTIONS,
32
+ # model="gpt-5-mini",
33
+ # output_type=ReportData,
34
+ # )
35
+
36
+ writer_agent = Agent(
37
+ name="WriterAgent",
38
+ instructions=INSTRUCTIONS,
39
+ model=gemini_model,
40
+ output_type=ReportData,
41
+ )
core/__init__.py ADDED
File without changes
core/logger.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import datetime
3
+
4
+ def log_call(func):
5
+ """
6
+ A decorator that logs when a function is called and when it finishes.
7
+ """
8
+ @functools.wraps(func)
9
+ def wrapper(*args, **kwargs):
10
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
11
+ arg_list = ", ".join(
12
+ [repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
13
+ )
14
+ print(f"[{timestamp}] πŸš€ Calling: {func.__name__}({arg_list})")
15
+ try:
16
+ result = func(*args, **kwargs)
17
+ # print(f"[{timestamp}] βœ… Finished: {func.__name__}")
18
+ return result
19
+ except Exception as e:
20
+ print(f"[{timestamp}] ❌ Error in {func.__name__}: {e}")
21
+ raise
22
+ return wrapper
prompts/__init__.py ADDED
File without changes
run.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+
5
+ # Use module execution to guarantee Streamlit runs inside the current interpreter
6
+ subprocess.run([
7
+ sys.executable, "-m", "streamlit",
8
+ "run",
9
+ os.path.join("ui", "app.py"),
10
+ "--server.runOnSave", "true"
11
+ ])
tools/__init__.py ADDED
File without changes
tools/google_tools.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ from agents import function_tool
5
+ from core.logger import log_call
6
+
7
+ # Load environment variables once
8
+ load_dotenv()
9
+
10
+
11
+ # ============================================================
12
+ # πŸ”Ή GOOGLE SEARCH TOOLSET (Serper.dev API)
13
+ # ============================================================
14
+ class GoogleTools:
15
+ """
16
+ GoogleTools provides function tools to perform web searches
17
+ using the Serper.dev API (Google Search). I am a fallback for
18
+ retrieving recent information from the web.
19
+ """
20
+
21
+ @staticmethod
22
+ @function_tool
23
+ @log_call
24
+ def search(query: str, num_results: int = 3) -> str:
25
+ """
26
+ Perform a general Google search using Serper.dev API.
27
+
28
+ Parameters:
29
+ -----------
30
+ query : str
31
+ The search query string, e.g., "latest Tesla stock news".
32
+ num_results : int, optional (default=3)
33
+ Maximum number of search results to return.
34
+
35
+ Returns:
36
+ --------
37
+ str
38
+ Nicely formatted search results.
39
+ """
40
+ try:
41
+ api_key = os.getenv("SERPER_API_KEY")
42
+ if not api_key:
43
+ return "❌ Missing SERPER_API_KEY in environment variables."
44
+
45
+ url = "https://google.serper.dev/search"
46
+ headers = {
47
+ "X-API-KEY": api_key,
48
+ "Content-Type": "application/json"
49
+ }
50
+ payload = {
51
+ "q": query,
52
+ "gl": "us", # country code (optional)
53
+ "hl": "en", # language code (optional)
54
+ }
55
+
56
+ response = requests.post(url, headers=headers, json=payload)
57
+ response.raise_for_status()
58
+ data = response.json()
59
+
60
+ organic_results = data.get("organic", [])
61
+ if not organic_results:
62
+ return "No search results found."
63
+
64
+ formatted = []
65
+ for item in organic_results[:num_results]:
66
+ title = item.get("title", "No title")
67
+ link = item.get("link", "No link")
68
+ snippet = item.get("snippet", "")
69
+ formatted.append(
70
+ f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n"
71
+ )
72
+ # print(formatted[-1]) # Log each result
73
+
74
+ return "\n".join(formatted)
75
+
76
+ except requests.exceptions.RequestException as e:
77
+ return f"⚠️ Network error during Google search: {e}"
78
+ except Exception as e:
79
+ return f"⚠️ Error performing Google search: {e}"
80
+
81
+
82
+ # ============================================================
83
+ # πŸ”Ή OPENAI & OTHER MODEL TOOLS
84
+ # ============================================================
85
+ class ModelTools:
86
+ """
87
+ ModelTools provides function tools to interact with LLM APIs
88
+ such as OpenAI, Gemini, or Groq.
89
+
90
+ Features:
91
+ - Send prompts to a language model.
92
+ - Receive structured text completions.
93
+ - Can be extended to support multiple LLM providers.
94
+ """
95
+
96
+ @staticmethod
97
+ @function_tool
98
+ def query_openai(prompt: str, model: str = "gpt-4o-mini") -> str:
99
+ """
100
+ Query an OpenAI language model with a prompt.
101
+
102
+ Parameters:
103
+ -----------
104
+ prompt : str
105
+ User-provided prompt for the model.
106
+ model : str, optional (default="gpt-4o-mini")
107
+ Model name to query (e.g., "gpt-4o-mini", "gpt-4").
108
+
109
+ Returns:
110
+ --------
111
+ str
112
+ Model's response content as text.
113
+ If an error occurs (network/API), returns an error message.
114
+
115
+ Example:
116
+ --------
117
+ query_openai("Explain AI in finance")
118
+
119
+ Output:
120
+ "AI in finance refers to the use of machine learning and natural language
121
+ processing techniques to automate trading, risk assessment, and customer service..."
122
+ """
123
+ try:
124
+ from openai import OpenAI # delayed import
125
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
126
+ response = client.chat.completions.create(
127
+ model=model,
128
+ messages=[{"role": "user", "content": prompt}],
129
+ )
130
+ return response.choices[0].message.content
131
+ except Exception as e:
132
+ return f"Error querying OpenAI API: {e}"
tools/time_tools.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from agents import function_tool
3
+ from core.logger import log_call
4
+
5
+ class TimeTools:
6
+ """Provides tools related to current date and time."""
7
+
8
+ @staticmethod
9
+ @function_tool
10
+ @log_call
11
+ def current_datetime(format: str = "%Y-%m-%d %H:%M:%S") -> str:
12
+ """
13
+ Returns the current date and time as a formatted string.
14
+
15
+ Args:
16
+ format (str): Optional datetime format (default: "YYYY-MM-DD HH:MM:SS")
17
+
18
+ Returns:
19
+ str: Current date and time in the specified format
20
+ """
21
+ now = datetime.now()
22
+ return now.strftime(format)
ui/app.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ import time
4
+ import html
5
+ from datetime import datetime, UTC
6
+ from io import BytesIO
7
+
8
+ from dotenv import load_dotenv
9
+ from reportlab.platypus import SimpleDocTemplate, Paragraph
10
+ from reportlab.lib.styles import getSampleStyleSheet
11
+
12
+ from appagents.orchestrator import Orchestrator
13
+ from agents import SQLiteSession
14
+
15
+ load_dotenv(override=True)
16
+
17
+ # --------------------
18
+ # Page config
19
+ # --------------------
20
+ st.set_page_config(page_title="Deep Research AI", layout="wide")
21
+
22
+ # --------------------
23
+ # Session-state init
24
+ # --------------------
25
+ if "session_store" not in st.session_state:
26
+ st.session_state.session_store = {}
27
+
28
+ if "session_id" not in st.session_state:
29
+ st.session_state.session_id = str(id(st))
30
+
31
+ if "final_report" not in st.session_state:
32
+ st.session_state.final_report = ""
33
+
34
+ if "button_disabled" not in st.session_state:
35
+ st.session_state.button_disabled = False
36
+
37
+
38
+ # (dark mode removed - UI uses single light theme)
39
+
40
+ # --------------------
41
+ # CSS for light/dark and layout
42
+ # --------------------
43
+ LIGHT_CSS = """
44
+ <style>
45
+ .block-container { max-width: 90% !important; margin-left:5% !important; margin-right:5% !important; padding-top:1.5rem; padding-bottom:2rem; }
46
+ h1 { font-size:2.2rem !important; text-align:center; color: #0b1220 !important; }
47
+ h2 { color: #0b1220 !important; }
48
+ h3 { color: #0b1220 !important; }
49
+ .report-box { background:#ffffff; padding:24px; border-radius:12px; border:1px solid #e9ecef; box-shadow:0 6px 18px rgba(23,43,77,0.04); font-size:1.05rem; line-height:1.65; white-space:pre-wrap; word-wrap:break-word; overflow-wrap:break-word; }
50
+ textarea, .stTextArea>div>div>textarea { font-size:1.05rem !important; }
51
+ .button-row { display:flex; justify-content:flex-start; gap:12px; margin-top:15px; margin-bottom:15px; flex-wrap: wrap; align-items: center; }
52
+ .button-row [data-testid="column"] { flex: 0 !important; }
53
+ .stButton { width: 100% !important; }
54
+ .stButton>button { padding: 0 !important; width: 180px !important; height: 48px !important; border-radius: 6px !important; border: 1px solid #d0d0d0 !important; font-weight: 500 !important; font-size: 0.9rem !important; white-space: normal !important; background-color: #f8f9fa !important; color: #0b1220 !important; transition: all 0.2s !important; display: flex !important; align-items: center !important; justify-content: center !important; line-height: 1.2 !important; }
55
+ .stButton>button:hover { background-color: #e9ecef !important; border-color: #999 !important; }
56
+ .stButton>button:active { background-color: #dee2e6 !important; }
57
+ .sidebar-title { font-weight:700; margin-bottom:8px; }
58
+ .history-item { padding:6px 8px; border-radius:8px; margin-bottom:6px; background: #fafafa; border:1px solid #eee; }
59
+ .small-muted { color:#6c757d; font-size:0.9rem; }
60
+ /* Remove truncation constraints on markdown output */
61
+ [data-testid="stMarkdown"] { max-height: none !important; height: auto !important; }
62
+ </style>
63
+ """
64
+
65
+ st.markdown(LIGHT_CSS, unsafe_allow_html=True)
66
+
67
+ # --------------------
68
+ # Helpers: orchestrator streaming
69
+ # --------------------
70
+ async def run_async_chunks(query: str, session_id: str):
71
+ if session_id not in st.session_state.session_store:
72
+ st.session_state.session_store[session_id] = SQLiteSession(f"session_{session_id}.db")
73
+ session = st.session_state.session_store[session_id]
74
+ orchestrator = Orchestrator(session=session)
75
+ async for chunk in orchestrator.run(query):
76
+ yield chunk
77
+
78
+ def safe_title_from_query(q: str):
79
+ q = q.strip()
80
+ if not q:
81
+ return "Untitled Report"
82
+ first_line = q.splitlines()[0]
83
+ # limit length for title
84
+ return (first_line[:80] + "...") if len(first_line) > 80 else first_line
85
+
86
+ # --------------------
87
+ # Export helpers
88
+ # --------------------
89
+ def make_pdf_bytes(text: str) -> bytes:
90
+ """Convert markdown text to PDF with proper formatting."""
91
+ buf = BytesIO()
92
+ doc = SimpleDocTemplate(buf, topMargin=0.5*72, bottomMargin=0.5*72, leftMargin=0.75*72, rightMargin=0.75*72)
93
+ styles = getSampleStyleSheet()
94
+ story = []
95
+
96
+ # parse markdown: headings, lists, bold, italic
97
+ lines = text.split("\n")
98
+ for line in lines:
99
+ stripped = line.strip()
100
+
101
+ if not stripped:
102
+ story.append(Paragraph(" ", styles["Normal"])) # empty line
103
+ continue
104
+
105
+ # heading levels
106
+ if stripped.startswith("# "):
107
+ story.append(Paragraph(html.escape(stripped[2:]), styles["Heading1"]))
108
+ elif stripped.startswith("## "):
109
+ story.append(Paragraph(html.escape(stripped[3:]), styles["Heading2"]))
110
+ elif stripped.startswith("### "):
111
+ story.append(Paragraph(html.escape(stripped[4:]), styles["Heading3"]))
112
+ elif stripped.startswith("- ") or stripped.startswith("* "):
113
+ # bullet list
114
+ story.append(Paragraph("β€’ " + html.escape(stripped[2:]), styles["Normal"]))
115
+ elif stripped[0].isdigit() and ". " in stripped[:4]:
116
+ # numbered list
117
+ story.append(Paragraph(html.escape(stripped), styles["Normal"]))
118
+ else:
119
+ # regular paragraph with basic markdown formatting
120
+ # escape first, then replace with safe formatting tags
121
+ p_text = html.escape(stripped)
122
+
123
+ # handle **bold** (convert escaped ** back and wrap in <b> tags)
124
+ p_text = p_text.replace("&lt;b&gt;", "<b>").replace("&lt;/b&gt;", "</b>")
125
+ # Simple approach: replace **text** with <b>text</b>
126
+ import re
127
+ p_text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', p_text)
128
+ p_text = re.sub(r'__(.+?)__', r'<b>\1</b>', p_text)
129
+ # handle *italic* β†’ <i>italic</i> carefully (avoid double replacement)
130
+ p_text = re.sub(r'\*([^*]+?)\*', r'<i>\1</i>', p_text)
131
+ p_text = re.sub(r'_([^_]+?)_', r'<i>\1</i>', p_text)
132
+
133
+ story.append(Paragraph(p_text, styles["Normal"]))
134
+
135
+ doc.build(story)
136
+ buf.seek(0)
137
+ return buf.read()
138
+
139
+ def make_md_bytes(text: str) -> bytes:
140
+ return text.encode("utf-8")
141
+
142
+ def make_html_bytes(text: str, title="Deep Research Report") -> bytes:
143
+ # simple HTML wrapper, escape content and preserve newlines
144
+ body = "<br/>".join(html.escape(text).split("\n"))
145
+ html_doc = f"""<!doctype html>
146
+ <html>
147
+ <head>
148
+ <meta charset="utf-8">
149
+ <title>{html.escape(title)}</title>
150
+ <style>body{{font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial; padding:24px; max-width:900px; margin:auto; line-height:1.6; color: #0b1220; background: #ffffff }}</style>
151
+ </head>
152
+ <body>
153
+ <h1>{html.escape(title)}</h1>
154
+ <div>{body}</div>
155
+ </body>
156
+ </html>"""
157
+ return html_doc.encode("utf-8")
158
+
159
+ # --------------------
160
+ # Streaming runner (final output replaces trace)
161
+ # --------------------
162
+ def run_streaming(query: str, final_ph, status_ph):
163
+ session_id = st.session_state.session_id
164
+
165
+ # placeholders
166
+ # status_ph = st.empty()
167
+ progress_ph = st.empty()
168
+
169
+ # reset final_report
170
+ st.session_state.final_report = ""
171
+ # track only the last received chunk
172
+ last_chunk = ""
173
+ progress_val = 0
174
+ progress_bar = progress_ph.progress(progress_val)
175
+
176
+ # ensure any prior final output is cleared while streaming
177
+ try:
178
+ final_ph.empty()
179
+ except Exception:
180
+ pass
181
+ # status_ph.info("πŸ”Ž Researching β€” streaming (final result only)...")
182
+
183
+ async def _stream():
184
+ nonlocal progress_val, last_chunk
185
+ status_ph.info("Streaming... receiving data")
186
+ bStartChunkCollected = False
187
+ async for chunk in run_async_chunks(query, session_id):
188
+ # start collecting chunks once we see one beginning with #
189
+ if not bStartChunkCollected and chunk.strip().startswith("#"):
190
+ bStartChunkCollected = True
191
+
192
+ if bStartChunkCollected:
193
+ last_chunk += chunk
194
+ # render accumulated markdown in real-time so user sees content streaming
195
+ status_ph.markdown(last_chunk)
196
+
197
+ progress_val = min(progress_val + 2, 98)
198
+ progress_bar.progress(progress_val)
199
+
200
+ # run async generator (compatibility fallback)
201
+ try:
202
+ asyncio.run(_stream())
203
+ except RuntimeError:
204
+ loop = asyncio.new_event_loop()
205
+ asyncio.set_event_loop(loop)
206
+ loop.run_until_complete(_stream())
207
+ loop.close()
208
+ except Exception as e:
209
+ # on exception, re-enable button and show error
210
+ st.session_state.button_disabled = False
211
+ status_ph.error(f"❌ Error during research: {str(e)}")
212
+ progress_ph.empty()
213
+ return
214
+
215
+ # finalize
216
+ progress_bar.progress(100)
217
+ status_ph.success("βœ… Research complete!")
218
+
219
+ # set final_report to only the last yield (trim surrounding whitespace)
220
+ md_text = last_chunk.strip()
221
+ st.session_state.final_report = md_text
222
+ progress_ph.empty()
223
+
224
+ # re-enable button after completion
225
+ st.session_state.button_disabled = False
226
+
227
+ # history saving disabled (kept minimal in-memory state only)
228
+
229
+ # render final output as Markdown into the dedicated placeholder
230
+ # Use Streamlit's markdown renderer so headings, lists, links render correctly.
231
+ if st.session_state.final_report:
232
+ final_ph.markdown(st.session_state.final_report)
233
+ else:
234
+ final_ph.empty()
235
+
236
+ # rerun to reflect button re-enable and final output
237
+ st.rerun()
238
+
239
+ # Sidebar removed per UI request. Dark-mode and history removed.
240
+
241
+
242
+ # --------------------
243
+ # Main UI
244
+ # --------------------
245
+ st.title("🧠 Deep Research (Powered by Agentic AI)")
246
+ st.write("What topic would you like to research?")
247
+
248
+ query = st.text_area("Enter your research topic", value="The impact of AI on the Healthcare Industry.", height=50, label_visibility="collapsed")
249
+
250
+ # Action row with buttons
251
+ col1, col2, col3, col4 = st.columns([2.0, 2.0, 2.0, 2.0])
252
+
253
+ with col1:
254
+ run_clicked = st.button("πŸš€ Run Deep Research", key="run", disabled=st.session_state.button_disabled)
255
+
256
+ # PDF and MD download buttons appear inline after a final_report exists
257
+ if st.session_state.final_report:
258
+ with col2:
259
+ # PDF generator stream - create bytes on demand
260
+ pdf_bytes = make_pdf_bytes(st.session_state.final_report)
261
+ st.download_button("πŸ“„ Download PDF", data=pdf_bytes, file_name="report.pdf", mime="application/pdf")
262
+
263
+ with col3:
264
+ # Markdown
265
+ md_bytes = make_md_bytes(st.session_state.final_report)
266
+ st.download_button("πŸ“ Download MD", data=md_bytes, file_name="report.md", mime="text/markdown")
267
+
268
+ # placeholder for final report (used so streaming traces can be cleared)
269
+ final_ph = st.empty()
270
+
271
+ # placeholder for streaming status and progress updates
272
+ status_ph = st.empty()
273
+
274
+ # Run research if requested; disable button on click and re-run
275
+ if run_clicked and query.strip():
276
+ st.session_state.button_disabled = True
277
+ st.rerun()
278
+
279
+ # Execute streaming if button was disabled (i.e., on the rerun after click)
280
+ if st.session_state.button_disabled and query.strip():
281
+ run_streaming(query.strip(), final_ph, status_ph)
282
+ elif not st.session_state.button_disabled:
283
+ # if final_report exists (e.g., from previous run), show it in the final placeholder
284
+ if st.session_state.final_report:
285
+ # final_ph.markdown(f"<div class='report-box'>{st.session_state.final_report}</div>", unsafe_allow_html=True)
286
+ final_ph.markdown(st.session_state.final_report, unsafe_allow_html=True)
287
+ else:
288
+ st.info("Enter a topic and press Run. Final report will replace streaming traces.")
289
+
290
+ # small debug caption
291
+ st.caption(f"Session: {st.session_state.session_id}")