|
import os |
|
from dotenv import load_dotenv |
|
|
|
|
|
from smolagents import CodeAgent, LiteLLMModel, OpenAIServerModel |
|
|
|
|
|
from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool |
|
|
|
|
|
from tools import ( |
|
AddDocumentToVectorStoreTool, |
|
ArxivSearchTool, |
|
DownloadFileFromLinkTool, |
|
DuckDuckGoSearchTool, |
|
QueryVectorStoreTool, |
|
ReadFileContentTool, |
|
TranscibeVideoFileTool, |
|
TranscribeAudioTool, |
|
VisitWebpageTool, |
|
WikipediaSearchTool, |
|
image_question_answering, |
|
) |
|
|
|
|
|
from utils import extract_final_answer, replace_tool_mentions |
|
|
|
|
|
class BoomBot: |
|
def __init__(self, provider="anthropic"): |
|
""" |
|
Initialize the BoomBot with the specified provider. |
|
|
|
Args: |
|
provider (str): The model provider to use (e.g., "groq", "qwen", "gemma", "anthropic", "deepinfra", "meta") |
|
""" |
|
load_dotenv() |
|
self.provider = provider |
|
self.model = self._initialize_model() |
|
self.agent = self._create_agent() |
|
|
|
def _initialize_model(self): |
|
""" |
|
Initialize the appropriate model based on the provider. |
|
|
|
Returns: |
|
The initialized model object |
|
""" |
|
if self.provider == "qwen": |
|
qwen_model = "ollama_chat/qwen3:8b" |
|
return LiteLLMModel( |
|
model_id=qwen_model, |
|
device="cuda", |
|
num_ctx=32768, |
|
temperature=0.6, |
|
top_p=0.95, |
|
) |
|
elif self.provider == "gemma": |
|
gemma_model = "ollama_chat/gemma3:12b-it-qat" |
|
return LiteLLMModel( |
|
model_id=gemma_model, |
|
num_ctx=65536, |
|
temperature=1.0, |
|
device="cuda", |
|
top_k=64, |
|
top_p=0.95, |
|
min_p=0.0, |
|
) |
|
elif self.provider == "anthropic": |
|
model_id = "anthropic/claude-3-5-haiku-latest" |
|
return LiteLLMModel( |
|
model_id=model_id, |
|
temperature=0.6, |
|
max_tokens=8192, |
|
api_key=os.getenv("ANTHROPIC_API_KEY"), |
|
) |
|
|
|
elif self.provider == "deepinfra": |
|
deepinfra_model = "Qwen/Qwen3-235B-A22B" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return LiteLLMModel( |
|
model_id="deepinfra/"+ deepinfra_model, |
|
api_base="https://api.deepinfra.com/v1/openai", |
|
api_key=os.getenv("DEEPINFRA_API_KEY"), |
|
flatten_messages_as_text=True, |
|
max_tokens=8192, |
|
temperature=0.7, |
|
) |
|
elif self.provider == "meta": |
|
meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo" |
|
meta_model = "Qwen/Qwen2.5-72B-Instruct" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return LiteLLMModel( |
|
model_id="deepinfra/"+ meta_model, |
|
api_base="https://api.deepinfra.com/v1/openai", |
|
api_key=os.getenv("DEEPINFRA_API_KEY"), |
|
flatten_messages_as_text=True, |
|
max_tokens=8192, |
|
temperature=0.7, |
|
) |
|
elif self.provider == "google": |
|
meta_model = "google/gemini-2.5-flash" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return LiteLLMModel( |
|
model_id="deepinfra/"+ meta_model, |
|
api_base="https://api.deepinfra.com/v1/openai", |
|
api_key=os.getenv("DEEPINFRA_API_KEY"), |
|
flatten_messages_as_text=True, |
|
max_tokens=8192, |
|
temperature=0.7, |
|
) |
|
elif self.provider == "groq": |
|
|
|
model_id = "claude-3-opus-20240229" |
|
return LiteLLMModel(model_id=model_id, temperature=0.7, max_tokens=8192) |
|
else: |
|
raise ValueError(f"Unsupported provider: {self.provider}") |
|
|
|
def _create_agent(self): |
|
""" |
|
Create and configure the agent with all necessary tools. |
|
|
|
Returns: |
|
The configured CodeAgent |
|
""" |
|
|
|
download_file = DownloadFileFromLinkTool() |
|
read_file_content = ReadFileContentTool() |
|
visit_webpage = VisitWebpageTool() |
|
|
|
transcribe_audio = TranscribeAudioTool() |
|
get_wikipedia_info = WikipediaSearchTool() |
|
web_searcher = DuckDuckGoSearchTool() |
|
arxiv_search = ArxivSearchTool() |
|
add_doc_vectorstore = AddDocumentToVectorStoreTool() |
|
retrieve_doc_vectorstore = QueryVectorStoreTool() |
|
|
|
|
|
python_interpreter = PythonInterpreterTool() |
|
final_answer = FinalAnswerTool() |
|
|
|
|
|
agent_tools = [ |
|
web_searcher, |
|
download_file, |
|
read_file_content, |
|
visit_webpage, |
|
|
|
transcribe_audio, |
|
get_wikipedia_info, |
|
arxiv_search, |
|
add_doc_vectorstore, |
|
retrieve_doc_vectorstore, |
|
|
|
python_interpreter, |
|
final_answer, |
|
] |
|
|
|
|
|
additional_imports = [ |
|
|
|
"json", |
|
"os", |
|
"glob", |
|
"pathlib", |
|
"argparse", |
|
"pickle", |
|
"io", |
|
"re", |
|
"datetime", |
|
"collections", |
|
"math", |
|
"random", |
|
"csv", |
|
"zipfile", |
|
"itertools", |
|
"functools", |
|
"requests", |
|
"bs4", |
|
|
|
"pandas", |
|
"numpy", |
|
"dask", |
|
"polars", |
|
"pyarrow", |
|
"h5py", |
|
"openpyxl", |
|
"yaml", |
|
|
|
"matplotlib", |
|
"seaborn" |
|
] |
|
|
|
|
|
agent = CodeAgent( |
|
tools=agent_tools, |
|
max_steps=15, |
|
model=self.model, |
|
add_base_tools=False, |
|
stream_outputs=True, |
|
additional_authorized_imports=additional_imports, |
|
) |
|
|
|
|
|
modified_prompt = replace_tool_mentions(agent.system_prompt) |
|
agent.system_prompt = modified_prompt |
|
|
|
return agent |
|
|
|
def _get_system_prompt(self): |
|
""" |
|
Return the system prompt for the agent. |
|
|
|
Returns: |
|
str: The system prompt |
|
""" |
|
return """ |
|
YOUR BEHAVIOR GUIDELINES: |
|
• Do NOT make unfounded assumptions—always ground answers in reliable sources or search results. |
|
• For math or puzzles: break the problem into code/math, then solve programmatically. |
|
|
|
RESEARCH WORKFLOW: |
|
1. SEARCH |
|
- Begin with web_search, wikipedia_search, or arxiv_search. |
|
- Refine your query if results are weak—don't just retry the same terms. |
|
- If one search tool yields little, try another before moving on to downloads. |
|
|
|
2. VISIT |
|
- Use visit_webpage to preview content from promising links. |
|
- If the content is long, complex, spans multiple pages, or may be needed later, do NOT rely solely on visit_webpage. |
|
- Move quickly to downloading: avoid repeated visits when the content should be archived. |
|
|
|
3. DOWNLOAD AND ADD TO VECTORSTORE (MANDATORY IF CONTENT IS LONG, DENSE, COMPLEX, MULTIPLE FILES OR LINKS TO VISIT) |
|
- Use download_file_from_link on all valuable resources (including html pages or pdfs). |
|
- Especially when a page is detailed, technical, or multi-part, downloading is preferred. |
|
- You can (and should) download webpages as HTML. Do this whenever the site might be referenced again later. |
|
|
|
4. INDEX & QUERY |
|
- Immediately add downloaded files to the vector store using add_document_to_vector_store. |
|
- For complex tasks or unclear answers, prefer querying vector store over re-visiting pages. |
|
- If you've downloaded a file, **always index it unless clearly irrelevant.** |
|
|
|
5. READ |
|
- Use read_file_content to analyze file contents (html, pdf, text). |
|
- You can also use query_downloaded_documents for deeper understanding. |
|
|
|
6. EVALUATE |
|
- ✅ If the answer is clear from current sources, respond. |
|
- ❌ If not, continue iterating and analyzing downloaded material. |
|
|
|
FALLBACK & ADAPTATION: |
|
• If a tool fails, reformulate or switch tools. |
|
• For arXiv: web_search might help you find the paper; follow with direct download of the PDF via download_file_from_link. |
|
|
|
MANDATORY DOWNLOAD & INDEX WHEN: |
|
• The page is lengthy or technical (e.g., research papers, government sites, legal docs, blog posts with code). |
|
• You suspect you'll need to return to the content. |
|
• You are working on multi-hop reasoning or long-term memory tasks. |
|
|
|
COMMON TOOL CHAINS: |
|
• FACTUAL Qs: |
|
web_search → final_answer |
|
• CURRENT EVENTS: |
|
web_search → visit_webpage → (download + index if needed) → final_answer |
|
• DOCUMENT-BASED Qs: |
|
web_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer |
|
• ARXIV PAPERS: |
|
arxiv_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer |
|
• MEDIA ANALYSIS: |
|
download_file_from_link → transcribe_audio → final_answer |
|
|
|
FINAL ANSWER FORMAT: |
|
- Begin with "FINAL ANSWER: " |
|
- Number → digits only (e.g., 42) |
|
- String → exact text (e.g., Pope Francis) without quotation marks |
|
- List → comma-separated, no brackets unless specified (e.g., 2, 3, 4) |
|
- End with: FINAL ANSWER: <your_answer> |
|
""" |
|
|
|
|
|
def run(self, question: str, task_id: str, to_download) -> str: |
|
""" |
|
Run the agent with the given question, task_id, and download flag. |
|
|
|
Args: |
|
question (str): The question or task for the agent to process |
|
task_id (str): A unique identifier for the task |
|
to_download (Bool): Flag indicating whether to download resources |
|
|
|
Returns: |
|
str: The agent's response |
|
""" |
|
prompt = self._get_system_prompt() |
|
|
|
prompt += "\nHere is the Task you need to solve:\n\n" |
|
prompt += f"Task: {question}\n\n" |
|
|
|
|
|
if to_download: |
|
link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}" |
|
prompt += ( |
|
"IMPORTANT: Before solving the task, you must download a required file.\n" |
|
f"Use the `download_file_from_link` tool with this link: {link}\n" |
|
"After downloading, use the appropriate tool to read or process the file " |
|
"before attempting to solve the task.\n\n" |
|
) |
|
|
|
|
|
result = self.agent.run(prompt) |
|
|
|
|
|
final_answer = extract_final_answer(result) |
|
|
|
return final_answer |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
import os |
|
import csv |
|
import time |
|
import requests |
|
from utils import load_online_qas, extract_final_answer |
|
|
|
CSV_FILE = "evals/llm_eval.csv" |
|
FIELDNAMES = ["model", "task_id", "question", "llm_answer", "processed_answer", "real_answer"] |
|
|
|
def ensure_csv(): |
|
"""Create the CSV file with header if it doesn't exist.""" |
|
if not os.path.isfile(CSV_FILE): |
|
with open(CSV_FILE, mode="w", newline="", encoding="utf-8") as f: |
|
writer = csv.DictWriter(f, fieldnames=FIELDNAMES) |
|
writer.writeheader() |
|
|
|
def append_results(rows): |
|
"""Append a list of dict rows to the CSV.""" |
|
with open(CSV_FILE, mode="a", newline="", encoding="utf-8") as f: |
|
writer = csv.DictWriter(f, fieldnames=FIELDNAMES) |
|
for row in rows: |
|
writer.writerow(row) |
|
|
|
agent = BoomBot(provider="deepinfra") |
|
model_name = agent.provider |
|
|
|
file_online = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=True) |
|
nofile_online = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=False) |
|
|
|
excluded_keywords = ["youtube", "video", "chess"] |
|
rows_to_append = [] |
|
|
|
|
|
for entry in file_online: |
|
task_id = entry["task_id"] |
|
question = entry["Question"] |
|
real_answer = entry["Final answer"] |
|
file_name = entry.get("file_name", "") |
|
to_download = bool(file_name) |
|
link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}" |
|
|
|
if any(kw in question.lower() for kw in excluded_keywords): |
|
llm_answer = processed = "NOT ATTEMPTED" |
|
else: |
|
try: |
|
resp = requests.get(link) |
|
if resp.status_code != 200: |
|
llm_answer = processed = "NOT ATTEMPTED" |
|
else: |
|
llm_answer = agent.run(question, task_id, to_download) |
|
processed = extract_final_answer(llm_answer).strip() |
|
|
|
except Exception as e: |
|
llm_answer = processed = f"[Error] {e}" |
|
|
|
|
|
rows_to_append.append({ |
|
"model": model_name, |
|
"task_id": task_id, |
|
"question": question, |
|
"llm_answer": llm_answer, |
|
"processed_answer": processed, |
|
"real_answer": real_answer, |
|
}) |
|
print("REAL ANSWER:", real_answer) |
|
|
|
|
|
for entry in nofile_online: |
|
task_id = entry["task_id"] |
|
question = entry["Question"] |
|
real_answer = entry["Final answer"] |
|
|
|
if any(kw in question.lower() for kw in excluded_keywords): |
|
llm_answer = processed = "NOT ATTEMPTED" |
|
else: |
|
try: |
|
llm_answer = agent.run(question, task_id, to_download=False) |
|
processed = extract_final_answer(llm_answer).strip() |
|
|
|
except Exception as e: |
|
llm_answer = processed = f"[Error] {e}" |
|
|
|
|
|
rows_to_append.append({ |
|
"model": model_name, |
|
"task_id": task_id, |
|
"question": question, |
|
"llm_answer": llm_answer, |
|
"processed_answer": processed, |
|
"real_answer": real_answer, |
|
}) |
|
print("REAL ANSWER:", real_answer) |
|
|
|
|
|
ensure_csv() |
|
append_results(rows_to_append) |
|
|
|
print(f"✅ Appended {len(rows_to_append)} rows to {CSV_FILE}") |