Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / agent.py

iammartian0

Create agent.py

53cf0f3 verified about 2 months ago

raw

history blame contribute delete

7.82 kB

	import os
	import json
	from dotenv import load_dotenv
	from tavily import TavilyClient
	from cerebras.cloud.sdk import Cerebras

	load_dotenv()

	# --- HELPER TOOLS ---

	class WebSearchTool:
	"""Search the web using Tavily"""

	def __init__(self, api_key: str):
	self.client = TavilyClient(api_key=api_key)

	def search(self, query: str, max_results: int = 5) -> str:
	"""Search and return formatted results"""
	try:
	response = self.client.search(
	query=query,
	search_depth="advanced",
	max_results=max_results,
	include_answer=True
	)

	# Format results
	output = []

	if response.get("answer"):
	output.append(f"Quick Answer: {response['answer']}\n")

	output.append("Search Results:")
	for i, result in enumerate(response.get("results", []), 1):
	output.append(f"\n{i}. {result['title']}")
	output.append(f" URL: {result['url']}")
	output.append(f" {result['content'][:300]}...")

	return "\n".join(output)

	except Exception as e:
	return f"Search error: {str(e)}"

	class FileReaderTool:
	"""Read various file formats"""

	def read(self, file_path: str) -> str:
	"""Read file and return content as text"""
	if not os.path.exists(file_path):
	return f"Error: File not found at {file_path}"

	ext = os.path.splitext(file_path)[1].lower()

	try:
	# DOCX files
	if ext == '.docx':
	try:
	from docx import Document
	doc = Document(file_path)
	text = [para.text for para in doc.paragraphs if para.text.strip()]
	for table in doc.tables:
	for row in table.rows:
	cells = [cell.text.strip() for cell in row.cells]
	text.append(" \| ".join(cells))
	return "\n".join(text)
	except ImportError:
	return "Error: python-docx not installed."

	# PDF files
	elif ext == '.pdf':
	try:
	import pdfplumber
	with pdfplumber.open(file_path) as pdf:
	text = [page.extract_text() for page in pdf.pages if page.extract_text()]
	return "\n".join(text)
	except ImportError:
	return "Error: pdfplumber not installed."

	# Excel/CSV files
	elif ext in ['.xlsx', '.xls', '.csv']:
	try:
	import pandas as pd
	if ext == '.csv':
	df = pd.read_csv(file_path)
	else:
	df = pd.read_excel(file_path)
	return df.to_string()
	except ImportError:
	return "Error: pandas or openpyxl not installed."

	# Text files
	elif ext in ['.txt', '.md', '.json']:
	with open(file_path, 'r', encoding='utf-8') as f:
	return f.read()

	else:
	return f"Unsupported file type: {ext}"

	except Exception as e:
	return f"Error reading file: {str(e)}"

	class ImageAnalysisTool:
	"""Analyze images using OCR or vision models"""

	def analyze(self, image_path: str, question: str = "Describe this image") -> str:
	if not os.path.exists(image_path):
	return f"Error: Image not found at {image_path}"

	try:
	# Try OCR first (fast and simple)
	import pytesseract
	from PIL import Image

	img = Image.open(image_path)
	text = pytesseract.image_to_string(img)

	if text.strip():
	return f"Text extracted from image:\n{text}"
	else:
	return "No text found in image (OCR returned empty)"

	except ImportError:
	return "Error: pytesseract or Pillow not installed."
	except Exception as e:
	return f"Error analyzing image: {str(e)}"

	# --- MAIN AGENT CLASS ---

	class BasicAgent:
	"""
	Renamed from SimpleResearchAgent to match app.py requirements.
	"""

	def __init__(self):
	print("--- Initializing BasicAgent ---")

	# 1. Load Keys internally
	self.hf_token = os.getenv("HF_TOKEN")
	self.cerebras_key = os.getenv("CEREBRAS_API_KEY")
	self.tavily_key = os.getenv("TAVILY_API_KEY")

	if not self.cerebras_key or not self.tavily_key:
	raise ValueError("❌ Missing API Keys. Please check Space Settings.")

	# 2. Initialize LLM
	self.llm = Cerebras(api_key=self.cerebras_key)
	self.model = "gpt-oss-120b" # Or "llama3.1-8b"

	# 3. Initialize tools
	self.web_search = WebSearchTool(self.tavily_key)
	self.file_reader = FileReaderTool()
	self.image_analyzer = ImageAnalysisTool()

	print("✅ BasicAgent initialized successfully.")

	def _call_llm(self, messages: list, temperature: float = 0.0) -> str:
	"""Call LLM and return response"""
	try:
	response = self.llm.chat.completions.create(
	model=self.model,
	messages=messages,
	temperature=temperature,
	max_tokens=200 # Prevent long rambling
	)
	content = response.choices[0].message.content
	return content.strip() if content else "Error: Empty response."
	except Exception as e:
	return f"LLM Error: {str(e)}"

	def answer(self, question: str, mode="context") -> str:
	"""
	Main method called by app.py.
	Note: app.py only passes 'question', not 'file_path'.
	"""
	print(f"Processing: {question[:50]}...")

	# 1. Detect if this is a Logic/Trick question (GAIA style)
	is_logic = any(keyword in question.lower() for keyword in [
	'opposite', 'backwards', 'reversed', 'if you understand', 'python code'
	])

	context_parts = []

	# 2. Search Web (Skip if it's purely a logic puzzle)
	if not is_logic:
	# Clean question for search (remove "Answer this..." etc)
	search_results = self.web_search.search(question)
	context_parts.append(f"Web Search Results:\n{search_results}")
	else:
	context_parts.append("Logic/Reasoning Task (No Search Performed)")

	context = "\n\n".join(context_parts)

	# 3. Construct System Prompt
	# We use the GAIA-style prompt for strictness
	messages = [
	{
	"role": "system",
	"content": (
	"You are a precise data extraction engine. "
	"Answer with ONLY the exact value requested. "
	"No explanations, no preambles, no conversational filler. "
	"Examples: '42', 'John Smith', 'Paris', 'right'. "
	)
	},
	{
	"role": "user",
	"content": f"Context:\n{context}\n\nQuestion: {question}\n\nExact Answer:"
	}
	]

	return self._call_llm(messages)

	def __call__(self, question: str) -> str:
	return self.answer(question)

	# For local testing
	if __name__ == "__main__":
	agent = BasicAgent()
	print(agent("What is the capital of France?"))