Spaces:

Hennessy2025
/

Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / agent.py

Hennessy2025

Upload 6 files

7f4cee7 verified 6 months ago

raw

history blame contribute delete

21.6 kB

	import os
	from dotenv import load_dotenv
	from typing import TypedDict, List, Dict, Any, Optional
	from urllib.parse import urlparse
	from langgraph.graph import StateGraph, START, END, MessagesState
	from langchain.agents import create_tool_calling_agent, ConversationalAgent, AgentExecutor, initialize_agent, create_react_agent
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain_groq import ChatGroq
	from langchain_core.tools import tool, Tool
	from langchain_core.messages import HumanMessage, SystemMessage
	from langchain.memory import ConversationBufferMemory
	from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
	from langgraph.prebuilt import ToolNode
	from langgraph.prebuilt import tools_condition

	# 1. Web Browsing
	from langchain_community.tools import DuckDuckGoSearchResults
	from langchain_community.document_loaders import ImageCaptionLoader
	import requests, time, yt_dlp
	import pandas as pd
	from pathlib import Path
	from bs4 import BeautifulSoup
	from langchain_community.tools import WikipediaQueryRun
	from langchain_community.utilities import WikipediaAPIWrapper, DuckDuckGoSearchAPIWrapper
	from langchain_community.document_loaders import YoutubeLoader
	from langchain_community.document_loaders import UnstructuredExcelLoader
	from langchain_community.document_loaders import AssemblyAIAudioTranscriptLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain_community.utilities import GoogleSerperAPIWrapper

	load_dotenv()
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	@tool
	def duckduck_websearch(query: str) -> str:
	"""Allows search through DuckDuckGo.
	Args:
	query: what you want to search
	"""
	try:
	# search = DuckDuckGoSearchResults()
	# results = search.invoke(query)
	search = search = DuckDuckGoSearchAPIWrapper(max_results=5)
	results = search.run(query)
	if not results or results.strip() == "":
	return "No search results found."

	return results
	except Exception as e:
	print(str(e))
	print('Try to use request method for duckcudckgo Search')
	base_url = "https://html.duckduckgo.com/html"
	params = {"q": query}
	response = requests.get(base_url, params=params, timeout=10)
	soup = BeautifulSoup(response.text, 'html.parser')
	for result in soup.find_all('div', {'class': 'result'}):
	title = result.find('a', {'class': 'result__a'})
	snippet = result.find('a', {'class': 'result__snippet'})
	if title and snippet:
	results.append({
	'title': title.get_text(),
	'snippet': snippet.get_text(),
	'url': title.get('href')
	})

	# Format results
	formatted_results = []
	for r in results[:10]: # Limit to top 5 results
	formatted_results.append(f"[{r['title']}]({r['url']})\n{r['snippet']}\n")

	return "## Search Results\n\n" + "\n".join(formatted_results)

	@tool
	def serper_websearch(query: str) -> str:
	"""Allows search through Serper.
	Args:
	query: what you want to search
	"""
	search = GoogleSerperAPIWrapper(serper_api_key=os.getenv("SERPER_API_KEY"))
	results = search.run(query)
	return results

	@tool
	def visit_webpage(url: str) -> str:
	"""Fetches raw HTML content of a web page.
	Args:
	url: the webpage url
	"""
	try:
	response = requests.get(url, timeout=5)
	return response.text[:5000]
	except Exception as e:
	return f"[ERROR fetching {url}]: {str(e)}"

	@tool
	def wiki_search(query: str) -> str:
	"""Wiki search tools.
	Args:
	query: what you want to wiki
	"""
	api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=100)
	wikipediatool = WikipediaQueryRun(api_wrapper=api_wrapper)
	return wikipediatool.run({"query": query})

	@tool
	def text_splitter(text: str) -> List[str]:
	"""Splits text into chunks using LangChain's CharacterTextSplitter.
	Args:
	text: A string of text to split.
	"""
	splitter = CharacterTextSplitter(chunk_size=450, chunk_overlap=10)
	return splitter.split_text(text)

	@tool
	def youtube_transcript(video_url: str) -> str:
	"""Fetched youtube transcript
	Args:
	video_url: YouTube video url
	"""
	try:
	loader = YoutubeLoader.from_youtube_url(video_url)
	# video_id = video_url.split("v=")[-1].split("&")[0]
	# transcript = YouTubeTranscriptApi.get_transcript(video_id)
	return loader.load()
	except Exception as e:
	return f"Error fetching transcript: {str(e)}"

	# 4. File Reading
	@tool
	def read_file(task_id: str) -> str:
	"""First download the file, then read its content
	Args:
	dir: the task_id
	"""
	file_url = f'{DEFAULT_API_URL}/files/{task_id}'
	r = requests.get(file_url, timeout=15, allow_redirects=True)
	with open('temp', "wb") as fp:
	fp.write(r.content)
	with open('temp') as f:
	return f.read()

	@tool
	def excel_read(task_id: str) -> str:
	"""First download the excel file, then read its content
	Args:
	dir: the task_id
	"""
	try:
	file_url = f'{DEFAULT_API_URL}/files/{task_id}'
	r = requests.get(file_url, timeout=15, allow_redirects=True)
	with open('temp.xlsx', "wb") as fp:
	fp.write(r.content)
	# Read the Excel file
	df = pd.read_excel('temp.xlsx')
	# Run various analyses based on the query
	result = (
	f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
	)
	result += f"Columns: {', '.join(df.columns)}\n\n"
	# Add summary statistics
	result += "Summary statistics:\n"
	result += str(df.describe())
	return result
	except Exception as e:
	return f"Error analyzing Excel file: {str(e)}"

	@tool
	def csv_read(task_id: str) -> str:
	"""First download the csv file, then read its content
	Args:
	dir: the task_id
	"""
	try:
	file_url = f'{DEFAULT_API_URL}/files/{task_id}'
	r = requests.get(file_url, timeout=15, allow_redirects=True)
	with open('temp.csv', "wb") as fp:
	fp.write(r.content)
	# Read the CSV file
	df = pd.read_csv(temp.csv)
	# Run various analyses based on the query
	result = (
	f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
	)
	result += f"Columns: {', '.join(df.columns)}\n\n"
	# Add summary statistics
	result += "Summary statistics:\n"
	result += str(df.describe())
	return result
	except Exception as e:
	return f"Error analyzing CSV file: {str(e)}"

	@tool
	def mp3_listen(task_id: str) -> str:
	"""First download the mp3 file, then listen to it
	Args:
	dir: the task_id
	"""
	file_url = f'{DEFAULT_API_URL}/files/{task_id}'
	r = requests.get(file_url, timeout=15, allow_redirects=True)
	with open('temp.mp3', "wb") as fp:
	fp.write(r.content)
	loader = AssemblyAIAudioTranscriptLoader(file_path="temp.mp3", api_key=os.getenv("AssemblyAI_API_KEY"))
	docs = loader.load()
	contents = [doc.page_content for doc in docs]
	return "\n".join(contents)

	# 5. Image Open
	@tool
	def image_caption(dir: str) -> str:
	"""Understand the content of the provided image
	Args:
	dir: the image url link
	"""
	loader = ImageCaptionLoader(images=[dir])
	metadata = loader.load()
	return metadata[0].page_content

	# 2. Coding
	from langchain_experimental.tools import PythonREPLTool
	@tool
	def run_python(code: str):
	""" Run the given python code
	Args:
	code: the python code
	"""
	return PythonREPLTool().run(code)

	@tool
	def multiply(a: float, b: float) -> float:
	"""Multiply two numbers.
	Args:
	a: first float
	b: second float
	"""
	return a * b

	@tool
	def add(a: float, b: float) -> float:
	"""Add two numbers.
	Args:
	a: first float
	b: second float
	"""
	return a + b

	@tool
	def subtract(a: float, b: float) -> float:
	"""Subtract two numbers.
	Args:
	a: first float
	b: second float
	"""
	return a - b

	@tool
	def divide(a: float, b: float) -> float:
	"""Divide two numbers.
	Args:
	a: first float
	b: second float
	"""
	if b == 0:
	raise ValueError("Cannot divide by zero.")
	return a / b

	# 3. Multi-Modality
	# - multiply: multiply two numbers, A and B
	# - add: add two numbers, A and B
	# - subtract: Subtract A by B with passing A as the first argument
	# - divide: Divide A by B with passing A as the first argument



	# ("human", f"Question: {question}\nReport to validate: {final_answer}")
	class BasicAgent:
	def __init__(self):
	self.model = ChatGoogleGenerativeAI(
	model="gemini-2.0-flash-lite",
	temperature=0,
	max_tokens=1024,
	candidate_count=1,
	google_api_key=os.getenv("GEMINI_API_KEY"),
	)
	# System Prompt for few shot prompting
	self.sys_prompt = """"
	You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
	FINAL ANSWER: [YOUR FINAL ANSWER].
	YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separared list of numbers and/or strings.
	If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
	If you are asked for a string, don't use articles, neither abbreviations (eg. for cities), and write the digits in plain text unless specified otherwise.
	If you are asked for a comma separated list, apply the above rules depending of whether the element to put in the list is a number or a string.

	You have access to the following tools:
	- serper_websearch: web search the content of the query by passing the query as input with Serper Search Engine
	- duckduck_websearch: web search the content of the query by passing the query as input with DuckDuckGo Search Engine
	- visit_webpage: visit the given webpage url by passing the url as input
	- wiki_search: wiki search the content of the query by passing the query as input if the question asks for wiki search it
	- text_splitter: split text into chunks
	- youtube_transcript: fetch the transcript of the Youtube video by passing the video url as input if the question asks for watching a Youtube video
	- read_file: read the content of the attached file by passing the TASK-ID as input
	- excel_read: read the content of the attached excel file by passing the TASK-ID as input
	- csv_read: read the content of the attached csv file by passing the TASK-ID as input
	- mp3_listen: listen to the content of the attached mp3 file by passing the TASK-ID as input
	- image_caption: understand the visual content of the attached image by passing the TASK-ID as input
	- run_python: run the python code

	If Task ID is included in the question, remember to call the relevant read tools [ie. read_file, excel_read, csv_read, mp3_listen, image_caption]
	Note: python_tool is called when the question mentions the term "Python" or any math calculation.
	"""
	# self.tools = [duckduck_websearch, serper_websearch, visit_webpage, wiki_search, text_splitter, self._analyze_video, youtube_transcript, read_file, excel_read, csv_read, mp3_listen, image_caption, run_python]
	self.tools = [
	Tool(
	name="duckduck_websearch",
	func=duckduck_websearch,
	description="Search the web for information with DuckDuckGo"
	),
	Tool(
	name="serper_websearch",
	func=serper_websearch,
	description="Search the web for information with Serper"
	),
	Tool(
	name="visit_webpage",
	func=visit_webpage,
	description="Directly visit the webpage"
	),
	Tool(
	name="wiki_search",
	func=wiki_search,
	description="Search the information on Wikipedia"
	),
	Tool(
	name="text_splitter",
	func=text_splitter,
	description="Split text into chunks"
	),
	Tool(
	name="analyze_video",
	func=self._analyze_video,
	description="Analyze YouTube video content directly"
	),
	Tool(
	name="youtube_transcript",
	func=youtube_transcript,
	description="Fetch the transcript of YouTube video"
	),
	Tool(
	name="read_file",
	func=read_file,
	description="Read the file content"
	),
	Tool(
	name="excel_read",
	func=excel_read,
	description="Read the content of Excel file"
	),
	Tool(
	name="csv_read",
	func=csv_read,
	description="Read the content of CSV file"
	),
	Tool(
	name='mp3_listen',
	func=mp3_listen,
	description="Listen to the MP3 file"
	),
	Tool(
	name="image_caption",
	func=image_caption,
	description="Understand the image content"
	),
	Tool(
	name="run_python",
	func=run_python,
	description="Run Python code"
	)
	]
	# Setup memory
	self.memory = ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True
	)
	self.agent = self.__setup_agent__()
	# self.prompt = ChatPromptTemplate.from_messages([
	# ("system", self.sys_prompt),
	# ("human", "{input}")
	# ])

	# self.agent = initialize_agent(
	# tools=self.tools,
	# llm=self.model,
	# agent="zero-shot-react-description", # ReAct agent type
	# verbose=True,
	# system_prompt=self.prompt,
	# handle_parsing_errors=True,
	# max_iterations=30
	# # "Check your output and make sure it conforms, use the Action/Action Input syntax"
	# )
	print("BasicAgent initialized.")

	def __call__(self, task: dict) -> str:
	task_id, question, file_name = task["task_id"], task["question"], task["file_name"]
	print(f"Agent received question (first 50 chars): {question[:50]}...")

	if file_name == "" or file_name is None:
	question = question
	else:
	question = f"{question} with TASK-ID: {task_id}"
	# fixed_answer = self.agent.run(f'{question} with TASK-ID: {task_id}')
	fixed_answer = "This is a default answer."


	max_retries = 5
	base_sleep = 1
	for attempt in range(max_retries):
	try:
	fixed_answer = self.agent.run(question)
	print(f"Agent returning fixed answer: {fixed_answer}")
	time.sleep(60)
	return fixed_answer
	except Exception as e:
	sleep_time = base_sleep * (attempt + 1)
	if attempt < max_retries - 1:
	print(str(e))
	print(f"Attempt {attempt + 1} failed. Retrying in {sleep_time} seconds...")
	time.sleep(sleep_time)
	continue
	return f"Error processing query after {max_retries} attempts: {str(e)}"
	return fixed_answer

	@tool
	def _analyze_video(self, url: str) -> str:
	"""Analyze video content using Gemini's video understanding capabilities."""
	try:
	# Validate URL
	parsed_url = urlparse(url)
	if not all([parsed_url.scheme, parsed_url.netloc]):
	return "Please provide a valid video URL with http:// or https:// prefix."

	# Check if it's a YouTube URL
	if 'youtube.com' not in url and 'youtu.be' not in url:
	return "Only YouTube videos are supported at this time."

	try:
	# Configure yt-dlp with minimal extraction
	ydl_opts = {
	'quiet': True,
	'no_warnings': True,
	'extract_flat': True,
	'no_playlist': True,
	'youtube_include_dash_manifest': False
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	try:
	# Try basic info extraction
	info = ydl.extract_info(url, download=False, process=False)
	if not info:
	return "Could not extract video information."

	title = info.get('title', 'Unknown')
	description = info.get('description', '')

	# Create a detailed prompt with available metadata
	prompt = f"""Please analyze this YouTube video:
	Title: {title}
	URL: {url}
	Description: {description}
	Please provide a detailed analysis focusing on:
	1. Main topic and key points from the title and description
	2. Expected visual elements and scenes
	3. Overall message or purpose
	4. Target audience"""

	# Use the LLM with proper message format
	messages = [HumanMessage(content=prompt)]
	response = self.model.invoke(messages)
	return response.content if hasattr(response, 'content') else str(response)

	except Exception as e:
	if 'Sign in to confirm' in str(e):
	return "This video requires age verification or sign-in. Please provide a different video URL."
	return f"Error accessing video: {str(e)}"

	except Exception as e:
	return f"Error extracting video info: {str(e)}"

	except Exception as e:
	return f"Error analyzing video: {str(e)}"

	def __setup_agent__(self) -> AgentExecutor:
	PREFIX = """
	You are a general AI assistant that can use various tools to answer question. I will ask you a question. Report your thoughts, and finish your answer with the following template:
	FINAL ANSWER: [YOUR FINAL ANSWER].
	YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separared list of numbers and/or strings.
	If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
	If you are asked for a string, don't use articles, neither abbreviations (eg. for cities), and write the digits in plain text unless specified otherwise.
	If you are asked for a comma separated list, apply the above rules depending of whether the element to put in the list is a number or a string.

	NOTE:
	- If Task ID is included in the question, remember to call the relevant read tools [ie. read_file, excel_read, csv_read, mp3_listen, image_caption]
	- python_tool is called when the question mentions the term "Python" or any math calculation.
	"""
	FORMAT_INSTRUCTIONS = """
	To use a tool, use the following format:
	Thought: Do I need to use a tool? Yes
	Action: the action to take, should be one of [{tool_names}]
	Action Input: the input to the action
	Observation: the result of the action
	When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
	Thought: Do I need to use a tool? No
	Final Answer: [your response here]
	Begin! Remember to ALWAYS include 'Thought:', 'Action:', 'Action Input:', and 'Final Answer:' in your responses.
	"""
	SUFFIX = """
	Previous conversation history:
	{chat_history}
	New question: {input}
	{agent_scratchpad}
	"""
	agent = ConversationalAgent.from_llm_and_tools(
	llm=self.model,
	tools=self.tools,
	prefix=PREFIX,
	format_instructions=FORMAT_INSTRUCTIONS,
	suffix=SUFFIX,
	input_variables=["input", "chat_history", "agent_scratchpad", "tool_names"],
	handle_parsing_errors=True
	)
	return AgentExecutor.from_agent_and_tools(
	agent=agent,
	tools=self.tools,
	memory=self.memory,
	max_iterations=30,
	verbose=True,
	handle_parsing_errors=True,
	# return_only_outputs=True # This ensures we only get the final output
	)