Spaces:
Sleeping
Sleeping
from smolagents import CodeAgent, InferenceClientModel | |
from smolagents.default_tools import PythonInterpreterTool, DuckDuckGoSearchTool | |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel | |
from tools import tavily_search_tool, read_python_file_from_path | |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby | |
from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path | |
from audio_tools import transcribe_audio_tool, get_audio_from_file_path, noise_reduction, audio_segmentation, speaker_diarization | |
from community_tools import community_tools, get_youtube_transcript_from_url, search_tools | |
from browser import browser_manager | |
import os | |
import logging | |
import yaml | |
from typing import List, Optional | |
from smolagents.tools import Tool | |
logging.basicConfig(level=logging.DEBUG) | |
MODEL_CHOICES = { | |
"audio": ["Qwen/Qwen2.5-Coder-32B-Instruct"], | |
"vlm": ["Qwen/Qwen2.5-Coder-32B-Instruct"], | |
"math": ["Qwen/Qwen2.5-Coder-7B-Instruct"], | |
"context_search": ["Qwen/Qwen2.5-Coder-32B-Instruct"], | |
"master": ["Qwen/Qwen2.5-Coder-32B-Instruct"] | |
} | |
with open("prompts/prompts.yaml", 'r') as stream: | |
prompt_templates = yaml.safe_load(stream) | |
with open("prompts/audio_prompts.yaml", 'r') as stream: | |
audio_prompt_templates = yaml.safe_load(stream) | |
with open("prompts/vlm_prompts.yaml", 'r') as stream: | |
vlm_prompt_templates = yaml.safe_load(stream) | |
with open("prompts/context_search_prompts.yaml", 'r') as stream: | |
context_search_prompt_templates = yaml.safe_load(stream) | |
PROMPT_TEMPLATE = { | |
"master_agent": prompt_templates, | |
"audio_agent": audio_prompt_templates, | |
"vlm_agent": vlm_prompt_templates, | |
"context_search_agent": context_search_prompt_templates | |
} | |
# Consolidated authorized imports for all agents | |
AUTHORIZED_IMPORTS = [ | |
# Audio processing | |
"wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis", | |
# Image/Video processing | |
"cv2", "cv2.dnn", "cv2.imread", "pytesseract", "onnxruntime", "PIL", "PIL.Image", "bs4", "tesseract", | |
# Data processing | |
"numpy", "pandas", "sklearn", "scipy", "math", "hmmlearn", | |
# File handling | |
"base64", "io", "json", "os", "pickle", "openpyxl", "pyxlsb" | |
# Visualization | |
"pyplot", "matplotlib", "matplotlib.pyplot", | |
# Utilities | |
"logging", "yaml", "datetime", "typing", "markdownify", "requests", "chess" | |
] | |
audio_model = InferenceClientModel( | |
model_id=MODEL_CHOICES["audio"][0], | |
token=os.getenv("HUGGINGFACE_API_KEY"), | |
max_tokens=18000 | |
) | |
audio_agent = CodeAgent( | |
model=audio_model, | |
tools=[transcribe_audio_tool, get_audio_from_file_path, noise_reduction, audio_segmentation, speaker_diarization], | |
max_steps=4, | |
additional_authorized_imports=AUTHORIZED_IMPORTS, | |
planning_interval=4, | |
name="audio_agent", | |
prompt_templates=PROMPT_TEMPLATE["audio_agent"], | |
description="This agent is responsible for processing audio, loading mp3 audio and converting it to base64, reducing noise, segmenting audio and transcribing audio (in base64 format). It cannot process videos." | |
) | |
vlm_model = InferenceClientModel( | |
model_id=MODEL_CHOICES["vlm"][0], | |
token=os.getenv("HUGGINGFACE_API_KEY"), | |
max_tokens=18000 | |
) | |
vlm_agent = CodeAgent( | |
model=vlm_model, | |
tools=[image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path], | |
max_steps=4, | |
additional_authorized_imports=AUTHORIZED_IMPORTS, | |
planning_interval=4, | |
name="vlm_agent", | |
prompt_templates=PROMPT_TEMPLATE["vlm_agent"], | |
description="This agent is responsible for downloading images or videos, processing images or videos, detecting objects in them and extracting text from them. It cannot process audios." | |
) | |
math_model = InferenceClientModel( | |
model_id=MODEL_CHOICES["math"][0], | |
token=os.getenv("HUGGINGFACE_API_KEY"), | |
max_tokens=6000 | |
) | |
math_agent = CodeAgent( | |
model=math_model, | |
tools=[operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel, to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby], | |
max_steps=4, | |
planning_interval=4, | |
additional_authorized_imports=AUTHORIZED_IMPORTS, | |
name="math_agent", | |
description="This agent is responsible for performing arithmetic operations on two numbers. It can also perform dataframe operations such as converting data to a dataframe, performing calculations on such dataframe and converting the dataframe back to a json or a csv file" | |
) | |
context_search_model = InferenceClientModel( | |
model_id=MODEL_CHOICES["context_search"][0], | |
token=os.getenv("HUGGINGFACE_API_KEY"), | |
max_tokens=24000 | |
) | |
context_search_agent = CodeAgent( | |
model=context_search_model, | |
tools=[*search_tools], | |
max_steps=4, | |
additional_authorized_imports=AUTHORIZED_IMPORTS, | |
planning_interval=4, | |
name="context_search_agent", | |
prompt_templates=PROMPT_TEMPLATE["context_search_agent"], | |
description="This agent is responsible for searching the web for context using wikipedia for general information and arxiv for scientific information." | |
) | |
master_model = InferenceClientModel( | |
model_id=MODEL_CHOICES["master"][0], | |
token=os.getenv("HUGGINGFACE_API_KEY"), | |
max_tokens=24000 | |
) | |
class MasterAgentWrapper: | |
"""Wrapper class to manage master agent with thread-safe browser tools""" | |
def __init__(self): | |
self.base_tools = [ | |
sort_list, | |
get_youtube_transcript_from_url, | |
read_python_file_from_path, | |
PythonInterpreterTool(), | |
DuckDuckGoSearchTool(), | |
tavily_search_tool, | |
*community_tools, | |
] | |
self.master_agent = CodeAgent( | |
model=master_model, | |
managed_agents=[audio_agent, vlm_agent, math_agent], | |
tools=self.base_tools, # Initialize without browser tools | |
add_base_tools=False, | |
max_steps=20, #One final plan step, 16 intermediate steps | |
additional_authorized_imports=AUTHORIZED_IMPORTS, | |
verbosity_level=logging.INFO, | |
planning_interval=5, | |
prompt_templates=PROMPT_TEMPLATE["master_agent"], | |
name="master_agent", | |
description="This agent is responsible for managing audio, vlm, context_search and math agents." | |
) | |
def _run_with_browser_tools(self, question: str, browser_tools: List[Tool]) -> str: | |
"""Run agent with browser tools""" | |
# Temporarily add browser tools | |
original_tools = self.master_agent.tools.copy() # Copy the dictionary | |
all_tools = original_tools.copy() | |
# Add browser tools to the dictionary | |
for tool in browser_tools: | |
all_tools[tool.name] = tool | |
self.master_agent.tools = all_tools | |
try: | |
# Run the agent directly since we're in a sync context | |
result = self.master_agent.run(question) | |
return result | |
finally: | |
# Restore original tools | |
self.master_agent.tools = original_tools | |
def run(self, question: str) -> str: | |
"""Run the agent with thread-safe browser tools""" | |
try: | |
# Get browser tools in the correct context | |
with browser_manager.get_browser_tools() as browser_tools: | |
# Run with browser tools | |
return self._run_with_browser_tools(question, browser_tools) | |
# return self.master_agent.run(question) # Try without browser tools | |
except Exception as e: | |
logging.error(f"Error in master agent run: {e}") | |
raise | |
# Create the wrapped master agent | |
master_agent = MasterAgentWrapper() | |
# For backward compatibility | |
def run_master_agent(question: str) -> str: | |
return master_agent.run(question) | |
#TESTING 5 | |