Spaces:
Sleeping
Sleeping
File size: 8,129 Bytes
01186d8 b114e5e ad0b84b 69fc68f 0db692a 915154c 2fac6a6 b114e5e d812eb3 dde4764 01186d8 ded1403 59377f3 01186d8 dde4764 3a27209 b114e5e 3a27209 0db692a dde4764 6d2e403 ded1403 6d2e403 b114e5e ded1403 6d2e403 b114e5e ded1403 6aa97d2 1cb8f4e 6aa97d2 1cb8f4e 6aa97d2 f864d65 6aa97d2 1cb8f4e 6aa97d2 1cb8f4e 6aa97d2 dde4764 92a4c5d 5ec6e01 204b035 dde4764 2fac6a6 ca74fff 6aa97d2 ca74fff 8d5a7ee b114e5e da4e402 dde4764 92a4c5d 5ec6e01 204b035 dde4764 915154c ca74fff 6aa97d2 ca74fff 8d5a7ee b114e5e 915154c dde4764 b114e5e 5ec6e01 0db692a dde4764 b114e5e 3a27209 333a8cc 6aa97d2 b114e5e 0db692a dde4764 b114e5e 5ec6e01 b114e5e 0db692a dde4764 b114e5e 3397cf1 3a27209 6aa97d2 b114e5e 3397cf1 0db692a dde4764 92a4c5d 5ec6e01 92a4c5d 1a04a88 3db6b5f b114e5e 3397cf1 b114e5e 1a04a88 b114e5e 1a04a88 3db6b5f b114e5e 1a04a88 90254a0 1a04a88 b114e5e 1a04a88 58c2c51 beb3f4b 59377f3 ff1ad14 59377f3 beb3f4b 59377f3 1a04a88 d812eb3 1a04a88 525242c d9f4863 525242c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
from smolagents import CodeAgent, InferenceClientModel
from smolagents.default_tools import PythonInterpreterTool, DuckDuckGoSearchTool
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel
from tools import tavily_search_tool, read_python_file_from_path
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path
from audio_tools import transcribe_audio_tool, get_audio_from_file_path, noise_reduction, audio_segmentation, speaker_diarization
from community_tools import community_tools, get_youtube_transcript_from_url, search_tools
from browser import browser_manager
import os
import logging
import yaml
from typing import List, Optional
from smolagents.tools import Tool
logging.basicConfig(level=logging.DEBUG)
MODEL_CHOICES = {
"audio": ["Qwen/Qwen2.5-Coder-32B-Instruct"],
"vlm": ["Qwen/Qwen2.5-Coder-32B-Instruct"],
"math": ["Qwen/Qwen2.5-Coder-7B-Instruct"],
"context_search": ["Qwen/Qwen2.5-Coder-32B-Instruct"],
"master": ["Qwen/Qwen2.5-Coder-32B-Instruct"]
}
with open("prompts/prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
with open("prompts/audio_prompts.yaml", 'r') as stream:
audio_prompt_templates = yaml.safe_load(stream)
with open("prompts/vlm_prompts.yaml", 'r') as stream:
vlm_prompt_templates = yaml.safe_load(stream)
with open("prompts/context_search_prompts.yaml", 'r') as stream:
context_search_prompt_templates = yaml.safe_load(stream)
PROMPT_TEMPLATE = {
"master_agent": prompt_templates,
"audio_agent": audio_prompt_templates,
"vlm_agent": vlm_prompt_templates,
"context_search_agent": context_search_prompt_templates
}
# Consolidated authorized imports for all agents
AUTHORIZED_IMPORTS = [
# Audio processing
"wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis",
# Image/Video processing
"cv2", "cv2.dnn", "cv2.imread", "pytesseract", "onnxruntime", "PIL", "PIL.Image", "bs4", "tesseract",
# Data processing
"numpy", "pandas", "sklearn", "scipy", "math", "hmmlearn",
# File handling
"base64", "io", "json", "os", "pickle", "openpyxl", "pyxlsb"
# Visualization
"pyplot", "matplotlib", "matplotlib.pyplot",
# Utilities
"logging", "yaml", "datetime", "typing", "markdownify", "requests", "chess"
]
audio_model = InferenceClientModel(
model_id=MODEL_CHOICES["audio"][0],
token=os.getenv("HUGGINGFACE_API_KEY"),
max_tokens=18000
)
audio_agent = CodeAgent(
model=audio_model,
tools=[transcribe_audio_tool, get_audio_from_file_path, noise_reduction, audio_segmentation, speaker_diarization],
max_steps=4,
additional_authorized_imports=AUTHORIZED_IMPORTS,
planning_interval=4,
name="audio_agent",
prompt_templates=PROMPT_TEMPLATE["audio_agent"],
description="This agent is responsible for processing audio, loading mp3 audio and converting it to base64, reducing noise, segmenting audio and transcribing audio (in base64 format). It cannot process videos."
)
vlm_model = InferenceClientModel(
model_id=MODEL_CHOICES["vlm"][0],
token=os.getenv("HUGGINGFACE_API_KEY"),
max_tokens=18000
)
vlm_agent = CodeAgent(
model=vlm_model,
tools=[image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path],
max_steps=4,
additional_authorized_imports=AUTHORIZED_IMPORTS,
planning_interval=4,
name="vlm_agent",
prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
description="This agent is responsible for downloading images or videos, processing images or videos, detecting objects in them and extracting text from them. It cannot process audios."
)
math_model = InferenceClientModel(
model_id=MODEL_CHOICES["math"][0],
token=os.getenv("HUGGINGFACE_API_KEY"),
max_tokens=6000
)
math_agent = CodeAgent(
model=math_model,
tools=[operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel, to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby],
max_steps=4,
planning_interval=4,
additional_authorized_imports=AUTHORIZED_IMPORTS,
name="math_agent",
description="This agent is responsible for performing arithmetic operations on two numbers. It can also perform dataframe operations such as converting data to a dataframe, performing calculations on such dataframe and converting the dataframe back to a json or a csv file"
)
context_search_model = InferenceClientModel(
model_id=MODEL_CHOICES["context_search"][0],
token=os.getenv("HUGGINGFACE_API_KEY"),
max_tokens=24000
)
context_search_agent = CodeAgent(
model=context_search_model,
tools=[*search_tools],
max_steps=4,
additional_authorized_imports=AUTHORIZED_IMPORTS,
planning_interval=4,
name="context_search_agent",
prompt_templates=PROMPT_TEMPLATE["context_search_agent"],
description="This agent is responsible for searching the web for context using wikipedia for general information and arxiv for scientific information."
)
master_model = InferenceClientModel(
model_id=MODEL_CHOICES["master"][0],
token=os.getenv("HUGGINGFACE_API_KEY"),
max_tokens=24000
)
class MasterAgentWrapper:
"""Wrapper class to manage master agent with thread-safe browser tools"""
def __init__(self):
self.base_tools = [
sort_list,
get_youtube_transcript_from_url,
read_python_file_from_path,
PythonInterpreterTool(),
DuckDuckGoSearchTool(),
tavily_search_tool,
*community_tools,
]
self.master_agent = CodeAgent(
model=master_model,
managed_agents=[audio_agent, vlm_agent, math_agent],
tools=self.base_tools, # Initialize without browser tools
add_base_tools=False,
max_steps=20, #One final plan step, 16 intermediate steps
additional_authorized_imports=AUTHORIZED_IMPORTS,
verbosity_level=logging.INFO,
planning_interval=5,
prompt_templates=PROMPT_TEMPLATE["master_agent"],
name="master_agent",
description="This agent is responsible for managing audio, vlm, context_search and math agents."
)
def _run_with_browser_tools(self, question: str, browser_tools: List[Tool]) -> str:
"""Run agent with browser tools"""
# Temporarily add browser tools
original_tools = self.master_agent.tools.copy() # Copy the dictionary
all_tools = original_tools.copy()
# Add browser tools to the dictionary
for tool in browser_tools:
all_tools[tool.name] = tool
self.master_agent.tools = all_tools
try:
# Run the agent directly since we're in a sync context
result = self.master_agent.run(question)
return result
finally:
# Restore original tools
self.master_agent.tools = original_tools
def run(self, question: str) -> str:
"""Run the agent with thread-safe browser tools"""
try:
# Get browser tools in the correct context
with browser_manager.get_browser_tools() as browser_tools:
# Run with browser tools
return self._run_with_browser_tools(question, browser_tools)
# return self.master_agent.run(question) # Try without browser tools
except Exception as e:
logging.error(f"Error in master agent run: {e}")
raise
# Create the wrapped master agent
master_agent = MasterAgentWrapper()
# For backward compatibility
def run_master_agent(question: str) -> str:
return master_agent.run(question)
#TESTING 5
|