Spaces:
Sleeping
Sleeping
Create agentic2.py
Browse files- agentic2.py +974 -0
agentic2.py
ADDED
@@ -0,0 +1,974 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langgraph.graph import StateGraph, START, END
|
2 |
+
from typing_extensions import TypedDict, Annotated, Literal, Optional
|
3 |
+
from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
|
4 |
+
from langgraph.graph.message import add_messages
|
5 |
+
from langchain_mistralai import ChatMistralAI
|
6 |
+
from langchain_openai import ChatOpenAI
|
7 |
+
from langgraph.prebuilt import ToolNode, tools_condition
|
8 |
+
from langchain_core.runnables.graph import MermaidDrawMethod
|
9 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
10 |
+
from langchain_community.tools import WikipediaQueryRun
|
11 |
+
from langchain_community.utilities import WikipediaAPIWrapper
|
12 |
+
from langchain_aws import ChatBedrock
|
13 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
14 |
+
from langchain_community.document_loaders import UnstructuredExcelLoader
|
15 |
+
# from langchain_google_vertexai import ChatVertexAI
|
16 |
+
|
17 |
+
# from langfuse.callback import CallbackHandler
|
18 |
+
|
19 |
+
import base64
|
20 |
+
import json
|
21 |
+
import time
|
22 |
+
import requests
|
23 |
+
|
24 |
+
|
25 |
+
# import boto3
|
26 |
+
|
27 |
+
from yt_dlp import YoutubeDL
|
28 |
+
import os
|
29 |
+
# from urllib.parse import urlparse, parse_qs
|
30 |
+
import re
|
31 |
+
from dotenv import load_dotenv
|
32 |
+
|
33 |
+
# Load env vars from .env file
|
34 |
+
load_dotenv()
|
35 |
+
|
36 |
+
# Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
|
37 |
+
# langfuse_handler = CallbackHandler()
|
38 |
+
|
39 |
+
######## STATE ########
|
40 |
+
class State(TypedDict):
|
41 |
+
"""
|
42 |
+
A class representing the state of the agent.
|
43 |
+
"""
|
44 |
+
question: str
|
45 |
+
messages: Annotated[list[AnyMessage], add_messages]
|
46 |
+
input_file: str
|
47 |
+
downloaded_file: Optional[str]
|
48 |
+
task_id: str
|
49 |
+
web_search_node_result: AnyMessage
|
50 |
+
thinking_node_result: AnyMessage
|
51 |
+
vision_node_result: AnyMessage
|
52 |
+
video_node_result: AnyMessage
|
53 |
+
audio_node_result: AnyMessage
|
54 |
+
code_node_result: AnyMessage
|
55 |
+
excel_node_result: AnyMessage
|
56 |
+
next_node: str
|
57 |
+
|
58 |
+
########################
|
59 |
+
|
60 |
+
######## MODELS ########
|
61 |
+
def get_general_model():
|
62 |
+
|
63 |
+
llm_provider = os.getenv("LLM_PROVIDER", "mistral")
|
64 |
+
|
65 |
+
if llm_provider == "mistral":
|
66 |
+
general_model = ChatMistralAI(
|
67 |
+
model="mistral-large-2411",#"ministral-8b-latest",#"mistral-small-latest",
|
68 |
+
temperature=0,
|
69 |
+
max_retries=2,
|
70 |
+
api_key=os.getenv("MISTRAL_API_KEY")
|
71 |
+
)
|
72 |
+
|
73 |
+
if llm_provider == "aws":
|
74 |
+
general_model = ChatBedrock(
|
75 |
+
model_id="arn:aws:bedrock:us-east-1:416545197702:inference-profile/us.amazon.nova-lite-v1:0",
|
76 |
+
# provider="amazon",
|
77 |
+
temperature=0,
|
78 |
+
region_name="eu-west-3",
|
79 |
+
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
80 |
+
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
|
81 |
+
)
|
82 |
+
|
83 |
+
return general_model
|
84 |
+
|
85 |
+
def get_big_model():
|
86 |
+
|
87 |
+
big_model = ChatMistralAI(
|
88 |
+
model="mistral-medium-2505",
|
89 |
+
temperature=0,
|
90 |
+
max_retries=2,
|
91 |
+
api_key=os.getenv("MISTRAL_API_KEY")
|
92 |
+
)
|
93 |
+
|
94 |
+
return big_model
|
95 |
+
|
96 |
+
def get_vision_model():
|
97 |
+
|
98 |
+
vlm_provider = os.getenv("VLM_PROVIDER", "mistral")
|
99 |
+
|
100 |
+
if vlm_provider == "openai":
|
101 |
+
print("Spawning Open AI VLM")
|
102 |
+
vision_model = ChatOpenAI(
|
103 |
+
model="gpt-4o",
|
104 |
+
temperature=0,
|
105 |
+
max_tokens=None,
|
106 |
+
timeout=None,
|
107 |
+
max_retries=2,
|
108 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
109 |
+
)
|
110 |
+
|
111 |
+
if vlm_provider == "mistral":
|
112 |
+
print("Spawning Mistral VLM")
|
113 |
+
vision_model = ChatMistralAI(
|
114 |
+
model="pixtral-12b-2409",#"mistral-small-latest","pixtral-large-latest",#
|
115 |
+
temperature=0,
|
116 |
+
max_retries=2,
|
117 |
+
api_key=os.getenv("MISTRAL_API_KEY")
|
118 |
+
)
|
119 |
+
|
120 |
+
return vision_model
|
121 |
+
|
122 |
+
def get_video_handler_model():
|
123 |
+
|
124 |
+
video_handler_model = ChatGoogleGenerativeAI(
|
125 |
+
model="gemini-2.0-flash",
|
126 |
+
temperature=0,
|
127 |
+
max_tokens=None,
|
128 |
+
timeout=None,
|
129 |
+
max_retries=2,
|
130 |
+
# other params...
|
131 |
+
)
|
132 |
+
|
133 |
+
return video_handler_model
|
134 |
+
|
135 |
+
def get_audio_handler_model():
|
136 |
+
audio_handler_model = ChatOpenAI(
|
137 |
+
model="gpt-4o-audio-preview-2024-12-17",#,gpt-4o-mini-audio-preview-2024-12-17",#
|
138 |
+
temperature=0,
|
139 |
+
max_tokens=None,
|
140 |
+
timeout=None,
|
141 |
+
max_retries=2,
|
142 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
143 |
+
)
|
144 |
+
|
145 |
+
return audio_handler_model
|
146 |
+
|
147 |
+
########################
|
148 |
+
|
149 |
+
######## Functions ########
|
150 |
+
|
151 |
+
def download_youtube_content(url: str, output_path: Optional[str] = None) -> None:
|
152 |
+
"""
|
153 |
+
Download YouTube content (single video or playlist) in MP4 format only.
|
154 |
+
|
155 |
+
Args:
|
156 |
+
url (str): URL of the YouTube video or playlist
|
157 |
+
output_path (str, optional): Directory to save the downloads. Defaults to './downloads'
|
158 |
+
"""
|
159 |
+
# Set default output path if none provided
|
160 |
+
if output_path is None:
|
161 |
+
output_path = os.path.join(os.getcwd(), 'downloads')
|
162 |
+
|
163 |
+
# Create output directory if it doesn't exist
|
164 |
+
os.makedirs(output_path, exist_ok=True)
|
165 |
+
|
166 |
+
# Configure yt-dlp options for MP4 only
|
167 |
+
ydl_opts = {
|
168 |
+
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
|
169 |
+
'merge_output_format': 'mp4',
|
170 |
+
'ignoreerrors': True,
|
171 |
+
'no_warnings': False,
|
172 |
+
'extract_flat': False,
|
173 |
+
# Disable all additional downloads
|
174 |
+
'writesubtitles': False,
|
175 |
+
'writethumbnail': False,
|
176 |
+
'writeautomaticsub': False,
|
177 |
+
'postprocessors': [{
|
178 |
+
'key': 'FFmpegVideoConvertor',
|
179 |
+
'preferedformat': 'mp4',
|
180 |
+
}],
|
181 |
+
# Clean up options
|
182 |
+
'keepvideo': False,
|
183 |
+
'clean_infojson': True
|
184 |
+
}
|
185 |
+
|
186 |
+
|
187 |
+
ydl_opts['outtmpl'] = os.path.join(output_path, '%(title)s.%(ext)s')
|
188 |
+
print("Detected single video URL. Downloading video...")
|
189 |
+
|
190 |
+
try:
|
191 |
+
with YoutubeDL(ydl_opts) as ydl:
|
192 |
+
# Download content
|
193 |
+
ydl.download([url])
|
194 |
+
print(f"\nDownload completed successfully! Files saved to: {output_path}")
|
195 |
+
|
196 |
+
except Exception as e:
|
197 |
+
print(f"An error occurred: {str(e)}")
|
198 |
+
|
199 |
+
result = os.listdir(output_path)
|
200 |
+
|
201 |
+
video_file_names = [x for x in result if re.match(r".*\.mp4$", x)]
|
202 |
+
|
203 |
+
if len(video_file_names) == 1:
|
204 |
+
video_file_name = video_file_names.pop()
|
205 |
+
video_file_name = f"{output_path}/{video_file_name}"
|
206 |
+
else:
|
207 |
+
video_file_name = None
|
208 |
+
|
209 |
+
for other_files in result:
|
210 |
+
if f"{output_path}/{other_files}" != video_file_name:
|
211 |
+
print(f"Removing file: {other_files}")
|
212 |
+
os.remove(os.path.join(output_path, other_files))
|
213 |
+
|
214 |
+
return video_file_name
|
215 |
+
|
216 |
+
|
217 |
+
web_search = DuckDuckGoSearchRun()
|
218 |
+
wikipedia_search = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
|
219 |
+
|
220 |
+
def download_input_file(task_id: str) -> str:
|
221 |
+
"""
|
222 |
+
Download the file specified in state input_file key.
|
223 |
+
You only need the task_id to download the file.
|
224 |
+
|
225 |
+
Args:
|
226 |
+
task_id (str): The task_id of the file to download.
|
227 |
+
|
228 |
+
Returns:
|
229 |
+
str: The path to the downloaded file.
|
230 |
+
"""
|
231 |
+
|
232 |
+
output_path = os.path.join(os.getcwd(), 'downloads')
|
233 |
+
|
234 |
+
api_url = os.getenv("DEFAULT_API_URL")
|
235 |
+
|
236 |
+
# Create output directory if it doesn't exist
|
237 |
+
os.makedirs(output_path, exist_ok=True)
|
238 |
+
|
239 |
+
# Construct the full URL
|
240 |
+
url = f"{api_url}/files/{task_id}"
|
241 |
+
|
242 |
+
try:
|
243 |
+
# Send a GET request to download the file
|
244 |
+
response = requests.get(url, stream=True)
|
245 |
+
response.raise_for_status() # Raise an error for bad status codes
|
246 |
+
|
247 |
+
headers = dict(response.headers)
|
248 |
+
attachement = headers["content-disposition"]
|
249 |
+
|
250 |
+
regex_result = re.search(r'filename="(.*)"', attachement)
|
251 |
+
filename = regex_result.group(1)
|
252 |
+
|
253 |
+
# Define the output file path
|
254 |
+
output_file_path = os.path.join(output_path, filename)
|
255 |
+
|
256 |
+
# Write the file to the output path
|
257 |
+
with open(output_file_path, 'wb') as file:
|
258 |
+
for chunk in response.iter_content(chunk_size=8192):
|
259 |
+
file.write(chunk)
|
260 |
+
|
261 |
+
print(f"File downloaded successfully and saved to: {output_file_path}")
|
262 |
+
|
263 |
+
return output_file_path
|
264 |
+
|
265 |
+
except requests.exceptions.RequestException as e:
|
266 |
+
print(f"An error occurred while downloading the file: {str(e)}")
|
267 |
+
return ""
|
268 |
+
|
269 |
+
########################
|
270 |
+
|
271 |
+
######## LLM associations ########
|
272 |
+
|
273 |
+
general_model = get_general_model()
|
274 |
+
big_model = get_big_model()
|
275 |
+
|
276 |
+
vision_model = get_vision_model()
|
277 |
+
video_handler_model = get_video_handler_model()
|
278 |
+
audio_handler_model = get_audio_handler_model()
|
279 |
+
|
280 |
+
########################
|
281 |
+
|
282 |
+
######## Nodes Definition ########
|
283 |
+
|
284 |
+
search_tools = [
|
285 |
+
web_search,
|
286 |
+
wikipedia_search,
|
287 |
+
]
|
288 |
+
|
289 |
+
download_file_tool = [ download_input_file ]
|
290 |
+
|
291 |
+
web_search_node_agent = general_model.bind_tools(search_tools, parallel_tool_calls=False)
|
292 |
+
|
293 |
+
def thinking_node(state: State) -> dict:
|
294 |
+
"""
|
295 |
+
A powerful node to answer general questions, reflection, maths, deduction, prediction.
|
296 |
+
This node does not handle files
|
297 |
+
This node does not handle images or pictures
|
298 |
+
This node does not handle videos
|
299 |
+
This node does not handle audio
|
300 |
+
This node does not handle code
|
301 |
+
|
302 |
+
Args:
|
303 |
+
state (State): A dictionary containing the current state of the agent, including the 'question' key which holds the question to be answered.
|
304 |
+
|
305 |
+
Returns:
|
306 |
+
dict: A dictionary containing the response from the web search node, with the key 'thinking_node_result' holding the list of messages generated by the general model.
|
307 |
+
"""
|
308 |
+
|
309 |
+
prompt = f"""
|
310 |
+
You are a powerful assistant that answers general questions, reflection, maths, deduction, prediction.
|
311 |
+
|
312 |
+
1. You need to fully understand the question
|
313 |
+
2. You must think hard about what is relevant in the question to make the best answer
|
314 |
+
3. If there are calculations or maths, you need to verify twice before answering.
|
315 |
+
4. Report your thought process in detail, explaining your reasoning step-by-step.
|
316 |
+
|
317 |
+
Here is the question {state['question']}
|
318 |
+
Now provide your response immediately without any preamble in text but not in markdown.
|
319 |
+
"""
|
320 |
+
|
321 |
+
state["thinking_node_result"] = state.get("thinking_node_result", "")
|
322 |
+
|
323 |
+
sys_msg = SystemMessage(content=prompt)
|
324 |
+
|
325 |
+
thinking_node_response = [general_model.invoke([sys_msg] + [state["thinking_node_result"]])]
|
326 |
+
|
327 |
+
thinking_node_response[-1].pretty_print()
|
328 |
+
|
329 |
+
return {
|
330 |
+
"thinking_node_result": thinking_node_response,
|
331 |
+
}
|
332 |
+
|
333 |
+
def code_node(state: State) -> dict:
|
334 |
+
"""
|
335 |
+
A powerful node to handle and understand code.
|
336 |
+
This node does not handle images or pictures
|
337 |
+
This node does not handle videos
|
338 |
+
This node does not handle audio
|
339 |
+
This node does not access the web
|
340 |
+
|
341 |
+
Args:
|
342 |
+
state (State): A dictionary containing the current state of the agent, including the 'question' key which holds the question to be answered.
|
343 |
+
|
344 |
+
Returns:
|
345 |
+
dict: A dictionary containing the response from the web search node, with the key 'code_node_result' holding the list of messages generated by the general model.
|
346 |
+
"""
|
347 |
+
|
348 |
+
with open(state["downloaded_file"], "r") as code_file:
|
349 |
+
code = code_file.read()
|
350 |
+
|
351 |
+
prompt = f"""
|
352 |
+
You are a powerful assistant that handle and understand code.
|
353 |
+
|
354 |
+
1. You need to fully understand the question.
|
355 |
+
2. You must think hard about the code and predict the result to answer the question.
|
356 |
+
3. Report your thought process in detail, explaining your reasoning step-by-step.
|
357 |
+
|
358 |
+
Here is the question : {state['question']}
|
359 |
+
Here is the code : {code}
|
360 |
+
|
361 |
+
Now provide your response immediately without any preamble in text but not in markdown.
|
362 |
+
"""
|
363 |
+
|
364 |
+
sys_msg = SystemMessage(content=prompt)
|
365 |
+
|
366 |
+
code_node_response = [general_model.invoke([sys_msg])]
|
367 |
+
|
368 |
+
code_node_response[-1].pretty_print()
|
369 |
+
|
370 |
+
return {
|
371 |
+
"code_node_result": code_node_response,
|
372 |
+
}
|
373 |
+
|
374 |
+
def web_search_node(state: State) -> dict:
|
375 |
+
"""
|
376 |
+
A powerful node to answer questions and make research on the web based on the question provided in the state.
|
377 |
+
This node does not handle files
|
378 |
+
This node does not handle images or pictures
|
379 |
+
This node does not handle videos
|
380 |
+
This node does not handle audio
|
381 |
+
This node does not handle code
|
382 |
+
|
383 |
+
Args:
|
384 |
+
state (State): A dictionary containing the current state of the agent, including the 'question' key which holds the question to be answered.
|
385 |
+
|
386 |
+
Returns:
|
387 |
+
dict: A dictionary containing the response from the web search node, with the key 'web_search_node_result' holding the list of messages generated by the general model.
|
388 |
+
"""
|
389 |
+
|
390 |
+
prompt = f"""
|
391 |
+
You are a powerful assistant that makes research on the web in order to give the best answer to the question.
|
392 |
+
|
393 |
+
1. You need to fully understand the question
|
394 |
+
2. You must think hard about what is relevant in the question to make the best search with write words
|
395 |
+
3. You must use the best of the tools you have to answer the question precisly
|
396 |
+
4. Report your thought process in detail, explaining your reasoning step-by-step.
|
397 |
+
5. You must not change the way words or identifiers are written in the web search results.
|
398 |
+
|
399 |
+
Here are the tools available:
|
400 |
+
web_search:
|
401 |
+
{web_search.description}
|
402 |
+
Args:
|
403 |
+
{web_search.args_schema}
|
404 |
+
Returns:
|
405 |
+
{web_search.response_format}
|
406 |
+
|
407 |
+
wikipedia_search:
|
408 |
+
{wikipedia_search.description}
|
409 |
+
Args:
|
410 |
+
{wikipedia_search.args_schema}
|
411 |
+
Returns:
|
412 |
+
{wikipedia_search.response_format}
|
413 |
+
|
414 |
+
Here is the question {state['question']}
|
415 |
+
Now provide your response immediately without any preamble in text but not in markdown.
|
416 |
+
"""
|
417 |
+
|
418 |
+
state["web_search_node_result"] = state.get("web_search_node_result", "")
|
419 |
+
|
420 |
+
sys_msg = SystemMessage(content=prompt)
|
421 |
+
|
422 |
+
web_search_node_response = [web_search_node_agent.invoke([sys_msg] + [state["web_search_node_result"]])]
|
423 |
+
|
424 |
+
web_search_node_response[-1].pretty_print()
|
425 |
+
|
426 |
+
return {
|
427 |
+
"web_search_node_result": web_search_node_response,
|
428 |
+
}
|
429 |
+
|
430 |
+
def vision_node(state: State) -> dict:
|
431 |
+
"""
|
432 |
+
Vision model that can analyze images and pictures and answer questions about them.
|
433 |
+
This node does not handle videos.
|
434 |
+
This node does not handle audio.
|
435 |
+
This node does not handle code.
|
436 |
+
|
437 |
+
Args:
|
438 |
+
state (State): A dictionary containing the current state of the agent, including the 'question' key which holds the question to be answered and the 'input_file' key which holds the path to the image file.
|
439 |
+
Returns:
|
440 |
+
dict: A dictionary containing the response from the vision node, with the key 'vision_node_result' holding the list of messages generated by the vision model.
|
441 |
+
"""
|
442 |
+
|
443 |
+
prompt = f"""
|
444 |
+
You are a powerful vision assistant, you can analyze images and answer question about the picture
|
445 |
+
|
446 |
+
1. You need to fully understand the question.
|
447 |
+
2. You must think hard about what is relevant in the image to make the best answer to the question.
|
448 |
+
3. Report your thought process in detail, explaining your reasoning step-by-step.
|
449 |
+
|
450 |
+
Here is the question {state['question']}
|
451 |
+
Now provide your response immediately without any preamble in text but not in markdown.
|
452 |
+
"""
|
453 |
+
|
454 |
+
image_base64 = ""
|
455 |
+
try:
|
456 |
+
with open(state["downloaded_file"], "rb") as image_file:
|
457 |
+
image_bytes = image_file.read()
|
458 |
+
|
459 |
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
460 |
+
|
461 |
+
mistral_image_handling = {
|
462 |
+
"type": "image_url",
|
463 |
+
"image_url": f"data:image/png;base64,{image_base64}",
|
464 |
+
}
|
465 |
+
|
466 |
+
openai_image_handling = {
|
467 |
+
"type": "image",
|
468 |
+
"source_type": "base64",
|
469 |
+
"mime_type": "image/png", # or image/png, etc.
|
470 |
+
"data": image_base64,
|
471 |
+
}
|
472 |
+
|
473 |
+
vision_provider = os.getenv("VLM_PROVIDER", "mistral")
|
474 |
+
|
475 |
+
if vision_provider == "openai":
|
476 |
+
image_handling = openai_image_handling
|
477 |
+
else:
|
478 |
+
image_handling = mistral_image_handling
|
479 |
+
|
480 |
+
message = [
|
481 |
+
{
|
482 |
+
"role": "user",
|
483 |
+
"content": [
|
484 |
+
{
|
485 |
+
"type": "text",
|
486 |
+
"text": prompt,
|
487 |
+
},
|
488 |
+
image_handling
|
489 |
+
]
|
490 |
+
}
|
491 |
+
]
|
492 |
+
|
493 |
+
vision_node_response = [vision_model.invoke(
|
494 |
+
input=message,
|
495 |
+
# config={
|
496 |
+
# "callbacks": [langfuse_handler]
|
497 |
+
# }
|
498 |
+
)]
|
499 |
+
|
500 |
+
vision_node_response[-1].pretty_print()
|
501 |
+
|
502 |
+
return {
|
503 |
+
"vision_node_result": vision_node_response
|
504 |
+
}
|
505 |
+
|
506 |
+
except Exception as e:
|
507 |
+
# A butler should handle errors gracefully
|
508 |
+
error_msg = f"Error extracting text: {str(e)}"
|
509 |
+
print(error_msg)
|
510 |
+
return {}
|
511 |
+
|
512 |
+
def video_node(state: State) -> str:
|
513 |
+
"""
|
514 |
+
Video handler model that can analyze videos and answer questions about them.
|
515 |
+
This node does not handle images or pictures.
|
516 |
+
This node does not handle audio.
|
517 |
+
This node does not handle code.
|
518 |
+
|
519 |
+
Args:
|
520 |
+
state (State): A dictionary containing the current state of the agent, including the 'question' key which holds the question to be answered.
|
521 |
+
|
522 |
+
Returns:
|
523 |
+
dict: A dictionary containing the response from the video handler node, with the key 'video_node_result' holding the list of messages generated by the video handler model.
|
524 |
+
"""
|
525 |
+
|
526 |
+
prompt = f"""
|
527 |
+
You are a highly capable video analysis assistant. Your task is to watch and analyze the provided video content and answer the user's question as accurately and concisely as possible.
|
528 |
+
|
529 |
+
1. You need to fully understand the question.
|
530 |
+
2. Carefully observe the video, paying attention to relevant details, actions, and context.
|
531 |
+
3. Focus on the user's question.
|
532 |
+
4. If the question requires counting, identifying, or describing, be precise and clear in your response.
|
533 |
+
5. If you are unsure, state what you can infer from the video.
|
534 |
+
6. Do not make up information that is not visible or inferable from the video.
|
535 |
+
|
536 |
+
Here is the question {state['question']}
|
537 |
+
Now provide your response immediately without any preamble in text but not in markdown.
|
538 |
+
"""
|
539 |
+
|
540 |
+
if re.search(r'youtube\.com', state["question"]):
|
541 |
+
# More flexible regex pattern to match YouTube URLs
|
542 |
+
regex_result = re.search(r"(?P<youtube_url>https://(?:www\.)?youtube\.com/watch\?v=[a-zA-Z0-9_-]+)", state["question"])
|
543 |
+
if regex_result:
|
544 |
+
video_url = regex_result.group("youtube_url")
|
545 |
+
downloaded_video = download_youtube_content(url=video_url)
|
546 |
+
else:
|
547 |
+
# Fallback if regex doesn't match
|
548 |
+
print("Could not extract YouTube URL from question. Using question as fallback.")
|
549 |
+
downloaded_video = state["downloaded_file"]
|
550 |
+
else:
|
551 |
+
downloaded_video = state["downloaded_file"]
|
552 |
+
|
553 |
+
print(f"Downloaded video: {downloaded_video}")
|
554 |
+
|
555 |
+
video_mime_type = "video/mp4"
|
556 |
+
|
557 |
+
with open(downloaded_video, "rb") as video_file:
|
558 |
+
encoded_video = base64.b64encode(video_file.read()).decode("utf-8")
|
559 |
+
|
560 |
+
os.remove(downloaded_video)
|
561 |
+
|
562 |
+
message = [
|
563 |
+
{
|
564 |
+
"role": "user",
|
565 |
+
"content": [
|
566 |
+
{
|
567 |
+
"type": "text",
|
568 |
+
"text": prompt,
|
569 |
+
},
|
570 |
+
{
|
571 |
+
"type": "media",
|
572 |
+
"data": encoded_video, # Use base64 string directly
|
573 |
+
"mime_type": video_mime_type,
|
574 |
+
},
|
575 |
+
]
|
576 |
+
}
|
577 |
+
]
|
578 |
+
|
579 |
+
video_node_response = [video_handler_model.invoke(
|
580 |
+
input=message,
|
581 |
+
# config={
|
582 |
+
# "callbacks": [langfuse_handler]
|
583 |
+
# }
|
584 |
+
)]
|
585 |
+
|
586 |
+
video_node_response[-1].pretty_print()
|
587 |
+
|
588 |
+
return {
|
589 |
+
"video_node_result": video_node_response
|
590 |
+
}
|
591 |
+
|
592 |
+
def audio_node(state: State) -> str:
|
593 |
+
"""
|
594 |
+
Audio handler model that can analyze audio and answer questions about it.
|
595 |
+
This node does not handle images or pictures.
|
596 |
+
This node does not handle video.
|
597 |
+
This node does not handle code.
|
598 |
+
|
599 |
+
Args:
|
600 |
+
state (State): with question key inside
|
601 |
+
|
602 |
+
Returns:
|
603 |
+
dict: A dictionary containing the response from the video handler node, with the key 'audioo_node_result' holding the list of messages generated by the audio handler model.
|
604 |
+
"""
|
605 |
+
|
606 |
+
prompt = f"""
|
607 |
+
You are a highly capable audio analysis assistant. Your task is to listen to and analyze the provided audio content and answer the user's question as accurately and concisely as possible.
|
608 |
+
|
609 |
+
1. You need to fully understand the question.
|
610 |
+
2. Carefully listen to the audio, paying attention to relevant details, actions, and context.
|
611 |
+
3. Focus on the user's question.
|
612 |
+
4. If the question requires counting, identifying, or describing, be precise and clear in your response.
|
613 |
+
5. If you are unsure, state what you can infer from the audio.
|
614 |
+
6. Do not make up information that is not audible or inferable from the audio.
|
615 |
+
|
616 |
+
Here is the question {state['question']}
|
617 |
+
Now provide your response immediately without any preamble in text but not in markdown.
|
618 |
+
"""
|
619 |
+
|
620 |
+
downloaded_audio = state["downloaded_file"]
|
621 |
+
|
622 |
+
print(f"Downloaded audio: {downloaded_audio}")
|
623 |
+
|
624 |
+
audio_format = re.search(r'\.(\w+)$', downloaded_audio).group(1)
|
625 |
+
|
626 |
+
with open(downloaded_audio, "rb") as audio_file:
|
627 |
+
encoded_audio = base64.b64encode(audio_file.read()).decode()
|
628 |
+
|
629 |
+
os.remove(downloaded_audio)
|
630 |
+
|
631 |
+
message = [
|
632 |
+
{
|
633 |
+
"role": "user",
|
634 |
+
"content": [
|
635 |
+
{
|
636 |
+
"type": "text",
|
637 |
+
"text": prompt,
|
638 |
+
},
|
639 |
+
{
|
640 |
+
"type": "input_audio",
|
641 |
+
"input_audio": {
|
642 |
+
"data": encoded_audio,
|
643 |
+
"format": audio_format,
|
644 |
+
}
|
645 |
+
},
|
646 |
+
]
|
647 |
+
}
|
648 |
+
]
|
649 |
+
|
650 |
+
audio_node_response = [audio_handler_model.invoke(
|
651 |
+
input=message,
|
652 |
+
# config={
|
653 |
+
# "callbacks": [langfuse_handler]
|
654 |
+
# }
|
655 |
+
)]
|
656 |
+
|
657 |
+
audio_node_response[-1].pretty_print()
|
658 |
+
|
659 |
+
return {
|
660 |
+
"audio_node_result": audio_node_response
|
661 |
+
}
|
662 |
+
|
663 |
+
def excel_node(state: State):
|
664 |
+
"""
|
665 |
+
Excel handler model that can analyze excel files and answer questions about it.
|
666 |
+
This node does not handle images or pictures.
|
667 |
+
This node does not handle video.
|
668 |
+
This node does not handle code.
|
669 |
+
This node does not handle audio.
|
670 |
+
|
671 |
+
Args:
|
672 |
+
state (State): with question key inside
|
673 |
+
|
674 |
+
Returns:
|
675 |
+
dict: A dictionary containing the response from the excel handler node, with the key 'excel_node_result' holding the list of messages generated by the excel handler model.
|
676 |
+
"""
|
677 |
+
|
678 |
+
loader = UnstructuredExcelLoader(state["downloaded_file"], mode="elements")
|
679 |
+
docs = loader.load()
|
680 |
+
|
681 |
+
prompt = f"""
|
682 |
+
You are a powerful assistant which handles excel files.
|
683 |
+
|
684 |
+
1. You need to fully understand the question.
|
685 |
+
2. You must analyze the excel file to answer the question.
|
686 |
+
3. If the question requires counting, identifying, or describing, be precise and clear in your response.
|
687 |
+
4. Do not make up information that is not in the excel file.
|
688 |
+
|
689 |
+
Here is the question {state['question']}
|
690 |
+
Here is the excel file loaded in a Document object: {docs}. You will find htlm content of the file in the 'text_as_html' key.
|
691 |
+
|
692 |
+
Now provide your response immediately without any preamble in text but not in markdown.
|
693 |
+
"""
|
694 |
+
|
695 |
+
response = big_model.invoke(
|
696 |
+
input=prompt,
|
697 |
+
# config={
|
698 |
+
# "callbacks": [langfuse_handler]
|
699 |
+
# }
|
700 |
+
)
|
701 |
+
|
702 |
+
response.pretty_print()
|
703 |
+
|
704 |
+
return {
|
705 |
+
"excel_node_result": response
|
706 |
+
}
|
707 |
+
|
708 |
+
def format_answer_node(state: State):
|
709 |
+
"""
|
710 |
+
Format answer node that formats the answer of the last node.
|
711 |
+
This node does not handle images or pictures.
|
712 |
+
This node does not handle video.
|
713 |
+
This node does not handle audio.
|
714 |
+
This node does not handle code.
|
715 |
+
|
716 |
+
Args:
|
717 |
+
state (State): with question key inside, and all other nodes results
|
718 |
+
|
719 |
+
Returns:
|
720 |
+
dict: A dictionary containing the response from the format answer node, with the key 'format_answer_node_result' holding the list of messages generated by the format answer model.
|
721 |
+
"""
|
722 |
+
|
723 |
+
prompt = """
|
724 |
+
You are the best assistant for final answer formating.
|
725 |
+
|
726 |
+
1. You must not change the content of the response of the last node.
|
727 |
+
2. You must fully understand the question
|
728 |
+
3. You must return the answer by following hard the format and the constraints
|
729 |
+
4. Report your thought process in detail, explaining your reasoning step-by-step.
|
730 |
+
|
731 |
+
5. Conclude your answer with the following template:
|
732 |
+
FINAL ANSWER: [YOUR FINAL ANSWER]
|
733 |
+
|
734 |
+
## Response Format
|
735 |
+
- If asked for a number:
|
736 |
+
For exemple 'How many' or a question asking for a number result
|
737 |
+
- Provide the number without commas, dollar signs, percent signs, or any units (unless specified).
|
738 |
+
- Provide digits, not words
|
739 |
+
- If asked for a string:
|
740 |
+
- Write the string without articles (a, an, the).
|
741 |
+
- Don't answer a full sentence when a short version is enough.
|
742 |
+
- Do not use abbreviations (e.g., for cities).
|
743 |
+
- Write digits in text but (e.g., "one" instead of "1") unless specified otherwise.
|
744 |
+
- Start the first word with a capital letter.
|
745 |
+
- If asked for a comma-separated list:
|
746 |
+
- Apply the above rules for numbers and strings to each element in the list.
|
747 |
+
- And take care of having a space after each comma.
|
748 |
+
|
749 |
+
## Constraints
|
750 |
+
- You must not answer if the constraints above are not respected.
|
751 |
+
- Your final answer should be provided in the format: FINAL ANSWER: [YOUR FINAL ANSWER]
|
752 |
+
- Your final answer should be a number, a string, or a comma-separated list of numbers and/or strings, following the specified formatting rules.
|
753 |
+
|
754 |
+
Now provide your response immediately without any preamble in text but not in markdown.
|
755 |
+
"""
|
756 |
+
|
757 |
+
nodes_response = [HumanMessage(content="Here are the results of the previous nodes")]
|
758 |
+
|
759 |
+
question = [HumanMessage(content=state["question"])]
|
760 |
+
|
761 |
+
for node_result in ["web_search_node_result", "vision_node_result", "video_node_result", "audio_node_result", "thinking_node_result", "code_node_result", "excel_node_result"]:
|
762 |
+
result = state.get(node_result, "")
|
763 |
+
if result:
|
764 |
+
# Ensure result is a string. If it's a message object, extract its content.
|
765 |
+
if hasattr(result, "content"):
|
766 |
+
content = result.content
|
767 |
+
else:
|
768 |
+
content = str(result)
|
769 |
+
nodes_response.append(HumanMessage(content=content))
|
770 |
+
|
771 |
+
sys_msg = SystemMessage(content=prompt)
|
772 |
+
|
773 |
+
response = [general_model.invoke([sys_msg] + state["messages"]+ question + nodes_response)]
|
774 |
+
|
775 |
+
return {
|
776 |
+
"messages": response,
|
777 |
+
}
|
778 |
+
|
779 |
+
########################
|
780 |
+
|
781 |
+
######## Entry Node ########
|
782 |
+
def entry_node(state: State)-> str:
|
783 |
+
# System message
|
784 |
+
|
785 |
+
system_prompt = f"""
|
786 |
+
You are a powerful assistant that handle the user message and manage other nodes in order to provide the best answer to the question.
|
787 |
+
You do not handle images or pictures
|
788 |
+
You do not handle videos
|
789 |
+
You do not handle audio
|
790 |
+
You do not handle code
|
791 |
+
You do not handle excel files
|
792 |
+
|
793 |
+
1. You need to fully understand the subject of the question
|
794 |
+
2. You need to understand the subject of the question with the question itself and the file extension
|
795 |
+
For example of extensions:
|
796 |
+
- .py is for code
|
797 |
+
- .wav or .mp3 is for audio
|
798 |
+
- a youtube url is for video
|
799 |
+
- a .jpg, .png, .jpeg is for image
|
800 |
+
- a .xlsx or .xls is for excel
|
801 |
+
3. You must think hard about what is relevant in the question to make the best choice for the next node
|
802 |
+
4. You must not answer the question by yourself
|
803 |
+
5. Report your thought process in detail, explaining your reasoning step-by-step.
|
804 |
+
|
805 |
+
Here are the nodes you can choose:
|
806 |
+
- thinking_node: {thinking_node.__doc__}
|
807 |
+
- web_search_node: {web_search_node.__doc__}
|
808 |
+
- vision_node: {vision_node.__doc__}
|
809 |
+
- video_node: {video_node.__doc__}
|
810 |
+
- audio_node: {audio_node.__doc__}
|
811 |
+
- code_node: {code_node.__doc__}
|
812 |
+
- excel_node: {excel_node.__doc__}
|
813 |
+
|
814 |
+
Here is the question : {state['question']}
|
815 |
+
Here is the file : {state.get("input_file", "no file to handle")}
|
816 |
+
|
817 |
+
Now provide your response immediately.
|
818 |
+
You must always respect this format in lower case: next node <the node name you choose>.
|
819 |
+
"""
|
820 |
+
|
821 |
+
downloaded = ""
|
822 |
+
# If there's an input file, download it directly:
|
823 |
+
if state.get("input_file", None):
|
824 |
+
downloaded = download_input_file(state.get("task_id"))
|
825 |
+
|
826 |
+
sys_msg = SystemMessage(content=system_prompt)
|
827 |
+
|
828 |
+
entry_node_response = [general_model.invoke([sys_msg] + state["messages"])]
|
829 |
+
|
830 |
+
entry_node_response[-1].pretty_print()
|
831 |
+
|
832 |
+
regex_result = re.search(r'.*next.*(?P<next_node>thinking_node|web_search_node|vision_node|video_node|audio_node|code_node|excel_node)', entry_node_response[-1].content, re.IGNORECASE)
|
833 |
+
|
834 |
+
next_node = "END"
|
835 |
+
if regex_result:
|
836 |
+
# Extract the node name and remove any quotes around it
|
837 |
+
next_node = regex_result.group("next_node")
|
838 |
+
next_node = next_node.lower()
|
839 |
+
|
840 |
+
print(f"Next node to invoke: {next_node}")
|
841 |
+
|
842 |
+
return {
|
843 |
+
"next_node": next_node,
|
844 |
+
"downloaded_file": downloaded
|
845 |
+
}
|
846 |
+
|
847 |
+
########################
|
848 |
+
|
849 |
+
######## Build Graph ########
|
850 |
+
|
851 |
+
def buildweb_search_graph():
|
852 |
+
builder = StateGraph(State)
|
853 |
+
builder.add_node("web_search_node", web_search_node)
|
854 |
+
builder.add_node("tools", ToolNode(search_tools))
|
855 |
+
|
856 |
+
builder.add_edge(START, "web_search_node")
|
857 |
+
builder.add_conditional_edges(
|
858 |
+
"web_search_node",
|
859 |
+
tools_condition,
|
860 |
+
)
|
861 |
+
builder.add_edge("tools", "web_search_node")
|
862 |
+
builder.add_edge("web_search_node", END)
|
863 |
+
|
864 |
+
return builder.compile()
|
865 |
+
|
866 |
+
def build_graph():
|
867 |
+
builder = StateGraph(State)
|
868 |
+
builder.add_node("entry_node", entry_node)
|
869 |
+
builder.add_node("web_search_node", buildweb_search_graph())
|
870 |
+
builder.add_node("vision_node", vision_node)
|
871 |
+
builder.add_node("video_node", video_node)
|
872 |
+
builder.add_node("audio_node", audio_node)
|
873 |
+
builder.add_node("code_node", code_node)
|
874 |
+
builder.add_node("thinking_node", thinking_node)
|
875 |
+
builder.add_node("excel_node", excel_node)
|
876 |
+
builder.add_node("format_answer_node", format_answer_node)
|
877 |
+
|
878 |
+
builder.add_edge(START, "entry_node")
|
879 |
+
|
880 |
+
# Conditional routing from entry_node to specialized nodes
|
881 |
+
builder.add_conditional_edges(
|
882 |
+
"entry_node",
|
883 |
+
lambda state: state["next_node"],
|
884 |
+
{
|
885 |
+
"web_search_node": "web_search_node",
|
886 |
+
"vision_node": "vision_node",
|
887 |
+
"video_node": "video_node",
|
888 |
+
"audio_node": "audio_node",
|
889 |
+
"code_node": "code_node",
|
890 |
+
"excel_node": "excel_node",
|
891 |
+
"thinking_node": "thinking_node"
|
892 |
+
}
|
893 |
+
)
|
894 |
+
# After specialized node, go to END
|
895 |
+
builder.add_edge("web_search_node", "format_answer_node")
|
896 |
+
builder.add_edge("vision_node", "format_answer_node")
|
897 |
+
builder.add_edge("video_node", "format_answer_node")
|
898 |
+
builder.add_edge("audio_node", "format_answer_node")
|
899 |
+
builder.add_edge("code_node", "format_answer_node")
|
900 |
+
builder.add_edge("excel_node", "format_answer_node")
|
901 |
+
builder.add_edge("thinking_node", "format_answer_node")
|
902 |
+
builder.add_edge("format_answer_node", END)
|
903 |
+
|
904 |
+
|
905 |
+
return builder.compile()
|
906 |
+
|
907 |
+
########################
|
908 |
+
|
909 |
+
if __name__ == "__main__":
|
910 |
+
|
911 |
+
agent_graph = build_graph()
|
912 |
+
|
913 |
+
# Save the Mermaid diagram as text instead of trying to render as PNG
|
914 |
+
# This avoids issues with Pyppeteer browser launching
|
915 |
+
# with open("graph.png", "wb") as f:
|
916 |
+
# f.write(agent_graph.get_graph(xray=True).draw_mermaid_png())
|
917 |
+
|
918 |
+
# print("Graph saved as graph.png")
|
919 |
+
|
920 |
+
|
921 |
+
|
922 |
+
# print(vision_node.__doc__)
|
923 |
+
|
924 |
+
with open("./responses.json", "r") as responses:
|
925 |
+
json_responses = json.loads(responses.read())
|
926 |
+
|
927 |
+
# json_questions = [{
|
928 |
+
# "question": "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.",
|
929 |
+
# "file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
|
930 |
+
# "task_id": "7bd855d8-463d-4ed5-93ca-5fe35145f733"
|
931 |
+
# }]
|
932 |
+
|
933 |
+
with open("questions.json", "r") as questions:
|
934 |
+
json_questions = json.loads(questions.read())
|
935 |
+
|
936 |
+
for input in json_questions:
|
937 |
+
|
938 |
+
question = input.get("question", "No question found")
|
939 |
+
file_name = input.get("file_name", "")
|
940 |
+
task_id = input.get("task_id", "")
|
941 |
+
|
942 |
+
print(f"QUESTION : {question}")
|
943 |
+
print(f"FILE: {file_name}")
|
944 |
+
|
945 |
+
user_prompt = [HumanMessage(content="Can you answer the question please ?")]
|
946 |
+
|
947 |
+
user_input = {"messages": user_prompt, "question": question, "input_file": file_name, "task_id": task_id}
|
948 |
+
|
949 |
+
messages = agent_graph.invoke(
|
950 |
+
input=user_input,
|
951 |
+
config={
|
952 |
+
"recursion_limit": 10,
|
953 |
+
# "callbacks": [langfuse_handler]
|
954 |
+
}
|
955 |
+
)
|
956 |
+
|
957 |
+
for m in messages['messages']:
|
958 |
+
m.pretty_print()
|
959 |
+
|
960 |
+
try:
|
961 |
+
regex_result = re.search(r"FINAL ANSWER:\s*(?P<answer>.*)$", messages['messages'][-1].content)
|
962 |
+
answer = regex_result.group("answer")
|
963 |
+
except:
|
964 |
+
regex_result = re.search(r"\s*(?P<answer>.*)$", messages['messages'][-1].content)
|
965 |
+
answer = regex_result.group("answer")
|
966 |
+
|
967 |
+
print(answer)
|
968 |
+
if answer == json_responses.get(task_id, ""):
|
969 |
+
print("The answer is correct !")
|
970 |
+
else:
|
971 |
+
print("The answer is incorrect !")
|
972 |
+
print(f"Expected: {json_responses.get(task_id, '')}")
|
973 |
+
print(f"Got: {answer}")
|
974 |
+
|